diff --git a/Editor/MeshWindow.cpp b/Editor/MeshWindow.cpp index d8c4c942a..29ef62d70 100644 --- a/Editor/MeshWindow.cpp +++ b/Editor/MeshWindow.cpp @@ -1036,6 +1036,17 @@ void MeshWindow::SetEntity(Entity entity, int subset) if (mesh->so_nor.IsValid()) ss += "\tstreamout_normals;\n"; if (mesh->so_tan.IsValid()) ss += "\tstreamout_tangents;\n"; if (mesh->so_pre.IsValid()) ss += "\tprevious_position;\n"; + + ss += "\nSuballocation offset: "; + if (mesh->generalBufferOffsetAllocation.IsValid()) + { + ss += wi::helper::GetMemorySizeText(mesh->generalBufferOffsetAllocation.byte_offset); + } + else + { + ss += "suballocation is not used for this mesh"; + } + meshInfoLabel.SetText(ss); subsetComboBox.ClearItems(); diff --git a/WickedEngine/CommonInclude.h b/WickedEngine/CommonInclude.h index 6df060a88..51abc23b9 100644 --- a/WickedEngine/CommonInclude.h +++ b/WickedEngine/CommonInclude.h @@ -16,6 +16,12 @@ // Simple common math helpers: +template +constexpr T align(T value, T alignment) +{ + return ((value + alignment - T(1)) / alignment) * alignment; +} + template constexpr T sqr(T x) { return x * x; } diff --git a/WickedEngine/Utility/offsetAllocator.cpp b/WickedEngine/Utility/offsetAllocator.cpp new file mode 100644 index 000000000..9f4b0b203 --- /dev/null +++ b/WickedEngine/Utility/offsetAllocator.cpp @@ -0,0 +1,475 @@ +// (C) Sebastian Aaltonen 2023 +// MIT License (see file: LICENSE) + +#include "offsetAllocator.hpp" + +#ifdef DEBUG +#include +#define ASSERT(x) assert(x) +//#define DEBUG_VERBOSE +#else +#define ASSERT(x) +#endif + +#ifdef DEBUG_VERBOSE +#include +#endif + +#ifdef _MSC_VER +#include +#endif + +#include + +namespace OffsetAllocator +{ + inline uint32 lzcnt_nonzero(uint32 v) + { +#ifdef _MSC_VER + unsigned long retVal; + _BitScanReverse(&retVal, v); + return 31 - retVal; +#else + return __builtin_clz(v); +#endif + } + + inline uint32 tzcnt_nonzero(uint32 v) + { +#ifdef _MSC_VER + unsigned long retVal; + _BitScanForward(&retVal, v); + return retVal; +#else + return __builtin_ctz(v); +#endif + } + + namespace SmallFloat + { + static constexpr uint32 MANTISSA_BITS = 3; + static constexpr uint32 MANTISSA_VALUE = 1 << MANTISSA_BITS; + static constexpr uint32 MANTISSA_MASK = MANTISSA_VALUE - 1; + + // Bin sizes follow floating point (exponent + mantissa) distribution (piecewise linear log approx) + // This ensures that for each size class, the average overhead percentage stays the same + uint32 uintToFloatRoundUp(uint32 size) + { + uint32 exp = 0; + uint32 mantissa = 0; + + if (size < MANTISSA_VALUE) + { + // Denorm: 0..(MANTISSA_VALUE-1) + mantissa = size; + } + else + { + // Normalized: Hidden high bit always 1. Not stored. Just like float. + uint32 leadingZeros = lzcnt_nonzero(size); + uint32 highestSetBit = 31 - leadingZeros; + + uint32 mantissaStartBit = highestSetBit - MANTISSA_BITS; + exp = mantissaStartBit + 1; + mantissa = (size >> mantissaStartBit) & MANTISSA_MASK; + + uint32 lowBitsMask = (1 << mantissaStartBit) - 1; + + // Round up! + if ((size & lowBitsMask) != 0) + mantissa++; + } + + return (exp << MANTISSA_BITS) + mantissa; // + allows mantissa->exp overflow for round up + } + + uint32 uintToFloatRoundDown(uint32 size) + { + uint32 exp = 0; + uint32 mantissa = 0; + + if (size < MANTISSA_VALUE) + { + // Denorm: 0..(MANTISSA_VALUE-1) + mantissa = size; + } + else + { + // Normalized: Hidden high bit always 1. Not stored. Just like float. + uint32 leadingZeros = lzcnt_nonzero(size); + uint32 highestSetBit = 31 - leadingZeros; + + uint32 mantissaStartBit = highestSetBit - MANTISSA_BITS; + exp = mantissaStartBit + 1; + mantissa = (size >> mantissaStartBit) & MANTISSA_MASK; + } + + return (exp << MANTISSA_BITS) | mantissa; + } + + uint32 floatToUint(uint32 floatValue) + { + uint32 exponent = floatValue >> MANTISSA_BITS; + uint32 mantissa = floatValue & MANTISSA_MASK; + if (exponent == 0) + { + // Denorms + return mantissa; + } + else + { + return (mantissa | MANTISSA_VALUE) << (exponent - 1); + } + } + } + + // Utility functions + uint32 findLowestSetBitAfter(uint32 bitMask, uint32 startBitIndex) + { + uint32 maskBeforeStartIndex = (1 << startBitIndex) - 1; + uint32 maskAfterStartIndex = ~maskBeforeStartIndex; + uint32 bitsAfter = bitMask & maskAfterStartIndex; + if (bitsAfter == 0) return Allocation::NO_SPACE; + return tzcnt_nonzero(bitsAfter); + } + + // Allocator... + void Allocator::init(uint32 size, uint32 maxAllocs) + { + m_size = size; + m_maxAllocs = maxAllocs; + m_nodes.reserve(maxAllocs); + m_freeNodes.reserve(maxAllocs); + if (sizeof(NodeIndex) == 2) + { + ASSERT(maxAllocs <= 65536); + } + reset(); + } + + void Allocator::reset() + { + m_freeStorage = 0; + m_usedBinsTop = 0; + m_freeOffset = m_maxAllocs - 1; + + for (uint32 i = 0 ; i < NUM_TOP_BINS; i++) + m_usedBins[i] = 0; + + for (uint32 i = 0 ; i < NUM_LEAF_BINS; i++) + m_binIndices[i] = Node::unused; + + m_nodes.clear(); + m_freeNodes.clear(); + + m_nodes.resize(m_maxAllocs); + m_freeNodes.resize(m_maxAllocs); + + // Freelist is a stack. Nodes in inverse order so that [0] pops first. + for (uint32 i = 0; i < m_maxAllocs; i++) + { + m_freeNodes[i] = m_maxAllocs - i - 1; + } + + // Start state: Whole storage as one big node + // Algorithm will split remainders and push them back as smaller nodes + insertNodeIntoBin(m_size, 0); + } + + Allocation Allocator::allocate(uint32 size) + { + // Out of allocations? + if (m_freeOffset == 0) + { + Allocation ret; + ret.offset = Allocation::NO_SPACE; + ret.metadata = Allocation::NO_SPACE; + return ret; + } + + // Round up to bin index to ensure that alloc >= bin + // Gives us min bin index that fits the size + uint32 minBinIndex = SmallFloat::uintToFloatRoundUp(size); + + uint32 minTopBinIndex = minBinIndex >> TOP_BINS_INDEX_SHIFT; + uint32 minLeafBinIndex = minBinIndex & LEAF_BINS_INDEX_MASK; + + uint32 topBinIndex = minTopBinIndex; + uint32 leafBinIndex = Allocation::NO_SPACE; + + // If top bin exists, scan its leaf bin. This can fail (NO_SPACE). + if (m_usedBinsTop & (1 << topBinIndex)) + { + leafBinIndex = findLowestSetBitAfter(m_usedBins[topBinIndex], minLeafBinIndex); + } + + // If we didn't find space in top bin, we search top bin from +1 + if (leafBinIndex == Allocation::NO_SPACE) + { + topBinIndex = findLowestSetBitAfter(m_usedBinsTop, minTopBinIndex + 1); + + // Out of space? + if (topBinIndex == Allocation::NO_SPACE) + { + Allocation ret; + ret.offset = Allocation::NO_SPACE; + ret.metadata = Allocation::NO_SPACE; + return ret; + } + + // All leaf bins here fit the alloc, since the top bin was rounded up. Start leaf search from bit 0. + // NOTE: This search can't fail since at least one leaf bit was set because the top bit was set. + leafBinIndex = tzcnt_nonzero(m_usedBins[topBinIndex]); + } + + uint32 binIndex = (topBinIndex << TOP_BINS_INDEX_SHIFT) | leafBinIndex; + + // Pop the top node of the bin. Bin top = node.next. + uint32 nodeIndex = m_binIndices[binIndex]; + Node& node = m_nodes[nodeIndex]; + uint32 nodeTotalSize = node.dataSize; + node.dataSize = size; + node.used = true; + m_binIndices[binIndex] = node.binListNext; + if (node.binListNext != Node::unused) m_nodes[node.binListNext].binListPrev = Node::unused; + m_freeStorage -= nodeTotalSize; +#ifdef DEBUG_VERBOSE + printf("Free storage: %u (-%u) (allocate)\n", m_freeStorage, nodeTotalSize); +#endif + + // Bin empty? + if (m_binIndices[binIndex] == Node::unused) + { + // Remove a leaf bin mask bit + m_usedBins[topBinIndex] &= ~(1 << leafBinIndex); + + // All leaf bins empty? + if (m_usedBins[topBinIndex] == 0) + { + // Remove a top bin mask bit + m_usedBinsTop &= ~(1 << topBinIndex); + } + } + + // Push back reminder N elements to a lower bin + uint32 reminderSize = nodeTotalSize - size; + if (reminderSize > 0) + { + uint32 newNodeIndex = insertNodeIntoBin(reminderSize, node.dataOffset + size); + + // Link nodes next to each other so that we can merge them later if both are free + // And update the old next neighbor to point to the new node (in middle) + if (node.neighborNext != Node::unused) m_nodes[node.neighborNext].neighborPrev = newNodeIndex; + m_nodes[newNodeIndex].neighborPrev = nodeIndex; + m_nodes[newNodeIndex].neighborNext = node.neighborNext; + node.neighborNext = newNodeIndex; + } + + Allocation ret; + ret.offset = node.dataOffset; + ret.metadata = nodeIndex; + return ret; + } + + void Allocator::free(Allocation allocation) + { + ASSERT(allocation.metadata != Allocation::NO_SPACE); + if (m_nodes.empty()) return; + + uint32 nodeIndex = allocation.metadata; + Node& node = m_nodes[nodeIndex]; + + // Double delete check + ASSERT(node.used == true); + + // Merge with neighbors... + uint32 offset = node.dataOffset; + uint32 size = node.dataSize; + + if ((node.neighborPrev != Node::unused) && (m_nodes[node.neighborPrev].used == false)) + { + // Previous (contiguous) free node: Change offset to previous node offset. Sum sizes + Node& prevNode = m_nodes[node.neighborPrev]; + offset = prevNode.dataOffset; + size += prevNode.dataSize; + + // Remove node from the bin linked list and put it in the freelist + removeNodeFromBin(node.neighborPrev); + + ASSERT(prevNode.neighborNext == nodeIndex); + node.neighborPrev = prevNode.neighborPrev; + } + + if ((node.neighborNext != Node::unused) && (m_nodes[node.neighborNext].used == false)) + { + // Next (contiguous) free node: Offset remains the same. Sum sizes. + Node& nextNode = m_nodes[node.neighborNext]; + size += nextNode.dataSize; + + // Remove node from the bin linked list and put it in the freelist + removeNodeFromBin(node.neighborNext); + + ASSERT(nextNode.neighborPrev == nodeIndex); + node.neighborNext = nextNode.neighborNext; + } + + uint32 neighborNext = node.neighborNext; + uint32 neighborPrev = node.neighborPrev; + + // Insert the removed node to freelist +#ifdef DEBUG_VERBOSE + printf("Putting node %u into freelist[%u] (free)\n", nodeIndex, m_freeOffset + 1); +#endif + m_freeNodes[++m_freeOffset] = nodeIndex; + + // Insert the (combined) free node to bin + uint32 combinedNodeIndex = insertNodeIntoBin(size, offset); + + // Connect neighbors with the new combined node + if (neighborNext != Node::unused) + { + m_nodes[combinedNodeIndex].neighborNext = neighborNext; + m_nodes[neighborNext].neighborPrev = combinedNodeIndex; + } + if (neighborPrev != Node::unused) + { + m_nodes[combinedNodeIndex].neighborPrev = neighborPrev; + m_nodes[neighborPrev].neighborNext = combinedNodeIndex; + } + } + + uint32 Allocator::insertNodeIntoBin(uint32 size, uint32 dataOffset) + { + // Round down to bin index to ensure that bin >= alloc + uint32 binIndex = SmallFloat::uintToFloatRoundDown(size); + + uint32 topBinIndex = binIndex >> TOP_BINS_INDEX_SHIFT; + uint32 leafBinIndex = binIndex & LEAF_BINS_INDEX_MASK; + + // Bin was empty before? + if (m_binIndices[binIndex] == Node::unused) + { + // Set bin mask bits + m_usedBins[topBinIndex] |= 1 << leafBinIndex; + m_usedBinsTop |= 1 << topBinIndex; + } + + // Take a freelist node and insert on top of the bin linked list (next = old top) + uint32 topNodeIndex = m_binIndices[binIndex]; + uint32 nodeIndex = m_freeNodes[m_freeOffset--]; +#ifdef DEBUG_VERBOSE + printf("Getting node %u from freelist[%u]\n", nodeIndex, m_freeOffset + 1); +#endif + m_nodes[nodeIndex].dataOffset = dataOffset; + m_nodes[nodeIndex].dataSize = size; + m_nodes[nodeIndex].binListNext = topNodeIndex; + if (topNodeIndex != Node::unused) m_nodes[topNodeIndex].binListPrev = nodeIndex; + m_binIndices[binIndex] = nodeIndex; + + m_freeStorage += size; +#ifdef DEBUG_VERBOSE + printf("Free storage: %u (+%u) (insertNodeIntoBin)\n", m_freeStorage, size); +#endif + + return nodeIndex; + } + + void Allocator::removeNodeFromBin(uint32 nodeIndex) + { + Node &node = m_nodes[nodeIndex]; + + if (node.binListPrev != Node::unused) + { + // Easy case: We have previous node. Just remove this node from the middle of the list. + m_nodes[node.binListPrev].binListNext = node.binListNext; + if (node.binListNext != Node::unused) m_nodes[node.binListNext].binListPrev = node.binListPrev; + } + else + { + // Hard case: We are the first node in a bin. Find the bin. + + // Round down to bin index to ensure that bin >= alloc + uint32 binIndex = SmallFloat::uintToFloatRoundDown(node.dataSize); + + uint32 topBinIndex = binIndex >> TOP_BINS_INDEX_SHIFT; + uint32 leafBinIndex = binIndex & LEAF_BINS_INDEX_MASK; + + m_binIndices[binIndex] = node.binListNext; + if (node.binListNext != Node::unused) m_nodes[node.binListNext].binListPrev = Node::unused; + + // Bin empty? + if (m_binIndices[binIndex] == Node::unused) + { + // Remove a leaf bin mask bit + m_usedBins[topBinIndex] &= ~(1 << leafBinIndex); + + // All leaf bins empty? + if (m_usedBins[topBinIndex] == 0) + { + // Remove a top bin mask bit + m_usedBinsTop &= ~(1 << topBinIndex); + } + } + } + + // Insert the node to freelist +#ifdef DEBUG_VERBOSE + printf("Putting node %u into freelist[%u] (removeNodeFromBin)\n", nodeIndex, m_freeOffset + 1); +#endif + m_freeNodes[++m_freeOffset] = nodeIndex; + + m_freeStorage -= node.dataSize; +#ifdef DEBUG_VERBOSE + printf("Free storage: %u (-%u) (removeNodeFromBin)\n", m_freeStorage, node.dataSize); +#endif + } + + uint32 Allocator::allocationSize(Allocation allocation) const + { + if (allocation.metadata == Allocation::NO_SPACE) return 0; + if (m_nodes.empty()) return 0; + + return m_nodes[allocation.metadata].dataSize; + } + + StorageReport Allocator::storageReport() const + { + uint32 largestFreeRegion = 0; + uint32 freeStorage = 0; + + // Out of allocations? -> Zero free space + if (m_freeOffset > 0) + { + freeStorage = m_freeStorage; + if (m_usedBinsTop) + { + uint32 topBinIndex = 31 - lzcnt_nonzero(m_usedBinsTop); + uint32 leafBinIndex = 31 - lzcnt_nonzero(m_usedBins[topBinIndex]); + largestFreeRegion = SmallFloat::floatToUint((topBinIndex << TOP_BINS_INDEX_SHIFT) | leafBinIndex); + ASSERT(freeStorage >= largestFreeRegion); + } + } + + StorageReport ret; + ret.totalFreeSpace = freeStorage; + ret.largestFreeRegion = largestFreeRegion; + return ret; + } + + StorageReportFull Allocator::storageReportFull() const + { + StorageReportFull report; + for (uint32 i = 0; i < NUM_LEAF_BINS; i++) + { + uint32 count = 0; + uint32 nodeIndex = m_binIndices[i]; + while (nodeIndex != Node::unused) + { + nodeIndex = m_nodes[nodeIndex].binListNext; + count++; + } + report.freeRegions[i].size = SmallFloat::floatToUint(i); report.freeRegions[i].count = count; + } + return report; + } +} diff --git a/WickedEngine/Utility/offsetAllocator.hpp b/WickedEngine/Utility/offsetAllocator.hpp new file mode 100644 index 000000000..ea0918cb0 --- /dev/null +++ b/WickedEngine/Utility/offsetAllocator.hpp @@ -0,0 +1,115 @@ +#pragma once +// (C) Sebastian Aaltonen 2023 +// MIT License (see file: LICENSE) + +// Modified for Wicked Engine +// - removed cpp20 features +// - removed constructors +// - changed node storage to std::vector +// - reduced size of Node structure + +//#define USE_16_BIT_OFFSETS + +#include + +namespace OffsetAllocator +{ + typedef unsigned char uint8; + typedef unsigned short uint16; + typedef unsigned int uint32; + + // 16 bit offsets mode will halve the metadata storage cost + // But it only supports up to 65536 maximum allocation count +#ifdef USE_16_BIT_NODE_INDICES + typedef uint16 NodeIndex; + static constexpr uint32 default_maxallocations = 64 * 1024; +#else + typedef uint32 NodeIndex; + static constexpr uint32 default_maxallocations = 128 * 1024; +#endif + + static constexpr uint32 NUM_TOP_BINS = 32; + static constexpr uint32 BINS_PER_LEAF = 8; + static constexpr uint32 TOP_BINS_INDEX_SHIFT = 3; + static constexpr uint32 LEAF_BINS_INDEX_MASK = 0x7; + static constexpr uint32 NUM_LEAF_BINS = NUM_TOP_BINS * BINS_PER_LEAF; + + struct Allocation + { + static constexpr uint32 NO_SPACE = 0xffffffff; + + uint32 offset = NO_SPACE; + NodeIndex metadata = NO_SPACE; // internal: node index + }; + + struct StorageReport + { + uint32 totalFreeSpace = 0; + uint32 largestFreeRegion = 0; + }; + + struct StorageReportFull + { + struct Region + { + uint32 size = 0; + uint32 count = 0; + }; + + Region freeRegions[NUM_LEAF_BINS]; + }; + + class Allocator + { + public: + void init(uint32 size, uint32 maxAllocs = default_maxallocations); + void reset(); + + Allocation allocate(uint32 size); + void free(Allocation allocation); + + uint32 allocationSize(Allocation allocation) const; + StorageReport storageReport() const; + StorageReportFull storageReportFull() const; + + private: + uint32 insertNodeIntoBin(uint32 size, uint32 dataOffset); + void removeNodeFromBin(uint32 nodeIndex); + + struct Node + { + static constexpr NodeIndex unused = 0xffffffff; + + uint32 dataOffset : 32; + uint32 dataSize : 31; + uint32 used : 1; + NodeIndex binListPrev : 32; + NodeIndex binListNext : 32; + NodeIndex neighborPrev : 32; + NodeIndex neighborNext : 32; + + Node() + { + dataOffset = 0; + dataSize = 0; + binListPrev = unused; + binListNext = unused; + neighborPrev = unused; + neighborNext = unused; + used = 0; + } + }; + + uint32 m_size = 0; + uint32 m_maxAllocs = 0; + uint32 m_freeStorage = 0; + + uint32 m_usedBinsTop = 0; + uint8 m_usedBins[NUM_TOP_BINS] = {}; + NodeIndex m_binIndices[NUM_LEAF_BINS] = {}; + + std::vector m_nodes; + std::vector m_freeNodes; + uint32 m_freeOffset = 0; + }; +} diff --git a/WickedEngine/WickedEngine_SOURCE.vcxitems b/WickedEngine/WickedEngine_SOURCE.vcxitems index 7c21e8574..f030cf153 100644 --- a/WickedEngine/WickedEngine_SOURCE.vcxitems +++ b/WickedEngine/WickedEngine_SOURCE.vcxitems @@ -314,6 +314,7 @@ + @@ -610,6 +611,7 @@ + diff --git a/WickedEngine/WickedEngine_SOURCE.vcxitems.filters b/WickedEngine/WickedEngine_SOURCE.vcxitems.filters index e97a7c894..f885da962 100644 --- a/WickedEngine/WickedEngine_SOURCE.vcxitems.filters +++ b/WickedEngine/WickedEngine_SOURCE.vcxitems.filters @@ -1389,6 +1389,9 @@ JOLT + + UTILITY + @@ -2189,6 +2192,9 @@ JOLT + + UTILITY + diff --git a/WickedEngine/wiAllocator.h b/WickedEngine/wiAllocator.h index 80addb116..65c0ca7aa 100644 --- a/WickedEngine/wiAllocator.h +++ b/WickedEngine/wiAllocator.h @@ -2,11 +2,18 @@ #include "CommonInclude.h" #include "wiVector.h" +#include "Utility/offsetAllocator.hpp" + +#include +#include +#include #include #include +#include namespace wi::allocator { + // Allocation of consecutive bytes, but no freeing, instead the whole allocator can be reset struct LinearAllocator { uint8_t* data = nullptr; @@ -38,6 +45,7 @@ namespace wi::allocator } }; + // Allocation and freeing of single elements of the same size template struct BlockAllocator { @@ -71,5 +79,174 @@ namespace wi::allocator ptr->~T(); free_list.push_back(ptr); } + + inline bool is_empty() const + { + return (blocks.size() * block_size) == free_list.size(); + } + }; + + // Allocation and freeing of an arbitrary number of bytes, managed in pages of the same size + // - this is a wrapper around OffsetAllocator that adds thread safety and refcounting + // - also supports deferred release for suballocated GPU resources + struct PageAllocator + { + uint32_t page_count = 0; + uint32_t page_size = 0; + struct AllocationInternal + { + std::atomic refcount{ 0 }; + OffsetAllocator::Allocation allocation; + }; + struct AllocatorInternal + { + std::mutex locker; + OffsetAllocator::Allocator allocator; + BlockAllocator internal_blocks; + bool deferred_release_enabled = false; + uint64_t deferred_release_frame = 0; + std::deque> deferred_release_queue; + }; + std::shared_ptr allocator; // shared ptr is used to let any allocations extend the lifeftime of the allocator + + // Returns the total size that the allocator manages: + constexpr uint64_t total_size_in_bytes() const { return uint64_t(page_count) * uint64_t(page_size); } + + // Calculates the page count that will accomodate an allocation size request + constexpr uint32_t page_count_from_bytes(uint64_t sizeInBytes) const { return uint32_t(align((uint64_t)sizeInBytes, (uint64_t)page_size) / (uint64_t)page_size); } + + // Initializes the allocator, only after which it can be used + // total_size_in_bytes : the allocator will manage this number of bytes + // page_size : the allocation granularity in bytes, each allocation will be aligned to this + // deferred_release : if false, allocations are freed immediately (suitable for CPU only allocations), otherwise they are freed after a number of frames passed (which should be used for GPU allocations) + void init(uint64_t total_size_in_bytes, uint32_t page_size = 64u * 1024u, bool deferred_release = false) + { + this->page_size = page_size; + this->page_count = page_count_from_bytes(total_size_in_bytes); + allocator = std::make_shared(); + allocator->allocator.init(page_count, std::min(page_count, OffsetAllocator::default_maxallocations)); + allocator->deferred_release_enabled = deferred_release; + allocator->deferred_release_frame = 0; + allocator->deferred_release_queue.clear(); + } + // This needs to be called every frame if deferred release is enabled: + void update_deferred_release(uint64_t framecount, uint32_t buffercount) + { + if (allocator == nullptr) + return; + std::scoped_lock lck(allocator->locker); + allocator->deferred_release_frame = framecount; + while (!allocator->deferred_release_queue.empty() && allocator->deferred_release_queue.front().second + buffercount < framecount) + { + allocator->allocator.free(allocator->deferred_release_queue.front().first); + allocator->deferred_release_queue.pop_front(); + } + } + + struct Allocation + { + std::shared_ptr allocator; // the allocator is retained so that allocation can deallocate itself + AllocationInternal* internal_state = nullptr; // this is pointing within the allocator which is retained by shared_ptr + uint64_t byte_offset = ~0ull; + + Allocation() + { + Reset(); + } + Allocation(const Allocation& other) + { + Reset(); + allocator = other.allocator; + internal_state = other.internal_state; + byte_offset = other.byte_offset; + if (internal_state != nullptr) + { + internal_state->refcount.fetch_add(1); + } + } + Allocation(Allocation&& other) noexcept + { + Reset(); + allocator = std::move(other.allocator); + internal_state = other.internal_state; + byte_offset = other.byte_offset; + other.allocator = nullptr; + other.internal_state = nullptr; + other.byte_offset = ~0ull; + } + ~Allocation() + { + Reset(); + } + void operator=(const Allocation& other) + { + Reset(); + allocator = other.allocator; + internal_state = other.internal_state; + byte_offset = other.byte_offset; + if (internal_state != nullptr) + { + internal_state->refcount.fetch_add(1); + } + } + void operator=(Allocation&& other) noexcept + { + Reset(); + allocator = std::move(other.allocator); + internal_state = other.internal_state; + byte_offset = other.byte_offset; + other.allocator = nullptr; + other.internal_state = nullptr; + other.byte_offset = ~0ull; + } + void Reset() + { + if (IsValid() && (internal_state->refcount.fetch_sub(1) <= 1)) + { + std::scoped_lock lck(allocator->locker); + if (allocator->deferred_release_enabled) + { + // can only be reclaimed after buffering amount of frames passed, this is usually used for GPU resources: + allocator->deferred_release_queue.push_back(std::make_pair(internal_state->allocation, allocator->deferred_release_frame)); + } + else + { + // reclaimed immediately: + allocator->allocator.free(internal_state->allocation); + } + allocator->internal_blocks.free(internal_state); + } + allocator = {}; + internal_state = nullptr; + byte_offset = ~0ull; + } + + constexpr bool IsValid() const { return internal_state != nullptr; } + }; + + // Allocates a reference counted allocation, viewing at least the requested amount of bytes + // To check if the allocation succeeded, call IsValid() on the returned object + inline Allocation allocate(size_t sizeInBytes) + { + const uint32_t pages = page_count_from_bytes(sizeInBytes); + std::scoped_lock lck(allocator->locker); + OffsetAllocator::Allocation offsetallocation = allocator->allocator.allocate(pages); + Allocation alloc; + if (offsetallocation.offset != OffsetAllocator::Allocation::NO_SPACE) + { + alloc.allocator = allocator; + alloc.internal_state = allocator->internal_blocks.allocate(); + alloc.internal_state->refcount.store(1); + alloc.internal_state->allocation = offsetallocation; + alloc.byte_offset = offsetallocation.offset * page_size; + } + return alloc; + } + + // returns true if no pages are allocated + inline bool is_empty() + { + return allocator->allocator.storageReport().totalFreeSpace == page_count; + } }; } diff --git a/WickedEngine/wiApplication.cpp b/WickedEngine/wiApplication.cpp index c6e38be50..b5c0ef6a7 100644 --- a/WickedEngine/wiApplication.cpp +++ b/WickedEngine/wiApplication.cpp @@ -318,6 +318,7 @@ namespace wi wi::input::ClearForNextFrame(); wi::profiler::EndFrame(cmd); graphicsDevice->SubmitCommandLists(); + wi::renderer::UpdateGPUSuballocator(); } void Application::Update(float dt) diff --git a/WickedEngine/wiGraphicsDevice.h b/WickedEngine/wiGraphicsDevice.h index bc0d56835..b2b13e80b 100644 --- a/WickedEngine/wiGraphicsDevice.h +++ b/WickedEngine/wiGraphicsDevice.h @@ -250,14 +250,14 @@ namespace wi::graphics return CreateBuffer2(desc, [&](void* dest) { std::memcpy(dest, initial_data, desc->size); }, buffer, alias, alias_offset); } - bool CreateBufferCleared(const GPUBufferDesc* desc, uint8_t value, GPUBuffer* buffer) const + bool CreateBufferCleared(const GPUBufferDesc* desc, uint8_t value, GPUBuffer* buffer, const GPUResource* alias = nullptr, uint64_t alias_offset = 0ull) const { - return CreateBuffer2(desc, [&](void* dest) { std::memset(dest, value, desc->size); }, buffer); + return CreateBuffer2(desc, [&](void* dest) { std::memset(dest, value, desc->size); }, buffer, alias, alias_offset); } - bool CreateBufferZeroed(const GPUBufferDesc* desc, GPUBuffer* buffer) const + bool CreateBufferZeroed(const GPUBufferDesc* desc, GPUBuffer* buffer, const GPUResource* alias = nullptr, uint64_t alias_offset = 0ull) const { - return CreateBufferCleared(desc, 0, buffer); + return CreateBufferCleared(desc, 0, buffer, alias, alias_offset); } void Barrier(const GPUBarrier& barrier, CommandList cmd) diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index 50e398060..a20a205fa 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -2422,6 +2422,7 @@ std::mutex queue_locker; disabledMessages.push_back(D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE); disabledMessages.push_back(D3D12_MESSAGE_ID_SETPRIVATEDATA_CHANGINGPARAMS); + disabledMessages.push_back(D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS); D3D12_INFO_QUEUE_FILTER filter = {}; filter.AllowList.NumSeverities = static_cast(enabledSeverities.size()); diff --git a/WickedEngine/wiGraphicsDevice_DX12.h b/WickedEngine/wiGraphicsDevice_DX12.h index 7fc152553..5b40c2386 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.h +++ b/WickedEngine/wiGraphicsDevice_DX12.h @@ -349,6 +349,18 @@ namespace wi::graphics { alignment = std::max(alignment, 16ull); } + if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_BUFFER)) + { + alignment = std::max(alignment, (uint64_t)D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); + } + if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_NON_RT_DS)) + { + alignment = std::max(alignment, (uint64_t)D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); + } + if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_RT_DS)) + { + alignment = std::max(alignment, (uint64_t)D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); + } return alignment; } diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index 79c5de20c..0631e2c49 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -3750,12 +3750,6 @@ using namespace vulkan_internal; } bool GraphicsDevice_Vulkan::CreateBuffer2(const GPUBufferDesc* desc, const std::function& init_callback, GPUBuffer* buffer, const GPUResource* alias, uint64_t alias_offset) const { -#ifdef PLATFORM_LINUX - // Resource aliasing on Linux sometimes fails with VK_ERROR_UNKOWN so I disable it: - alias = nullptr; - alias_offset = 0; -#endif // PLATFORM_LINUX - auto internal_state = std::make_shared(); internal_state->allocationhandler = allocationhandler; buffer->internal_state = internal_state; @@ -3854,6 +3848,10 @@ using namespace vulkan_internal; { VkMemoryRequirements memory_requirements = {}; memory_requirements.alignment = desc->alignment; + if (memory_requirements.alignment == 0) + { + memory_requirements.alignment = GetMinOffsetAlignment(desc); + } memory_requirements.size = AlignTo(desc->size, memory_requirements.alignment); memory_requirements.memoryTypeBits = ~0u; diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.h b/WickedEngine/wiGraphicsDevice_Vulkan.h index 4a6e13481..905510690 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.h +++ b/WickedEngine/wiGraphicsDevice_Vulkan.h @@ -473,6 +473,10 @@ namespace wi::graphics { alignment = std::max(alignment, properties2.properties.limits.minTexelBufferOffsetAlignment); } + if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_BUFFER) || has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_NON_RT_DS) || has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_RT_DS)) + { + alignment = std::max(alignment, uint64_t(64 * 1024)); // 64KB safety to match DX12, because cannot use vkGetBufferMemoryRequirements here + } return alignment; } diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index e68fef363..c17f446e1 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -920,12 +920,6 @@ namespace wi wi::renderer::UpdateRaytracingAccelerationStructures(*scene, cmd); } - if (scene->weather.IsRealisticSky()) - { - wi::renderer::ComputeSkyAtmosphereTextures(cmd); - wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd); - } - if (wi::renderer::GetSurfelGIEnabled()) { wi::renderer::SurfelGI( @@ -1164,16 +1158,6 @@ namespace wi ); } - if (scene->weather.IsRealisticSky()) - { - wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd); - - if (scene->weather.IsRealisticSkyAerialPerspective()) - { - wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd); - } - } - if (scene->weather.IsVolumetricClouds() && !scene->weather.IsVolumetricCloudsReceiveShadow()) { // When volumetric cloud DOESN'T receive shadow it can be done async to shadow maps! @@ -1305,17 +1289,6 @@ namespace wi cmd ); - // Render SkyAtmosphere assets from planar reflections point of view - if (scene->weather.IsRealisticSky()) - { - wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd); - - if (scene->weather.IsRealisticSkyAerialPerspective()) - { - wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd); - } - } - device->EventBegin("Planar reflections Z-Prepass", cmd); auto range = wi::profiler::BeginRangeGPU("Planar Reflections Z-Prepass", cmd); diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index f6c7bcf45..4d7319cf4 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -2623,6 +2623,73 @@ const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count) return indexBufferForQuads32; } +// This is responsible to manage big chunks of GPUBuffer, each of which will be used for suballocations: +struct GPUSubAllocator +{ + static constexpr uint64_t blocksize = 256ull * 1024ull * 1024ull; // 256 MB + struct Block + { + wi::allocator::PageAllocator allocator; + GPUBuffer buffer; + }; + wi::vector blocks; + std::mutex locker; +} static suballocator; +BufferSuballocation SuballocateGPUBuffer(uint64_t size) +{ + if (size > GPUSubAllocator::blocksize / 2) + return {}; // invalid, larger allocations than half block size will not be suballocated + + // scoped for locker + { + std::scoped_lock lock(suballocator.locker); + + // See if any of the large blocks can fulfill the allocation request: + BufferSuballocation allocation; + for (auto& block : suballocator.blocks) + { + allocation.allocation = block.allocator.allocate(size); + if (allocation.allocation.IsValid()) + { + allocation.alias = block.buffer; + //wilog("SuballocateGPUBuffer allocated size: %s, pages: %d, free space remaining: %s", wi::helper::GetMemorySizeText(size).c_str(), block.allocator.page_count_from_bytes(size), wi::helper::GetMemorySizeText(allocation.allocation.allocator->allocator.storageReport().totalFreeSpace * block.allocator.page_size).c_str()); + return allocation; + } + } + + // Allocation couldn't be fulfilled, create new block: + GPUBufferDesc desc; + desc.size = GPUSubAllocator::blocksize; + desc.usage = Usage::DEFAULT; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::VERTEX_BUFFER | BindFlag::INDEX_BUFFER; + desc.misc_flags = ResourceMiscFlag::ALIASING_BUFFER | ResourceMiscFlag::NO_DEFAULT_DESCRIPTORS; + desc.alignment = device->GetMinOffsetAlignment(&desc); + auto& block = suballocator.blocks.emplace_back(); + bool success = device->CreateBuffer(&desc, nullptr, &block.buffer); + assert(success); + device->SetName(&block.buffer, "GPUSubAllocator"); + block.allocator.init(desc.size, (uint32_t)desc.alignment, true); + wilog("SuballocateGPUBuffer created buffer block with size: %s, with page size: %s, page count: %d", wi::helper::GetMemorySizeText(block.allocator.total_size_in_bytes()).c_str(), wi::helper::GetMemorySizeText(block.allocator.page_size).c_str(), (int)block.allocator.page_count); + } + return SuballocateGPUBuffer(size); // retry +} +void UpdateGPUSuballocator() +{ + std::scoped_lock lock(suballocator.locker); + for (auto& block : suballocator.blocks) + { + block.allocator.update_deferred_release(device->GetFrameCount(), device->GetBufferCount()); + } + for (size_t i = 0; i < suballocator.blocks.size(); ++i) + { + if (suballocator.blocks[i].allocator.is_empty()) + { + suballocator.blocks.erase(suballocator.blocks.begin() + i); + break; + } + } +} + void ModifyObjectSampler(const SamplerDesc& desc) { if (initialized.load()) @@ -2961,7 +3028,8 @@ void RenderMeshes( uint32_t prev_stencilref = STENCILREF_DEFAULT; device->BindStencilRef(prev_stencilref, cmd); - const GPUBuffer* prev_ib = nullptr; + IndexBufferFormat prev_ibformat = IndexBufferFormat::UINT16; + const void* prev_ib_internal = nullptr; // This will be called every time we start a new draw call: auto batch_flush = [&]() @@ -3092,10 +3160,16 @@ void RenderMeshes( device->BindStencilRef(stencilRef, cmd); } - if (!meshShaderPSO && prev_ib != &mesh.generalBuffer) + // Note: the mesh.generalBuffer can be either a standalone allocated buffer, or a suballocated one (to reduce index buffer switching) + const GPUBuffer* ib = mesh.generalBufferOffsetAllocation.IsValid() ? &mesh.generalBufferOffsetAllocationAlias : &mesh.generalBuffer; + const IndexBufferFormat ibformat = mesh.GetIndexFormat(); + const void* ibinternal = ib->internal_state.get(); + + if (!meshShaderPSO && (prev_ib_internal != ibinternal || prev_ibformat != ibformat)) { - device->BindIndexBuffer(&mesh.generalBuffer, mesh.GetIndexFormat(), mesh.ib.offset, cmd); - prev_ib = &mesh.generalBuffer; + prev_ib_internal = ibinternal; + prev_ibformat = ibformat; + device->BindIndexBuffer(ib, ibformat, 0, cmd); } if ( @@ -3114,6 +3188,18 @@ void RenderMeshes( push.instances = instanceBufferDescriptorIndex; push.instance_offset = (uint)instancedBatch.dataOffset; + uint32_t indexOffset = 0; + if (mesh.generalBufferOffsetAllocation.IsValid()) + { + // In case the mesh general buffer is suballocated, the indexOffset is calculated relative to the beginning of the aliased buffer block: + indexOffset = uint32_t(((uint64_t)mesh.generalBufferOffsetAllocation.byte_offset + mesh.ib.offset) / mesh.GetIndexStride()) + subset.indexOffset; + } + else + { + // In case the mesh general buffer is not suballocated, it is a standalone buffer and index offset is relative to itself + indexOffset = uint32_t(mesh.ib.offset / mesh.GetIndexStride()) + subset.indexOffset; + } + if (pso_backside != nullptr) { device->BindPipelineState(pso_backside, cmd); @@ -3124,7 +3210,7 @@ void RenderMeshes( } else { - device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, subset.indexOffset, 0, 0, cmd); + device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, indexOffset, 0, 0, cmd); } } @@ -3136,7 +3222,7 @@ void RenderMeshes( } else { - device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, subset.indexOffset, 0, 0, cmd); + device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, indexOffset, 0, 0, cmd); } } @@ -5196,6 +5282,16 @@ void UpdateRenderDataAsync( ComputeVolumetricCloudShadows(cmd, weatherMapFirst, weatherMapSecond); } + if (vis.scene->weather.IsRealisticSky()) + { + wi::renderer::ComputeSkyAtmosphereTextures(cmd); + wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd); + if (vis.scene->weather.IsRealisticSkyAerialPerspective()) + { + wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd); + } + } + // GPU Particle systems simulation/sorting/culling: if (!vis.visibleEmitters.empty() || vis.scene->weather.rain_amount > 0) { diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index 59b6494cd..3149e50f5 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -12,6 +12,7 @@ #include "shaders/ShaderInterop_SurfelGI.h" #include "wiVector.h" #include "wiSpinLock.h" +#include "wiAllocator.h" #include #include @@ -66,8 +67,21 @@ namespace wi::renderer // Returns a buffer preinitialized for quad index buffer laid out as: // vertexID * 4 + [0, 1, 2, 2, 1, 3] + // Note: it will return 16-bit or 32-bit index buffer depending on max_quad_count const wi::graphics::GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count); + struct BufferSuballocation + { + wi::graphics::GPUBuffer alias; + wi::allocator::PageAllocator::Allocation allocation; + }; + // Sub-allocate (thread-safe) from a global GPU buffer for memory aliasing purpose: + // The buffer will be DEFAULT usage, useable as vertex buffer, index buffer and shader resource + // The purpose is to suballocate smaller GPUBuffers inside a larger GPUBuffer and bind the large GPUBuffer once as index buffer, + // while the small buffers can be allocated/deallocated from it with memory aliasing and also used regularly by themselves + BufferSuballocation SuballocateGPUBuffer(uint64_t size); + void UpdateGPUSuballocator(); // called every frame for deferred release of GPU suballocations + void ModifyObjectSampler(const wi::graphics::SamplerDesc& desc); // Initializes the renderer diff --git a/WickedEngine/wiScene_Components.cpp b/WickedEngine/wiScene_Components.cpp index 2291d6ef4..917bf5896 100644 --- a/WickedEngine/wiScene_Components.cpp +++ b/WickedEngine/wiScene_Components.cpp @@ -586,6 +586,7 @@ namespace wi::scene void MeshComponent::DeleteRenderData() { + generalBufferOffsetAllocation = {}; generalBuffer = {}; streamoutBuffer = {}; ib = {}; @@ -1291,9 +1292,25 @@ namespace wi::scene } }; - bool success = device->CreateBuffer2(&bd, init_callback, &generalBuffer); - assert(success); - device->SetName(&generalBuffer, "MeshComponent::generalBuffer"); + // The suballocation strategy is used to have all mesh buffers reside in a global buffer + // With this we can avoid rebinding the index buffer for every mesh and can work with purely offsets + // Though the index buffer will still need to be rebound if the index format changes, but that happens less frequently + wi::renderer::BufferSuballocation suballoc = wi::renderer::SuballocateGPUBuffer(bd.size); + if (suballoc.allocation.IsValid()) + { + bool success = device->CreateBuffer2(&bd, init_callback, &generalBuffer, &suballoc.alias, suballoc.allocation.byte_offset); + assert(success); + device->SetName(&generalBuffer, "MeshComponent::generalBuffer (suballocated)"); + generalBufferOffsetAllocation = std::move(suballoc.allocation); + generalBufferOffsetAllocationAlias = std::move(suballoc.alias); + } + else + { + // If suballocation was not successful, a standalone buffer can be created instead: + bool success = device->CreateBuffer2(&bd, init_callback, &generalBuffer); + assert(success); + device->SetName(&generalBuffer, "MeshComponent::generalBuffer"); + } assert(ib.IsValid()); const Format ib_format = GetIndexFormat() == IndexBufferFormat::UINT32 ? Format::R32_UINT : Format::R16_UINT; diff --git a/WickedEngine/wiScene_Components.h b/WickedEngine/wiScene_Components.h index db5a262d7..238c46efa 100644 --- a/WickedEngine/wiScene_Components.h +++ b/WickedEngine/wiScene_Components.h @@ -15,6 +15,7 @@ #include "wiUnorderedSet.h" #include "wiBVH.h" #include "wiPathQuery.h" +#include "wiAllocator.h" namespace wi::scene { @@ -182,7 +183,7 @@ namespace wi::scene XMFLOAT4 emissiveColor = XMFLOAT4(1, 1, 1, 0); XMFLOAT4 subsurfaceScattering = XMFLOAT4(1, 1, 1, 0); XMFLOAT4 extinctionColor = XMFLOAT4(0, 0.9f, 1, 1); - XMFLOAT4 texMulAdd = XMFLOAT4(1, 1, 0, 0); + XMFLOAT4 texMulAdd = XMFLOAT4(1, 1, 0, 0); // dynamic multiplier (.xy) and addition (.zw) for UV coordinates float roughness = 0.2f; float reflectance = 0.02f; float metalness = 0.0f; @@ -655,7 +656,7 @@ namespace wi::scene BVH_ENABLED = 1 << 8, QUANTIZED_POSITIONS_DISABLED = 1 << 9, }; - uint32_t _flags = RENDERABLE; + // *uint32_t _flags is moved down for better struct padding... wi::vector vertex_positions; wi::vector vertex_normals; @@ -714,6 +715,8 @@ namespace wi::scene wi::primitive::AABB aabb; wi::graphics::GPUBuffer generalBuffer; // index buffer + all static vertex buffers wi::graphics::GPUBuffer streamoutBuffer; // all dynamic vertex buffers + wi::allocator::PageAllocator::Allocation generalBufferOffsetAllocation; + wi::graphics::GPUBuffer generalBufferOffsetAllocationAlias; struct BufferView { uint64_t offset = ~0ull; @@ -751,13 +754,6 @@ namespace wi::scene XMFLOAT2 uv_range_max = XMFLOAT2(1, 1); wi::vector BLASes; // one BLAS per LOD - enum BLAS_STATE - { - BLAS_STATE_NEEDS_REBUILD, - BLAS_STATE_NEEDS_REFIT, - BLAS_STATE_COMPLETE, - }; - mutable BLAS_STATE BLAS_state = BLAS_STATE_NEEDS_REBUILD; wi::vector bvh_leaf_aabbs; wi::BVH bvh; @@ -771,6 +767,16 @@ namespace wi::scene RigidBodyPhysicsComponent precomputed_rigidbody_physics_shape; // you can precompute a physics shape here if you need without using a real rigid body component yet + uint32_t _flags = RENDERABLE; // *this is serialized but put here for better struct padding + + enum BLAS_STATE + { + BLAS_STATE_NEEDS_REBUILD, + BLAS_STATE_NEEDS_REFIT, + BLAS_STATE_COMPLETE, + }; + mutable BLAS_STATE BLAS_state = BLAS_STATE_NEEDS_REBUILD; + constexpr void SetRenderable(bool value) { if (value) { _flags |= RENDERABLE; } else { _flags &= ~RENDERABLE; } } constexpr void SetDoubleSided(bool value) { if (value) { _flags |= DOUBLE_SIDED; } else { _flags &= ~DOUBLE_SIDED; } } constexpr void SetDoubleSidedShadow(bool value) { if (value) { _flags |= DOUBLE_SIDED_SHADOW; } else { _flags &= ~DOUBLE_SIDED_SHADOW; } } diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 5618c5fb4..5ab44be5a 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 750; + const int revision = 751; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision); diff --git a/third_party_software.txt b/third_party_software.txt index e97e1c144..6aac9a8e0 100644 --- a/third_party_software.txt +++ b/third_party_software.txt @@ -934,5 +934,28 @@ SOFTWARE. ############################################################################################################################### +OffsetAllocator: https://github.com/sebbbi/OffsetAllocator +MIT License +Copyright (c) 2023 Sebastian Aaltonen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +###############################################################################################################################