GPU buffer suballocator for meshses to reduce index buffer switching (#1094)

This commit is contained in:
Turánszki János
2025-04-28 09:10:53 +02:00
committed by GitHub
parent 5ba77946d1
commit 30917c9e1f
20 changed files with 993 additions and 56 deletions
+11
View File
@@ -1036,6 +1036,17 @@ void MeshWindow::SetEntity(Entity entity, int subset)
if (mesh->so_nor.IsValid()) ss += "\tstreamout_normals;\n";
if (mesh->so_tan.IsValid()) ss += "\tstreamout_tangents;\n";
if (mesh->so_pre.IsValid()) ss += "\tprevious_position;\n";
ss += "\nSuballocation offset: ";
if (mesh->generalBufferOffsetAllocation.IsValid())
{
ss += wi::helper::GetMemorySizeText(mesh->generalBufferOffsetAllocation.byte_offset);
}
else
{
ss += "suballocation is not used for this mesh";
}
meshInfoLabel.SetText(ss);
subsetComboBox.ClearItems();
+6
View File
@@ -16,6 +16,12 @@
// Simple common math helpers:
template<typename T>
constexpr T align(T value, T alignment)
{
return ((value + alignment - T(1)) / alignment) * alignment;
}
template <typename T>
constexpr T sqr(T x) { return x * x; }
+475
View File
@@ -0,0 +1,475 @@
// (C) Sebastian Aaltonen 2023
// MIT License (see file: LICENSE)
#include "offsetAllocator.hpp"
#ifdef DEBUG
#include <assert.h>
#define ASSERT(x) assert(x)
//#define DEBUG_VERBOSE
#else
#define ASSERT(x)
#endif
#ifdef DEBUG_VERBOSE
#include <stdio.h>
#endif
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <cstring>
namespace OffsetAllocator
{
inline uint32 lzcnt_nonzero(uint32 v)
{
#ifdef _MSC_VER
unsigned long retVal;
_BitScanReverse(&retVal, v);
return 31 - retVal;
#else
return __builtin_clz(v);
#endif
}
inline uint32 tzcnt_nonzero(uint32 v)
{
#ifdef _MSC_VER
unsigned long retVal;
_BitScanForward(&retVal, v);
return retVal;
#else
return __builtin_ctz(v);
#endif
}
namespace SmallFloat
{
static constexpr uint32 MANTISSA_BITS = 3;
static constexpr uint32 MANTISSA_VALUE = 1 << MANTISSA_BITS;
static constexpr uint32 MANTISSA_MASK = MANTISSA_VALUE - 1;
// Bin sizes follow floating point (exponent + mantissa) distribution (piecewise linear log approx)
// This ensures that for each size class, the average overhead percentage stays the same
uint32 uintToFloatRoundUp(uint32 size)
{
uint32 exp = 0;
uint32 mantissa = 0;
if (size < MANTISSA_VALUE)
{
// Denorm: 0..(MANTISSA_VALUE-1)
mantissa = size;
}
else
{
// Normalized: Hidden high bit always 1. Not stored. Just like float.
uint32 leadingZeros = lzcnt_nonzero(size);
uint32 highestSetBit = 31 - leadingZeros;
uint32 mantissaStartBit = highestSetBit - MANTISSA_BITS;
exp = mantissaStartBit + 1;
mantissa = (size >> mantissaStartBit) & MANTISSA_MASK;
uint32 lowBitsMask = (1 << mantissaStartBit) - 1;
// Round up!
if ((size & lowBitsMask) != 0)
mantissa++;
}
return (exp << MANTISSA_BITS) + mantissa; // + allows mantissa->exp overflow for round up
}
uint32 uintToFloatRoundDown(uint32 size)
{
uint32 exp = 0;
uint32 mantissa = 0;
if (size < MANTISSA_VALUE)
{
// Denorm: 0..(MANTISSA_VALUE-1)
mantissa = size;
}
else
{
// Normalized: Hidden high bit always 1. Not stored. Just like float.
uint32 leadingZeros = lzcnt_nonzero(size);
uint32 highestSetBit = 31 - leadingZeros;
uint32 mantissaStartBit = highestSetBit - MANTISSA_BITS;
exp = mantissaStartBit + 1;
mantissa = (size >> mantissaStartBit) & MANTISSA_MASK;
}
return (exp << MANTISSA_BITS) | mantissa;
}
uint32 floatToUint(uint32 floatValue)
{
uint32 exponent = floatValue >> MANTISSA_BITS;
uint32 mantissa = floatValue & MANTISSA_MASK;
if (exponent == 0)
{
// Denorms
return mantissa;
}
else
{
return (mantissa | MANTISSA_VALUE) << (exponent - 1);
}
}
}
// Utility functions
uint32 findLowestSetBitAfter(uint32 bitMask, uint32 startBitIndex)
{
uint32 maskBeforeStartIndex = (1 << startBitIndex) - 1;
uint32 maskAfterStartIndex = ~maskBeforeStartIndex;
uint32 bitsAfter = bitMask & maskAfterStartIndex;
if (bitsAfter == 0) return Allocation::NO_SPACE;
return tzcnt_nonzero(bitsAfter);
}
// Allocator...
void Allocator::init(uint32 size, uint32 maxAllocs)
{
m_size = size;
m_maxAllocs = maxAllocs;
m_nodes.reserve(maxAllocs);
m_freeNodes.reserve(maxAllocs);
if (sizeof(NodeIndex) == 2)
{
ASSERT(maxAllocs <= 65536);
}
reset();
}
void Allocator::reset()
{
m_freeStorage = 0;
m_usedBinsTop = 0;
m_freeOffset = m_maxAllocs - 1;
for (uint32 i = 0 ; i < NUM_TOP_BINS; i++)
m_usedBins[i] = 0;
for (uint32 i = 0 ; i < NUM_LEAF_BINS; i++)
m_binIndices[i] = Node::unused;
m_nodes.clear();
m_freeNodes.clear();
m_nodes.resize(m_maxAllocs);
m_freeNodes.resize(m_maxAllocs);
// Freelist is a stack. Nodes in inverse order so that [0] pops first.
for (uint32 i = 0; i < m_maxAllocs; i++)
{
m_freeNodes[i] = m_maxAllocs - i - 1;
}
// Start state: Whole storage as one big node
// Algorithm will split remainders and push them back as smaller nodes
insertNodeIntoBin(m_size, 0);
}
Allocation Allocator::allocate(uint32 size)
{
// Out of allocations?
if (m_freeOffset == 0)
{
Allocation ret;
ret.offset = Allocation::NO_SPACE;
ret.metadata = Allocation::NO_SPACE;
return ret;
}
// Round up to bin index to ensure that alloc >= bin
// Gives us min bin index that fits the size
uint32 minBinIndex = SmallFloat::uintToFloatRoundUp(size);
uint32 minTopBinIndex = minBinIndex >> TOP_BINS_INDEX_SHIFT;
uint32 minLeafBinIndex = minBinIndex & LEAF_BINS_INDEX_MASK;
uint32 topBinIndex = minTopBinIndex;
uint32 leafBinIndex = Allocation::NO_SPACE;
// If top bin exists, scan its leaf bin. This can fail (NO_SPACE).
if (m_usedBinsTop & (1 << topBinIndex))
{
leafBinIndex = findLowestSetBitAfter(m_usedBins[topBinIndex], minLeafBinIndex);
}
// If we didn't find space in top bin, we search top bin from +1
if (leafBinIndex == Allocation::NO_SPACE)
{
topBinIndex = findLowestSetBitAfter(m_usedBinsTop, minTopBinIndex + 1);
// Out of space?
if (topBinIndex == Allocation::NO_SPACE)
{
Allocation ret;
ret.offset = Allocation::NO_SPACE;
ret.metadata = Allocation::NO_SPACE;
return ret;
}
// All leaf bins here fit the alloc, since the top bin was rounded up. Start leaf search from bit 0.
// NOTE: This search can't fail since at least one leaf bit was set because the top bit was set.
leafBinIndex = tzcnt_nonzero(m_usedBins[topBinIndex]);
}
uint32 binIndex = (topBinIndex << TOP_BINS_INDEX_SHIFT) | leafBinIndex;
// Pop the top node of the bin. Bin top = node.next.
uint32 nodeIndex = m_binIndices[binIndex];
Node& node = m_nodes[nodeIndex];
uint32 nodeTotalSize = node.dataSize;
node.dataSize = size;
node.used = true;
m_binIndices[binIndex] = node.binListNext;
if (node.binListNext != Node::unused) m_nodes[node.binListNext].binListPrev = Node::unused;
m_freeStorage -= nodeTotalSize;
#ifdef DEBUG_VERBOSE
printf("Free storage: %u (-%u) (allocate)\n", m_freeStorage, nodeTotalSize);
#endif
// Bin empty?
if (m_binIndices[binIndex] == Node::unused)
{
// Remove a leaf bin mask bit
m_usedBins[topBinIndex] &= ~(1 << leafBinIndex);
// All leaf bins empty?
if (m_usedBins[topBinIndex] == 0)
{
// Remove a top bin mask bit
m_usedBinsTop &= ~(1 << topBinIndex);
}
}
// Push back reminder N elements to a lower bin
uint32 reminderSize = nodeTotalSize - size;
if (reminderSize > 0)
{
uint32 newNodeIndex = insertNodeIntoBin(reminderSize, node.dataOffset + size);
// Link nodes next to each other so that we can merge them later if both are free
// And update the old next neighbor to point to the new node (in middle)
if (node.neighborNext != Node::unused) m_nodes[node.neighborNext].neighborPrev = newNodeIndex;
m_nodes[newNodeIndex].neighborPrev = nodeIndex;
m_nodes[newNodeIndex].neighborNext = node.neighborNext;
node.neighborNext = newNodeIndex;
}
Allocation ret;
ret.offset = node.dataOffset;
ret.metadata = nodeIndex;
return ret;
}
void Allocator::free(Allocation allocation)
{
ASSERT(allocation.metadata != Allocation::NO_SPACE);
if (m_nodes.empty()) return;
uint32 nodeIndex = allocation.metadata;
Node& node = m_nodes[nodeIndex];
// Double delete check
ASSERT(node.used == true);
// Merge with neighbors...
uint32 offset = node.dataOffset;
uint32 size = node.dataSize;
if ((node.neighborPrev != Node::unused) && (m_nodes[node.neighborPrev].used == false))
{
// Previous (contiguous) free node: Change offset to previous node offset. Sum sizes
Node& prevNode = m_nodes[node.neighborPrev];
offset = prevNode.dataOffset;
size += prevNode.dataSize;
// Remove node from the bin linked list and put it in the freelist
removeNodeFromBin(node.neighborPrev);
ASSERT(prevNode.neighborNext == nodeIndex);
node.neighborPrev = prevNode.neighborPrev;
}
if ((node.neighborNext != Node::unused) && (m_nodes[node.neighborNext].used == false))
{
// Next (contiguous) free node: Offset remains the same. Sum sizes.
Node& nextNode = m_nodes[node.neighborNext];
size += nextNode.dataSize;
// Remove node from the bin linked list and put it in the freelist
removeNodeFromBin(node.neighborNext);
ASSERT(nextNode.neighborPrev == nodeIndex);
node.neighborNext = nextNode.neighborNext;
}
uint32 neighborNext = node.neighborNext;
uint32 neighborPrev = node.neighborPrev;
// Insert the removed node to freelist
#ifdef DEBUG_VERBOSE
printf("Putting node %u into freelist[%u] (free)\n", nodeIndex, m_freeOffset + 1);
#endif
m_freeNodes[++m_freeOffset] = nodeIndex;
// Insert the (combined) free node to bin
uint32 combinedNodeIndex = insertNodeIntoBin(size, offset);
// Connect neighbors with the new combined node
if (neighborNext != Node::unused)
{
m_nodes[combinedNodeIndex].neighborNext = neighborNext;
m_nodes[neighborNext].neighborPrev = combinedNodeIndex;
}
if (neighborPrev != Node::unused)
{
m_nodes[combinedNodeIndex].neighborPrev = neighborPrev;
m_nodes[neighborPrev].neighborNext = combinedNodeIndex;
}
}
uint32 Allocator::insertNodeIntoBin(uint32 size, uint32 dataOffset)
{
// Round down to bin index to ensure that bin >= alloc
uint32 binIndex = SmallFloat::uintToFloatRoundDown(size);
uint32 topBinIndex = binIndex >> TOP_BINS_INDEX_SHIFT;
uint32 leafBinIndex = binIndex & LEAF_BINS_INDEX_MASK;
// Bin was empty before?
if (m_binIndices[binIndex] == Node::unused)
{
// Set bin mask bits
m_usedBins[topBinIndex] |= 1 << leafBinIndex;
m_usedBinsTop |= 1 << topBinIndex;
}
// Take a freelist node and insert on top of the bin linked list (next = old top)
uint32 topNodeIndex = m_binIndices[binIndex];
uint32 nodeIndex = m_freeNodes[m_freeOffset--];
#ifdef DEBUG_VERBOSE
printf("Getting node %u from freelist[%u]\n", nodeIndex, m_freeOffset + 1);
#endif
m_nodes[nodeIndex].dataOffset = dataOffset;
m_nodes[nodeIndex].dataSize = size;
m_nodes[nodeIndex].binListNext = topNodeIndex;
if (topNodeIndex != Node::unused) m_nodes[topNodeIndex].binListPrev = nodeIndex;
m_binIndices[binIndex] = nodeIndex;
m_freeStorage += size;
#ifdef DEBUG_VERBOSE
printf("Free storage: %u (+%u) (insertNodeIntoBin)\n", m_freeStorage, size);
#endif
return nodeIndex;
}
void Allocator::removeNodeFromBin(uint32 nodeIndex)
{
Node &node = m_nodes[nodeIndex];
if (node.binListPrev != Node::unused)
{
// Easy case: We have previous node. Just remove this node from the middle of the list.
m_nodes[node.binListPrev].binListNext = node.binListNext;
if (node.binListNext != Node::unused) m_nodes[node.binListNext].binListPrev = node.binListPrev;
}
else
{
// Hard case: We are the first node in a bin. Find the bin.
// Round down to bin index to ensure that bin >= alloc
uint32 binIndex = SmallFloat::uintToFloatRoundDown(node.dataSize);
uint32 topBinIndex = binIndex >> TOP_BINS_INDEX_SHIFT;
uint32 leafBinIndex = binIndex & LEAF_BINS_INDEX_MASK;
m_binIndices[binIndex] = node.binListNext;
if (node.binListNext != Node::unused) m_nodes[node.binListNext].binListPrev = Node::unused;
// Bin empty?
if (m_binIndices[binIndex] == Node::unused)
{
// Remove a leaf bin mask bit
m_usedBins[topBinIndex] &= ~(1 << leafBinIndex);
// All leaf bins empty?
if (m_usedBins[topBinIndex] == 0)
{
// Remove a top bin mask bit
m_usedBinsTop &= ~(1 << topBinIndex);
}
}
}
// Insert the node to freelist
#ifdef DEBUG_VERBOSE
printf("Putting node %u into freelist[%u] (removeNodeFromBin)\n", nodeIndex, m_freeOffset + 1);
#endif
m_freeNodes[++m_freeOffset] = nodeIndex;
m_freeStorage -= node.dataSize;
#ifdef DEBUG_VERBOSE
printf("Free storage: %u (-%u) (removeNodeFromBin)\n", m_freeStorage, node.dataSize);
#endif
}
uint32 Allocator::allocationSize(Allocation allocation) const
{
if (allocation.metadata == Allocation::NO_SPACE) return 0;
if (m_nodes.empty()) return 0;
return m_nodes[allocation.metadata].dataSize;
}
StorageReport Allocator::storageReport() const
{
uint32 largestFreeRegion = 0;
uint32 freeStorage = 0;
// Out of allocations? -> Zero free space
if (m_freeOffset > 0)
{
freeStorage = m_freeStorage;
if (m_usedBinsTop)
{
uint32 topBinIndex = 31 - lzcnt_nonzero(m_usedBinsTop);
uint32 leafBinIndex = 31 - lzcnt_nonzero(m_usedBins[topBinIndex]);
largestFreeRegion = SmallFloat::floatToUint((topBinIndex << TOP_BINS_INDEX_SHIFT) | leafBinIndex);
ASSERT(freeStorage >= largestFreeRegion);
}
}
StorageReport ret;
ret.totalFreeSpace = freeStorage;
ret.largestFreeRegion = largestFreeRegion;
return ret;
}
StorageReportFull Allocator::storageReportFull() const
{
StorageReportFull report;
for (uint32 i = 0; i < NUM_LEAF_BINS; i++)
{
uint32 count = 0;
uint32 nodeIndex = m_binIndices[i];
while (nodeIndex != Node::unused)
{
nodeIndex = m_nodes[nodeIndex].binListNext;
count++;
}
report.freeRegions[i].size = SmallFloat::floatToUint(i); report.freeRegions[i].count = count;
}
return report;
}
}
+115
View File
@@ -0,0 +1,115 @@
#pragma once
// (C) Sebastian Aaltonen 2023
// MIT License (see file: LICENSE)
// Modified for Wicked Engine
// - removed cpp20 features
// - removed constructors
// - changed node storage to std::vector
// - reduced size of Node structure
//#define USE_16_BIT_OFFSETS
#include <vector>
namespace OffsetAllocator
{
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
// 16 bit offsets mode will halve the metadata storage cost
// But it only supports up to 65536 maximum allocation count
#ifdef USE_16_BIT_NODE_INDICES
typedef uint16 NodeIndex;
static constexpr uint32 default_maxallocations = 64 * 1024;
#else
typedef uint32 NodeIndex;
static constexpr uint32 default_maxallocations = 128 * 1024;
#endif
static constexpr uint32 NUM_TOP_BINS = 32;
static constexpr uint32 BINS_PER_LEAF = 8;
static constexpr uint32 TOP_BINS_INDEX_SHIFT = 3;
static constexpr uint32 LEAF_BINS_INDEX_MASK = 0x7;
static constexpr uint32 NUM_LEAF_BINS = NUM_TOP_BINS * BINS_PER_LEAF;
struct Allocation
{
static constexpr uint32 NO_SPACE = 0xffffffff;
uint32 offset = NO_SPACE;
NodeIndex metadata = NO_SPACE; // internal: node index
};
struct StorageReport
{
uint32 totalFreeSpace = 0;
uint32 largestFreeRegion = 0;
};
struct StorageReportFull
{
struct Region
{
uint32 size = 0;
uint32 count = 0;
};
Region freeRegions[NUM_LEAF_BINS];
};
class Allocator
{
public:
void init(uint32 size, uint32 maxAllocs = default_maxallocations);
void reset();
Allocation allocate(uint32 size);
void free(Allocation allocation);
uint32 allocationSize(Allocation allocation) const;
StorageReport storageReport() const;
StorageReportFull storageReportFull() const;
private:
uint32 insertNodeIntoBin(uint32 size, uint32 dataOffset);
void removeNodeFromBin(uint32 nodeIndex);
struct Node
{
static constexpr NodeIndex unused = 0xffffffff;
uint32 dataOffset : 32;
uint32 dataSize : 31;
uint32 used : 1;
NodeIndex binListPrev : 32;
NodeIndex binListNext : 32;
NodeIndex neighborPrev : 32;
NodeIndex neighborNext : 32;
Node()
{
dataOffset = 0;
dataSize = 0;
binListPrev = unused;
binListNext = unused;
neighborPrev = unused;
neighborNext = unused;
used = 0;
}
};
uint32 m_size = 0;
uint32 m_maxAllocs = 0;
uint32 m_freeStorage = 0;
uint32 m_usedBinsTop = 0;
uint8 m_usedBins[NUM_TOP_BINS] = {};
NodeIndex m_binIndices[NUM_LEAF_BINS] = {};
std::vector<Node> m_nodes;
std::vector<NodeIndex> m_freeNodes;
uint32 m_freeOffset = 0;
};
}
@@ -314,6 +314,7 @@
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\lodepng.h" />
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\meshoptimizer\meshoptimizer.h" />
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\minimp4.h" />
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\offsetAllocator.hpp" />
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\pugiconfig.hpp" />
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\pugixml.hpp" />
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\WinAdapter.h" />
@@ -610,6 +611,7 @@
<ClCompile Include="$(MSBuildThisFileDirectory)Utility\meshoptimizer\vertexfilter.cpp" />
<ClCompile Include="$(MSBuildThisFileDirectory)Utility\meshoptimizer\vfetchanalyzer.cpp" />
<ClCompile Include="$(MSBuildThisFileDirectory)Utility\meshoptimizer\vfetchoptimizer.cpp" />
<ClCompile Include="$(MSBuildThisFileDirectory)Utility\offsetAllocator.cpp" />
<ClCompile Include="$(MSBuildThisFileDirectory)Utility\pugixml.cpp" />
<ClCompile Include="$(MSBuildThisFileDirectory)wiAsync_BindLua.cpp" />
<ClCompile Include="$(MSBuildThisFileDirectory)wiConfig.cpp" />
@@ -1389,6 +1389,9 @@
<ClInclude Include="$(MSBuildThisFileDirectory)Jolt\RegisterTypes.h">
<Filter>JOLT</Filter>
</ClInclude>
<ClInclude Include="$(MSBuildThisFileDirectory)Utility\offsetAllocator.hpp">
<Filter>UTILITY</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="$(MSBuildThisFileDirectory)LUA\lapi.c">
@@ -2189,6 +2192,9 @@
<ClCompile Include="$(MSBuildThisFileDirectory)Jolt\RegisterTypes.cpp">
<Filter>JOLT</Filter>
</ClCompile>
<ClCompile Include="$(MSBuildThisFileDirectory)Utility\offsetAllocator.cpp">
<Filter>UTILITY</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="$(MSBuildThisFileDirectory)Utility\DirectXCollision.inl">
+177
View File
@@ -2,11 +2,18 @@
#include "CommonInclude.h"
#include "wiVector.h"
#include "Utility/offsetAllocator.hpp"
#include <mutex>
#include <atomic>
#include <memory>
#include <cassert>
#include <algorithm>
#include <deque>
namespace wi::allocator
{
// Allocation of consecutive bytes, but no freeing, instead the whole allocator can be reset
struct LinearAllocator
{
uint8_t* data = nullptr;
@@ -38,6 +45,7 @@ namespace wi::allocator
}
};
// Allocation and freeing of single elements of the same size
template<typename T, size_t block_size = 256>
struct BlockAllocator
{
@@ -71,5 +79,174 @@ namespace wi::allocator
ptr->~T();
free_list.push_back(ptr);
}
inline bool is_empty() const
{
return (blocks.size() * block_size) == free_list.size();
}
};
// Allocation and freeing of an arbitrary number of bytes, managed in pages of the same size
// - this is a wrapper around OffsetAllocator that adds thread safety and refcounting
// - also supports deferred release for suballocated GPU resources
struct PageAllocator
{
uint32_t page_count = 0;
uint32_t page_size = 0;
struct AllocationInternal
{
std::atomic<int> refcount{ 0 };
OffsetAllocator::Allocation allocation;
};
struct AllocatorInternal
{
std::mutex locker;
OffsetAllocator::Allocator allocator;
BlockAllocator<AllocationInternal> internal_blocks;
bool deferred_release_enabled = false;
uint64_t deferred_release_frame = 0;
std::deque<std::pair<OffsetAllocator::Allocation, uint64_t>> deferred_release_queue;
};
std::shared_ptr<AllocatorInternal> allocator; // shared ptr is used to let any allocations extend the lifeftime of the allocator
// Returns the total size that the allocator manages:
constexpr uint64_t total_size_in_bytes() const { return uint64_t(page_count) * uint64_t(page_size); }
// Calculates the page count that will accomodate an allocation size request
constexpr uint32_t page_count_from_bytes(uint64_t sizeInBytes) const { return uint32_t(align((uint64_t)sizeInBytes, (uint64_t)page_size) / (uint64_t)page_size); }
// Initializes the allocator, only after which it can be used
// total_size_in_bytes : the allocator will manage this number of bytes
// page_size : the allocation granularity in bytes, each allocation will be aligned to this
// deferred_release : if false, allocations are freed immediately (suitable for CPU only allocations), otherwise they are freed after a number of frames passed (which should be used for GPU allocations)
void init(uint64_t total_size_in_bytes, uint32_t page_size = 64u * 1024u, bool deferred_release = false)
{
this->page_size = page_size;
this->page_count = page_count_from_bytes(total_size_in_bytes);
allocator = std::make_shared<AllocatorInternal>();
allocator->allocator.init(page_count, std::min(page_count, OffsetAllocator::default_maxallocations));
allocator->deferred_release_enabled = deferred_release;
allocator->deferred_release_frame = 0;
allocator->deferred_release_queue.clear();
}
// This needs to be called every frame if deferred release is enabled:
void update_deferred_release(uint64_t framecount, uint32_t buffercount)
{
if (allocator == nullptr)
return;
std::scoped_lock lck(allocator->locker);
allocator->deferred_release_frame = framecount;
while (!allocator->deferred_release_queue.empty() && allocator->deferred_release_queue.front().second + buffercount < framecount)
{
allocator->allocator.free(allocator->deferred_release_queue.front().first);
allocator->deferred_release_queue.pop_front();
}
}
struct Allocation
{
std::shared_ptr<AllocatorInternal> allocator; // the allocator is retained so that allocation can deallocate itself
AllocationInternal* internal_state = nullptr; // this is pointing within the allocator which is retained by shared_ptr
uint64_t byte_offset = ~0ull;
Allocation()
{
Reset();
}
Allocation(const Allocation& other)
{
Reset();
allocator = other.allocator;
internal_state = other.internal_state;
byte_offset = other.byte_offset;
if (internal_state != nullptr)
{
internal_state->refcount.fetch_add(1);
}
}
Allocation(Allocation&& other) noexcept
{
Reset();
allocator = std::move(other.allocator);
internal_state = other.internal_state;
byte_offset = other.byte_offset;
other.allocator = nullptr;
other.internal_state = nullptr;
other.byte_offset = ~0ull;
}
~Allocation()
{
Reset();
}
void operator=(const Allocation& other)
{
Reset();
allocator = other.allocator;
internal_state = other.internal_state;
byte_offset = other.byte_offset;
if (internal_state != nullptr)
{
internal_state->refcount.fetch_add(1);
}
}
void operator=(Allocation&& other) noexcept
{
Reset();
allocator = std::move(other.allocator);
internal_state = other.internal_state;
byte_offset = other.byte_offset;
other.allocator = nullptr;
other.internal_state = nullptr;
other.byte_offset = ~0ull;
}
void Reset()
{
if (IsValid() && (internal_state->refcount.fetch_sub(1) <= 1))
{
std::scoped_lock lck(allocator->locker);
if (allocator->deferred_release_enabled)
{
// can only be reclaimed after buffering amount of frames passed, this is usually used for GPU resources:
allocator->deferred_release_queue.push_back(std::make_pair(internal_state->allocation, allocator->deferred_release_frame));
}
else
{
// reclaimed immediately:
allocator->allocator.free(internal_state->allocation);
}
allocator->internal_blocks.free(internal_state);
}
allocator = {};
internal_state = nullptr;
byte_offset = ~0ull;
}
constexpr bool IsValid() const { return internal_state != nullptr; }
};
// Allocates a reference counted allocation, viewing at least the requested amount of bytes
// To check if the allocation succeeded, call IsValid() on the returned object
inline Allocation allocate(size_t sizeInBytes)
{
const uint32_t pages = page_count_from_bytes(sizeInBytes);
std::scoped_lock lck(allocator->locker);
OffsetAllocator::Allocation offsetallocation = allocator->allocator.allocate(pages);
Allocation alloc;
if (offsetallocation.offset != OffsetAllocator::Allocation::NO_SPACE)
{
alloc.allocator = allocator;
alloc.internal_state = allocator->internal_blocks.allocate();
alloc.internal_state->refcount.store(1);
alloc.internal_state->allocation = offsetallocation;
alloc.byte_offset = offsetallocation.offset * page_size;
}
return alloc;
}
// returns true if no pages are allocated
inline bool is_empty()
{
return allocator->allocator.storageReport().totalFreeSpace == page_count;
}
};
}
+1
View File
@@ -318,6 +318,7 @@ namespace wi
wi::input::ClearForNextFrame();
wi::profiler::EndFrame(cmd);
graphicsDevice->SubmitCommandLists();
wi::renderer::UpdateGPUSuballocator();
}
void Application::Update(float dt)
+4 -4
View File
@@ -250,14 +250,14 @@ namespace wi::graphics
return CreateBuffer2(desc, [&](void* dest) { std::memcpy(dest, initial_data, desc->size); }, buffer, alias, alias_offset);
}
bool CreateBufferCleared(const GPUBufferDesc* desc, uint8_t value, GPUBuffer* buffer) const
bool CreateBufferCleared(const GPUBufferDesc* desc, uint8_t value, GPUBuffer* buffer, const GPUResource* alias = nullptr, uint64_t alias_offset = 0ull) const
{
return CreateBuffer2(desc, [&](void* dest) { std::memset(dest, value, desc->size); }, buffer);
return CreateBuffer2(desc, [&](void* dest) { std::memset(dest, value, desc->size); }, buffer, alias, alias_offset);
}
bool CreateBufferZeroed(const GPUBufferDesc* desc, GPUBuffer* buffer) const
bool CreateBufferZeroed(const GPUBufferDesc* desc, GPUBuffer* buffer, const GPUResource* alias = nullptr, uint64_t alias_offset = 0ull) const
{
return CreateBufferCleared(desc, 0, buffer);
return CreateBufferCleared(desc, 0, buffer, alias, alias_offset);
}
void Barrier(const GPUBarrier& barrier, CommandList cmd)
+1
View File
@@ -2422,6 +2422,7 @@ std::mutex queue_locker;
disabledMessages.push_back(D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE);
disabledMessages.push_back(D3D12_MESSAGE_ID_SETPRIVATEDATA_CHANGINGPARAMS);
disabledMessages.push_back(D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS);
D3D12_INFO_QUEUE_FILTER filter = {};
filter.AllowList.NumSeverities = static_cast<UINT>(enabledSeverities.size());
+12
View File
@@ -349,6 +349,18 @@ namespace wi::graphics
{
alignment = std::max(alignment, 16ull);
}
if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_BUFFER))
{
alignment = std::max(alignment, (uint64_t)D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
}
if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_NON_RT_DS))
{
alignment = std::max(alignment, (uint64_t)D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
}
if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_RT_DS))
{
alignment = std::max(alignment, (uint64_t)D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
}
return alignment;
}
+4 -6
View File
@@ -3750,12 +3750,6 @@ using namespace vulkan_internal;
}
bool GraphicsDevice_Vulkan::CreateBuffer2(const GPUBufferDesc* desc, const std::function<void(void*)>& init_callback, GPUBuffer* buffer, const GPUResource* alias, uint64_t alias_offset) const
{
#ifdef PLATFORM_LINUX
// Resource aliasing on Linux sometimes fails with VK_ERROR_UNKOWN so I disable it:
alias = nullptr;
alias_offset = 0;
#endif // PLATFORM_LINUX
auto internal_state = std::make_shared<Buffer_Vulkan>();
internal_state->allocationhandler = allocationhandler;
buffer->internal_state = internal_state;
@@ -3854,6 +3848,10 @@ using namespace vulkan_internal;
{
VkMemoryRequirements memory_requirements = {};
memory_requirements.alignment = desc->alignment;
if (memory_requirements.alignment == 0)
{
memory_requirements.alignment = GetMinOffsetAlignment(desc);
}
memory_requirements.size = AlignTo(desc->size, memory_requirements.alignment);
memory_requirements.memoryTypeBits = ~0u;
+4
View File
@@ -473,6 +473,10 @@ namespace wi::graphics
{
alignment = std::max(alignment, properties2.properties.limits.minTexelBufferOffsetAlignment);
}
if (has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_BUFFER) || has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_NON_RT_DS) || has_flag(desc->misc_flags, ResourceMiscFlag::ALIASING_TEXTURE_RT_DS))
{
alignment = std::max(alignment, uint64_t(64 * 1024)); // 64KB safety to match DX12, because cannot use vkGetBufferMemoryRequirements here
}
return alignment;
}
-27
View File
@@ -920,12 +920,6 @@ namespace wi
wi::renderer::UpdateRaytracingAccelerationStructures(*scene, cmd);
}
if (scene->weather.IsRealisticSky())
{
wi::renderer::ComputeSkyAtmosphereTextures(cmd);
wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd);
}
if (wi::renderer::GetSurfelGIEnabled())
{
wi::renderer::SurfelGI(
@@ -1164,16 +1158,6 @@ namespace wi
);
}
if (scene->weather.IsRealisticSky())
{
wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd);
if (scene->weather.IsRealisticSkyAerialPerspective())
{
wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd);
}
}
if (scene->weather.IsVolumetricClouds() && !scene->weather.IsVolumetricCloudsReceiveShadow())
{
// When volumetric cloud DOESN'T receive shadow it can be done async to shadow maps!
@@ -1305,17 +1289,6 @@ namespace wi
cmd
);
// Render SkyAtmosphere assets from planar reflections point of view
if (scene->weather.IsRealisticSky())
{
wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd);
if (scene->weather.IsRealisticSkyAerialPerspective())
{
wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd);
}
}
device->EventBegin("Planar reflections Z-Prepass", cmd);
auto range = wi::profiler::BeginRangeGPU("Planar Reflections Z-Prepass", cmd);
+102 -6
View File
@@ -2623,6 +2623,73 @@ const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count)
return indexBufferForQuads32;
}
// This is responsible to manage big chunks of GPUBuffer, each of which will be used for suballocations:
struct GPUSubAllocator
{
static constexpr uint64_t blocksize = 256ull * 1024ull * 1024ull; // 256 MB
struct Block
{
wi::allocator::PageAllocator allocator;
GPUBuffer buffer;
};
wi::vector<Block> blocks;
std::mutex locker;
} static suballocator;
BufferSuballocation SuballocateGPUBuffer(uint64_t size)
{
if (size > GPUSubAllocator::blocksize / 2)
return {}; // invalid, larger allocations than half block size will not be suballocated
// scoped for locker
{
std::scoped_lock lock(suballocator.locker);
// See if any of the large blocks can fulfill the allocation request:
BufferSuballocation allocation;
for (auto& block : suballocator.blocks)
{
allocation.allocation = block.allocator.allocate(size);
if (allocation.allocation.IsValid())
{
allocation.alias = block.buffer;
//wilog("SuballocateGPUBuffer allocated size: %s, pages: %d, free space remaining: %s", wi::helper::GetMemorySizeText(size).c_str(), block.allocator.page_count_from_bytes(size), wi::helper::GetMemorySizeText(allocation.allocation.allocator->allocator.storageReport().totalFreeSpace * block.allocator.page_size).c_str());
return allocation;
}
}
// Allocation couldn't be fulfilled, create new block:
GPUBufferDesc desc;
desc.size = GPUSubAllocator::blocksize;
desc.usage = Usage::DEFAULT;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::VERTEX_BUFFER | BindFlag::INDEX_BUFFER;
desc.misc_flags = ResourceMiscFlag::ALIASING_BUFFER | ResourceMiscFlag::NO_DEFAULT_DESCRIPTORS;
desc.alignment = device->GetMinOffsetAlignment(&desc);
auto& block = suballocator.blocks.emplace_back();
bool success = device->CreateBuffer(&desc, nullptr, &block.buffer);
assert(success);
device->SetName(&block.buffer, "GPUSubAllocator");
block.allocator.init(desc.size, (uint32_t)desc.alignment, true);
wilog("SuballocateGPUBuffer created buffer block with size: %s, with page size: %s, page count: %d", wi::helper::GetMemorySizeText(block.allocator.total_size_in_bytes()).c_str(), wi::helper::GetMemorySizeText(block.allocator.page_size).c_str(), (int)block.allocator.page_count);
}
return SuballocateGPUBuffer(size); // retry
}
void UpdateGPUSuballocator()
{
std::scoped_lock lock(suballocator.locker);
for (auto& block : suballocator.blocks)
{
block.allocator.update_deferred_release(device->GetFrameCount(), device->GetBufferCount());
}
for (size_t i = 0; i < suballocator.blocks.size(); ++i)
{
if (suballocator.blocks[i].allocator.is_empty())
{
suballocator.blocks.erase(suballocator.blocks.begin() + i);
break;
}
}
}
void ModifyObjectSampler(const SamplerDesc& desc)
{
if (initialized.load())
@@ -2961,7 +3028,8 @@ void RenderMeshes(
uint32_t prev_stencilref = STENCILREF_DEFAULT;
device->BindStencilRef(prev_stencilref, cmd);
const GPUBuffer* prev_ib = nullptr;
IndexBufferFormat prev_ibformat = IndexBufferFormat::UINT16;
const void* prev_ib_internal = nullptr;
// This will be called every time we start a new draw call:
auto batch_flush = [&]()
@@ -3092,10 +3160,16 @@ void RenderMeshes(
device->BindStencilRef(stencilRef, cmd);
}
if (!meshShaderPSO && prev_ib != &mesh.generalBuffer)
// Note: the mesh.generalBuffer can be either a standalone allocated buffer, or a suballocated one (to reduce index buffer switching)
const GPUBuffer* ib = mesh.generalBufferOffsetAllocation.IsValid() ? &mesh.generalBufferOffsetAllocationAlias : &mesh.generalBuffer;
const IndexBufferFormat ibformat = mesh.GetIndexFormat();
const void* ibinternal = ib->internal_state.get();
if (!meshShaderPSO && (prev_ib_internal != ibinternal || prev_ibformat != ibformat))
{
device->BindIndexBuffer(&mesh.generalBuffer, mesh.GetIndexFormat(), mesh.ib.offset, cmd);
prev_ib = &mesh.generalBuffer;
prev_ib_internal = ibinternal;
prev_ibformat = ibformat;
device->BindIndexBuffer(ib, ibformat, 0, cmd);
}
if (
@@ -3114,6 +3188,18 @@ void RenderMeshes(
push.instances = instanceBufferDescriptorIndex;
push.instance_offset = (uint)instancedBatch.dataOffset;
uint32_t indexOffset = 0;
if (mesh.generalBufferOffsetAllocation.IsValid())
{
// In case the mesh general buffer is suballocated, the indexOffset is calculated relative to the beginning of the aliased buffer block:
indexOffset = uint32_t(((uint64_t)mesh.generalBufferOffsetAllocation.byte_offset + mesh.ib.offset) / mesh.GetIndexStride()) + subset.indexOffset;
}
else
{
// In case the mesh general buffer is not suballocated, it is a standalone buffer and index offset is relative to itself
indexOffset = uint32_t(mesh.ib.offset / mesh.GetIndexStride()) + subset.indexOffset;
}
if (pso_backside != nullptr)
{
device->BindPipelineState(pso_backside, cmd);
@@ -3124,7 +3210,7 @@ void RenderMeshes(
}
else
{
device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, subset.indexOffset, 0, 0, cmd);
device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, indexOffset, 0, 0, cmd);
}
}
@@ -3136,7 +3222,7 @@ void RenderMeshes(
}
else
{
device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, subset.indexOffset, 0, 0, cmd);
device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, indexOffset, 0, 0, cmd);
}
}
@@ -5196,6 +5282,16 @@ void UpdateRenderDataAsync(
ComputeVolumetricCloudShadows(cmd, weatherMapFirst, weatherMapSecond);
}
if (vis.scene->weather.IsRealisticSky())
{
wi::renderer::ComputeSkyAtmosphereTextures(cmd);
wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd);
if (vis.scene->weather.IsRealisticSkyAerialPerspective())
{
wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd);
}
}
// GPU Particle systems simulation/sorting/culling:
if (!vis.visibleEmitters.empty() || vis.scene->weather.rain_amount > 0)
{
+14
View File
@@ -12,6 +12,7 @@
#include "shaders/ShaderInterop_SurfelGI.h"
#include "wiVector.h"
#include "wiSpinLock.h"
#include "wiAllocator.h"
#include <memory>
#include <limits>
@@ -66,8 +67,21 @@ namespace wi::renderer
// Returns a buffer preinitialized for quad index buffer laid out as:
// vertexID * 4 + [0, 1, 2, 2, 1, 3]
// Note: it will return 16-bit or 32-bit index buffer depending on max_quad_count
const wi::graphics::GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count);
struct BufferSuballocation
{
wi::graphics::GPUBuffer alias;
wi::allocator::PageAllocator::Allocation allocation;
};
// Sub-allocate (thread-safe) from a global GPU buffer for memory aliasing purpose:
// The buffer will be DEFAULT usage, useable as vertex buffer, index buffer and shader resource
// The purpose is to suballocate smaller GPUBuffers inside a larger GPUBuffer and bind the large GPUBuffer once as index buffer,
// while the small buffers can be allocated/deallocated from it with memory aliasing and also used regularly by themselves
BufferSuballocation SuballocateGPUBuffer(uint64_t size);
void UpdateGPUSuballocator(); // called every frame for deferred release of GPU suballocations
void ModifyObjectSampler(const wi::graphics::SamplerDesc& desc);
// Initializes the renderer
+20 -3
View File
@@ -586,6 +586,7 @@ namespace wi::scene
void MeshComponent::DeleteRenderData()
{
generalBufferOffsetAllocation = {};
generalBuffer = {};
streamoutBuffer = {};
ib = {};
@@ -1291,9 +1292,25 @@ namespace wi::scene
}
};
bool success = device->CreateBuffer2(&bd, init_callback, &generalBuffer);
assert(success);
device->SetName(&generalBuffer, "MeshComponent::generalBuffer");
// The suballocation strategy is used to have all mesh buffers reside in a global buffer
// With this we can avoid rebinding the index buffer for every mesh and can work with purely offsets
// Though the index buffer will still need to be rebound if the index format changes, but that happens less frequently
wi::renderer::BufferSuballocation suballoc = wi::renderer::SuballocateGPUBuffer(bd.size);
if (suballoc.allocation.IsValid())
{
bool success = device->CreateBuffer2(&bd, init_callback, &generalBuffer, &suballoc.alias, suballoc.allocation.byte_offset);
assert(success);
device->SetName(&generalBuffer, "MeshComponent::generalBuffer (suballocated)");
generalBufferOffsetAllocation = std::move(suballoc.allocation);
generalBufferOffsetAllocationAlias = std::move(suballoc.alias);
}
else
{
// If suballocation was not successful, a standalone buffer can be created instead:
bool success = device->CreateBuffer2(&bd, init_callback, &generalBuffer);
assert(success);
device->SetName(&generalBuffer, "MeshComponent::generalBuffer");
}
assert(ib.IsValid());
const Format ib_format = GetIndexFormat() == IndexBufferFormat::UINT32 ? Format::R32_UINT : Format::R16_UINT;
+15 -9
View File
@@ -15,6 +15,7 @@
#include "wiUnorderedSet.h"
#include "wiBVH.h"
#include "wiPathQuery.h"
#include "wiAllocator.h"
namespace wi::scene
{
@@ -182,7 +183,7 @@ namespace wi::scene
XMFLOAT4 emissiveColor = XMFLOAT4(1, 1, 1, 0);
XMFLOAT4 subsurfaceScattering = XMFLOAT4(1, 1, 1, 0);
XMFLOAT4 extinctionColor = XMFLOAT4(0, 0.9f, 1, 1);
XMFLOAT4 texMulAdd = XMFLOAT4(1, 1, 0, 0);
XMFLOAT4 texMulAdd = XMFLOAT4(1, 1, 0, 0); // dynamic multiplier (.xy) and addition (.zw) for UV coordinates
float roughness = 0.2f;
float reflectance = 0.02f;
float metalness = 0.0f;
@@ -655,7 +656,7 @@ namespace wi::scene
BVH_ENABLED = 1 << 8,
QUANTIZED_POSITIONS_DISABLED = 1 << 9,
};
uint32_t _flags = RENDERABLE;
// *uint32_t _flags is moved down for better struct padding...
wi::vector<XMFLOAT3> vertex_positions;
wi::vector<XMFLOAT3> vertex_normals;
@@ -714,6 +715,8 @@ namespace wi::scene
wi::primitive::AABB aabb;
wi::graphics::GPUBuffer generalBuffer; // index buffer + all static vertex buffers
wi::graphics::GPUBuffer streamoutBuffer; // all dynamic vertex buffers
wi::allocator::PageAllocator::Allocation generalBufferOffsetAllocation;
wi::graphics::GPUBuffer generalBufferOffsetAllocationAlias;
struct BufferView
{
uint64_t offset = ~0ull;
@@ -751,13 +754,6 @@ namespace wi::scene
XMFLOAT2 uv_range_max = XMFLOAT2(1, 1);
wi::vector<wi::graphics::RaytracingAccelerationStructure> BLASes; // one BLAS per LOD
enum BLAS_STATE
{
BLAS_STATE_NEEDS_REBUILD,
BLAS_STATE_NEEDS_REFIT,
BLAS_STATE_COMPLETE,
};
mutable BLAS_STATE BLAS_state = BLAS_STATE_NEEDS_REBUILD;
wi::vector<wi::primitive::AABB> bvh_leaf_aabbs;
wi::BVH bvh;
@@ -771,6 +767,16 @@ namespace wi::scene
RigidBodyPhysicsComponent precomputed_rigidbody_physics_shape; // you can precompute a physics shape here if you need without using a real rigid body component yet
uint32_t _flags = RENDERABLE; // *this is serialized but put here for better struct padding
enum BLAS_STATE
{
BLAS_STATE_NEEDS_REBUILD,
BLAS_STATE_NEEDS_REFIT,
BLAS_STATE_COMPLETE,
};
mutable BLAS_STATE BLAS_state = BLAS_STATE_NEEDS_REBUILD;
constexpr void SetRenderable(bool value) { if (value) { _flags |= RENDERABLE; } else { _flags &= ~RENDERABLE; } }
constexpr void SetDoubleSided(bool value) { if (value) { _flags |= DOUBLE_SIDED; } else { _flags &= ~DOUBLE_SIDED; } }
constexpr void SetDoubleSidedShadow(bool value) { if (value) { _flags |= DOUBLE_SIDED_SHADOW; } else { _flags &= ~DOUBLE_SIDED_SHADOW; } }
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 750;
const int revision = 751;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);
+23
View File
@@ -934,5 +934,28 @@ SOFTWARE.
###############################################################################################################################
OffsetAllocator: https://github.com/sebbbi/OffsetAllocator
MIT License
Copyright (c) 2023 Sebastian Aaltonen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
###############################################################################################################################