diff --git a/WickedEngine/Utility/D3D12MemAlloc.cpp b/WickedEngine/Utility/D3D12MemAlloc.cpp index f8048bd33..9e65a0d27 100644 --- a/WickedEngine/Utility/D3D12MemAlloc.cpp +++ b/WickedEngine/Utility/D3D12MemAlloc.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2019-2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,20 +22,12 @@ #include "D3D12MemAlloc.h" -#ifndef D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED - #include - #if D3D12MA_DXGI_1_4 - #include - #endif -#endif - #include #include #include #include #include #include // for _aligned_malloc, _aligned_free - #ifndef _WIN32 #include #endif @@ -47,6 +39,24 @@ // //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// +#ifndef _D3D12MA_CONFIGURATION + +#ifdef _WIN32 + #if !defined(WINVER) || WINVER < 0x0600 + #error Required at least WinAPI version supporting: client = Windows Vista, server = Windows Server 2008. + #endif +#endif + +#ifndef D3D12MA_SORT + #define D3D12MA_SORT(beg, end, cmp) std::sort(beg, end, cmp) +#endif + +#ifndef D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED + #include + #if D3D12MA_DXGI_1_4 + #include + #endif +#endif #ifndef D3D12MA_ASSERT #include @@ -96,6 +106,7 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. #define D3D12MA_DEFAULT_BLOCK_SIZE (64ull * 1024 * 1024) #endif +#endif // _D3D12MA_CONFIGURATION //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // @@ -108,11 +119,51 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. namespace D3D12MA { +static constexpr UINT HEAP_TYPE_COUNT = 4; +static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 3; // Only DEFAULT, UPLOAD, READBACK. +static constexpr UINT DEFAULT_POOL_MAX_COUNT = 9; +static const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3; +// Minimum size of a free suballocation to register it in the free suballocation collection. +static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; -//////////////////////////////////////////////////////////////////////////////// -// Private globals - CPU memory allocation +static const WCHAR* const HeapTypeNames[] = +{ + L"DEFAULT", + L"UPLOAD", + L"READBACK", + L"CUSTOM", +}; -static void* DefaultAllocate(size_t Size, size_t Alignment, void* /*pUserData*/) +static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = + D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + +#ifndef _D3D12MA_ENUM_DECLARATIONS + +// Local copy of this enum, as it is provided only by , so it may not be available. +enum DXGI_MEMORY_SEGMENT_GROUP_COPY +{ + DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY = 0, + DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY = 1, + DXGI_MEMORY_SEGMENT_GROUP_COUNT +}; + +enum class ResourceClass +{ + Unknown, Buffer, Non_RT_DS_Texture, RT_DS_Texture +}; + +enum SuballocationType +{ + SUBALLOCATION_TYPE_FREE = 0, + SUBALLOCATION_TYPE_ALLOCATION = 1, +}; + +#endif // _D3D12MA_ENUM_DECLARATIONS + + +#ifndef _D3D12MA_FUNCTIONS + +static void* DefaultAllocate(size_t Size, size_t Alignment, void* /*pPrivateData*/) { #ifdef _WIN32 return _aligned_malloc(Size, Alignment); @@ -120,7 +171,7 @@ static void* DefaultAllocate(size_t Size, size_t Alignment, void* /*pUserData*/) return aligned_alloc(Alignment, Size); #endif } -static void DefaultFree(void* pMemory, void* /*pUserData*/) +static void DefaultFree(void* pMemory, void* /*pPrivateData*/) { #ifdef _WIN32 return _aligned_free(pMemory); @@ -131,13 +182,13 @@ static void DefaultFree(void* pMemory, void* /*pUserData*/) static void* Malloc(const ALLOCATION_CALLBACKS& allocs, size_t size, size_t alignment) { - void* const result = (*allocs.pAllocate)(size, alignment, allocs.pUserData); + void* const result = (*allocs.pAllocate)(size, alignment, allocs.pPrivateData); D3D12MA_ASSERT(result); return result; } static void Free(const ALLOCATION_CALLBACKS& allocs, void* memory) { - (*allocs.pFree)(memory, allocs.pUserData); + (*allocs.pFree)(memory, allocs.pPrivateData); } template @@ -157,7 +208,7 @@ static T* AllocateArray(const ALLOCATION_CALLBACKS& allocs, size_t count) template void D3D12MA_DELETE(const ALLOCATION_CALLBACKS& allocs, T* memory) { - if(memory) + if (memory) { memory->~T(); Free(allocs, memory); @@ -166,9 +217,9 @@ void D3D12MA_DELETE(const ALLOCATION_CALLBACKS& allocs, T* memory) template void D3D12MA_DELETE_ARRAY(const ALLOCATION_CALLBACKS& allocs, T* memory, size_t count) { - if(memory) + if (memory) { - for(size_t i = count; i--; ) + for (size_t i = count; i--; ) { memory[i].~T(); } @@ -178,7 +229,7 @@ void D3D12MA_DELETE_ARRAY(const ALLOCATION_CALLBACKS& allocs, T* memory, size_t static void SetupAllocationCallbacks(ALLOCATION_CALLBACKS& outAllocs, const ALLOCATION_CALLBACKS* allocationCallbacks) { - if(allocationCallbacks) + if (allocationCallbacks) { outAllocs = *allocationCallbacks; D3D12MA_ASSERT(outAllocs.pAllocate != NULL && outAllocs.pFree != NULL); @@ -187,85 +238,114 @@ static void SetupAllocationCallbacks(ALLOCATION_CALLBACKS& outAllocs, const ALLO { outAllocs.pAllocate = &DefaultAllocate; outAllocs.pFree = &DefaultFree; - outAllocs.pUserData = NULL; + outAllocs.pPrivateData = NULL; } } -//////////////////////////////////////////////////////////////////////////////// -// Private globals - basic facilities - #define SAFE_RELEASE(ptr) do { if(ptr) { (ptr)->Release(); (ptr) = NULL; } } while(false) #define D3D12MA_VALIDATE(cond) do { if(!(cond)) { \ - D3D12MA_ASSERT(0 && "Validation failed: " #cond); \ - return false; \ - } } while(false) - -const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3; + D3D12MA_ASSERT(0 && "Validation failed: " #cond); \ + return false; \ +} } while(false) template -static inline T D3D12MA_MIN(const T& a, const T& b) -{ - return a <= b ? a : b; -} +static T D3D12MA_MIN(const T& a, const T& b) { return a <= b ? a : b; } template -static inline T D3D12MA_MAX(const T& a, const T& b) -{ - return a <= b ? b : a; -} +static T D3D12MA_MAX(const T& a, const T& b) { return a <= b ? b : a; } template -static inline void D3D12MA_SWAP(T& a, T& b) -{ - T tmp = a; a = b; b = tmp; -} +static void D3D12MA_SWAP(T& a, T& b) { T tmp = a; a = b; b = tmp; } -#ifndef D3D12MA_MUTEX - class Mutex +// Scans integer for index of first nonzero bit from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX +static UINT8 BitScanLSB(UINT64 mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanForward64(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffsll(mask)) - 1U; +#else + UINT8 pos = 0; + UINT64 bit = 1; + do { - public: - void Lock() { m_Mutex.lock(); } - void Unlock() { m_Mutex.unlock(); } - private: - std::mutex m_Mutex; - }; - #define D3D12MA_MUTEX Mutex + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 63); + return UINT8_MAX; #endif - -#ifdef _WIN32 - #if !defined(WINVER) || WINVER < 0x0600 - #error Required at least WinAPI version supporting: client = Windows Vista, server = Windows Server 2008. - #endif -#endif // #ifdef _WIN32 - -#ifndef D3D12MA_RW_MUTEX -#ifdef _WIN32 - class RWMutex +} +// Scans integer for index of first nonzero bit from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX +static UINT8 BitScanLSB(UINT32 mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanForward(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffs(mask)) - 1U; +#else + UINT8 pos = 0; + UINT32 bit = 1; + do { - public: - RWMutex() { InitializeSRWLock(&m_Lock); } - void LockRead() { AcquireSRWLockShared(&m_Lock); } - void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } - void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } - void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } - private: - SRWLOCK m_Lock; - }; -#else // #ifdef _WIN32 - class RWMutex + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 31); + return UINT8_MAX; +#endif +} + +// Scans integer for index of first nonzero bit from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX +static UINT8 BitScanMSB(UINT64 mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanReverse64(&pos, mask)) + return static_cast(pos); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 63 - static_cast(__builtin_clzll(mask)); +#else + UINT8 pos = 63; + UINT64 bit = 1ULL << 63; + do { - public: - RWMutex() {} - void LockRead() { m_Mutex.lock_shared(); } - void UnlockRead() { m_Mutex.unlock_shared(); } - void LockWrite() { m_Mutex.lock(); } - void UnlockWrite() { m_Mutex.unlock(); } - private: - std::shared_timed_mutex m_Mutex; - }; -#endif // #ifdef _WIN32 - #define D3D12MA_RW_MUTEX RWMutex -#endif // #ifndef D3D12MA_RW_MUTEX + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} +// Scans integer for index of first nonzero bit from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX +static UINT8 BitScanMSB(UINT32 mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanReverse(&pos, mask)) + return static_cast(pos); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 31 - static_cast(__builtin_clz(mask)); +#else + UINT8 pos = 31; + UINT32 bit = 1UL << 31; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} /* Returns true if given number is a power of two. @@ -273,23 +353,20 @@ T must be unsigned integer number or signed integer but always nonnegative. For 0 returns true. */ template -inline bool IsPow2(T x) -{ - return (x & (x-1)) == 0; -} +static bool IsPow2(T x) { return (x & (x - 1)) == 0; } // Aligns given value up to nearest multiply of align value. For example: AlignUp(11, 8) = 16. // Use types like UINT, uint64_t as T. template -static inline T AlignUp(T val, T alignment) +static T AlignUp(T val, T alignment) { D3D12MA_HEAVY_ASSERT(IsPow2(alignment)); - return (val + alignment - 1) & ~(alignment - 1); + return (val + alignment - 1) & ~(alignment - 1); } // Aligns given value down to nearest multiply of align value. For example: AlignUp(11, 8) = 8. // Use types like UINT, uint64_t as T. template -static inline T AlignDown(T val, T alignment) +static T AlignDown(T val, T alignment) { D3D12MA_HEAVY_ASSERT(IsPow2(alignment)); return val & ~(alignment - 1); @@ -297,101 +374,10 @@ static inline T AlignDown(T val, T alignment) // Division with mathematical rounding to nearest number. template -static inline T RoundDiv(T x, T y) -{ - return (x + (y / (T)2)) / y; -} +static T RoundDiv(T x, T y) { return (x + (y / (T)2)) / y; } template -static inline T DivideRoudingUp(T x, T y) -{ - return (x + y - 1) / y; -} - -// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). -struct MutexLock -{ -public: - MutexLock(D3D12MA_MUTEX& mutex, bool useMutex = true) : - m_pMutex(useMutex ? &mutex : NULL) - { - if(m_pMutex) - { - m_pMutex->Lock(); - } - } - ~MutexLock() - { - if(m_pMutex) - { - m_pMutex->Unlock(); - } - } -private: - D3D12MA_MUTEX* m_pMutex; - - D3D12MA_CLASS_NO_COPY(MutexLock) -}; - -// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. -struct MutexLockRead -{ -public: - MutexLockRead(D3D12MA_RW_MUTEX& mutex, bool useMutex) : - m_pMutex(useMutex ? &mutex : NULL) - { - if(m_pMutex) - { - m_pMutex->LockRead(); - } - } - ~MutexLockRead() - { - if(m_pMutex) - { - m_pMutex->UnlockRead(); - } - } -private: - D3D12MA_RW_MUTEX* m_pMutex; - - D3D12MA_CLASS_NO_COPY(MutexLockRead) -}; - -// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. -struct MutexLockWrite -{ -public: - MutexLockWrite(D3D12MA_RW_MUTEX& mutex, bool useMutex) : - m_pMutex(useMutex ? &mutex : NULL) - { - if(m_pMutex) - { - m_pMutex->LockWrite(); - } - } - ~MutexLockWrite() - { - if(m_pMutex) - { - m_pMutex->UnlockWrite(); - } - } -private: - D3D12MA_RW_MUTEX* m_pMutex; - - D3D12MA_CLASS_NO_COPY(MutexLockWrite) -}; - -#if D3D12MA_DEBUG_GLOBAL_MUTEX - static D3D12MA_MUTEX g_DebugGlobalMutex; - #define D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK MutexLock debugGlobalMutexLock(g_DebugGlobalMutex, true); -#else - #define D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK -#endif - -// Minimum size of a free suballocation to register it in the free suballocation collection. -static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; - +static T DivideRoundingUp(T x, T y) { return (x + y - 1) / y; } + /* Performs binary search and returns iterator to first element that is greater or equal to `key`, according to comparison `cmp`. @@ -402,13 +388,13 @@ Returned value is the found element, if present in the collection or place where new element with value (key) should be inserted. */ template -static IterT BinaryFindFirstNotLess(IterT beg, IterT end, const KeyT &key, const CmpLess& cmp) +static IterT BinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) { size_t down = 0, up = (end - beg); - while(down < up) + while (down < up) { const size_t mid = (down + up) / 2; - if(cmp(*(beg+mid), key)) + if (cmp(*(beg + mid), key)) { down = mid + 1; } @@ -430,10 +416,10 @@ Returned value is the found element, if present in the collection or end if not found. */ template -IterT BinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +static IterT BinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) { IterT it = BinaryFindFirstNotLess(beg, end, value, cmp); - if(it == end || + if (it == end || (!cmp(*it, value) && !cmp(value, *it))) { return it; @@ -443,7 +429,7 @@ IterT BinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, co static UINT HeapTypeToIndex(D3D12_HEAP_TYPE type) { - switch(type) + switch (type) { case D3D12_HEAP_TYPE_DEFAULT: return 0; case D3D12_HEAP_TYPE_UPLOAD: return 1; @@ -453,37 +439,14 @@ static UINT HeapTypeToIndex(D3D12_HEAP_TYPE type) } } -static const WCHAR* const HeapTypeNames[] = { - L"DEFAULT", - L"UPLOAD", - L"READBACK", - L"CUSTOM", -}; - -// Stat helper functions - -static void AddStatInfo(StatInfo& dst, const StatInfo& src) +static D3D12_HEAP_TYPE IndexToHeapType(UINT heapTypeIndex) { - dst.BlockCount += src.BlockCount; - dst.AllocationCount += src.AllocationCount; - dst.UnusedRangeCount += src.UnusedRangeCount; - dst.UsedBytes += src.UsedBytes; - dst.UnusedBytes += src.UnusedBytes; - dst.AllocationSizeMin = D3D12MA_MIN(dst.AllocationSizeMin, src.AllocationSizeMin); - dst.AllocationSizeMax = D3D12MA_MAX(dst.AllocationSizeMax, src.AllocationSizeMax); - dst.UnusedRangeSizeMin = D3D12MA_MIN(dst.UnusedRangeSizeMin, src.UnusedRangeSizeMin); - dst.UnusedRangeSizeMax = D3D12MA_MAX(dst.UnusedRangeSizeMax, src.UnusedRangeSizeMax); + D3D12MA_ASSERT(heapTypeIndex < 4); + // D3D12_HEAP_TYPE_DEFAULT starts at 1. + return (D3D12_HEAP_TYPE)(heapTypeIndex + 1); } -static void PostProcessStatInfo(StatInfo& statInfo) -{ - statInfo.AllocationSizeAvg = statInfo.AllocationCount ? - statInfo.UsedBytes / statInfo.AllocationCount : 0; - statInfo.UnusedRangeSizeAvg = statInfo.UnusedRangeCount ? - statInfo.UnusedBytes / statInfo.UnusedRangeCount : 0; -} - -static UINT64 HeapFlagsToAlignment(D3D12_HEAP_FLAGS flags) +static UINT64 HeapFlagsToAlignment(D3D12_HEAP_FLAGS flags, bool denyMsaaTextures) { /* Documentation of D3D12_HEAP_DESC structure says: @@ -496,6 +459,9 @@ static UINT64 HeapFlagsToAlignment(D3D12_HEAP_FLAGS flags) https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/ns-d3d12-d3d12_heap_desc */ + if (denyMsaaTextures) + return D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + const D3D12_HEAP_FLAGS denyAllTexturesFlags = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; const bool canContainAnyTextures = @@ -504,9 +470,41 @@ static UINT64 HeapFlagsToAlignment(D3D12_HEAP_FLAGS flags) D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; } +static ResourceClass HeapFlagsToResourceClass(D3D12_HEAP_FLAGS heapFlags) +{ + const bool allowBuffers = (heapFlags & D3D12_HEAP_FLAG_DENY_BUFFERS) == 0; + const bool allowRtDsTextures = (heapFlags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) == 0; + const bool allowNonRtDsTextures = (heapFlags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) == 0; + + const uint8_t allowedGroupCount = (allowBuffers ? 1 : 0) + (allowRtDsTextures ? 1 : 0) + (allowNonRtDsTextures ? 1 : 0); + if (allowedGroupCount != 1) + return ResourceClass::Unknown; + + if (allowRtDsTextures) + return ResourceClass::RT_DS_Texture; + if (allowNonRtDsTextures) + return ResourceClass::Non_RT_DS_Texture; + return ResourceClass::Buffer; +} + +static bool IsHeapTypeStandard(D3D12_HEAP_TYPE type) +{ + return type == D3D12_HEAP_TYPE_DEFAULT || + type == D3D12_HEAP_TYPE_UPLOAD || + type == D3D12_HEAP_TYPE_READBACK; +} + +static D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type) +{ + D3D12MA_ASSERT(IsHeapTypeStandard(type)); + D3D12_HEAP_PROPERTIES result = {}; + result.Type = type; + return result; +} + static bool IsFormatCompressed(DXGI_FORMAT format) { - switch(format) + switch (format) { case DXGI_FORMAT_BC1_TYPELESS: case DXGI_FORMAT_BC1_UNORM: @@ -538,7 +536,7 @@ static bool IsFormatCompressed(DXGI_FORMAT format) // Only some formats are supported. For others it returns 0. static UINT GetBitsPerPixel(DXGI_FORMAT format) { - switch(format) + switch (format) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32A32_FLOAT: @@ -650,71 +648,46 @@ static UINT GetBitsPerPixel(DXGI_FORMAT format) return 0; } } - -static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = - D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; - -enum class ResourceClass -{ - Unknown, Buffer, Non_RT_DS_Texture, RT_DS_Texture -}; - + template -static inline ResourceClass ResourceDescToResourceClass(const D3D12_RESOURCE_DESC_T& resDesc) +static ResourceClass ResourceDescToResourceClass(const D3D12_RESOURCE_DESC_T& resDesc) { - if(resDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + if (resDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) return ResourceClass::Buffer; // Else: it's surely a texture. const bool isRenderTargetOrDepthStencil = (resDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0; return isRenderTargetOrDepthStencil ? ResourceClass::RT_DS_Texture : ResourceClass::Non_RT_DS_Texture; } - -static inline ResourceClass HeapFlagsToResourceClass(D3D12_HEAP_FLAGS heapFlags) -{ - const bool allowBuffers = (heapFlags & D3D12_HEAP_FLAG_DENY_BUFFERS ) == 0; - const bool allowRtDsTextures = (heapFlags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES ) == 0; - const bool allowNonRtDsTextures = (heapFlags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) == 0; - - const uint8_t allowedGroupCount = (allowBuffers ? 1 : 0) + (allowRtDsTextures ? 1 : 0) + (allowNonRtDsTextures ? 1 : 0); - if(allowedGroupCount != 1) - return ResourceClass::Unknown; - if(allowRtDsTextures) - return ResourceClass::RT_DS_Texture; - if(allowNonRtDsTextures) - return ResourceClass::Non_RT_DS_Texture; - return ResourceClass::Buffer; -} - // This algorithm is overly conservative. template static bool CanUseSmallAlignment(const D3D12_RESOURCE_DESC_T& resourceDesc) { - if(resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) + if (resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) return false; - if((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0) + if ((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0) return false; - if(resourceDesc.SampleDesc.Count > 1) + if (resourceDesc.SampleDesc.Count > 1) return false; - if(resourceDesc.DepthOrArraySize != 1) + if (resourceDesc.DepthOrArraySize != 1) return false; UINT sizeX = (UINT)resourceDesc.Width; UINT sizeY = resourceDesc.Height; UINT bitsPerPixel = GetBitsPerPixel(resourceDesc.Format); - if(bitsPerPixel == 0) + if (bitsPerPixel == 0) return false; - if(IsFormatCompressed(resourceDesc.Format)) + if (IsFormatCompressed(resourceDesc.Format)) { - sizeX = DivideRoudingUp(sizeX / 4, 1u); - sizeY = DivideRoudingUp(sizeY / 4, 1u); + sizeX = DivideRoundingUp(sizeX, 4u); + sizeY = DivideRoundingUp(sizeY, 4u); bitsPerPixel *= 16; } UINT tileSizeX = 0, tileSizeY = 0; - switch(bitsPerPixel) + switch (bitsPerPixel) { case 8: tileSizeX = 64; tileSizeY = 64; break; case 16: tileSizeX = 64; tileSizeY = 32; break; @@ -724,28 +697,189 @@ static bool CanUseSmallAlignment(const D3D12_RESOURCE_DESC_T& resourceDesc) default: return false; } - const UINT tileCount = DivideRoudingUp(sizeX, tileSizeX) * DivideRoudingUp(sizeY, tileSizeY); + const UINT tileCount = DivideRoundingUp(sizeX, tileSizeX) * DivideRoundingUp(sizeY, tileSizeY); return tileCount <= 16; } - -static inline bool IsHeapTypeStandard(D3D12_HEAP_TYPE type) + +static bool ValidateAllocateMemoryParameters( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation) { - return type == D3D12_HEAP_TYPE_DEFAULT || - type == D3D12_HEAP_TYPE_UPLOAD || - type == D3D12_HEAP_TYPE_READBACK; + return pAllocDesc && + pAllocInfo && + ppAllocation && + (pAllocInfo->Alignment == 0 || + pAllocInfo->Alignment == D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT || + pAllocInfo->Alignment == D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) && + pAllocInfo->SizeInBytes != 0 && + pAllocInfo->SizeInBytes % (64ull * 1024) == 0; } -static inline D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type) +#endif // _D3D12MA_FUNCTIONS + +#ifndef _D3D12MA_STATISTICS_FUNCTIONS + +static void ClearStatistics(Statistics& outStats) { - D3D12MA_ASSERT(IsHeapTypeStandard(type)); - D3D12_HEAP_PROPERTIES result = {}; - result.Type = type; - return result; + outStats.BlockCount = 0; + outStats.AllocationCount = 0; + outStats.BlockBytes = 0; + outStats.AllocationBytes = 0; } -//////////////////////////////////////////////////////////////////////////////// -// Private class Vector +static void ClearDetailedStatistics(DetailedStatistics& outStats) +{ + ClearStatistics(outStats.Stats); + outStats.UnusedRangeCount = 0; + outStats.AllocationSizeMin = UINT64_MAX; + outStats.AllocationSizeMax = 0; + outStats.UnusedRangeSizeMin = UINT64_MAX; + outStats.UnusedRangeSizeMax = 0; +} +static void AddStatistics(Statistics& inoutStats, const Statistics& src) +{ + inoutStats.BlockCount += src.BlockCount; + inoutStats.AllocationCount += src.AllocationCount; + inoutStats.BlockBytes += src.BlockBytes; + inoutStats.AllocationBytes += src.AllocationBytes; +} + +static void AddDetailedStatistics(DetailedStatistics& inoutStats, const DetailedStatistics& src) +{ + AddStatistics(inoutStats.Stats, src.Stats); + inoutStats.UnusedRangeCount += src.UnusedRangeCount; + inoutStats.AllocationSizeMin = D3D12MA_MIN(inoutStats.AllocationSizeMin, src.AllocationSizeMin); + inoutStats.AllocationSizeMax = D3D12MA_MAX(inoutStats.AllocationSizeMax, src.AllocationSizeMax); + inoutStats.UnusedRangeSizeMin = D3D12MA_MIN(inoutStats.UnusedRangeSizeMin, src.UnusedRangeSizeMin); + inoutStats.UnusedRangeSizeMax = D3D12MA_MAX(inoutStats.UnusedRangeSizeMax, src.UnusedRangeSizeMax); +} + +static void AddDetailedStatisticsAllocation(DetailedStatistics& inoutStats, UINT64 size) +{ + inoutStats.Stats.AllocationCount++; + inoutStats.Stats.AllocationBytes += size; + inoutStats.AllocationSizeMin = D3D12MA_MIN(inoutStats.AllocationSizeMin, size); + inoutStats.AllocationSizeMax = D3D12MA_MAX(inoutStats.AllocationSizeMax, size); +} + +static void AddDetailedStatisticsUnusedRange(DetailedStatistics& inoutStats, UINT64 size) +{ + inoutStats.UnusedRangeCount++; + inoutStats.UnusedRangeSizeMin = D3D12MA_MIN(inoutStats.UnusedRangeSizeMin, size); + inoutStats.UnusedRangeSizeMax = D3D12MA_MAX(inoutStats.UnusedRangeSizeMax, size); +} + +#endif // _D3D12MA_STATISTICS_FUNCTIONS + + +#ifndef _D3D12MA_MUTEX + +#ifndef D3D12MA_MUTEX + class Mutex + { + public: + void Lock() { m_Mutex.lock(); } + void Unlock() { m_Mutex.unlock(); } + + private: + std::mutex m_Mutex; + }; + #define D3D12MA_MUTEX Mutex +#endif + +#ifndef D3D12MA_RW_MUTEX +#ifdef _WIN32 + class RWMutex + { + public: + RWMutex() { InitializeSRWLock(&m_Lock); } + void LockRead() { AcquireSRWLockShared(&m_Lock); } + void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } + void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + + private: + SRWLOCK m_Lock; + }; +#else // #ifdef _WIN32 + class RWMutex + { + public: + RWMutex() {} + void LockRead() { m_Mutex.lock_shared(); } + void UnlockRead() { m_Mutex.unlock_shared(); } + void LockWrite() { m_Mutex.lock(); } + void UnlockWrite() { m_Mutex.unlock(); } + + private: + std::shared_timed_mutex m_Mutex; + }; +#endif // #ifdef _WIN32 + #define D3D12MA_RW_MUTEX RWMutex +#endif // #ifndef D3D12MA_RW_MUTEX + +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct MutexLock +{ + D3D12MA_CLASS_NO_COPY(MutexLock); +public: + MutexLock(D3D12MA_MUTEX& mutex, bool useMutex = true) : + m_pMutex(useMutex ? &mutex : NULL) + { + if (m_pMutex) m_pMutex->Lock(); + } + ~MutexLock() { if (m_pMutex) m_pMutex->Unlock(); } + +private: + D3D12MA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct MutexLockRead +{ + D3D12MA_CLASS_NO_COPY(MutexLockRead); +public: + MutexLockRead(D3D12MA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : NULL) + { + if(m_pMutex) + { + m_pMutex->LockRead(); + } + } + ~MutexLockRead() { if (m_pMutex) m_pMutex->UnlockRead(); } + +private: + D3D12MA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct MutexLockWrite +{ + D3D12MA_CLASS_NO_COPY(MutexLockWrite); +public: + MutexLockWrite(D3D12MA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : NULL) + { + if (m_pMutex) m_pMutex->LockWrite(); + } + ~MutexLockWrite() { if (m_pMutex) m_pMutex->UnlockWrite(); } + +private: + D3D12MA_RW_MUTEX* m_pMutex; +}; + +#if D3D12MA_DEBUG_GLOBAL_MUTEX + static D3D12MA_MUTEX g_DebugGlobalMutex; + #define D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK MutexLock debugGlobalMutexLock(g_DebugGlobalMutex, true); +#else + #define D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _D3D12MA_MUTEX + +#ifndef _D3D12MA_VECTOR /* Dynamically resizing continuous array. Class with interface similar to std::vector. T must be POD because constructors and destructors are not called and memcpy is @@ -756,226 +890,54 @@ class Vector { public: using value_type = T; + using iterator = T*; // allocationCallbacks externally owned, must outlive this object. - Vector(const ALLOCATION_CALLBACKS& allocationCallbacks) : - m_AllocationCallbacks(allocationCallbacks), - m_pArray(NULL), - m_Count(0), - m_Capacity(0) - { - } - - Vector(size_t count, const ALLOCATION_CALLBACKS& allocationCallbacks) : - m_AllocationCallbacks(allocationCallbacks), - m_pArray(count ? AllocateArray(allocationCallbacks, count) : NULL), - m_Count(count), - m_Capacity(count) - { - } - - Vector(const Vector& src) : - m_AllocationCallbacks(src.m_AllocationCallbacks), - m_pArray(src.m_Count ? AllocateArray(src.m_AllocationCallbacks, src.m_Count) : NULL), - m_Count(src.m_Count), - m_Capacity(src.m_Count) - { - if(m_Count > 0) - { - memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); - } - } - - ~Vector() - { - Free(m_AllocationCallbacks, m_pArray); - } - - Vector& operator=(const Vector& rhs) - { - if(&rhs != this) - { - resize(rhs.m_Count); - if(m_Count != 0) - { - memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); - } - } - return *this; - } + Vector(const ALLOCATION_CALLBACKS& allocationCallbacks); + Vector(size_t count, const ALLOCATION_CALLBACKS& allocationCallbacks); + Vector(const Vector& src); + ~Vector(); + const ALLOCATION_CALLBACKS& GetAllocs() const { return m_AllocationCallbacks; } bool empty() const { return m_Count == 0; } size_t size() const { return m_Count; } T* data() { return m_pArray; } const T* data() const { return m_pArray; } - - T& operator[](size_t index) - { - D3D12MA_HEAVY_ASSERT(index < m_Count); - return m_pArray[index]; - } - const T& operator[](size_t index) const - { - D3D12MA_HEAVY_ASSERT(index < m_Count); - return m_pArray[index]; - } - - T& front() - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[0]; - } - const T& front() const - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[0]; - } - T& back() - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[m_Count - 1]; - } - const T& back() const - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[m_Count - 1]; - } - - void reserve(size_t newCapacity, bool freeMemory = false) - { - newCapacity = D3D12MA_MAX(newCapacity, m_Count); - - if((newCapacity < m_Capacity) && !freeMemory) - { - newCapacity = m_Capacity; - } - - if(newCapacity != m_Capacity) - { - T* const newArray = newCapacity ? AllocateArray(m_AllocationCallbacks, newCapacity) : NULL; - if(m_Count != 0) - { - memcpy(newArray, m_pArray, m_Count * sizeof(T)); - } - Free(m_AllocationCallbacks, m_pArray); - m_Capacity = newCapacity; - m_pArray = newArray; - } - } - - void resize(size_t newCount, bool freeMemory = false) - { - size_t newCapacity = m_Capacity; - if(newCount > m_Capacity) - { - newCapacity = D3D12MA_MAX(newCount, D3D12MA_MAX(m_Capacity * 3 / 2, (size_t)8)); - } - else if(freeMemory) - { - newCapacity = newCount; - } - - if(newCapacity != m_Capacity) - { - T* const newArray = newCapacity ? AllocateArray(m_AllocationCallbacks, newCapacity) : NULL; - const size_t elementsToCopy = D3D12MA_MIN(m_Count, newCount); - if(elementsToCopy != 0) - { - memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); - } - Free(m_AllocationCallbacks, m_pArray); - m_Capacity = newCapacity; - m_pArray = newArray; - } - - m_Count = newCount; - } - - void clear(bool freeMemory = false) - { - resize(0, freeMemory); - } - - void insert(size_t index, const T& src) - { - D3D12MA_HEAVY_ASSERT(index <= m_Count); - const size_t oldCount = size(); - resize(oldCount + 1); - if(index < oldCount) - { - memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); - } - m_pArray[index] = src; - } - - void remove(size_t index) - { - D3D12MA_HEAVY_ASSERT(index < m_Count); - const size_t oldCount = size(); - if(index < oldCount - 1) - { - memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); - } - resize(oldCount - 1); - } - - void push_back(const T& src) - { - const size_t newIndex = size(); - resize(newIndex + 1); - m_pArray[newIndex] = src; - } - - void pop_back() - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - resize(size() - 1); - } - - void push_front(const T& src) - { - insert(0, src); - } - - void pop_front() - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - remove(0); - } - - using iterator = T*; + void clear(bool freeMemory = false) { resize(0, freeMemory); } iterator begin() { return m_pArray; } iterator end() { return m_pArray + m_Count; } + iterator rend() { return begin() - 1; } + iterator rbegin() { return end() - 1; } + + const iterator cbegin() const { return m_pArray; } + const iterator cend() const { return m_pArray + m_Count; } + const iterator crbegin() const { return cend() - 1; } + const iterator crend() const { return cbegin() - 1; } + + void push_front(const T& src) { insert(0, src); } + void push_back(const T& src); + void pop_front(); + void pop_back(); + + T& front(); + T& back(); + const T& front() const; + const T& back() const; + + void reserve(size_t newCapacity, bool freeMemory = false); + void resize(size_t newCount, bool freeMemory = false); + void insert(size_t index, const T& src); + void remove(size_t index); template - size_t InsertSorted(const T& value, const CmpLess& cmp) - { - const size_t indexToInsert = BinaryFindFirstNotLess( - m_pArray, - m_pArray + m_Count, - value, - cmp) - m_pArray; - insert(indexToInsert, value); - return indexToInsert; - } - + size_t InsertSorted(const T& value, const CmpLess& cmp); template - bool RemoveSorted(const T& value, const CmpLess& cmp) - { - const iterator it = BinaryFindFirstNotLess( - m_pArray, - m_pArray + m_Count, - value, - cmp); - if((it != end()) && !cmp(*it, value) && !cmp(value, *it)) - { - size_t indexToRemove = it - begin(); - remove(indexToRemove); - return true; - } - return false; - } + bool RemoveSorted(const T& value, const CmpLess& cmp); + + Vector& operator=(const Vector& rhs); + T& operator[](size_t index); + const T& operator[](size_t index) const; private: const ALLOCATION_CALLBACKS& m_AllocationCallbacks; @@ -984,13 +946,231 @@ private: size_t m_Capacity; }; -//////////////////////////////////////////////////////////////////////////////// -// Private class StringBuilder +#ifndef _D3D12MA_VECTOR_FUNCTIONS +template +Vector::Vector(const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_AllocationCallbacks(allocationCallbacks), + m_pArray(NULL), + m_Count(0), + m_Capacity(0) {} +template +Vector::Vector(size_t count, const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_AllocationCallbacks(allocationCallbacks), + m_pArray(count ? AllocateArray(allocationCallbacks, count) : NULL), + m_Count(count), + m_Capacity(count) {} + +template +Vector::Vector(const Vector& src) + : m_AllocationCallbacks(src.m_AllocationCallbacks), + m_pArray(src.m_Count ? AllocateArray(src.m_AllocationCallbacks, src.m_Count) : NULL), + m_Count(src.m_Count), + m_Capacity(src.m_Count) +{ + if (m_Count > 0) + { + memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); + } +} + +template +Vector::~Vector() +{ + Free(m_AllocationCallbacks, m_pArray); +} + +template +void Vector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + m_pArray[newIndex] = src; +} + +template +void Vector::pop_front() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + remove(0); +} + +template +void Vector::pop_back() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + resize(size() - 1); +} + +template +T& Vector::front() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[0]; +} + +template +T& Vector::back() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[m_Count - 1]; +} + +template +const T& Vector::front() const +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[0]; +} + +template +const T& Vector::back() const +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[m_Count - 1]; +} + +template +void Vector::reserve(size_t newCapacity, bool freeMemory) +{ + newCapacity = D3D12MA_MAX(newCapacity, m_Count); + + if ((newCapacity < m_Capacity) && !freeMemory) + { + newCapacity = m_Capacity; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? AllocateArray(m_AllocationCallbacks, newCapacity) : NULL; + if (m_Count != 0) + { + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + Free(m_AllocationCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } +} + +template +void Vector::resize(size_t newCount, bool freeMemory) +{ + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = D3D12MA_MAX(newCount, D3D12MA_MAX(m_Capacity * 3 / 2, (size_t)8)); + } + else if (freeMemory) + { + newCapacity = newCount; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? AllocateArray(m_AllocationCallbacks, newCapacity) : NULL; + const size_t elementsToCopy = D3D12MA_MIN(m_Count, newCount); + if (elementsToCopy != 0) + { + memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); + } + Free(m_AllocationCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } + + m_Count = newCount; +} + +template +void Vector::insert(size_t index, const T& src) +{ + D3D12MA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + if (index < oldCount) + { + memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); + } + m_pArray[index] = src; +} + +template +void Vector::remove(size_t index) +{ + D3D12MA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} + +template template +size_t Vector::InsertSorted(const T& value, const CmpLess& cmp) +{ + const size_t indexToInsert = BinaryFindFirstNotLess( + m_pArray, + m_pArray + m_Count, + value, + cmp) - m_pArray; + insert(indexToInsert, value); + return indexToInsert; +} + +template template +bool Vector::RemoveSorted(const T& value, const CmpLess& cmp) +{ + const iterator it = BinaryFindFirstNotLess( + m_pArray, + m_pArray + m_Count, + value, + cmp); + if ((it != end()) && !cmp(*it, value) && !cmp(value, *it)) + { + size_t indexToRemove = it - begin(); + remove(indexToRemove); + return true; + } + return false; +} + +template +Vector& Vector::operator=(const Vector& rhs) +{ + if (&rhs != this) + { + resize(rhs.m_Count); + if (m_Count != 0) + { + memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); + } + } + return *this; +} + +template +T& Vector::operator[](size_t index) +{ + D3D12MA_HEAVY_ASSERT(index < m_Count); + return m_pArray[index]; +} + +template +const T& Vector::operator[](size_t index) const +{ + D3D12MA_HEAVY_ASSERT(index < m_Count); + return m_pArray[index]; +} +#endif // _D3D12MA_VECTOR_FUNCTIONS +#endif // _D3D12MA_VECTOR + +#ifndef _D3D12MA_STRING_BUILDER class StringBuilder { public: - StringBuilder(const ALLOCATION_CALLBACKS& allocationCallbacks) : m_Data(allocationCallbacks) { } + StringBuilder(const ALLOCATION_CALLBACKS& allocationCallbacks) : m_Data(allocationCallbacks) {} size_t GetLength() const { return m_Data.size(); } LPCWSTR GetData() const { return m_Data.data(); } @@ -1005,6 +1185,7 @@ private: Vector m_Data; }; +#ifndef _D3D12MA_STRING_BUILDER_FUNCTIONS void StringBuilder::Add(LPCWSTR str) { const size_t len = wcslen(str); @@ -1043,10 +1224,10 @@ void StringBuilder::AddNumber(UINT64 num) while (num); Add(p); } +#endif // _D3D12MA_STRING_BUILDER_FUNCTIONS +#endif // _D3D12MA_STRING_BUILDER -//////////////////////////////////////////////////////////////////////////////// -// Private class JsonWriter - +#ifndef _D3D12MA_JSON_WRITER /* Allows to conveniently build a correct JSON document to be written to the StringBuilder passed to the constructor. @@ -1092,8 +1273,6 @@ public: // Ends writing a string value by writing '"'. void EndString(LPCWSTR pStr = NULL); - void AddAllocationToObject(const Allocation& alloc); - // Writes a number value. void WriteNumber(UINT num); void WriteNumber(UINT64 num); @@ -1102,6 +1281,9 @@ public: // Writes a null value. void WriteNull(); + void AddAllocationToObject(const Allocation& alloc); + void AddDetailedStatisticsInfoObject(const DetailedStatistics& stats); + private: static const WCHAR* const INDENT; @@ -1125,14 +1307,13 @@ private: void WriteIndent(bool oneLess = false); }; +#ifndef _D3D12MA_JSON_WRITER_FUNCTIONS const WCHAR* const JsonWriter::INDENT = L" "; -JsonWriter::JsonWriter(const ALLOCATION_CALLBACKS& allocationCallbacks, StringBuilder& stringBuilder) : - m_SB(stringBuilder), +JsonWriter::JsonWriter(const ALLOCATION_CALLBACKS& allocationCallbacks, StringBuilder& stringBuilder) + : m_SB(stringBuilder), m_Stack(allocationCallbacks), - m_InsideString(false) -{ -} + m_InsideString(false) {} JsonWriter::~JsonWriter() { @@ -1312,6 +1493,85 @@ void JsonWriter::WriteNull() m_SB.Add(L"null"); } +void JsonWriter::AddAllocationToObject(const Allocation& alloc) +{ + WriteString(L"Type"); + switch (alloc.m_PackedData.GetResourceDimension()) { + case D3D12_RESOURCE_DIMENSION_UNKNOWN: + WriteString(L"UNKNOWN"); + break; + case D3D12_RESOURCE_DIMENSION_BUFFER: + WriteString(L"BUFFER"); + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + WriteString(L"TEXTURE1D"); + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + WriteString(L"TEXTURE2D"); + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + WriteString(L"TEXTURE3D"); + break; + default: D3D12MA_ASSERT(0); break; + } + + WriteString(L"Size"); + WriteNumber(alloc.GetSize()); + WriteString(L"Usage"); + WriteNumber((UINT)alloc.m_PackedData.GetResourceFlags()); + + void* privateData = alloc.GetPrivateData(); + if (privateData) + { + WriteString(L"CustomData"); + WriteNumber((uintptr_t)privateData); + } + + LPCWSTR name = alloc.GetName(); + if (name != NULL) + { + WriteString(L"Name"); + WriteString(name); + } + if (alloc.m_PackedData.GetTextureLayout()) + { + WriteString(L"Layout"); + WriteNumber((UINT)alloc.m_PackedData.GetTextureLayout()); + } +} + +void JsonWriter::AddDetailedStatisticsInfoObject(const DetailedStatistics& stats) +{ + BeginObject(); + + WriteString(L"BlockCount"); + WriteNumber(stats.Stats.BlockCount); + WriteString(L"BlockBytes"); + WriteNumber(stats.Stats.BlockBytes); + WriteString(L"AllocationCount"); + WriteNumber(stats.Stats.AllocationCount); + WriteString(L"AllocationBytes"); + WriteNumber(stats.Stats.AllocationBytes); + WriteString(L"UnusedRangeCount"); + WriteNumber(stats.UnusedRangeCount); + + if (stats.Stats.AllocationCount > 1) + { + WriteString(L"AllocationSizeMin"); + WriteNumber(stats.AllocationSizeMin); + WriteString(L"AllocationSizeMax"); + WriteNumber(stats.AllocationSizeMax); + } + if (stats.UnusedRangeCount > 1) + { + WriteString(L"UnusedRangeSizeMin"); + WriteNumber(stats.UnusedRangeSizeMin); + WriteString(L"UnusedRangeSizeMax"); + WriteNumber(stats.UnusedRangeSizeMax); + } + EndObject(); +} + void JsonWriter::BeginValue(bool isString) { if (!m_Stack.empty()) @@ -1356,56 +1616,10 @@ void JsonWriter::WriteIndent(bool oneLess) } } } +#endif // _D3D12MA_JSON_WRITER_FUNCTIONS +#endif // _D3D12MA_JSON_WRITER -void JsonWriter::AddAllocationToObject(const Allocation& alloc) -{ - WriteString(L"Type"); - switch (alloc.m_PackedData.GetResourceDimension()) { - case D3D12_RESOURCE_DIMENSION_UNKNOWN: - WriteString(L"UNKNOWN"); - break; - case D3D12_RESOURCE_DIMENSION_BUFFER: - WriteString(L"BUFFER"); - break; - case D3D12_RESOURCE_DIMENSION_TEXTURE1D: - WriteString(L"TEXTURE1D"); - break; - case D3D12_RESOURCE_DIMENSION_TEXTURE2D: - WriteString(L"TEXTURE2D"); - break; - case D3D12_RESOURCE_DIMENSION_TEXTURE3D: - WriteString(L"TEXTURE3D"); - break; - default: D3D12MA_ASSERT(0); break; - } - WriteString(L"Size"); - WriteNumber(alloc.GetSize()); - LPCWSTR name = alloc.GetName(); - if(name != NULL) - { - WriteString(L"Name"); - WriteString(name); - } - if(alloc.m_PackedData.GetResourceFlags()) - { - WriteString(L"Flags"); - WriteNumber((UINT)alloc.m_PackedData.GetResourceFlags()); - } - if(alloc.m_PackedData.GetTextureLayout()) - { - WriteString(L"Layout"); - WriteNumber((UINT)alloc.m_PackedData.GetTextureLayout()); - } - if(alloc.m_CreationFrameIndex) - { - WriteString(L"CreationFrameIndex"); - WriteNumber(alloc.m_CreationFrameIndex); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Private class PoolAllocator - +#ifndef _D3D12MA_POOL_ALLOCATOR /* Allocator for objects of type T using a list of arrays (pools) to speed up allocation. Number of elements that can be allocated is not bounded because @@ -1421,8 +1635,10 @@ public: // allocationCallbacks externally owned, must outlive this object. PoolAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT firstBlockCapacity); ~PoolAllocator() { Clear(); } + void Clear(); - template T* Alloc(Types... args); + template + T* Alloc(Types... args); void Free(T* ptr); private: @@ -1446,9 +1662,10 @@ private: ItemBlock& CreateNewBlock(); }; +#ifndef _D3D12MA_POOL_ALLOCATOR_FUNCTIONS template -PoolAllocator::PoolAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT firstBlockCapacity) : - m_AllocationCallbacks(allocationCallbacks), +PoolAllocator::PoolAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT firstBlockCapacity) + : m_AllocationCallbacks(allocationCallbacks), m_FirstBlockCapacity(firstBlockCapacity), m_ItemBlocks(allocationCallbacks) { @@ -1465,8 +1682,8 @@ void PoolAllocator::Clear() m_ItemBlocks.clear(true); } -template -template T* PoolAllocator::Alloc(Types... args) +template template +T* PoolAllocator::Alloc(Types... args) { for(size_t i = m_ItemBlocks.size(); i--; ) { @@ -1536,10 +1753,10 @@ typename PoolAllocator::ItemBlock& PoolAllocator::CreateNewBlock() newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; return m_ItemBlocks.back(); } +#endif // _D3D12MA_POOL_ALLOCATOR_FUNCTIONS +#endif // _D3D12MA_POOL_ALLOCATOR -//////////////////////////////////////////////////////////////////////////////// -// Private class List - +#ifndef _D3D12MA_LIST /* Doubly linked list, with elements allocated out of PoolAllocator. Has custom interface, as well as STL-style interface, including iterator and @@ -1557,15 +1774,137 @@ public: T Value; }; + class reverse_iterator; + class const_reverse_iterator; + class iterator + { + friend class List; + friend class const_iterator; + + public: + iterator() = default; + iterator(const reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const; + T* operator->() const; + + iterator& operator++(); + iterator& operator--(); + iterator operator++(int); + iterator operator--(int); + + bool operator==(const iterator& rhs) const; + bool operator!=(const iterator& rhs) const; + + private: + List* m_pList = NULL; + Item* m_pItem = NULL; + + iterator(List* pList, Item* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + + class reverse_iterator + { + friend class List; + friend class const_reverse_iterator; + + public: + reverse_iterator() = default; + reverse_iterator(const iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const; + T* operator->() const; + + reverse_iterator& operator++(); + reverse_iterator& operator--(); + reverse_iterator operator++(int); + reverse_iterator operator--(int); + + bool operator==(const reverse_iterator& rhs) const; + bool operator!=(const reverse_iterator& rhs) const; + + private: + List* m_pList = NULL; + Item* m_pItem = NULL; + + reverse_iterator(List* pList, Item* pItem) + : m_pList(pList), m_pItem(pItem) {} + }; + + class const_iterator + { + friend class List; + + public: + const_iterator() = default; + const_iterator(const iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const const_reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + iterator dropConst() const; + const T& operator*() const; + const T* operator->() const; + + const_iterator& operator++(); + const_iterator& operator--(); + const_iterator operator++(int); + const_iterator operator--(int); + + bool operator==(const const_iterator& rhs) const; + bool operator!=(const const_iterator& rhs) const; + + private: + const List* m_pList = NULL; + const Item* m_pItem = NULL; + + const_iterator(const List* pList, const Item* pItem) + : m_pList(pList), m_pItem(pItem) {} + }; + + class const_reverse_iterator + { + friend class List; + + public: + const_reverse_iterator() = default; + const_reverse_iterator(const iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const const_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + reverse_iterator dropConst() const; + const T& operator*() const; + const T* operator->() const; + + const_reverse_iterator& operator++(); + const_reverse_iterator& operator--(); + const_reverse_iterator operator++(int); + const_reverse_iterator operator--(int); + + bool operator==(const const_reverse_iterator& rhs) const; + bool operator!=(const const_reverse_iterator& rhs) const; + + private: + const List* m_pList = NULL; + const Item* m_pItem = NULL; + + const_reverse_iterator(const List* pList, const Item* pItem) + : m_pList(pList), m_pItem(pItem) {} + }; + // allocationCallbacks externally owned, must outlive this object. List(const ALLOCATION_CALLBACKS& allocationCallbacks); - // Intentionally not calling Clear, because that would be unnecessary // computations to return all items to m_ItemAllocator as free. - // ~List() {} + ~List() = default; - void Clear(); - size_t GetCount() const { return m_Count; } bool IsEmpty() const { return m_Count == 0; } @@ -1574,6 +1913,28 @@ public: Item* Back() { return m_pBack; } const Item* Back() const { return m_pBack; } + bool empty() const { return IsEmpty(); } + size_t size() const { return GetCount(); } + void push_back(const T& value) { PushBack(value); } + iterator insert(iterator it, const T& value) { return iterator(this, InsertBefore(it.m_pItem, value)); } + void clear() { Clear(); } + void erase(iterator it) { Remove(it.m_pItem); } + + iterator begin() { return iterator(this, Front()); } + iterator end() { return iterator(this, NULL); } + reverse_iterator rbegin() { return reverse_iterator(this, Back()); } + reverse_iterator rend() { return reverse_iterator(this, NULL); } + + const_iterator cbegin() const { return const_iterator(this, Front()); } + const_iterator cend() const { return const_iterator(this, NULL); } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(this, Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(this, NULL); } + const_reverse_iterator rbegin() const { return crbegin(); } + const_reverse_iterator rend() const { return crend(); } + Item* PushBack(); Item* PushFront(); Item* PushBack(const T& value); @@ -1585,408 +1946,12 @@ public: Item* InsertBefore(Item* pItem); // Item can be null - it means PushFront. Item* InsertAfter(Item* pItem); - Item* InsertBefore(Item* pItem, const T& value); Item* InsertAfter(Item* pItem, const T& value); + void Clear(); void Remove(Item* pItem); - class reverse_iterator; - class const_reverse_iterator; - class iterator - { - public: - iterator() : - m_pList(NULL), - m_pItem(NULL) - { - } - - iterator(const reverse_iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - T& operator*() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return m_pItem->Value; - } - T* operator->() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return &m_pItem->Value; - } - - iterator& operator++() - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - m_pItem = m_pItem->pNext; - return *this; - } - iterator& operator--() - { - if(m_pItem != NULL) - { - m_pItem = m_pItem->pPrev; - } - else - { - D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Back(); - } - return *this; - } - - iterator operator++(int) - { - iterator result = *this; - ++*this; - return result; - } - iterator operator--(int) - { - iterator result = *this; - --*this; - return result; - } - - bool operator==(const iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } - - private: - List* m_pList; - Item* m_pItem; - - iterator(List* pList, Item* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - friend class List; - friend class const_iterator; - }; - - class reverse_iterator - { - public: - reverse_iterator() : - m_pList(NULL), - m_pItem(NULL) - { - } - - reverse_iterator(const iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - T& operator*() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return m_pItem->Value; - } - T* operator->() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return &m_pItem->Value; - } - - reverse_iterator& operator++() - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - m_pItem = m_pItem->pPrev; - return *this; - } - reverse_iterator& operator--() - { - if(m_pItem != NULL) - { - m_pItem = m_pItem->pNext; - } - else - { - D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Front(); - } - return *this; - } - - reverse_iterator operator++(int) - { - reverse_iterator result = *this; - ++*this; - return result; - } - reverse_iterator operator--(int) - { - reverse_iterator result = *this; - --*this; - return result; - } - - bool operator==(const reverse_iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const reverse_iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } - - private: - List* m_pList; - Item* m_pItem; - - reverse_iterator(List* pList, Item* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - friend class List; - friend class const_reverse_iterator; - }; - - class const_iterator - { - public: - const_iterator() : - m_pList(NULL), - m_pItem(NULL) - { - } - - const_iterator(const iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - const_iterator(const reverse_iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - const_iterator(const const_reverse_iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - iterator dropConst() const - { - return iterator(const_cast*>(m_pList), const_cast(m_pItem)); - } - - const T& operator*() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return m_pItem->Value; - } - const T* operator->() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return &m_pItem->Value; - } - - const_iterator& operator++() - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - m_pItem = m_pItem->pNext; - return *this; - } - const_iterator& operator--() - { - if(m_pItem != NULL) - { - m_pItem = m_pItem->pPrev; - } - else - { - D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Back(); - } - return *this; - } - - const_iterator operator++(int) - { - const_iterator result = *this; - ++*this; - return result; - } - const_iterator operator--(int) - { - const_iterator result = *this; - --*this; - return result; - } - - bool operator==(const const_iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const const_iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } - - private: - const_iterator(const List* pList, const Item* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - const List* m_pList; - const Item* m_pItem; - - friend class List; - }; - - class const_reverse_iterator - { - public: - const_reverse_iterator() : - m_pList(NULL), - m_pItem(NULL) - { - } - - const_reverse_iterator(const iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - const_reverse_iterator(const reverse_iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - const_reverse_iterator(const const_iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - reverse_iterator dropConst() const - { - return reverse_iterator(const_cast*>(m_pList), const_cast(m_pItem)); - } - - const T& operator*() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return m_pItem->Value; - } - const T* operator->() const - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - return &m_pItem->Value; - } - - const_reverse_iterator& operator++() - { - D3D12MA_HEAVY_ASSERT(m_pItem != NULL); - m_pItem = m_pItem->pPrev; - return *this; - } - const_reverse_iterator& operator--() - { - if(m_pItem != NULL) - { - m_pItem = m_pItem->pNext; - } - else - { - D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Front(); - } - return *this; - } - - const_reverse_iterator operator++(int) - { - const_reverse_iterator result = *this; - ++*this; - return result; - } - const_reverse_iterator operator--(int) - { - const_reverse_iterator result = *this; - --*this; - return result; - } - - bool operator==(const const_reverse_iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const const_reverse_iterator& rhs) const - { - D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } - - private: - const_reverse_iterator(const List* pList, const Item* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - const List* m_pList; - const Item* m_pItem; - - friend class List; - }; - - bool empty() const { return IsEmpty(); } - size_t size() const { return GetCount(); } - - iterator begin() { return iterator(this, Front()); } - iterator end() { return iterator(this, NULL); } - - const_iterator cbegin() const { return const_iterator(this, Front()); } - const_iterator cend() const { return const_iterator(this, NULL); } - - const_iterator begin() const { return cbegin(); } - const_iterator end() const { return cend(); } - - reverse_iterator rbegin() { return reverse_iterator(this, Back()); } - reverse_iterator rend() { return reverse_iterator(this, NULL); } - - const_reverse_iterator crbegin() const { return const_reverse_iterator(this, Back()); } - const_reverse_iterator crend() const { return const_reverse_iterator(this, NULL); } - - const_reverse_iterator rbegin() const { return crbegin(); } - const_reverse_iterator rend() const { return crend(); } - - void clear() { Clear(); } - void push_back(const T& value) { PushBack(value); } - void erase(iterator it) { Remove(it.m_pItem); } - iterator insert(iterator it, const T& value) { return iterator(this, InsertBefore(it.m_pItem, value)); } - private: const ALLOCATION_CALLBACKS& m_AllocationCallbacks; PoolAllocator m_ItemAllocator; @@ -1995,15 +1960,302 @@ private: size_t m_Count; }; +#ifndef _D3D12MA_LIST_ITERATOR_FUNCTIONS template -List::List(const ALLOCATION_CALLBACKS& allocationCallbacks) : - m_AllocationCallbacks(allocationCallbacks), +T& List::iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +T* List::iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::iterator& List::iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pNext; + return *this; +} + +template +typename List::iterator& List::iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename List::iterator List::iterator::operator++(int) +{ + iterator result = *this; + ++* this; + return result; +} + +template +typename List::iterator List::iterator::operator--(int) +{ + iterator result = *this; + --* this; + return result; +} + +template +bool List::iterator::operator==(const iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::iterator::operator!=(const iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_REVERSE_ITERATOR_FUNCTIONS +template +T& List::reverse_iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +T* List::reverse_iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::reverse_iterator& List::reverse_iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pPrev; + return *this; +} + +template +typename List::reverse_iterator& List::reverse_iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename List::reverse_iterator List::reverse_iterator::operator++(int) +{ + reverse_iterator result = *this; + ++* this; + return result; +} + +template +typename List::reverse_iterator List::reverse_iterator::operator--(int) +{ + reverse_iterator result = *this; + --* this; + return result; +} + +template +bool List::reverse_iterator::operator==(const reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::reverse_iterator::operator!=(const reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_REVERSE_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_CONST_ITERATOR_FUNCTIONS +template +typename List::iterator List::const_iterator::dropConst() const +{ + return iterator(const_cast*>(m_pList), const_cast(m_pItem)); +} + +template +const T& List::const_iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +const T* List::const_iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::const_iterator& List::const_iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pNext; + return *this; +} + +template +typename List::const_iterator& List::const_iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename List::const_iterator List::const_iterator::operator++(int) +{ + const_iterator result = *this; + ++* this; + return result; +} + +template +typename List::const_iterator List::const_iterator::operator--(int) +{ + const_iterator result = *this; + --* this; + return result; +} + +template +bool List::const_iterator::operator==(const const_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::const_iterator::operator!=(const const_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_CONST_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_CONST_REVERSE_ITERATOR_FUNCTIONS +template +typename List::reverse_iterator List::const_reverse_iterator::dropConst() const +{ + return reverse_iterator(const_cast*>(m_pList), const_cast(m_pItem)); +} + +template +const T& List::const_reverse_iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +const T* List::const_reverse_iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::const_reverse_iterator& List::const_reverse_iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pPrev; + return *this; +} + +template +typename List::const_reverse_iterator& List::const_reverse_iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename List::const_reverse_iterator List::const_reverse_iterator::operator++(int) +{ + const_reverse_iterator result = *this; + ++* this; + return result; +} + +template +typename List::const_reverse_iterator List::const_reverse_iterator::operator--(int) +{ + const_reverse_iterator result = *this; + --* this; + return result; +} + +template +bool List::const_reverse_iterator::operator==(const const_reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::const_reverse_iterator::operator!=(const const_reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_CONST_REVERSE_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_FUNCTIONS +template +List::List(const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_AllocationCallbacks(allocationCallbacks), m_ItemAllocator(allocationCallbacks, 128), m_pFront(NULL), m_pBack(NULL), - m_Count(0) -{ -} + m_Count(0) {} template void List::Clear() @@ -2212,10 +2464,10 @@ typename List::Item* List::InsertAfter(Item* pItem, const T& value) newItem->Value = value; return newItem; } +#endif // _D3D12MA_LIST_FUNCTIONS +#endif // _D3D12MA_LIST -//////////////////////////////////////////////////////////////////////////////// -// Private class IntrusiveLinkedList - +#ifndef _D3D12MA_INTRUSIVE_LINKED_LIST /* Expected interface of ItemTypeTraits: struct MyItemTypeTraits @@ -2234,204 +2486,238 @@ public: using ItemType = typename ItemTypeTraits::ItemType; static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + // Movable, not copyable. - IntrusiveLinkedList() { } - IntrusiveLinkedList(const IntrusiveLinkedList& src) = delete; - IntrusiveLinkedList(IntrusiveLinkedList&& src) : - m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) - { - src.m_Front = src.m_Back = NULL; - src.m_Count = 0; - } - ~IntrusiveLinkedList() - { - D3D12MA_HEAVY_ASSERT(IsEmpty()); - } - IntrusiveLinkedList& operator=(const IntrusiveLinkedList& src) = delete; - IntrusiveLinkedList& operator=(IntrusiveLinkedList&& src) - { - if(&src != this) - { - D3D12MA_HEAVY_ASSERT(IsEmpty()); - m_Front = src.m_Front; - m_Back = src.m_Back; - m_Count = src.m_Count; - src.m_Front = src.m_Back = NULL; - src.m_Count = 0; - } - return *this; - } - void RemoveAll() - { - if(!IsEmpty()) - { - ItemType* item = m_Back; - while(item != NULL) - { - ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); - ItemTypeTraits::AccessPrev(item) = NULL; - ItemTypeTraits::AccessNext(item) = NULL; - item = prevItem; - } - m_Front = NULL; - m_Back = NULL; - m_Count = 0; - } - } + IntrusiveLinkedList() = default; + IntrusiveLinkedList(const IntrusiveLinkedList&) = delete; + IntrusiveLinkedList(IntrusiveLinkedList&& src); + IntrusiveLinkedList& operator=(const IntrusiveLinkedList&) = delete; + IntrusiveLinkedList& operator=(IntrusiveLinkedList&& src); + ~IntrusiveLinkedList() { D3D12MA_HEAVY_ASSERT(IsEmpty()); } + size_t GetCount() const { return m_Count; } bool IsEmpty() const { return m_Count == 0; } + ItemType* Front() { return m_Front; } - const ItemType* Front() const { return m_Front; } ItemType* Back() { return m_Back; } + const ItemType* Front() const { return m_Front; } const ItemType* Back() const { return m_Back; } - void PushBack(ItemType* item) - { - D3D12MA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == NULL && ItemTypeTraits::GetNext(item) == NULL); - if(IsEmpty()) - { - m_Front = item; - m_Back = item; - m_Count = 1; - } - else - { - ItemTypeTraits::AccessPrev(item) = m_Back; - ItemTypeTraits::AccessNext(m_Back) = item; - m_Back = item; - ++m_Count; - } - } - void PushFront(ItemType* item) - { - D3D12MA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == NULL && ItemTypeTraits::GetNext(item) == NULL); - if(IsEmpty()) - { - m_Front = item; - m_Back = item; - m_Count = 1; - } - else - { - ItemTypeTraits::AccessNext(item) = m_Front; - ItemTypeTraits::AccessPrev(m_Front) = item; - m_Front = item; - ++m_Count; - } - } - ItemType* PopBack() - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - ItemType* const backItem = m_Back; - ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); - if(prevItem != NULL) - { - ItemTypeTraits::AccessNext(prevItem) = NULL; - } - m_Back = prevItem; - --m_Count; - ItemTypeTraits::AccessPrev(backItem) = NULL; - ItemTypeTraits::AccessNext(backItem) = NULL; - return backItem; - } - ItemType* PopFront() - { - D3D12MA_HEAVY_ASSERT(m_Count > 0); - ItemType* const frontItem = m_Front; - ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); - if(nextItem != NULL) - { - ItemTypeTraits::AccessPrev(nextItem) = NULL; - } - m_Front = nextItem; - --m_Count; - ItemTypeTraits::AccessPrev(frontItem) = NULL; - ItemTypeTraits::AccessNext(frontItem) = NULL; - return frontItem; - } + + void PushBack(ItemType* item); + void PushFront(ItemType* item); + ItemType* PopBack(); + ItemType* PopFront(); // MyItem can be null - it means PushBack. - void InsertBefore(ItemType* existingItem, ItemType* newItem) - { - D3D12MA_HEAVY_ASSERT(newItem != NULL && ItemTypeTraits::GetPrev(newItem) == NULL && ItemTypeTraits::GetNext(newItem) == NULL); - if(existingItem != NULL) - { - ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); - ItemTypeTraits::AccessPrev(newItem) = prevItem; - ItemTypeTraits::AccessNext(newItem) = existingItem; - ItemTypeTraits::AccessPrev(existingItem) = newItem; - if(prevItem != NULL) - { - ItemTypeTraits::AccessNext(prevItem) = newItem; - } - else - { - D3D12MA_HEAVY_ASSERT(m_Front == existingItem); - m_Front = newItem; - } - ++m_Count; - } - else - PushBack(newItem); - } + void InsertBefore(ItemType* existingItem, ItemType* newItem); // MyItem can be null - it means PushFront. - void InsertAfter(ItemType* existingItem, ItemType* newItem) - { - D3D12MA_HEAVY_ASSERT(newItem != NULL && ItemTypeTraits::GetPrev(newItem) == NULL && ItemTypeTraits::GetNext(newItem) == NULL); - if(existingItem != NULL) - { - ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); - ItemTypeTraits::AccessNext(newItem) = nextItem; - ItemTypeTraits::AccessPrev(newItem) = existingItem; - ItemTypeTraits::AccessNext(existingItem) = newItem; - if(nextItem != NULL) - { - ItemTypeTraits::AccessPrev(nextItem) = newItem; - } - else - { - D3D12MA_HEAVY_ASSERT(m_Back == existingItem); - m_Back = newItem; - } - ++m_Count; - } - else - return PushFront(newItem); - } - void Remove(ItemType* item) - { - D3D12MA_HEAVY_ASSERT(item != NULL && m_Count > 0); - if(ItemTypeTraits::GetPrev(item) != NULL) - { - ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); - } - else - { - D3D12MA_HEAVY_ASSERT(m_Front == item); - m_Front = ItemTypeTraits::GetNext(item); - } + void InsertAfter(ItemType* existingItem, ItemType* newItem); + + void Remove(ItemType* item); + void RemoveAll(); - if(ItemTypeTraits::GetNext(item) != NULL) - { - ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); - } - else - { - D3D12MA_HEAVY_ASSERT(m_Back == item); - m_Back = ItemTypeTraits::GetPrev(item); - } - ItemTypeTraits::AccessPrev(item) = NULL; - ItemTypeTraits::AccessNext(item) = NULL; - --m_Count; - } private: ItemType* m_Front = NULL; ItemType* m_Back = NULL; size_t m_Count = 0; }; -//////////////////////////////////////////////////////////////////////////////// -// Private class AllocationObjectAllocator definition +#ifndef _D3D12MA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template +IntrusiveLinkedList::IntrusiveLinkedList(IntrusiveLinkedList&& src) + : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ + src.m_Front = src.m_Back = NULL; + src.m_Count = 0; +} +template +IntrusiveLinkedList& IntrusiveLinkedList::operator=(IntrusiveLinkedList&& src) +{ + if (&src != this) + { + D3D12MA_HEAVY_ASSERT(IsEmpty()); + m_Front = src.m_Front; + m_Back = src.m_Back; + m_Count = src.m_Count; + src.m_Front = src.m_Back = NULL; + src.m_Count = 0; + } + return *this; +} + +template +void IntrusiveLinkedList::PushBack(ItemType* item) +{ + D3D12MA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == NULL && ItemTypeTraits::GetNext(item) == NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessPrev(item) = m_Back; + ItemTypeTraits::AccessNext(m_Back) = item; + m_Back = item; + ++m_Count; + } +} + +template +void IntrusiveLinkedList::PushFront(ItemType* item) +{ + D3D12MA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == NULL && ItemTypeTraits::GetNext(item) == NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessNext(item) = m_Front; + ItemTypeTraits::AccessPrev(m_Front) = item; + m_Front = item; + ++m_Count; + } +} + +template +typename IntrusiveLinkedList::ItemType* IntrusiveLinkedList::PopBack() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + ItemType* const backItem = m_Back; + ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); + if (prevItem != NULL) + { + ItemTypeTraits::AccessNext(prevItem) = NULL; + } + m_Back = prevItem; + --m_Count; + ItemTypeTraits::AccessPrev(backItem) = NULL; + ItemTypeTraits::AccessNext(backItem) = NULL; + return backItem; +} + +template +typename IntrusiveLinkedList::ItemType* IntrusiveLinkedList::PopFront() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + ItemType* const frontItem = m_Front; + ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); + if (nextItem != NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = NULL; + } + m_Front = nextItem; + --m_Count; + ItemTypeTraits::AccessPrev(frontItem) = NULL; + ItemTypeTraits::AccessNext(frontItem) = NULL; + return frontItem; +} + +template +void IntrusiveLinkedList::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ + D3D12MA_HEAVY_ASSERT(newItem != NULL && ItemTypeTraits::GetPrev(newItem) == NULL && ItemTypeTraits::GetNext(newItem) == NULL); + if (existingItem != NULL) + { + ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); + ItemTypeTraits::AccessPrev(newItem) = prevItem; + ItemTypeTraits::AccessNext(newItem) = existingItem; + ItemTypeTraits::AccessPrev(existingItem) = newItem; + if (prevItem != NULL) + { + ItemTypeTraits::AccessNext(prevItem) = newItem; + } + else + { + D3D12MA_HEAVY_ASSERT(m_Front == existingItem); + m_Front = newItem; + } + ++m_Count; + } + else + PushBack(newItem); +} + +template +void IntrusiveLinkedList::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ + D3D12MA_HEAVY_ASSERT(newItem != NULL && ItemTypeTraits::GetPrev(newItem) == NULL && ItemTypeTraits::GetNext(newItem) == NULL); + if (existingItem != NULL) + { + ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); + ItemTypeTraits::AccessNext(newItem) = nextItem; + ItemTypeTraits::AccessPrev(newItem) = existingItem; + ItemTypeTraits::AccessNext(existingItem) = newItem; + if (nextItem != NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = newItem; + } + else + { + D3D12MA_HEAVY_ASSERT(m_Back == existingItem); + m_Back = newItem; + } + ++m_Count; + } + else + return PushFront(newItem); +} + +template +void IntrusiveLinkedList::Remove(ItemType* item) +{ + D3D12MA_HEAVY_ASSERT(item != NULL && m_Count > 0); + if (ItemTypeTraits::GetPrev(item) != NULL) + { + ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); + } + else + { + D3D12MA_HEAVY_ASSERT(m_Front == item); + m_Front = ItemTypeTraits::GetNext(item); + } + + if (ItemTypeTraits::GetNext(item) != NULL) + { + ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); + } + else + { + D3D12MA_HEAVY_ASSERT(m_Back == item); + m_Back = ItemTypeTraits::GetPrev(item); + } + ItemTypeTraits::AccessPrev(item) = NULL; + ItemTypeTraits::AccessNext(item) = NULL; + --m_Count; +} + +template +void IntrusiveLinkedList::RemoveAll() +{ + if (!IsEmpty()) + { + ItemType* item = m_Back; + while (item != NULL) + { + ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); + ItemTypeTraits::AccessPrev(item) = NULL; + ItemTypeTraits::AccessNext(item) = NULL; + item = prevItem; + } + m_Front = NULL; + m_Back = NULL; + m_Count = 0; + } +} +#endif // _D3D12MA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _D3D12MA_INTRUSIVE_LINKED_LIST + +#ifndef _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR /* Thread-safe wrapper over PoolAllocator free list, for allocation of Allocation objects. */ @@ -2439,9 +2725,11 @@ class AllocationObjectAllocator { D3D12MA_CLASS_NO_COPY(AllocationObjectAllocator); public: - AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks); + AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_Allocator(allocationCallbacks, 1024) {} - template Allocation* Allocate(Types... args); + template + Allocation* Allocate(Types... args); void Free(Allocation* alloc); private: @@ -2449,15 +2737,23 @@ private: PoolAllocator m_Allocator; }; -//////////////////////////////////////////////////////////////////////////////// -// Private class BlockMetadata and derived classes - declarations - -enum SuballocationType +#ifndef _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR_FUNCTIONS +template +Allocation* AllocationObjectAllocator::Allocate(Types... args) { - SUBALLOCATION_TYPE_FREE = 0, - SUBALLOCATION_TYPE_ALLOCATION = 1, -}; + MutexLock mutexLock(m_Mutex); + return m_Allocator.Alloc(std::forward(args)...); +} +void AllocationObjectAllocator::Free(Allocation* alloc) +{ + MutexLock mutexLock(m_Mutex); + m_Allocator.Free(alloc); +} +#endif // _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR_FUNCTIONS +#endif // _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _D3D12MA_SUBALLOCATION /* Represents a region of NormalBlock that is either assigned and returned as allocated memory block or free. @@ -2466,9 +2762,10 @@ struct Suballocation { UINT64 offset; UINT64 size; - void* userData; + void* privateData; SuballocationType type; }; +using SuballocationList = List; // Comparator for offsets. struct SuballocationOffsetLess @@ -2478,6 +2775,7 @@ struct SuballocationOffsetLess return lhs.offset < rhs.offset; } }; + struct SuballocationOffsetGreater { bool operator()(const Suballocation& lhs, const Suballocation& rhs) const @@ -2486,8 +2784,6 @@ struct SuballocationOffsetGreater } }; -using SuballocationList = List; - struct SuballocationItemSizeLess { bool operator()(const SuballocationList::iterator lhs, const SuballocationList::iterator rhs) const @@ -2499,19 +2795,25 @@ struct SuballocationItemSizeLess return lhs->size < rhsSize; } }; +#endif // _D3D12MA_SUBALLOCATION +#ifndef _D3D12MA_ALLOCATION_REQUEST /* Parameters of planned allocation inside a NormalBlock. */ struct AllocationRequest { - UINT64 offset; + AllocHandle allocHandle; + UINT64 size; + UINT64 algorithmData; UINT64 sumFreeSize; // Sum size of free items that overlap with proposed allocation. UINT64 sumItemSize; // Sum size of items to make lost that overlap with proposed allocation. SuballocationList::iterator item; BOOL zeroInitialized; }; +#endif // _D3D12MA_ALLOCATION_REQUEST +#ifndef _D3D12MA_ZERO_INITIALIZED_RANGE /* Keeps track of the range of bytes that are surely initialized with zeros. Everything outside of it is considered uninitialized memory that may contain @@ -2522,53 +2824,61 @@ The range is left-inclusive. class ZeroInitializedRange { public: - void Reset(UINT64 size) - { - D3D12MA_ASSERT(size > 0); - m_ZeroBeg = 0; - m_ZeroEnd = size; - } - - BOOL IsRangeZeroInitialized(UINT64 beg, UINT64 end) const - { - D3D12MA_ASSERT(beg < end); - return m_ZeroBeg <= beg && end <= m_ZeroEnd; - } - - void MarkRangeAsUsed(UINT64 usedBeg, UINT64 usedEnd) - { - D3D12MA_ASSERT(usedBeg < usedEnd); - // No new bytes marked. - if(usedEnd <= m_ZeroBeg || m_ZeroEnd <= usedBeg) - { - return; - } - // All bytes marked. - if(usedBeg <= m_ZeroBeg && m_ZeroEnd <= usedEnd) - { - m_ZeroBeg = m_ZeroEnd = 0; - } - // Some bytes marked. - else - { - const UINT64 remainingZeroBefore = usedBeg > m_ZeroBeg ? usedBeg - m_ZeroBeg : 0; - const UINT64 remainingZeroAfter = usedEnd < m_ZeroEnd ? m_ZeroEnd - usedEnd : 0; - D3D12MA_ASSERT(remainingZeroBefore > 0 || remainingZeroAfter > 0); - if(remainingZeroBefore > remainingZeroAfter) - { - m_ZeroEnd = usedBeg; - } - else - { - m_ZeroBeg = usedEnd; - } - } - } + void Reset(UINT64 size); + BOOL IsRangeZeroInitialized(UINT64 beg, UINT64 end) const; + void MarkRangeAsUsed(UINT64 usedBeg, UINT64 usedEnd); private: UINT64 m_ZeroBeg = 0, m_ZeroEnd = 0; }; +#ifndef _D3D12MA_ZERO_INITIALIZED_RANGE_FUNCTIONS +void ZeroInitializedRange::Reset(UINT64 size) +{ + D3D12MA_ASSERT(size > 0); + m_ZeroBeg = 0; + m_ZeroEnd = size; +} + +BOOL ZeroInitializedRange::IsRangeZeroInitialized(UINT64 beg, UINT64 end) const +{ + D3D12MA_ASSERT(beg < end); + return m_ZeroBeg <= beg && end <= m_ZeroEnd; +} + +void ZeroInitializedRange::MarkRangeAsUsed(UINT64 usedBeg, UINT64 usedEnd) +{ + D3D12MA_ASSERT(usedBeg < usedEnd); + // No new bytes marked. + if (usedEnd <= m_ZeroBeg || m_ZeroEnd <= usedBeg) + { + return; + } + // All bytes marked. + if (usedBeg <= m_ZeroBeg && m_ZeroEnd <= usedEnd) + { + m_ZeroBeg = m_ZeroEnd = 0; + } + // Some bytes marked. + else + { + const UINT64 remainingZeroBefore = usedBeg > m_ZeroBeg ? usedBeg - m_ZeroBeg : 0; + const UINT64 remainingZeroAfter = usedEnd < m_ZeroEnd ? m_ZeroEnd - usedEnd : 0; + D3D12MA_ASSERT(remainingZeroBefore > 0 || remainingZeroAfter > 0); + if (remainingZeroBefore > remainingZeroAfter) + { + m_ZeroEnd = usedBeg; + } + else + { + m_ZeroBeg = usedEnd; + } + } +} +#endif // _D3D12MA_ZERO_INITIALIZED_RANGE_FUNCTIONS +#endif // _D3D12MA_ZERO_INITIALIZED_RANGE + +#ifndef _D3D12MA_BLOCK_METADATA /* Data structure used for bookkeeping of allocations and unused ranges of memory in a single ID3D12Heap memory block. @@ -2577,20 +2887,21 @@ class BlockMetadata { public: BlockMetadata(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); - virtual ~BlockMetadata() { } - virtual void Init(UINT64 size) { m_Size = size; } + virtual ~BlockMetadata() = default; + virtual void Init(UINT64 size) { m_Size = size; } // Validates all data structures inside this object. If not valid, returns false. virtual bool Validate() const = 0; UINT64 GetSize() const { return m_Size; } bool IsVirtual() const { return m_IsVirtual; } virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; virtual UINT64 GetSumFreeSize() const = 0; - virtual UINT64 GetUnusedRangeSizeMax() const = 0; + virtual UINT64 GetAllocationOffset(AllocHandle allocHandle) const = 0; // Returns true if this block is empty - contains only single free suballocation. virtual bool IsEmpty() const = 0; - virtual void GetAllocationInfo(UINT64 offset, VIRTUAL_ALLOCATION_INFO& outInfo) const = 0; + virtual void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const = 0; // Tries to find a place for suballocation with given parameters inside this block. // If succeeded, fills pAllocationRequest and returns true. @@ -2598,26 +2909,44 @@ public: virtual bool CreateAllocationRequest( UINT64 allocSize, UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, AllocationRequest* pAllocationRequest) = 0; // Makes actual allocation based on request. Request must already be checked and valid. virtual void Alloc( const AllocationRequest& request, UINT64 allocSize, - void* userData) = 0; + void* PrivateData) = 0; - virtual void FreeAtOffset(UINT64 offset) = 0; + virtual void Free(AllocHandle allocHandle) = 0; // Frees all allocations. - // Careful! Don't call it if there are Allocation objects owned by pUserData of of cleared allocations! + // Careful! Don't call it if there are Allocation objects owned by pPrivateData of of cleared allocations! virtual void Clear() = 0; - virtual void SetAllocationUserData(UINT64 offset, void* userData) = 0; + virtual AllocHandle GetAllocationListBegin() const = 0; + virtual AllocHandle GetNextAllocation(AllocHandle prevAlloc) const = 0; + virtual UINT64 GetNextFreeRegionSize(AllocHandle alloc) const = 0; + virtual void* GetAllocationPrivateData(AllocHandle allocHandle) const = 0; + virtual void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) = 0; - virtual void CalcAllocationStatInfo(StatInfo& outInfo) const = 0; + virtual void AddStatistics(Statistics& inoutStats) const = 0; + virtual void AddDetailedStatistics(DetailedStatistics& inoutStats) const = 0; virtual void WriteAllocationInfoToJson(JsonWriter& json) const = 0; protected: const ALLOCATION_CALLBACKS* GetAllocs() const { return m_pAllocationCallbacks; } + UINT64 GetDebugMargin() const { return IsVirtual() ? 0 : D3D12MA_DEBUG_MARGIN; } + + void PrintDetailedMap_Begin(JsonWriter& json, + UINT64 unusedBytes, + size_t allocationCount, + size_t unusedRangeCount) const; + void PrintDetailedMap_Allocation(JsonWriter& json, + UINT64 offset, UINT64 size, void* privateData) const; + void PrintDetailedMap_UnusedRange(JsonWriter& json, + UINT64 offset, UINT64 size) const; + void PrintDetailedMap_End(JsonWriter& json) const; private: UINT64 m_Size; @@ -2627,38 +2956,121 @@ private: D3D12MA_CLASS_NO_COPY(BlockMetadata); }; +#ifndef _D3D12MA_BLOCK_METADATA_FUNCTIONS +BlockMetadata::BlockMetadata(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : m_Size(0), + m_IsVirtual(isVirtual), + m_pAllocationCallbacks(allocationCallbacks) +{ + D3D12MA_ASSERT(allocationCallbacks); +} + +void BlockMetadata::PrintDetailedMap_Begin(JsonWriter& json, + UINT64 unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ + json.WriteString(L"TotalBytes"); + json.WriteNumber(GetSize()); + + json.WriteString(L"UnusedBytes"); + json.WriteNumber(unusedBytes); + + json.WriteString(L"Allocations"); + json.WriteNumber(allocationCount); + + json.WriteString(L"UnusedRanges"); + json.WriteNumber(unusedRangeCount); + + json.WriteString(L"Suballocations"); + json.BeginArray(); +} + +void BlockMetadata::PrintDetailedMap_Allocation(JsonWriter& json, + UINT64 offset, UINT64 size, void* privateData) const +{ + json.BeginObject(true); + + json.WriteString(L"Offset"); + json.WriteNumber(offset); + + if (IsVirtual()) + { + json.WriteString(L"Size"); + json.WriteNumber(size); + if (privateData) + { + json.WriteString(L"CustomData"); + json.WriteNumber((uintptr_t)privateData); + } + } + else + { + const Allocation* const alloc = (const Allocation*)privateData; + D3D12MA_ASSERT(alloc); + json.AddAllocationToObject(*alloc); + } + json.EndObject(); +} + +void BlockMetadata::PrintDetailedMap_UnusedRange(JsonWriter& json, + UINT64 offset, UINT64 size) const +{ + json.BeginObject(true); + + json.WriteString(L"Offset"); + json.WriteNumber(offset); + + json.WriteString(L"Type"); + json.WriteString(L"FREE"); + + json.WriteString(L"Size"); + json.WriteNumber(size); + + json.EndObject(); +} + +void BlockMetadata::PrintDetailedMap_End(JsonWriter& json) const +{ + json.EndArray(); +} +#endif // _D3D12MA_BLOCK_METADATA_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA + +#if 0 +#ifndef _D3D12MA_BLOCK_METADATA_GENERIC class BlockMetadata_Generic : public BlockMetadata { public: BlockMetadata_Generic(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); - virtual ~BlockMetadata_Generic(); - virtual void Init(UINT64 size); + virtual ~BlockMetadata_Generic() = default; - virtual bool Validate() const; - virtual size_t GetAllocationCount() const { return m_Suballocations.size() - m_FreeCount; } - virtual UINT64 GetSumFreeSize() const { return m_SumFreeSize; } - virtual UINT64 GetUnusedRangeSizeMax() const; - virtual bool IsEmpty() const; + size_t GetAllocationCount() const override { return m_Suballocations.size() - m_FreeCount; } + UINT64 GetSumFreeSize() const override { return m_SumFreeSize; } + UINT64 GetAllocationOffset(AllocHandle allocHandle) const override { return (UINT64)allocHandle - 1; } - virtual void GetAllocationInfo(UINT64 offset, VIRTUAL_ALLOCATION_INFO& outInfo) const; + void Init(UINT64 size) override; + bool Validate() const override; + bool IsEmpty() const override; + void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const override; - virtual bool CreateAllocationRequest( + bool CreateAllocationRequest( UINT64 allocSize, UINT64 allocAlignment, - AllocationRequest* pAllocationRequest); + bool upperAddress, + AllocationRequest* pAllocationRequest) override; - virtual void Alloc( + void Alloc( const AllocationRequest& request, UINT64 allocSize, - void* userData); + void* privateData) override; - virtual void FreeAtOffset(UINT64 offset); - virtual void Clear(); + void Free(AllocHandle allocHandle) override; + void Clear() override; - virtual void SetAllocationUserData(UINT64 offset, void* userData); + void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) override; - virtual void CalcAllocationStatInfo(StatInfo& outInfo) const; - virtual void WriteAllocationInfoToJson(JsonWriter& json) const; + void AddStatistics(Statistics& inoutStats) const override; + void AddDetailedStatistics(DetailedStatistics& inoutStats) const override; + void WriteAllocationInfoToJson(JsonWriter& json) const override; private: UINT m_FreeCount; @@ -2678,7 +3090,7 @@ private: UINT64 allocSize, UINT64 allocAlignment, SuballocationList::const_iterator suballocItem, - UINT64* pOffset, + AllocHandle* pAllocHandle, UINT64* pSumFreeSize, UINT64* pSumItemSize, BOOL *pZeroInitialized) const; @@ -2698,579 +3110,9 @@ private: D3D12MA_CLASS_NO_COPY(BlockMetadata_Generic) }; -//////////////////////////////////////////////////////////////////////////////// -// Private class MemoryBlock definition - -/* -Represents a single block of device memory (heap). -Base class for inheritance. -Thread-safety: This class must be externally synchronized. -*/ -class MemoryBlock -{ -public: - MemoryBlock( - AllocatorPimpl* allocator, - const D3D12_HEAP_PROPERTIES& heapProps, - D3D12_HEAP_FLAGS heapFlags, - UINT64 size, - UINT id); - virtual ~MemoryBlock(); - // Creates the ID3D12Heap. - - const D3D12_HEAP_PROPERTIES& GetHeapProperties() const { return m_HeapProps; } - D3D12_HEAP_FLAGS GetHeapFlags() const { return m_HeapFlags; } - UINT64 GetSize() const { return m_Size; } - UINT GetId() const { return m_Id; } - ID3D12Heap* GetHeap() const { return m_Heap; } - -protected: - AllocatorPimpl* const m_Allocator; - const D3D12_HEAP_PROPERTIES m_HeapProps; - const D3D12_HEAP_FLAGS m_HeapFlags; - const UINT64 m_Size; - const UINT m_Id; - - HRESULT Init(ID3D12ProtectedResourceSession* pProtectedSession); - -private: - ID3D12Heap* m_Heap = NULL; - - D3D12MA_CLASS_NO_COPY(MemoryBlock) -}; - -//////////////////////////////////////////////////////////////////////////////// -// Private class NormalBlock definition - -/* -Represents a single block of device memory (heap) with all the data about its -regions (aka suballocations, Allocation), assigned and free. -Thread-safety: This class must be externally synchronized. -*/ -class NormalBlock : public MemoryBlock -{ -public: - BlockMetadata* m_pMetadata; - - NormalBlock( - AllocatorPimpl* allocator, - BlockVector* blockVector, - const D3D12_HEAP_PROPERTIES& heapProps, - D3D12_HEAP_FLAGS heapFlags, - UINT64 size, - UINT id); - virtual ~NormalBlock(); - HRESULT Init(ID3D12ProtectedResourceSession* pProtectedSession); - - BlockVector* GetBlockVector() const { return m_BlockVector; } - - // Validates all data structures inside this object. If not valid, returns false. - bool Validate() const; - -private: - BlockVector* m_BlockVector; - - D3D12MA_CLASS_NO_COPY(NormalBlock) -}; - -//////////////////////////////////////////////////////////////////////////////// -// Private class CommittedAllocationList definition - -struct CommittedAllocationListItemTraits -{ - using ItemType = Allocation; - static ItemType* GetPrev(const ItemType* item) - { - D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); - return item->m_Committed.prev; - } - static ItemType* GetNext(const ItemType* item) - { - D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); - return item->m_Committed.next; - } - static ItemType*& AccessPrev(ItemType* item) - { - D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); - return item->m_Committed.prev; - } - static ItemType*& AccessNext(ItemType* item) - { - D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); - return item->m_Committed.next; - } -}; - -/* -Stores linked list of Allocation objects that are of TYPE_COMMITTED or TYPE_HEAP. -Thread-safe, synchronized internally. -*/ -class CommittedAllocationList -{ -public: - CommittedAllocationList(); - void Init(bool useMutex, D3D12_HEAP_TYPE heapType, PoolPimpl* pool); - ~CommittedAllocationList(); - - D3D12_HEAP_TYPE GetHeapType() const { return m_HeapType; } - - void CalculateStats(StatInfo& outStats); - // Writes JSON array with the list of allocations. - void BuildStatsString(JsonWriter& json); - - void Register(Allocation* alloc); - void Unregister(Allocation* alloc); - -private: - bool m_UseMutex = true; - D3D12_HEAP_TYPE m_HeapType = D3D12_HEAP_TYPE_CUSTOM; - PoolPimpl* m_Pool = NULL; - - D3D12MA_RW_MUTEX m_Mutex; - using CommittedAllocationLinkedList = IntrusiveLinkedList; - CommittedAllocationLinkedList m_AllocationList; -}; - -//////////////////////////////////////////////////////////////////////////////// -// Private class BlockVector definition - -/* -Sequence of NormalBlock. Represents memory blocks allocated for a specific -heap type and possibly resource type (if only Tier 1 is supported). - -Synchronized internally with a mutex. -*/ -class BlockVector -{ - D3D12MA_CLASS_NO_COPY(BlockVector) -public: - BlockVector( - AllocatorPimpl* hAllocator, - const D3D12_HEAP_PROPERTIES& heapProps, - D3D12_HEAP_FLAGS heapFlags, - UINT64 preferredBlockSize, - size_t minBlockCount, - size_t maxBlockCount, - bool explicitBlockSize, - UINT64 minAllocationAlignment, - ID3D12ProtectedResourceSession* pProtectedSession); - ~BlockVector(); - - HRESULT CreateMinBlocks(); - - const D3D12_HEAP_PROPERTIES& GetHeapProperties() const { return m_HeapProps; } - UINT64 GetPreferredBlockSize() const { return m_PreferredBlockSize; } - - bool IsEmpty(); - - HRESULT Allocate( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - size_t allocationCount, - Allocation** pAllocations); - - void Free( - Allocation* hAllocation); - - HRESULT CreateResource( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - const D3D12_RESOURCE_DESC& resourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, - REFIID riidResource, - void** ppvResource); - -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - HRESULT CreateResource2( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - const D3D12_RESOURCE_DESC1& resourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, - REFIID riidResource, - void** ppvResource); -#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ - - void AddStats(StatInfo& outStats); - void AddStats(Stats& outStats); - - void WriteBlockInfoToJson(JsonWriter& json); - -private: - AllocatorPimpl* const m_hAllocator; - const D3D12_HEAP_PROPERTIES m_HeapProps; - const D3D12_HEAP_FLAGS m_HeapFlags; - const UINT64 m_PreferredBlockSize; - const size_t m_MinBlockCount; - const size_t m_MaxBlockCount; - const bool m_ExplicitBlockSize; - const UINT64 m_MinAllocationAlignment; - ID3D12ProtectedResourceSession* const m_ProtectedSession; - /* There can be at most one allocation that is completely empty - a - hysteresis to avoid pessimistic case of alternating creation and destruction - of a VkDeviceMemory. */ - bool m_HasEmptyBlock; - D3D12MA_RW_MUTEX m_Mutex; - // Incrementally sorted by sumFreeSize, ascending. - Vector m_Blocks; - UINT m_NextBlockId; - - UINT64 CalcSumBlockSize() const; - UINT64 CalcMaxBlockSize() const; - - // Finds and removes given block from vector. - void Remove(NormalBlock* pBlock); - - // Performs single step in sorting m_Blocks. They may not be fully sorted - // after this call. - void IncrementallySortBlocks(); - - HRESULT AllocatePage( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - Allocation** pAllocation); - - HRESULT AllocateFromBlock( - NormalBlock* pBlock, - UINT64 size, - UINT64 alignment, - ALLOCATION_FLAGS allocFlags, - Allocation** pAllocation); - - HRESULT CreateBlock( - UINT64 blockSize, - size_t* pNewBlockIndex); -}; - -//////////////////////////////////////////////////////////////////////////////// -// Private class AllocatorPimpl definition - -static const UINT STANDARD_HEAP_TYPE_COUNT = 3; // Only DEFAULT, UPLOAD, READBACK. -static const UINT DEFAULT_POOL_MAX_COUNT = 9; - -struct CurrentBudgetData -{ - D3D12MA_ATOMIC_UINT64 m_BlockBytes[HEAP_TYPE_COUNT]; - D3D12MA_ATOMIC_UINT64 m_AllocationBytes[HEAP_TYPE_COUNT]; - - D3D12MA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch; - D3D12MA_RW_MUTEX m_BudgetMutex; - UINT64 m_D3D12UsageLocal, m_D3D12UsageNonLocal; - UINT64 m_D3D12BudgetLocal, m_D3D12BudgetNonLocal; - UINT64 m_BlockBytesAtBudgetFetch[HEAP_TYPE_COUNT]; - - CurrentBudgetData() - { - for(UINT i = 0; i < HEAP_TYPE_COUNT; ++i) - { - m_BlockBytes[i] = 0; - m_AllocationBytes[i] = 0; - m_BlockBytesAtBudgetFetch[i] = 0; - } - - m_D3D12UsageLocal = 0; - m_D3D12UsageNonLocal = 0; - m_D3D12BudgetLocal = 0; - m_D3D12BudgetNonLocal = 0; - m_OperationsSinceBudgetFetch = 0; - } - - void AddAllocation(UINT heapTypeIndex, UINT64 allocationSize) - { - m_AllocationBytes[heapTypeIndex] += allocationSize; - ++m_OperationsSinceBudgetFetch; - } - void RemoveAllocation(UINT heapTypeIndex, UINT64 allocationSize) - { - m_AllocationBytes[heapTypeIndex] -= allocationSize; - ++m_OperationsSinceBudgetFetch; - } - void AddCommittedAllocation(UINT heapTypeIndex, UINT64 allocationSize) - { - AddAllocation(heapTypeIndex, allocationSize); - m_BlockBytes[heapTypeIndex] += allocationSize; - } - void RemoveCommittedAllocation(UINT heapTypeIndex, UINT64 allocationSize) - { - m_BlockBytes[heapTypeIndex] -= allocationSize; - RemoveAllocation(heapTypeIndex, allocationSize); - } -}; - -class PoolPimpl -{ -public: - PoolPimpl(AllocatorPimpl* allocator, const POOL_DESC& desc); - HRESULT Init(); - ~PoolPimpl(); - - AllocatorPimpl* GetAllocator() const { return m_Allocator; } - const POOL_DESC& GetDesc() const { return m_Desc; } - bool SupportsCommittedAllocations() const { return m_Desc.BlockSize == 0; } - - BlockVector* GetBlockVector() { return m_BlockVector; } - CommittedAllocationList* GetCommittedAllocationList() { return SupportsCommittedAllocations() ? &m_CommittedAllocations : NULL; } - - void CalculateStats(StatInfo& outStats); - void AddStats(Stats& inoutStats); - - void SetName(LPCWSTR Name); - LPCWSTR GetName() const { return m_Name; } - -private: - friend class Allocator; - friend struct PoolListItemTraits; - - AllocatorPimpl* m_Allocator; // Externally owned object. - POOL_DESC m_Desc; - BlockVector* m_BlockVector; // Owned object. - CommittedAllocationList m_CommittedAllocations; - wchar_t* m_Name; - PoolPimpl* m_PrevPool = NULL; - PoolPimpl* m_NextPool = NULL; - - void FreeName(); -}; - -struct PoolListItemTraits -{ - using ItemType = PoolPimpl; - static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } - static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } - static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } - static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } -}; - -struct CommittedAllocationParameters -{ - CommittedAllocationList* m_List = NULL; - D3D12_HEAP_PROPERTIES m_HeapProperties = {}; - D3D12_HEAP_FLAGS m_HeapFlags = D3D12_HEAP_FLAG_NONE; - ID3D12ProtectedResourceSession* m_ProtectedSession = NULL; - - bool IsValid() const { return m_List != NULL; } -}; - -class AllocatorPimpl -{ -public: - std::atomic_uint32_t m_RefCount = 1; - CurrentBudgetData m_Budget; - - AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc); - HRESULT Init(const ALLOCATOR_DESC& desc); - ~AllocatorPimpl(); - - ID3D12Device* GetDevice() const { return m_Device; } -#ifdef __ID3D12Device4_INTERFACE_DEFINED__ - ID3D12Device4* GetDevice4() const { return m_Device4; } -#endif -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - ID3D12Device8* GetDevice8() const { return m_Device8; } -#endif - // Shortcut for "Allocation Callbacks", because this function is called so often. - const ALLOCATION_CALLBACKS& GetAllocs() const { return m_AllocationCallbacks; } - const D3D12_FEATURE_DATA_D3D12_OPTIONS& GetD3D12Options() const { return m_D3D12Options; } - BOOL IsUMA() const { return m_D3D12Architecture.UMA; } - BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; } - bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; } - bool UseMutex() const { return m_UseMutex; } - AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; } - bool HeapFlagsFulfillResourceHeapTier(D3D12_HEAP_FLAGS flags) const; - - HRESULT CreateResource( - const ALLOCATION_DESC* pAllocDesc, - const D3D12_RESOURCE_DESC* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, - REFIID riidResource, - void** ppvResource); - -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - HRESULT CreateResource2( - const ALLOCATION_DESC* pAllocDesc, - const D3D12_RESOURCE_DESC1* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, - REFIID riidResource, - void** ppvResource); -#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ - - HRESULT AllocateMemory( - const ALLOCATION_DESC* pAllocDesc, - const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, - Allocation** ppAllocation); - - HRESULT CreateAliasingResource( - Allocation* pAllocation, - UINT64 AllocationLocalOffset, - const D3D12_RESOURCE_DESC* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - REFIID riidResource, - void** ppvResource); - - // Unregisters allocation from the collection of dedicated allocations. - // Allocation object must be deleted externally afterwards. - void FreeCommittedMemory(Allocation* allocation); - // Unregisters allocation from the collection of placed allocations. - // Allocation object must be deleted externally afterwards. - void FreePlacedMemory(Allocation* allocation); - // Unregisters allocation from the collection of dedicated allocations and destroys associated heap. - // Allocation object must be deleted externally afterwards. - void FreeHeapMemory(Allocation* allocation); - - void SetCurrentFrameIndex(UINT frameIndex); - - UINT GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } - - void CalculateStats(Stats& outStats); - - void GetBudget(Budget* outGpuBudget, Budget* outCpuBudget); - void GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType); - - void BuildStatsString(WCHAR** ppStatsString, BOOL DetailedMap); - - void FreeStatsString(WCHAR* pStatsString); - -private: - friend class Allocator; - friend class Pool; - - /* - Heuristics that decides whether a resource should better be placed in its own, - dedicated allocation (committed resource rather than placed resource). - */ - template - static bool PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc); - - const bool m_UseMutex; - const bool m_AlwaysCommitted; - ID3D12Device* m_Device; // AddRef -#ifdef __ID3D12Device4_INTERFACE_DEFINED__ - ID3D12Device4* m_Device4 = NULL; // AddRef, optional -#endif -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - ID3D12Device8* m_Device8 = NULL; // AddRef, optional -#endif - IDXGIAdapter* m_Adapter; // AddRef -#if D3D12MA_DXGI_1_4 - IDXGIAdapter3* m_Adapter3 = NULL; // AddRef, optional -#endif - UINT64 m_PreferredBlockSize; - ALLOCATION_CALLBACKS m_AllocationCallbacks; - D3D12MA_ATOMIC_UINT32 m_CurrentFrameIndex; - DXGI_ADAPTER_DESC m_AdapterDesc; - D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options; - D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture; - AllocationObjectAllocator m_AllocationObjectAllocator; - - using PoolList = IntrusiveLinkedList; - PoolList m_Pools[HEAP_TYPE_COUNT]; - D3D12MA_RW_MUTEX m_PoolsMutex[HEAP_TYPE_COUNT]; - - // Default pools. - BlockVector* m_BlockVectors[DEFAULT_POOL_MAX_COUNT]; - - CommittedAllocationList m_CommittedAllocations[STANDARD_HEAP_TYPE_COUNT]; - - // Allocates and registers new committed resource with implicit heap, as dedicated allocation. - // Creates and returns Allocation object and optionally D3D12 resource. - HRESULT AllocateCommittedResource( - const CommittedAllocationParameters& committedAllocParams, - UINT64 resourceSize, bool withinBudget, - const D3D12_RESOURCE_DESC* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, REFIID riidResource, void** ppvResource); - -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - HRESULT AllocateCommittedResource2( - const CommittedAllocationParameters& committedAllocParams, - UINT64 resourceSize, bool withinBudget, - const D3D12_RESOURCE_DESC1* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, REFIID riidResource, void** ppvResource); -#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ - - // Allocates and registers new heap without any resources placed in it, as dedicated allocation. - // Creates and returns Allocation object. - HRESULT AllocateHeap( - const CommittedAllocationParameters& committedAllocParams, - const D3D12_RESOURCE_ALLOCATION_INFO& allocInfo, bool withinBudget, - Allocation** ppAllocation); - - template - HRESULT CalcAllocationParams(const ALLOCATION_DESC& allocDesc, UINT64 allocSize, - const D3D12_RESOURCE_DESC_T* resDesc, // Optional - BlockVector*& outBlockVector, CommittedAllocationParameters& outCommittedAllocationParams, bool& outPreferCommitted); - - /* - If SupportsResourceHeapTier2(): - 0: D3D12_HEAP_TYPE_DEFAULT - 1: D3D12_HEAP_TYPE_UPLOAD - 2: D3D12_HEAP_TYPE_READBACK - else: - 0: D3D12_HEAP_TYPE_DEFAULT + buffer - 1: D3D12_HEAP_TYPE_DEFAULT + texture - 2: D3D12_HEAP_TYPE_DEFAULT + texture RT or DS - 3: D3D12_HEAP_TYPE_UPLOAD + buffer - 4: D3D12_HEAP_TYPE_UPLOAD + texture - 5: D3D12_HEAP_TYPE_UPLOAD + texture RT or DS - 6: D3D12_HEAP_TYPE_READBACK + buffer - 7: D3D12_HEAP_TYPE_READBACK + texture - 8: D3D12_HEAP_TYPE_READBACK + texture RT or DS - */ - UINT CalcDefaultPoolCount() const; - // Returns UINT32_MAX if index cannot be calculcated. - UINT CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, ResourceClass resourceClass) const; - void CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_HEAP_FLAGS& outHeapFlags, UINT index) const; - - // Registers Pool object in m_Pools. - void RegisterPool(Pool* pool, D3D12_HEAP_TYPE heapType); - // Unregisters Pool object from m_Pools. - void UnregisterPool(Pool* pool, D3D12_HEAP_TYPE heapType); - - HRESULT UpdateD3D12Budget(); - - D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC& resourceDesc) const; -#ifdef __ID3D12Device8_INTERFACE_DEFINED__ - D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC1& resourceDesc) const; -#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ - - template - D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc) const; - - bool NewAllocationWithinBudget(D3D12_HEAP_TYPE heapType, UINT64 size); - - // Writes object { } with data of given budget. - static void WriteBudgetToJson(JsonWriter& json, const Budget& budget); -}; - -//////////////////////////////////////////////////////////////////////////////// -// Private class BlockMetadata implementation - -BlockMetadata::BlockMetadata(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) : - m_Size(0), - m_IsVirtual(isVirtual), - m_pAllocationCallbacks(allocationCallbacks) -{ - D3D12MA_ASSERT(allocationCallbacks); -} - -//////////////////////////////////////////////////////////////////////////////// -// Private class BlockMetadata_Generic implementation - -BlockMetadata_Generic::BlockMetadata_Generic(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) : - BlockMetadata(allocationCallbacks, isVirtual), +#ifndef _D3D12MA_BLOCK_METADATA_GENERIC_FUNCTIONS +BlockMetadata_Generic::BlockMetadata_Generic(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : BlockMetadata(allocationCallbacks, isVirtual), m_FreeCount(0), m_SumFreeSize(0), m_Suballocations(*allocationCallbacks), @@ -3279,10 +3121,6 @@ BlockMetadata_Generic::BlockMetadata_Generic(const ALLOCATION_CALLBACKS* allocat D3D12MA_ASSERT(allocationCallbacks); } -BlockMetadata_Generic::~BlockMetadata_Generic() -{ -} - void BlockMetadata_Generic::Init(UINT64 size) { BlockMetadata::Init(size); @@ -3295,7 +3133,7 @@ void BlockMetadata_Generic::Init(UINT64 size) suballoc.offset = 0; suballoc.size = size; suballoc.type = SUBALLOCATION_TYPE_FREE; - suballoc.userData = NULL; + suballoc.privateData = NULL; D3D12MA_ASSERT(size > MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER); m_Suballocations.push_back(suballoc); @@ -3320,7 +3158,7 @@ bool BlockMetadata_Generic::Validate() const // True if previous visited suballocation was free. bool prevFree = false; - for(const auto& subAlloc : m_Suballocations) + for (const auto& subAlloc : m_Suballocations) { // Actual offset of this suballocation doesn't match expected one. D3D12MA_VALIDATE(subAlloc.offset == calculatedOffset); @@ -3329,34 +3167,34 @@ bool BlockMetadata_Generic::Validate() const // Two adjacent free suballocations are invalid. They should be merged. D3D12MA_VALIDATE(!prevFree || !currFree); - const Allocation* const alloc = (Allocation*)subAlloc.userData; - if(!IsVirtual()) + const Allocation* const alloc = (Allocation*)subAlloc.privateData; + if (!IsVirtual()) { D3D12MA_VALIDATE(currFree == (alloc == NULL)); } - if(currFree) + if (currFree) { calculatedSumFreeSize += subAlloc.size; ++calculatedFreeCount; - if(subAlloc.size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) + if (subAlloc.size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) { ++freeSuballocationsToRegister; } // Margin required between allocations - every free space must be at least that large. - D3D12MA_VALIDATE(subAlloc.size >= D3D12MA_DEBUG_MARGIN); + D3D12MA_VALIDATE(subAlloc.size >= GetDebugMargin()); } else { - if(!IsVirtual()) + if (!IsVirtual()) { D3D12MA_VALIDATE(alloc->GetOffset() == subAlloc.offset); D3D12MA_VALIDATE(alloc->GetSize() == subAlloc.size); } // Margin required between allocations - previous allocation must be free. - D3D12MA_VALIDATE(D3D12MA_DEBUG_MARGIN == 0 || prevFree); + D3D12MA_VALIDATE(GetDebugMargin() == 0 || prevFree); } calculatedOffset += subAlloc.size; @@ -3368,7 +3206,7 @@ bool BlockMetadata_Generic::Validate() const D3D12MA_VALIDATE(m_FreeSuballocationsBySize.size() == freeSuballocationsToRegister); UINT64 lastSize = 0; - for(size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) + for (size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) { SuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i]; @@ -3389,63 +3227,54 @@ bool BlockMetadata_Generic::Validate() const return true; } -UINT64 BlockMetadata_Generic::GetUnusedRangeSizeMax() const -{ - if(!m_FreeSuballocationsBySize.empty()) - { - return m_FreeSuballocationsBySize.back()->size; - } - else - { - return 0; - } -} - bool BlockMetadata_Generic::IsEmpty() const { return (m_Suballocations.size() == 1) && (m_FreeCount == 1); } -void BlockMetadata_Generic::GetAllocationInfo(UINT64 offset, VIRTUAL_ALLOCATION_INFO& outInfo) const +void BlockMetadata_Generic::GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const { - Suballocation& suballoc = *FindAtOffset(offset).dropConst(); - outInfo.size = suballoc.size; - outInfo.pUserData = suballoc.userData; + Suballocation& suballoc = *FindAtOffset((UINT64)allocHandle - 1).dropConst(); + outInfo.Offset = suballoc.offset; + outInfo.Size = suballoc.size; + outInfo.pPrivateData = suballoc.privateData; } bool BlockMetadata_Generic::CreateAllocationRequest( UINT64 allocSize, UINT64 allocAlignment, + bool upperAddress, AllocationRequest* pAllocationRequest) { D3D12MA_ASSERT(allocSize > 0); + D3D12MA_ASSERT(!upperAddress && "ALLOCATION_FLAG_UPPER_ADDRESS can be used only with linear algorithm."); D3D12MA_ASSERT(pAllocationRequest != NULL); D3D12MA_HEAVY_ASSERT(Validate()); // There is not enough total free space in this block to fullfill the request: Early return. - if(m_SumFreeSize < allocSize + 2 * D3D12MA_DEBUG_MARGIN) + if (m_SumFreeSize < allocSize + GetDebugMargin()) { return false; } // New algorithm, efficiently searching freeSuballocationsBySize. const size_t freeSuballocCount = m_FreeSuballocationsBySize.size(); - if(freeSuballocCount > 0) + if (freeSuballocCount > 0) { - // Find first free suballocation with size not less than allocSize + 2 * D3D12MA_DEBUG_MARGIN. + // Find first free suballocation with size not less than allocSize + GetDebugMargin(). SuballocationList::iterator* const it = BinaryFindFirstNotLess( m_FreeSuballocationsBySize.data(), m_FreeSuballocationsBySize.data() + freeSuballocCount, - allocSize + 2 * D3D12MA_DEBUG_MARGIN, + allocSize + GetDebugMargin(), SuballocationItemSizeLess()); size_t index = it - m_FreeSuballocationsBySize.data(); - for(; index < freeSuballocCount; ++index) + for (; index < freeSuballocCount; ++index) { - if(CheckAllocation( + if (CheckAllocation( allocSize, allocAlignment, m_FreeSuballocationsBySize[index], - &pAllocationRequest->offset, + &pAllocationRequest->allocHandle, &pAllocationRequest->sumFreeSize, &pAllocationRequest->sumItemSize, &pAllocationRequest->zeroInitialized)) @@ -3462,15 +3291,16 @@ bool BlockMetadata_Generic::CreateAllocationRequest( void BlockMetadata_Generic::Alloc( const AllocationRequest& request, UINT64 allocSize, - void* userData) + void* privateData) { D3D12MA_ASSERT(request.item != m_Suballocations.end()); Suballocation& suballoc = *request.item; // Given suballocation is a free block. D3D12MA_ASSERT(suballoc.type == SUBALLOCATION_TYPE_FREE); // Given offset is inside this suballocation. - D3D12MA_ASSERT(request.offset >= suballoc.offset); - const UINT64 paddingBegin = request.offset - suballoc.offset; + UINT64 offset = (UINT64)request.allocHandle - 1; + D3D12MA_ASSERT(offset >= suballoc.offset); + const UINT64 paddingBegin = offset - suballoc.offset; D3D12MA_ASSERT(suballoc.size >= paddingBegin + allocSize); const UINT64 paddingEnd = suballoc.size - paddingBegin - allocSize; @@ -3478,16 +3308,16 @@ void BlockMetadata_Generic::Alloc( // it to become used. UnregisterFreeSuballocation(request.item); - suballoc.offset = request.offset; + suballoc.offset = offset; suballoc.size = allocSize; suballoc.type = SUBALLOCATION_TYPE_ALLOCATION; - suballoc.userData = userData; + suballoc.privateData = privateData; // If there are any free bytes remaining at the end, insert new free suballocation after current one. - if(paddingEnd) + if (paddingEnd) { Suballocation paddingSuballoc = {}; - paddingSuballoc.offset = request.offset + allocSize; + paddingSuballoc.offset = offset + allocSize; paddingSuballoc.size = paddingEnd; paddingSuballoc.type = SUBALLOCATION_TYPE_FREE; SuballocationList::iterator next = request.item; @@ -3498,10 +3328,10 @@ void BlockMetadata_Generic::Alloc( } // If there are any free bytes remaining at the beginning, insert new free suballocation before current one. - if(paddingBegin) + if (paddingBegin) { Suballocation paddingSuballoc = {}; - paddingSuballoc.offset = request.offset - paddingBegin; + paddingSuballoc.offset = offset - paddingBegin; paddingSuballoc.size = paddingBegin; paddingSuballoc.type = SUBALLOCATION_TYPE_FREE; const SuballocationList::iterator paddingBeginItem = @@ -3511,22 +3341,22 @@ void BlockMetadata_Generic::Alloc( // Update totals. m_FreeCount = m_FreeCount - 1; - if(paddingBegin > 0) + if (paddingBegin > 0) { ++m_FreeCount; } - if(paddingEnd > 0) + if (paddingEnd > 0) { ++m_FreeCount; } m_SumFreeSize -= allocSize; - m_ZeroInitializedRange.MarkRangeAsUsed(request.offset, request.offset + allocSize); + m_ZeroInitializedRange.MarkRangeAsUsed(offset, offset + allocSize); } -void BlockMetadata_Generic::FreeAtOffset(UINT64 offset) +void BlockMetadata_Generic::Free(AllocHandle allocHandle) { - FreeSuballocation(FindAtOffset(offset).dropConst()); + FreeSuballocation(FindAtOffset((UINT64)allocHandle - 1).dropConst()); } void BlockMetadata_Generic::Clear() @@ -3580,7 +3410,7 @@ SuballocationList::const_iterator BlockMetadata_Generic::FindAtOffset(UINT64 off bool BlockMetadata_Generic::ValidateFreeSuballocationList() const { UINT64 lastSize = 0; - for(size_t i = 0, count = m_FreeSuballocationsBySize.size(); i < count; ++i) + for (size_t i = 0, count = m_FreeSuballocationsBySize.size(); i < count; ++i) { const SuballocationList::iterator it = m_FreeSuballocationsBySize[i]; @@ -3596,14 +3426,14 @@ bool BlockMetadata_Generic::CheckAllocation( UINT64 allocSize, UINT64 allocAlignment, SuballocationList::const_iterator suballocItem, - UINT64* pOffset, + AllocHandle* pAllocHandle, UINT64* pSumFreeSize, UINT64* pSumItemSize, - BOOL *pZeroInitialized) const + BOOL* pZeroInitialized) const { D3D12MA_ASSERT(allocSize > 0); D3D12MA_ASSERT(suballocItem != m_Suballocations.cend()); - D3D12MA_ASSERT(pOffset != NULL && pZeroInitialized != NULL); + D3D12MA_ASSERT(pAllocHandle != NULL && pZeroInitialized != NULL); *pSumFreeSize = 0; *pSumItemSize = 0; @@ -3615,37 +3445,29 @@ bool BlockMetadata_Generic::CheckAllocation( *pSumFreeSize = suballoc.size; // Size of this suballocation is too small for this request: Early return. - if(suballoc.size < allocSize) + if (suballoc.size < allocSize) { return false; } - // Start from offset equal to beginning of this suballocation. - *pOffset = suballoc.offset; - - // Apply D3D12MA_DEBUG_MARGIN at the beginning. - if(D3D12MA_DEBUG_MARGIN > 0) - { - *pOffset += D3D12MA_DEBUG_MARGIN; - } + // Start from offset equal to beginning of this suballocation and debug margin of previous allocation if present. + UINT64 offset = suballoc.offset + (suballocItem == m_Suballocations.cbegin() ? 0 : GetDebugMargin()); // Apply alignment. - *pOffset = AlignUp(*pOffset, allocAlignment); + offset = AlignUp(offset, allocAlignment); // Calculate padding at the beginning based on current offset. - const UINT64 paddingBegin = *pOffset - suballoc.offset; + const UINT64 paddingBegin = offset - suballoc.offset; - // Calculate required margin at the end. - const UINT64 requiredEndMargin = D3D12MA_DEBUG_MARGIN; - - // Fail if requested size plus margin before and after is bigger than size of this suballocation. - if(paddingBegin + allocSize + requiredEndMargin > suballoc.size) + // Fail if requested size plus margin after is bigger than size of this suballocation. + if (paddingBegin + allocSize + GetDebugMargin() > suballoc.size) { return false; } - // All tests passed: Success. pOffset is already filled. - *pZeroInitialized = m_ZeroInitializedRange.IsRangeZeroInitialized(*pOffset, *pOffset + allocSize); + // All tests passed: Success. Offset is already filled. + *pZeroInitialized = m_ZeroInitializedRange.IsRangeZeroInitialized(offset, offset + allocSize); + *pAllocHandle = (AllocHandle)(offset + 1); return true; } @@ -3669,7 +3491,7 @@ SuballocationList::iterator BlockMetadata_Generic::FreeSuballocation(Suballocati // Change this suballocation to be marked as free. Suballocation& suballoc = *suballocItem; suballoc.type = SUBALLOCATION_TYPE_FREE; - suballoc.userData = NULL; + suballoc.privateData = NULL; // Update totals. ++m_FreeCount; @@ -3681,28 +3503,28 @@ SuballocationList::iterator BlockMetadata_Generic::FreeSuballocation(Suballocati SuballocationList::iterator nextItem = suballocItem; ++nextItem; - if((nextItem != m_Suballocations.end()) && (nextItem->type == SUBALLOCATION_TYPE_FREE)) + if ((nextItem != m_Suballocations.end()) && (nextItem->type == SUBALLOCATION_TYPE_FREE)) { mergeWithNext = true; } SuballocationList::iterator prevItem = suballocItem; - if(suballocItem != m_Suballocations.begin()) + if (suballocItem != m_Suballocations.begin()) { --prevItem; - if(prevItem->type == SUBALLOCATION_TYPE_FREE) + if (prevItem->type == SUBALLOCATION_TYPE_FREE) { mergeWithPrev = true; } } - if(mergeWithNext) + if (mergeWithNext) { UnregisterFreeSuballocation(nextItem); MergeFreeWithNext(suballocItem); } - if(mergeWithPrev) + if (mergeWithPrev) { UnregisterFreeSuballocation(prevItem); MergeFreeWithNext(prevItem); @@ -3725,9 +3547,9 @@ void BlockMetadata_Generic::RegisterFreeSuballocation(SuballocationList::iterato // this function, depending on what do you want to check. D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - if(item->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) + if (item->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) { - if(m_FreeSuballocationsBySize.empty()) + if (m_FreeSuballocationsBySize.empty()) { m_FreeSuballocationsBySize.push_back(item); } @@ -3740,7 +3562,6 @@ void BlockMetadata_Generic::RegisterFreeSuballocation(SuballocationList::iterato //D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); } - void BlockMetadata_Generic::UnregisterFreeSuballocation(SuballocationList::iterator item) { D3D12MA_ASSERT(item->type == SUBALLOCATION_TYPE_FREE); @@ -3750,18 +3571,18 @@ void BlockMetadata_Generic::UnregisterFreeSuballocation(SuballocationList::itera // this function, depending on what do you want to check. D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - if(item->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) + if (item->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) { SuballocationList::iterator* const it = BinaryFindFirstNotLess( m_FreeSuballocationsBySize.data(), m_FreeSuballocationsBySize.data() + m_FreeSuballocationsBySize.size(), item, SuballocationItemSizeLess()); - for(size_t index = it - m_FreeSuballocationsBySize.data(); + for (size_t index = it - m_FreeSuballocationsBySize.data(); index < m_FreeSuballocationsBySize.size(); ++index) { - if(m_FreeSuballocationsBySize[index] == item) + if (m_FreeSuballocationsBySize[index] == item) { m_FreeSuballocationsBySize.remove(index); return; @@ -3774,946 +3595,2930 @@ void BlockMetadata_Generic::UnregisterFreeSuballocation(SuballocationList::itera //D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); } -void BlockMetadata_Generic::SetAllocationUserData(UINT64 offset, void* userData) +void BlockMetadata_Generic::SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) { - Suballocation& suballoc = *FindAtOffset(offset).dropConst(); - suballoc.userData = userData; + Suballocation& suballoc = *FindAtOffset((UINT64)allocHandle - 1).dropConst(); + suballoc.privateData = privateData; } -void BlockMetadata_Generic::CalcAllocationStatInfo(StatInfo& outInfo) const +void BlockMetadata_Generic::AddStatistics(Statistics& inoutStats) const { - outInfo.BlockCount = 1; + inoutStats.BlockCount++; + inoutStats.AllocationCount += (UINT)m_Suballocations.size() - m_FreeCount; + inoutStats.BlockBytes += GetSize(); + inoutStats.AllocationBytes += GetSize() - m_SumFreeSize; +} - const UINT rangeCount = (UINT)m_Suballocations.size(); - outInfo.AllocationCount = rangeCount - m_FreeCount; - outInfo.UnusedRangeCount = m_FreeCount; +void BlockMetadata_Generic::AddDetailedStatistics(DetailedStatistics& inoutStats) const +{ + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += GetSize(); - outInfo.UsedBytes = GetSize() - m_SumFreeSize; - outInfo.UnusedBytes = m_SumFreeSize; - - outInfo.AllocationSizeMin = UINT64_MAX; - outInfo.AllocationSizeMax = 0; - outInfo.UnusedRangeSizeMin = UINT64_MAX; - outInfo.UnusedRangeSizeMax = 0; - - for(const auto& suballoc : m_Suballocations) + for (const auto& suballoc : m_Suballocations) { - if(suballoc.type == SUBALLOCATION_TYPE_FREE) - { - outInfo.UnusedRangeSizeMin = D3D12MA_MIN(suballoc.size, outInfo.UnusedRangeSizeMin); - outInfo.UnusedRangeSizeMax = D3D12MA_MAX(suballoc.size, outInfo.UnusedRangeSizeMax); - } + if (suballoc.type == SUBALLOCATION_TYPE_FREE) + AddDetailedStatisticsUnusedRange(inoutStats, suballoc.size); else - { - outInfo.AllocationSizeMin = D3D12MA_MIN(suballoc.size, outInfo.AllocationSizeMin); - outInfo.AllocationSizeMax = D3D12MA_MAX(suballoc.size, outInfo.AllocationSizeMax); - } + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); } } void BlockMetadata_Generic::WriteAllocationInfoToJson(JsonWriter& json) const { - json.BeginObject(); - json.WriteString(L"TotalBytes"); - json.WriteNumber(GetSize()); - json.WriteString(L"UnusuedBytes"); - json.WriteNumber(GetSumFreeSize()); - json.WriteString(L"Allocations"); - json.WriteNumber(GetAllocationCount()); - json.WriteString(L"UnusedRanges"); - json.WriteNumber(m_FreeCount); - json.WriteString(L"Suballocations"); - json.BeginArray(); - for(const auto& suballoc : m_Suballocations) + PrintDetailedMap_Begin(json, GetSumFreeSize(), GetAllocationCount(), m_FreeCount); + for (const auto& suballoc : m_Suballocations) { - json.BeginObject(true); - json.WriteString(L"Offset"); - json.WriteNumber(suballoc.offset); - if(suballoc.type == SUBALLOCATION_TYPE_FREE) + if (suballoc.type == SUBALLOCATION_TYPE_FREE) + PrintDetailedMap_UnusedRange(json, suballoc.offset, suballoc.size); + else + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + } + PrintDetailedMap_End(json); +} +#endif // _D3D12MA_BLOCK_METADATA_GENERIC_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA_GENERIC +#endif // #if 0 + +#ifndef _D3D12MA_BLOCK_METADATA_LINEAR +class BlockMetadata_Linear : public BlockMetadata +{ +public: + BlockMetadata_Linear(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); + virtual ~BlockMetadata_Linear() = default; + + UINT64 GetSumFreeSize() const override { return m_SumFreeSize; } + bool IsEmpty() const override { return GetAllocationCount() == 0; } + UINT64 GetAllocationOffset(AllocHandle allocHandle) const override { return (UINT64)allocHandle - 1; }; + + void Init(UINT64 size) override; + bool Validate() const override; + size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; + void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const override; + + bool CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) override; + + void Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) override; + + void Free(AllocHandle allocHandle) override; + void Clear() override; + + AllocHandle GetAllocationListBegin() const override; + AllocHandle GetNextAllocation(AllocHandle prevAlloc) const override; + UINT64 GetNextFreeRegionSize(AllocHandle alloc) const override; + void* GetAllocationPrivateData(AllocHandle allocHandle) const override; + void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) override; + + void AddStatistics(Statistics& inoutStats) const override; + void AddDetailedStatistics(DetailedStatistics& inoutStats) const override; + void WriteAllocationInfoToJson(JsonWriter& json) const override; + +private: + /* + There are two suballocation vectors, used in ping-pong way. + The one with index m_1stVectorIndex is called 1st. + The one with index (m_1stVectorIndex ^ 1) is called 2nd. + 2nd can be non-empty only when 1st is not empty. + When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. + */ + typedef Vector SuballocationVectorType; + + enum ALLOC_REQUEST_TYPE + { + ALLOC_REQUEST_UPPER_ADDRESS, + ALLOC_REQUEST_END_OF_1ST, + ALLOC_REQUEST_END_OF_2ND, + }; + + enum SECOND_VECTOR_MODE + { + SECOND_VECTOR_EMPTY, + /* + Suballocations in 2nd vector are created later than the ones in 1st, but they + all have smaller offset. + */ + SECOND_VECTOR_RING_BUFFER, + /* + Suballocations in 2nd vector are upper side of double stack. + They all have offsets higher than those in 1st vector. + Top of this stack means smaller offsets, but higher indices in this vector. + */ + SECOND_VECTOR_DOUBLE_STACK, + }; + + UINT64 m_SumFreeSize; + SuballocationVectorType m_Suballocations0, m_Suballocations1; + UINT32 m_1stVectorIndex; + SECOND_VECTOR_MODE m_2ndVectorMode; + // Number of items in 1st vector with hAllocation = null at the beginning. + size_t m_1stNullItemsBeginCount; + // Number of other items in 1st vector with hAllocation = null somewhere in the middle. + size_t m_1stNullItemsMiddleCount; + // Number of items in 2nd vector with hAllocation = null. + size_t m_2ndNullItemsCount; + + SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + + Suballocation& FindSuballocation(UINT64 offset) const; + bool ShouldCompact1st() const; + void CleanupAfterFree(); + + bool CreateAllocationRequest_LowerAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest); + bool CreateAllocationRequest_UpperAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest); + + D3D12MA_CLASS_NO_COPY(BlockMetadata_Linear) +}; + +#ifndef _D3D12MA_BLOCK_METADATA_LINEAR_FUNCTIONS +BlockMetadata_Linear::BlockMetadata_Linear(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : BlockMetadata(allocationCallbacks, isVirtual), + m_SumFreeSize(0), + m_Suballocations0(*allocationCallbacks), + m_Suballocations1(*allocationCallbacks), + m_1stVectorIndex(0), + m_2ndVectorMode(SECOND_VECTOR_EMPTY), + m_1stNullItemsBeginCount(0), + m_1stNullItemsMiddleCount(0), + m_2ndNullItemsCount(0) +{ + D3D12MA_ASSERT(allocationCallbacks); +} + +void BlockMetadata_Linear::Init(UINT64 size) +{ + BlockMetadata::Init(size); + m_SumFreeSize = size; +} + +bool BlockMetadata_Linear::Validate() const +{ + D3D12MA_VALIDATE(GetSumFreeSize() <= GetSize()); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + D3D12MA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); + D3D12MA_VALIDATE(!suballocations1st.empty() || + suballocations2nd.empty() || + m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + + if (!suballocations1st.empty()) + { + // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. + D3D12MA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != SUBALLOCATION_TYPE_FREE); + // Null item at the end should be just pop_back(). + D3D12MA_VALIDATE(suballocations1st.back().type != SUBALLOCATION_TYPE_FREE); + } + if (!suballocations2nd.empty()) + { + // Null item at the end should be just pop_back(). + D3D12MA_VALIDATE(suballocations2nd.back().type != SUBALLOCATION_TYPE_FREE); + } + + D3D12MA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); + D3D12MA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + + UINT64 sumUsedSize = 0; + const size_t suballoc1stCount = suballocations1st.size(); + UINT64 offset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = 0; i < suballoc2ndCount; ++i) { - json.WriteString(L"Type"); - json.WriteString(L"FREE"); - json.WriteString(L"Size"); - json.WriteNumber(suballoc.size); - } - else if(IsVirtual()) - { - json.WriteString(L"Type"); - json.WriteString(L"ALLOCATION"); - json.WriteString(L"Size"); - json.WriteNumber(suballoc.size); - if(suballoc.userData) + const Suballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == SUBALLOCATION_TYPE_FREE); + + const Allocation* alloc = (Allocation*)suballoc.privateData; + if (!IsVirtual()) { - json.WriteString(L"UserData"); - json.WriteNumber((uintptr_t)suballoc.userData); + D3D12MA_VALIDATE(currFree == (alloc == NULL)); } + D3D12MA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + D3D12MA_VALIDATE(GetAllocationOffset(alloc->GetAllocHandle()) == suballoc.offset); + D3D12MA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + GetDebugMargin(); + } + + D3D12MA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) + { + const Suballocation& suballoc = suballocations1st[i]; + D3D12MA_VALIDATE(suballoc.type == SUBALLOCATION_TYPE_FREE && + suballoc.privateData == NULL); + } + + size_t nullItem1stCount = m_1stNullItemsBeginCount; + + for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) + { + const Suballocation& suballoc = suballocations1st[i]; + const bool currFree = (suballoc.type == SUBALLOCATION_TYPE_FREE); + + const Allocation* alloc = (Allocation*)suballoc.privateData; + if (!IsVirtual()) + { + D3D12MA_VALIDATE(currFree == (alloc == NULL)); + } + D3D12MA_VALIDATE(suballoc.offset >= offset); + D3D12MA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + + if (!currFree) + { + if (!IsVirtual()) + { + D3D12MA_VALIDATE(GetAllocationOffset(alloc->GetAllocHandle()) == suballoc.offset); + D3D12MA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; } else { - const Allocation* const alloc = (const Allocation*)suballoc.userData; - D3D12MA_ASSERT(alloc); - json.AddAllocationToObject(*alloc); + ++nullItem1stCount; } - json.EndObject(); + + offset = suballoc.offset + suballoc.size + GetDebugMargin(); } - json.EndArray(); - json.EndObject(); -} + D3D12MA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); -//////////////////////////////////////////////////////////////////////////////// -// Private class NormalBlock implementation - -NormalBlock::NormalBlock( - AllocatorPimpl* allocator, - BlockVector* blockVector, - const D3D12_HEAP_PROPERTIES& heapProps, - D3D12_HEAP_FLAGS heapFlags, - UINT64 size, - UINT id) : - MemoryBlock(allocator, heapProps, heapFlags, size, id), - m_pMetadata(NULL), - m_BlockVector(blockVector) -{ -} - -NormalBlock::~NormalBlock() -{ - if(m_pMetadata != NULL) + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) { - // THIS IS THE MOST IMPORTANT ASSERT IN THE ENTIRE LIBRARY! - // Hitting it means you have some memory leak - unreleased Allocation objects. - D3D12MA_ASSERT(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); - - D3D12MA_DELETE(m_Allocator->GetAllocs(), m_pMetadata); - } -} - -HRESULT NormalBlock::Init(ID3D12ProtectedResourceSession* pProtectedSession) -{ - HRESULT hr = MemoryBlock::Init(pProtectedSession); - if(FAILED(hr)) - { - return hr; - } - - m_pMetadata = D3D12MA_NEW(m_Allocator->GetAllocs(), BlockMetadata_Generic)(&m_Allocator->GetAllocs(), false); - m_pMetadata->Init(m_Size); - - return hr; -} - -bool NormalBlock::Validate() const -{ - D3D12MA_VALIDATE(GetHeap() && - m_pMetadata && - m_pMetadata->GetSize() != 0 && - m_pMetadata->GetSize() == GetSize()); - return m_pMetadata->Validate(); -} - -//////////////////////////////////////////////////////////////////////////////// -// Private class MemoryBlock definition - -MemoryBlock::MemoryBlock( - AllocatorPimpl* allocator, - const D3D12_HEAP_PROPERTIES& heapProps, - D3D12_HEAP_FLAGS heapFlags, - UINT64 size, - UINT id) : - m_Allocator(allocator), - m_HeapProps(heapProps), - m_HeapFlags(heapFlags), - m_Size(size), - m_Id(id) -{ -} - -MemoryBlock::~MemoryBlock() -{ - if(m_Heap) - { - m_Allocator->m_Budget.m_BlockBytes[HeapTypeToIndex(m_HeapProps.Type)] -= m_Size; - m_Heap->Release(); - } -} - -HRESULT MemoryBlock::Init(ID3D12ProtectedResourceSession* pProtectedSession) -{ - D3D12MA_ASSERT(m_Heap == NULL && m_Size > 0); - - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.SizeInBytes = m_Size; - heapDesc.Properties = m_HeapProps; - heapDesc.Alignment = HeapFlagsToAlignment(m_HeapFlags); - heapDesc.Flags = m_HeapFlags; - -#ifdef __ID3D12Device4_INTERFACE_DEFINED__ - HRESULT hr = m_Allocator->GetDevice4()->CreateHeap1(&heapDesc, pProtectedSession, D3D12MA_IID_PPV_ARGS(&m_Heap)); -#else - D3D12MA_ASSERT(pProtectedSession == NULL); - HRESULT hr = m_Allocator->GetDevice()->CreateHeap(&heapDesc, D3D12MA_IID_PPV_ARGS(&m_Heap)); -#endif - if(SUCCEEDED(hr)) - { - m_Allocator->m_Budget.m_BlockBytes[HeapTypeToIndex(m_HeapProps.Type)] += m_Size; - } - return hr; -} - -//////////////////////////////////////////////////////////////////////////////// -// Private class CommittedAllocationList implementation - -CommittedAllocationList::CommittedAllocationList() -{ -} - -void CommittedAllocationList::Init(bool useMutex, D3D12_HEAP_TYPE heapType, PoolPimpl* pool) -{ - m_UseMutex = useMutex; - m_HeapType = heapType; - m_Pool = pool; -} - -CommittedAllocationList::~CommittedAllocationList() -{ - if(!m_AllocationList.IsEmpty()) - { - D3D12MA_ASSERT(0 && "Unfreed committed allocations found!"); - } -} - -void CommittedAllocationList::CalculateStats(StatInfo& outStats) -{ - ZeroMemory(&outStats, sizeof(outStats)); - outStats.AllocationSizeMin = UINT64_MAX; - outStats.UnusedRangeSizeMin = UINT64_MAX; - - MutexLockRead lock(m_Mutex, m_UseMutex); - - for(Allocation* alloc = m_AllocationList.Front(); - alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) - { - const UINT64 size = alloc->GetSize(); - ++outStats.BlockCount; - ++outStats.AllocationCount; - outStats.UsedBytes += size; - if(size > outStats.AllocationSizeMax) - outStats.AllocationSizeMax = size; - if(size < outStats.AllocationSizeMin) - outStats.AllocationSizeMin = size; - } -} - -void CommittedAllocationList::BuildStatsString(JsonWriter& json) -{ - MutexLockRead lock(m_Mutex, m_UseMutex); - - json.BeginArray(); - for(Allocation* alloc = m_AllocationList.Front(); - alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) - { - json.BeginObject(true); - json.AddAllocationToObject(*alloc); - json.EndObject(); - } - json.EndArray(); -} - -void CommittedAllocationList::Register(Allocation* alloc) -{ - MutexLockWrite lock(m_Mutex, m_UseMutex); - m_AllocationList.PushBack(alloc); -} - -void CommittedAllocationList::Unregister(Allocation* alloc) -{ - MutexLockWrite lock(m_Mutex, m_UseMutex); - m_AllocationList.Remove(alloc); -} - -//////////////////////////////////////////////////////////////////////////////// -// Private class BlockVector implementation - -BlockVector::BlockVector( - AllocatorPimpl* hAllocator, - const D3D12_HEAP_PROPERTIES& heapProps, - D3D12_HEAP_FLAGS heapFlags, - UINT64 preferredBlockSize, - size_t minBlockCount, - size_t maxBlockCount, - bool explicitBlockSize, - UINT64 minAllocationAlignment, - ID3D12ProtectedResourceSession* pProtectedSession) : - m_hAllocator(hAllocator), - m_HeapProps(heapProps), - m_HeapFlags(heapFlags), - m_PreferredBlockSize(preferredBlockSize), - m_MinBlockCount(minBlockCount), - m_MaxBlockCount(maxBlockCount), - m_ExplicitBlockSize(explicitBlockSize), - m_MinAllocationAlignment(minAllocationAlignment), - m_ProtectedSession(pProtectedSession), - m_HasEmptyBlock(false), - m_Blocks(hAllocator->GetAllocs()), - m_NextBlockId(0) -{ -} - -BlockVector::~BlockVector() -{ - for(size_t i = m_Blocks.size(); i--; ) - { - D3D12MA_DELETE(m_hAllocator->GetAllocs(), m_Blocks[i]); - } -} - -HRESULT BlockVector::CreateMinBlocks() -{ - for(size_t i = 0; i < m_MinBlockCount; ++i) - { - HRESULT hr = CreateBlock(m_PreferredBlockSize, NULL); - if(FAILED(hr)) + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = suballoc2ndCount; i--; ) { - return hr; - } - } - return S_OK; -} + const Suballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == SUBALLOCATION_TYPE_FREE); -bool BlockVector::IsEmpty() -{ - MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); - return m_Blocks.empty(); -} - -HRESULT BlockVector::Allocate( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - size_t allocationCount, - Allocation** pAllocations) -{ - size_t allocIndex; - HRESULT hr = S_OK; - - { - MutexLockWrite lock(m_Mutex, m_hAllocator->UseMutex()); - for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) - { - hr = AllocatePage( - size, - alignment, - allocDesc, - pAllocations + allocIndex); - if(FAILED(hr)) + const Allocation* alloc = (Allocation*)suballoc.privateData; + if (!IsVirtual()) { - break; + D3D12MA_VALIDATE(currFree == (alloc == NULL)); } - } - } + D3D12MA_VALIDATE(suballoc.offset >= offset); - if(FAILED(hr)) - { - // Free all already created allocations. - while(allocIndex--) - { - Free(pAllocations[allocIndex]); - } - ZeroMemory(pAllocations, sizeof(Allocation*) * allocationCount); - } - - return hr; -} - -HRESULT BlockVector::AllocatePage( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - Allocation** pAllocation) -{ - // Early reject: requested allocation size is larger that maximum block size for this block vector. - if(size + 2 * D3D12MA_DEBUG_MARGIN > m_PreferredBlockSize) - { - return E_OUTOFMEMORY; - } - - UINT64 freeMemory = UINT64_MAX; - if(IsHeapTypeStandard(m_HeapProps.Type)) - { - Budget budget = {}; - m_hAllocator->GetBudgetForHeapType(budget, m_HeapProps.Type); - freeMemory = (budget.UsageBytes < budget.BudgetBytes) ? (budget.BudgetBytes - budget.UsageBytes) : 0; - } - - const bool canCreateNewBlock = - ((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) == 0) && - (m_Blocks.size() < m_MaxBlockCount) && - // Even if we don't have to stay within budget with this allocation, when the - // budget would be exceeded, we don't want to allocate new blocks, but always - // create resources as committed. - freeMemory >= size; - - // 1. Search existing allocations - { - // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex ) - { - NormalBlock* const pCurrBlock = m_Blocks[blockIndex]; - D3D12MA_ASSERT(pCurrBlock); - HRESULT hr = AllocateFromBlock( - pCurrBlock, - size, - alignment, - allocDesc.Flags, - pAllocation); - if(SUCCEEDED(hr)) + if (!currFree) { - return hr; - } - } - } - - // 2. Try to create new block. - if(canCreateNewBlock) - { - // Calculate optimal size for new block. - UINT64 newBlockSize = m_PreferredBlockSize; - UINT newBlockSizeShift = 0; - - if(!m_ExplicitBlockSize) - { - // Allocate 1/8, 1/4, 1/2 as first blocks. - const UINT64 maxExistingBlockSize = CalcMaxBlockSize(); - for(UINT i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) - { - const UINT64 smallerNewBlockSize = newBlockSize / 2; - if(smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) + if (!IsVirtual()) { - newBlockSize = smallerNewBlockSize; - ++newBlockSizeShift; + D3D12MA_VALIDATE(GetAllocationOffset(alloc->GetAllocHandle()) == suballoc.offset); + D3D12MA_VALIDATE(alloc->GetSize() == suballoc.size); } - else - { - break; - } - } - } - - size_t newBlockIndex = 0; - HRESULT hr = newBlockSize <= freeMemory ? - CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; - // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. - if(!m_ExplicitBlockSize) - { - while(FAILED(hr) && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) - { - const UINT64 smallerNewBlockSize = newBlockSize / 2; - if(smallerNewBlockSize >= size) - { - newBlockSize = smallerNewBlockSize; - ++newBlockSizeShift; - hr = newBlockSize <= freeMemory ? - CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; - } - else - { - break; - } - } - } - - if(SUCCEEDED(hr)) - { - NormalBlock* const pBlock = m_Blocks[newBlockIndex]; - D3D12MA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); - - hr = AllocateFromBlock( - pBlock, - size, - alignment, - allocDesc.Flags, - pAllocation); - if(SUCCEEDED(hr)) - { - return hr; + sumUsedSize += suballoc.size; } else { - // Allocation from new block failed, possibly due to D3D12MA_DEBUG_MARGIN or alignment. - return E_OUTOFMEMORY; + ++nullItem2ndCount; } + + offset = suballoc.offset + suballoc.size + GetDebugMargin(); } + + D3D12MA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); } - return E_OUTOFMEMORY; + D3D12MA_VALIDATE(offset <= GetSize()); + D3D12MA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + + return true; } -void BlockVector::Free(Allocation* hAllocation) +size_t BlockMetadata_Linear::GetAllocationCount() const { - NormalBlock* pBlockToDelete = NULL; + return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + + AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} - bool budgetExceeded = false; - if(IsHeapTypeStandard(m_HeapProps.Type)) +size_t BlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return SIZE_MAX; +} + +void BlockMetadata_Linear::GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const +{ + const Suballocation& suballoc = FindSuballocation((UINT64)allocHandle - 1); + outInfo.Offset = suballoc.offset; + outInfo.Size = suballoc.size; + outInfo.pPrivateData = suballoc.privateData; +} + +bool BlockMetadata_Linear::CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) +{ + D3D12MA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + D3D12MA_ASSERT(pAllocationRequest != NULL); + D3D12MA_HEAVY_ASSERT(Validate()); + pAllocationRequest->size = allocSize; + return upperAddress ? + CreateAllocationRequest_UpperAddress( + allocSize, allocAlignment, pAllocationRequest) : + CreateAllocationRequest_LowerAddress( + allocSize, allocAlignment, pAllocationRequest); +} + +void BlockMetadata_Linear::Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) +{ + UINT64 offset = (UINT64)request.allocHandle - 1; + const Suballocation newSuballoc = { offset, request.size, privateData, SUBALLOCATION_TYPE_ALLOCATION }; + + switch (request.algorithmData) { - Budget budget = {}; - m_hAllocator->GetBudgetForHeapType(budget, m_HeapProps.Type); - budgetExceeded = budget.UsageBytes >= budget.BudgetBytes; + case ALLOC_REQUEST_UPPER_ADDRESS: + { + D3D12MA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && + "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + suballocations2nd.push_back(newSuballoc); + m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; + break; + } + case ALLOC_REQUEST_END_OF_1ST: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + + D3D12MA_ASSERT(suballocations1st.empty() || + offset >= suballocations1st.back().offset + suballocations1st.back().size); + // Check if it fits before the end of the block. + D3D12MA_ASSERT(offset + request.size <= GetSize()); + + suballocations1st.push_back(newSuballoc); + break; + } + case ALLOC_REQUEST_END_OF_2ND: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. + D3D12MA_ASSERT(!suballocations1st.empty() && + offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + switch (m_2ndVectorMode) + { + case SECOND_VECTOR_EMPTY: + // First allocation from second part ring buffer. + D3D12MA_ASSERT(suballocations2nd.empty()); + m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; + break; + case SECOND_VECTOR_RING_BUFFER: + // 2-part ring buffer is already started. + D3D12MA_ASSERT(!suballocations2nd.empty()); + break; + case SECOND_VECTOR_DOUBLE_STACK: + D3D12MA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); + break; + default: + D3D12MA_ASSERT(0); + } + + suballocations2nd.push_back(newSuballoc); + break; + } + default: + D3D12MA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); + } + m_SumFreeSize -= newSuballoc.size; +} + +void BlockMetadata_Linear::Free(AllocHandle allocHandle) +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + UINT64 offset = (UINT64)allocHandle - 1; + + if (!suballocations1st.empty()) + { + // First allocation: Mark it as next empty at the beginning. + Suballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; + if (firstSuballoc.offset == offset) + { + firstSuballoc.type = SUBALLOCATION_TYPE_FREE; + firstSuballoc.privateData = NULL; + m_SumFreeSize += firstSuballoc.size; + ++m_1stNullItemsBeginCount; + CleanupAfterFree(); + return; + } } - // Scope for lock. + // Last allocation in 2-part ring buffer or top of upper stack (same logic). + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) { - MutexLockWrite lock(m_Mutex, m_hAllocator->UseMutex()); - - NormalBlock* pBlock = hAllocation->m_Placed.block; - - pBlock->m_pMetadata->FreeAtOffset(hAllocation->GetOffset()); - D3D12MA_HEAVY_ASSERT(pBlock->Validate()); - - const size_t blockCount = m_Blocks.size(); - // pBlock became empty after this deallocation. - if(pBlock->m_pMetadata->IsEmpty()) + Suballocation& lastSuballoc = suballocations2nd.back(); + if (lastSuballoc.offset == offset) { - // Already has empty Allocation. We don't want to have two, so delete this one. - if((m_HasEmptyBlock || budgetExceeded) && - blockCount > m_MinBlockCount) + m_SumFreeSize += lastSuballoc.size; + suballocations2nd.pop_back(); + CleanupAfterFree(); + return; + } + } + // Last allocation in 1st vector. + else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) + { + Suballocation& lastSuballoc = suballocations1st.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations1st.pop_back(); + CleanupAfterFree(); + return; + } + } + + Suballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the middle of 1st vector. + { + const SuballocationVectorType::iterator it = BinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + SuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + it->type = SUBALLOCATION_TYPE_FREE; + it->privateData = NULL; + ++m_1stNullItemsMiddleCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Item from the middle of 2nd vector. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + BinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, SuballocationOffsetLess()) : + BinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, SuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + it->type = SUBALLOCATION_TYPE_FREE; + it->privateData = NULL; + ++m_2ndNullItemsCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + D3D12MA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void BlockMetadata_Linear::Clear() +{ + m_SumFreeSize = GetSize(); + m_Suballocations0.clear(); + m_Suballocations1.clear(); + // Leaving m_1stVectorIndex unchanged - it doesn't matter. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; +} + +AllocHandle BlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return (AllocHandle)0; +} + +AllocHandle BlockMetadata_Linear::GetNextAllocation(AllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return (AllocHandle)0; +} + +UINT64 BlockMetadata_Linear::GetNextFreeRegionSize(AllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return 0; +} + +void* BlockMetadata_Linear::GetAllocationPrivateData(AllocHandle allocHandle) const +{ + return FindSuballocation((UINT64)allocHandle - 1).privateData; +} + +void BlockMetadata_Linear::SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) +{ + Suballocation& suballoc = FindSuballocation((UINT64)allocHandle - 1); + suballoc.privateData = privateData; +} + +void BlockMetadata_Linear::AddStatistics(Statistics& inoutStats) const +{ + inoutStats.BlockCount++; + inoutStats.AllocationCount += (UINT)GetAllocationCount(); + inoutStats.BlockBytes += GetSize(); + inoutStats.AllocationBytes += GetSize() - m_SumFreeSize; +} + +void BlockMetadata_Linear::AddDetailedStatistics(DetailedStatistics& inoutStats) const +{ + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += GetSize(); + + const UINT64 size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + UINT64 lastOffset = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const UINT64 freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) { - pBlockToDelete = pBlock; - Remove(pBlock); + ++nextAlloc2ndIndex; } - // We now have first empty block. + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. else { - m_HasEmptyBlock = true; - } - } - // pBlock didn't become empty, but we have another empty block - find and free that one. - // (This is optional, heuristics.) - else if(m_HasEmptyBlock && blockCount > m_MinBlockCount) - { - NormalBlock* pLastBlock = m_Blocks.back(); - if(pLastBlock->m_pMetadata->IsEmpty()) - { - pBlockToDelete = pLastBlock; - m_Blocks.pop_back(); - m_HasEmptyBlock = false; - } - } + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + if (lastOffset < freeSpace2ndTo1stEnd) + { + const UINT64 unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } - IncrementallySortBlocks(); + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } } - // Destruction of a free Allocation. Deferred until this point, outside of mutex - // lock, for performance reason. - if(pBlockToDelete != NULL) + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const UINT64 freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) { - D3D12MA_DELETE(m_hAllocator->GetAllocs(), pBlockToDelete); + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].privateData == NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const Suballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + if (lastOffset < freeSpace1stTo2ndEnd) + { + const UINT64 unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to size. + if (lastOffset < size) + { + const UINT64 unusedRangeSize = size - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } } } -HRESULT BlockVector::CreateResource( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - const D3D12_RESOURCE_DESC& resourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, - REFIID riidResource, - void** ppvResource) +void BlockMetadata_Linear::WriteAllocationInfoToJson(JsonWriter& json) const { - HRESULT hr = Allocate(size, alignment, allocDesc, 1, ppAllocation); - if(SUCCEEDED(hr)) + const UINT64 size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + // FIRST PASS + + size_t unusedRangeCount = 0; + UINT64 usedBytes = 0; + + UINT64 lastOffset = 0; + + size_t alloc2ndCount = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) { - ID3D12Resource* res = NULL; - hr = m_hAllocator->GetDevice()->CreatePlacedResource( - (*ppAllocation)->m_Placed.block->GetHeap(), - (*ppAllocation)->GetOffset(), - &resourceDesc, - InitialResourceState, - pOptimizedClearValue, - D3D12MA_IID_PPV_ARGS(&res)); - if(SUCCEEDED(hr)) + const UINT64 freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) { - if(ppvResource != NULL) + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) { - hr = res->QueryInterface(riidResource, ppvResource); + ++nextAlloc2ndIndex; } - if(SUCCEEDED(hr)) + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) { - (*ppAllocation)->SetResource(res, &resourceDesc); + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; } + // We are at the end. else { - res->Release(); - SAFE_RELEASE(*ppAllocation); + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; } } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + size_t alloc1stCount = 0; + const UINT64 freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].privateData == NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const Suballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc1stCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = size; + } + } + } + + const UINT64 unusedBytes = size - usedBytes; + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + + // SECOND PASS + lastOffset = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const UINT64 freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + const UINT64 unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + nextAlloc1stIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].privateData == NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const Suballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + const UINT64 unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + const UINT64 unusedRangeSize = size - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } + + PrintDetailedMap_End(json); +} + +Suballocation& BlockMetadata_Linear::FindSuballocation(UINT64 offset) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + Suballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the 1st vector. + { + const SuballocationVectorType::iterator it = BinaryFindSorted( + suballocations1st.cbegin() + m_1stNullItemsBeginCount, + suballocations1st.cend(), + refSuballoc, + SuballocationOffsetLess()); + if (it != suballocations1st.cend()) + { + return *it; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Rest of members stays uninitialized intentionally for better performance. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + BinaryFindSorted(suballocations2nd.cbegin(), suballocations2nd.cend(), refSuballoc, SuballocationOffsetLess()) : + BinaryFindSorted(suballocations2nd.cbegin(), suballocations2nd.cend(), refSuballoc, SuballocationOffsetGreater()); + if (it != suballocations2nd.cend()) + { + return *it; + } + } + + D3D12MA_ASSERT(0 && "Allocation not found in linear allocator!"); + return *suballocations1st.crbegin(); // Should never occur. +} + +bool BlockMetadata_Linear::ShouldCompact1st() const +{ + const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + const size_t suballocCount = AccessSuballocations1st().size(); + return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void BlockMetadata_Linear::CleanupAfterFree() +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (IsEmpty()) + { + suballocations1st.clear(); + suballocations2nd.clear(); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + else + { + const size_t suballoc1stCount = suballocations1st.size(); + const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + D3D12MA_ASSERT(nullItem1stCount <= suballoc1stCount); + + // Find more null items at the beginning of 1st vector. + while (m_1stNullItemsBeginCount < suballoc1stCount && + suballocations1st[m_1stNullItemsBeginCount].type == SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + + // Find more null items at the end of 1st vector. + while (m_1stNullItemsMiddleCount > 0 && + suballocations1st.back().type == SUBALLOCATION_TYPE_FREE) + { + --m_1stNullItemsMiddleCount; + suballocations1st.pop_back(); + } + + // Find more null items at the end of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd.back().type == SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.pop_back(); + } + + // Find more null items at the beginning of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd[0].type == SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.remove(0); + } + + if (ShouldCompact1st()) + { + const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; + size_t srcIndex = m_1stNullItemsBeginCount; + for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) + { + while (suballocations1st[srcIndex].type == SUBALLOCATION_TYPE_FREE) + { + ++srcIndex; + } + if (dstIndex != srcIndex) + { + suballocations1st[dstIndex] = suballocations1st[srcIndex]; + } + ++srcIndex; + } + suballocations1st.resize(nonNullItemCount); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + } + + // 2nd vector became empty. + if (suballocations2nd.empty()) + { + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + + // 1st vector became empty. + if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) + { + suballocations1st.clear(); + m_1stNullItemsBeginCount = 0; + + if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + // Swap 1st with 2nd. Now 2nd is empty. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsMiddleCount = m_2ndNullItemsCount; + while (m_1stNullItemsBeginCount < suballocations2nd.size() && + suballocations2nd[m_1stNullItemsBeginCount].type == SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + m_2ndNullItemsCount = 0; + m_1stVectorIndex ^= 1; + } + } + } + + D3D12MA_HEAVY_ASSERT(Validate()); +} + +bool BlockMetadata_Linear::CreateAllocationRequest_LowerAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest) +{ + const UINT64 blockSize = GetSize(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + // Try to allocate at the end of 1st vector. + + UINT64 resultBaseOffset = 0; + if (!suballocations1st.empty()) + { + const Suballocation& lastSuballoc = suballocations1st.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + GetDebugMargin(); + } + + // Start from offset equal to beginning of free space. + UINT64 resultOffset = resultBaseOffset; + // Apply alignment. + resultOffset = AlignUp(resultOffset, allocAlignment); + + const UINT64 freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? + suballocations2nd.back().offset : blockSize; + + // There is enough free space at the end after alignment. + if (resultOffset + allocSize + GetDebugMargin() <= freeSpaceEnd) + { + // All tests passed: Success. + pAllocationRequest->allocHandle = (AllocHandle)(resultOffset + 1); + // pAllocationRequest->item, customData unused. + pAllocationRequest->algorithmData = ALLOC_REQUEST_END_OF_1ST; + return true; + } + } + + // Wrap-around to end of 2nd vector. Try to allocate there, watching for the + // beginning of 1st vector as the end of free space. + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + D3D12MA_ASSERT(!suballocations1st.empty()); + + UINT64 resultBaseOffset = 0; + if (!suballocations2nd.empty()) + { + const Suballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + GetDebugMargin(); + } + + // Start from offset equal to beginning of free space. + UINT64 resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = AlignUp(resultOffset, allocAlignment); + + size_t index1st = m_1stNullItemsBeginCount; + // There is enough free space at the end after alignment. + if ((index1st == suballocations1st.size() && resultOffset + allocSize + GetDebugMargin() <= blockSize) || + (index1st < suballocations1st.size() && resultOffset + allocSize + GetDebugMargin() <= suballocations1st[index1st].offset)) + { + // All tests passed: Success. + pAllocationRequest->allocHandle = (AllocHandle)(resultOffset + 1); + pAllocationRequest->algorithmData = ALLOC_REQUEST_END_OF_2ND; + // pAllocationRequest->item, customData unused. + return true; + } + } + return false; +} + +bool BlockMetadata_Linear::CreateAllocationRequest_UpperAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest) +{ + const UINT64 blockSize = GetSize(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + D3D12MA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); + return false; + } + + // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). + if (allocSize > blockSize) + { + return false; + } + UINT64 resultBaseOffset = blockSize - allocSize; + if (!suballocations2nd.empty()) + { + const Suballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset - allocSize; + if (allocSize > lastSuballoc.offset) + { + return false; + } + } + + // Start from offset equal to end of free space. + UINT64 resultOffset = resultBaseOffset; + // Apply debugMargin at the end. + if (GetDebugMargin() > 0) + { + if (resultOffset < GetDebugMargin()) + { + return false; + } + resultOffset -= GetDebugMargin(); + } + + // Apply alignment. + resultOffset = AlignDown(resultOffset, allocAlignment); + // There is enough free space. + const UINT64 endOf1st = !suballocations1st.empty() ? + suballocations1st.back().offset + suballocations1st.back().size : 0; + + if (endOf1st + GetDebugMargin() <= resultOffset) + { + // All tests passed: Success. + pAllocationRequest->allocHandle = (AllocHandle)(resultOffset + 1); + // pAllocationRequest->item unused. + pAllocationRequest->algorithmData = ALLOC_REQUEST_UPPER_ADDRESS; + return true; + } + return false; +} +#endif // _D3D12MA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA_LINEAR + +#ifndef _D3D12MA_BLOCK_METADATA_TLSF +class BlockMetadata_TLSF : public BlockMetadata +{ +public: + BlockMetadata_TLSF(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); + virtual ~BlockMetadata_TLSF(); + + size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } + UINT64 GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } + bool IsEmpty() const override { return m_NullBlock->offset == 0; } + UINT64 GetAllocationOffset(AllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }; + + void Init(UINT64 size) override; + bool Validate() const override; + void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const override; + + bool CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) override; + + void Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) override; + + void Free(AllocHandle allocHandle) override; + void Clear() override; + + AllocHandle GetAllocationListBegin() const override; + AllocHandle GetNextAllocation(AllocHandle prevAlloc) const override; + UINT64 GetNextFreeRegionSize(AllocHandle alloc) const override; + void* GetAllocationPrivateData(AllocHandle allocHandle) const override; + void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) override; + + void AddStatistics(Statistics& inoutStats) const override; + void AddDetailedStatistics(DetailedStatistics& inoutStats) const override; + void WriteAllocationInfoToJson(JsonWriter& json) const override; + +private: + // According to original paper it should be preferable 4 or 5: + // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" + // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf + static const UINT8 SECOND_LEVEL_INDEX = 5; + static const UINT16 SMALL_BUFFER_SIZE = 256; + static const UINT INITIAL_BLOCK_ALLOC_COUNT = 16; + static const UINT8 MEMORY_CLASS_SHIFT = 7; + static const UINT8 MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; + + class Block + { + public: + UINT64 offset; + UINT64 size; + Block* prevPhysical; + Block* nextPhysical; + + void MarkFree() { prevFree = NULL; } + void MarkTaken() { prevFree = this; } + bool IsFree() const { return prevFree != this; } + void*& PrivateData() { D3D12MA_HEAVY_ASSERT(!IsFree()); return privateData; } + Block*& PrevFree() { return prevFree; } + Block*& NextFree() { D3D12MA_HEAVY_ASSERT(IsFree()); return nextFree; } + + private: + Block* prevFree; // Address of the same block here indicates that block is taken + union + { + Block* nextFree; + void* privateData; + }; + }; + + size_t m_AllocCount = 0; + // Total number of free blocks besides null block + size_t m_BlocksFreeCount = 0; + // Total size of free blocks excluding null block + UINT64 m_BlocksFreeSize = 0; + UINT32 m_IsFreeBitmap = 0; + UINT8 m_MemoryClasses = 0; + UINT32 m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; + UINT32 m_ListsCount = 0; + /* + * 0: 0-3 lists for small buffers + * 1+: 0-(2^SLI-1) lists for normal buffers + */ + Block** m_FreeList = NULL; + PoolAllocator m_BlockAllocator; + Block* m_NullBlock = NULL; + + UINT8 SizeToMemoryClass(UINT64 size) const; + UINT16 SizeToSecondIndex(UINT64 size, UINT8 memoryClass) const; + UINT32 GetListIndex(UINT8 memoryClass, UINT16 secondIndex) const; + UINT32 GetListIndex(UINT64 size) const; + + void RemoveFreeBlock(Block* block); + void InsertFreeBlock(Block* block); + void MergeBlock(Block* block, Block* prev); + + Block* FindFreeBlock(UINT64 size, UINT32& listIndex) const; + bool CheckBlock( + Block& block, + UINT32 listIndex, + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest); + + D3D12MA_CLASS_NO_COPY(BlockMetadata_TLSF) +}; + +#ifndef _D3D12MA_BLOCK_METADATA_TLSF_FUNCTIONS +BlockMetadata_TLSF::BlockMetadata_TLSF(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : BlockMetadata(allocationCallbacks, isVirtual), + m_BlockAllocator(*allocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT) +{ + D3D12MA_ASSERT(allocationCallbacks); +} + +BlockMetadata_TLSF::~BlockMetadata_TLSF() +{ + D3D12MA_DELETE_ARRAY(*GetAllocs(), m_FreeList, m_ListsCount); +} + +void BlockMetadata_TLSF::Init(UINT64 size) +{ + BlockMetadata::Init(size); + + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = size; + m_NullBlock->offset = 0; + m_NullBlock->prevPhysical = NULL; + m_NullBlock->nextPhysical = NULL; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = NULL; + m_NullBlock->PrevFree() = NULL; + UINT8 memoryClass = SizeToMemoryClass(size); + UINT16 sli = SizeToSecondIndex(size, memoryClass); + m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; + if (IsVirtual()) + m_ListsCount += 1UL << SECOND_LEVEL_INDEX; + else + m_ListsCount += 4; + + m_MemoryClasses = memoryClass + 2; + memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(UINT32)); + + m_FreeList = D3D12MA_NEW_ARRAY(*GetAllocs(), Block*, m_ListsCount); + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool BlockMetadata_TLSF::Validate() const +{ + D3D12MA_VALIDATE(GetSumFreeSize() <= GetSize()); + + UINT64 calculatedSize = m_NullBlock->size; + UINT64 calculatedFreeSize = m_NullBlock->size; + size_t allocCount = 0; + size_t freeCount = 0; + + // Check integrity of free lists + for (UINT32 list = 0; list < m_ListsCount; ++list) + { + Block* block = m_FreeList[list]; + if (block != NULL) + { + D3D12MA_VALIDATE(block->IsFree()); + D3D12MA_VALIDATE(block->PrevFree() == NULL); + while (block->NextFree()) + { + D3D12MA_VALIDATE(block->NextFree()->IsFree()); + D3D12MA_VALIDATE(block->NextFree()->PrevFree() == block); + block = block->NextFree(); + } + } + } + + D3D12MA_VALIDATE(m_NullBlock->nextPhysical == NULL); + if (m_NullBlock->prevPhysical) + { + D3D12MA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); + } + + // Check all blocks + UINT64 nextOffset = m_NullBlock->offset; + for (Block* prev = m_NullBlock->prevPhysical; prev != NULL; prev = prev->prevPhysical) + { + D3D12MA_VALIDATE(prev->offset + prev->size == nextOffset); + nextOffset = prev->offset; + calculatedSize += prev->size; + + UINT32 listIndex = GetListIndex(prev->size); + if (prev->IsFree()) + { + ++freeCount; + // Check if free block belongs to free list + Block* freeBlock = m_FreeList[listIndex]; + D3D12MA_VALIDATE(freeBlock != NULL); + + bool found = false; + do + { + if (freeBlock == prev) + found = true; + + freeBlock = freeBlock->NextFree(); + } while (!found && freeBlock != NULL); + + D3D12MA_VALIDATE(found); + calculatedFreeSize += prev->size; + } else { - SAFE_RELEASE(*ppAllocation); + ++allocCount; + // Check if taken block is not on a free list + Block* freeBlock = m_FreeList[listIndex]; + while (freeBlock) + { + D3D12MA_VALIDATE(freeBlock != prev); + freeBlock = freeBlock->NextFree(); + } + } + + if (prev->prevPhysical) + { + D3D12MA_VALIDATE(prev->prevPhysical->nextPhysical == prev); } } - return hr; + + D3D12MA_VALIDATE(nextOffset == 0); + D3D12MA_VALIDATE(calculatedSize == GetSize()); + D3D12MA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); + D3D12MA_VALIDATE(allocCount == m_AllocCount); + D3D12MA_VALIDATE(freeCount == m_BlocksFreeCount); + + return true; } +void BlockMetadata_TLSF::GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const +{ + Block* block = (Block*)allocHandle; + D3D12MA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); + outInfo.Offset = block->offset; + outInfo.Size = block->size; + outInfo.pPrivateData = block->PrivateData(); +} + +bool BlockMetadata_TLSF::CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) +{ + D3D12MA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + D3D12MA_ASSERT(!upperAddress && "ALLOCATION_FLAG_UPPER_ADDRESS can be used only with linear algorithm."); + D3D12MA_ASSERT(pAllocationRequest != NULL); + D3D12MA_HEAVY_ASSERT(Validate()); + + allocSize += GetDebugMargin(); + // Quick check for too small pool + if (allocSize > GetSumFreeSize()) + return false; + + // If no free blocks in pool then check only null block + if (m_BlocksFreeCount == 0) + return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest); + + // Round up to the next block + UINT64 sizeForNextList = allocSize; + UINT64 smallSizeStep = SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4); + if (allocSize > SMALL_BUFFER_SIZE) + { + sizeForNextList += (1ULL << (BitScanMSB(allocSize) - SECOND_LEVEL_INDEX)); + } + else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) + sizeForNextList = SMALL_BUFFER_SIZE + 1; + else + sizeForNextList += smallSizeStep; + + UINT32 nextListIndex = 0; + UINT32 prevListIndex = 0; + Block* nextListBlock = NULL; + Block* prevListBlock = NULL; + + // Check blocks according to strategies + if (strategy & ALLOCATION_FLAG_STRATEGY_MIN_TIME) + { + // Quick check for larger block first + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + if (nextListBlock != NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // If not fitted then null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Null block failed, search larger bucket + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // Failed again, check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + else if (strategy & ALLOCATION_FLAG_STRATEGY_MIN_MEMORY) + { + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + else if (strategy & ALLOCATION_FLAG_STRATEGY_MIN_OFFSET) + { + // Perform search from the start + Vector blockList(m_BlocksFreeCount, *GetAllocs()); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } + else + { + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + + // Worst case, full search has to be done + while (++nextListIndex < m_ListsCount) + { + nextListBlock = m_FreeList[nextListIndex]; + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + + // No more memory sadly + return false; +} + +void BlockMetadata_TLSF::Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) +{ + // Get block and pop it from the free list + Block* currentBlock = (Block*)request.allocHandle; + UINT64 offset = request.algorithmData; + D3D12MA_ASSERT(currentBlock != NULL); + D3D12MA_ASSERT(currentBlock->offset <= offset); + + if (currentBlock != m_NullBlock) + RemoveFreeBlock(currentBlock); + + // Append missing alignment to prev block or create new one + UINT64 misssingAlignment = offset - currentBlock->offset; + if (misssingAlignment) + { + Block* prevBlock = currentBlock->prevPhysical; + D3D12MA_ASSERT(prevBlock != NULL && "There should be no missing alignment at offset 0!"); + + if (prevBlock->IsFree() && prevBlock->size != GetDebugMargin()) + { + UINT32 oldList = GetListIndex(prevBlock->size); + prevBlock->size += misssingAlignment; + // Check if new size crosses list bucket + if (oldList != GetListIndex(prevBlock->size)) + { + prevBlock->size -= misssingAlignment; + RemoveFreeBlock(prevBlock); + prevBlock->size += misssingAlignment; + InsertFreeBlock(prevBlock); + } + else + m_BlocksFreeSize += misssingAlignment; + } + else + { + Block* newBlock = m_BlockAllocator.Alloc(); + currentBlock->prevPhysical = newBlock; + prevBlock->nextPhysical = newBlock; + newBlock->prevPhysical = prevBlock; + newBlock->nextPhysical = currentBlock; + newBlock->size = misssingAlignment; + newBlock->offset = currentBlock->offset; + newBlock->MarkTaken(); + + InsertFreeBlock(newBlock); + } + + currentBlock->size -= misssingAlignment; + currentBlock->offset += misssingAlignment; + } + + UINT64 size = request.size + GetDebugMargin(); + if (currentBlock->size == size) + { + if (currentBlock == m_NullBlock) + { + // Setup new null block + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = 0; + m_NullBlock->offset = currentBlock->offset + size; + m_NullBlock->prevPhysical = currentBlock; + m_NullBlock->nextPhysical = NULL; + m_NullBlock->MarkFree(); + m_NullBlock->PrevFree() = NULL; + m_NullBlock->NextFree() = NULL; + currentBlock->nextPhysical = m_NullBlock; + currentBlock->MarkTaken(); + } + } + else + { + D3D12MA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + + // Create new free block + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = currentBlock->size - size; + newBlock->offset = currentBlock->offset + size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + currentBlock->nextPhysical = newBlock; + currentBlock->size = size; + + if (currentBlock == m_NullBlock) + { + m_NullBlock = newBlock; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = NULL; + m_NullBlock->PrevFree() = NULL; + currentBlock->MarkTaken(); + } + else + { + newBlock->nextPhysical->prevPhysical = newBlock; + newBlock->MarkTaken(); + InsertFreeBlock(newBlock); + } + } + currentBlock->PrivateData() = privateData; + + if (GetDebugMargin() > 0) + { + currentBlock->size -= GetDebugMargin(); + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = GetDebugMargin(); + newBlock->offset = currentBlock->offset + currentBlock->size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + newBlock->MarkTaken(); + currentBlock->nextPhysical->prevPhysical = newBlock; + currentBlock->nextPhysical = newBlock; + InsertFreeBlock(newBlock); + } + ++m_AllocCount; +} + +void BlockMetadata_TLSF::Free(AllocHandle allocHandle) +{ + Block* block = (Block*)allocHandle; + Block* next = block->nextPhysical; + D3D12MA_ASSERT(!block->IsFree() && "Block is already free!"); + + --m_AllocCount; + if (GetDebugMargin() > 0) + { + RemoveFreeBlock(next); + MergeBlock(next, block); + block = next; + next = next->nextPhysical; + } + + // Try merging + Block* prev = block->prevPhysical; + if (prev != NULL && prev->IsFree() && prev->size != GetDebugMargin()) + { + RemoveFreeBlock(prev); + MergeBlock(block, prev); + } + + if (!next->IsFree()) + InsertFreeBlock(block); + else if (next == m_NullBlock) + MergeBlock(m_NullBlock, block); + else + { + RemoveFreeBlock(next); + MergeBlock(next, block); + InsertFreeBlock(next); + } +} + +void BlockMetadata_TLSF::Clear() +{ + m_AllocCount = 0; + m_BlocksFreeCount = 0; + m_BlocksFreeSize = 0; + m_IsFreeBitmap = 0; + m_NullBlock->offset = 0; + m_NullBlock->size = GetSize(); + Block* block = m_NullBlock->prevPhysical; + m_NullBlock->prevPhysical = NULL; + while (block) + { + Block* prev = block->prevPhysical; + m_BlockAllocator.Free(block); + block = prev; + } + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); + memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(UINT32)); +} + +AllocHandle BlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return (AllocHandle)0; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (AllocHandle)block; + } + D3D12MA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return (AllocHandle)0; +} + +AllocHandle BlockMetadata_TLSF::GetNextAllocation(AllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + D3D12MA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (AllocHandle)block; + } + return (AllocHandle)0; +} + +UINT64 BlockMetadata_TLSF::GetNextFreeRegionSize(AllocHandle alloc) const +{ + Block* block = (Block*)alloc; + D3D12MA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + +void* BlockMetadata_TLSF::GetAllocationPrivateData(AllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + D3D12MA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->PrivateData(); +} + +void BlockMetadata_TLSF::SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) +{ + Block* block = (Block*)allocHandle; + D3D12MA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); + block->PrivateData() = privateData; +} + +void BlockMetadata_TLSF::AddStatistics(Statistics& inoutStats) const +{ + inoutStats.BlockCount++; + inoutStats.AllocationCount += static_cast(m_AllocCount); + inoutStats.BlockBytes += GetSize(); + inoutStats.AllocationBytes += GetSize() - GetSumFreeSize(); +} + +void BlockMetadata_TLSF::AddDetailedStatistics(DetailedStatistics& inoutStats) const +{ + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += GetSize(); + + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + if (block->IsFree()) + AddDetailedStatisticsUnusedRange(inoutStats, block->size); + else + AddDetailedStatisticsAllocation(inoutStats, block->size); + } + + if (m_NullBlock->size > 0) + AddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); +} + +void BlockMetadata_TLSF::WriteAllocationInfoToJson(JsonWriter& json) const +{ + size_t blockCount = m_AllocCount + m_BlocksFreeCount; + Vector blockList(blockCount, *GetAllocs()); + + size_t i = blockCount; + if (m_NullBlock->size > 0) + { + ++blockCount; + blockList.push_back(m_NullBlock); + } + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + blockList[--i] = block; + } + D3D12MA_ASSERT(i == 0); + + PrintDetailedMap_Begin(json, GetSumFreeSize(), GetAllocationCount(), m_BlocksFreeCount + static_cast(m_NullBlock->size)); + for (; i < blockCount; ++i) + { + Block* block = blockList[i]; + if (block->IsFree()) + PrintDetailedMap_UnusedRange(json, block->offset, block->size); + else + PrintDetailedMap_Allocation(json, block->offset, block->size, block->PrivateData()); + } + PrintDetailedMap_End(json); +} + +UINT8 BlockMetadata_TLSF::SizeToMemoryClass(UINT64 size) const +{ + if (size > SMALL_BUFFER_SIZE) + return BitScanMSB(size) - MEMORY_CLASS_SHIFT; + return 0; +} + +UINT16 BlockMetadata_TLSF::SizeToSecondIndex(UINT64 size, UINT8 memoryClass) const +{ + if (memoryClass == 0) + { + if (IsVirtual()) + return static_cast((size - 1) / 8); + else + return static_cast((size - 1) / 64); + } + return static_cast((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +UINT32 BlockMetadata_TLSF::GetListIndex(UINT8 memoryClass, UINT16 secondIndex) const +{ + if (memoryClass == 0) + return secondIndex; + + const UINT32 index = static_cast(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; + if (IsVirtual()) + return index + (1 << SECOND_LEVEL_INDEX); + else + return index + 4; +} + +UINT32 BlockMetadata_TLSF::GetListIndex(UINT64 size) const +{ + UINT8 memoryClass = SizeToMemoryClass(size); + return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void BlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ + D3D12MA_ASSERT(block != m_NullBlock); + D3D12MA_ASSERT(block->IsFree()); + + if (block->NextFree() != NULL) + block->NextFree()->PrevFree() = block->PrevFree(); + if (block->PrevFree() != NULL) + block->PrevFree()->NextFree() = block->NextFree(); + else + { + UINT8 memClass = SizeToMemoryClass(block->size); + UINT16 secondIndex = SizeToSecondIndex(block->size, memClass); + UINT32 index = GetListIndex(memClass, secondIndex); + m_FreeList[index] = block->NextFree(); + if (block->NextFree() == NULL) + { + m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); + if (m_InnerIsFreeBitmap[memClass] == 0) + m_IsFreeBitmap &= ~(1UL << memClass); + } + } + block->MarkTaken(); + block->PrivateData() = NULL; + --m_BlocksFreeCount; + m_BlocksFreeSize -= block->size; +} + +void BlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ + D3D12MA_ASSERT(block != m_NullBlock); + D3D12MA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + + UINT8 memClass = SizeToMemoryClass(block->size); + UINT16 secondIndex = SizeToSecondIndex(block->size, memClass); + UINT32 index = GetListIndex(memClass, secondIndex); + block->PrevFree() = NULL; + block->NextFree() = m_FreeList[index]; + m_FreeList[index] = block; + if (block->NextFree() != NULL) + block->NextFree()->PrevFree() = block; + else + { + m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; + m_IsFreeBitmap |= 1UL << memClass; + } + ++m_BlocksFreeCount; + m_BlocksFreeSize += block->size; +} + +void BlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ + D3D12MA_ASSERT(block->prevPhysical == prev && "Cannot merge seperate physical regions!"); + D3D12MA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + + block->offset = prev->offset; + block->size += prev->size; + block->prevPhysical = prev->prevPhysical; + if (block->prevPhysical) + block->prevPhysical->nextPhysical = block; + m_BlockAllocator.Free(prev); +} + +BlockMetadata_TLSF::Block* BlockMetadata_TLSF::FindFreeBlock(UINT64 size, UINT32& listIndex) const +{ + UINT8 memoryClass = SizeToMemoryClass(size); + UINT32 innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); + if (!innerFreeMap) + { + // Check higher levels for avaiable blocks + UINT32 freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); + if (!freeMap) + return NULL; // No more memory avaible + + // Find lowest free region + memoryClass = BitScanLSB(freeMap); + innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; + D3D12MA_ASSERT(innerFreeMap != 0); + } + // Find lowest free subregion + listIndex = GetListIndex(memoryClass, BitScanLSB(innerFreeMap)); + return m_FreeList[listIndex]; +} + +bool BlockMetadata_TLSF::CheckBlock( + Block& block, + UINT32 listIndex, + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest) +{ + D3D12MA_ASSERT(block.IsFree() && "Block is already taken!"); + + UINT64 alignedOffset = AlignUp(block.offset, allocAlignment); + if (block.size < allocSize + alignedOffset - block.offset) + return false; + + // Alloc successful + pAllocationRequest->allocHandle = (AllocHandle)█ + pAllocationRequest->size = allocSize - GetDebugMargin(); + pAllocationRequest->algorithmData = alignedOffset; + + // Place block at the start of list if it's normal block + if (listIndex != m_ListsCount && block.PrevFree()) + { + block.PrevFree()->NextFree() = block.NextFree(); + if (block.NextFree()) + block.NextFree()->PrevFree() = block.PrevFree(); + block.PrevFree() = NULL; + block.NextFree() = m_FreeList[listIndex]; + m_FreeList[listIndex] = █ + if (block.NextFree()) + block.NextFree()->PrevFree() = █ + } + + return true; +} +#endif // _D3D12MA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA_TLSF + +#ifndef _D3D12MA_MEMORY_BLOCK +/* +Represents a single block of device memory (heap). +Base class for inheritance. +Thread-safety: This class must be externally synchronized. +*/ +class MemoryBlock +{ +public: + // Creates the ID3D12Heap. + MemoryBlock( + AllocatorPimpl* allocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id); + virtual ~MemoryBlock(); + + const D3D12_HEAP_PROPERTIES& GetHeapProperties() const { return m_HeapProps; } + D3D12_HEAP_FLAGS GetHeapFlags() const { return m_HeapFlags; } + UINT64 GetSize() const { return m_Size; } + UINT GetId() const { return m_Id; } + ID3D12Heap* GetHeap() const { return m_Heap; } + +protected: + AllocatorPimpl* const m_Allocator; + const D3D12_HEAP_PROPERTIES m_HeapProps; + const D3D12_HEAP_FLAGS m_HeapFlags; + const UINT64 m_Size; + const UINT m_Id; + + HRESULT Init(ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures); + +private: + ID3D12Heap* m_Heap = NULL; + + D3D12MA_CLASS_NO_COPY(MemoryBlock) +}; +#endif // _D3D12MA_MEMORY_BLOCK + +#ifndef _D3D12MA_NORMAL_BLOCK +/* +Represents a single block of device memory (heap) with all the data about its +regions (aka suballocations, Allocation), assigned and free. +Thread-safety: This class must be externally synchronized. +*/ +class NormalBlock : public MemoryBlock +{ +public: + BlockMetadata* m_pMetadata; + + NormalBlock( + AllocatorPimpl* allocator, + BlockVector* blockVector, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id); + virtual ~NormalBlock(); + + BlockVector* GetBlockVector() const { return m_BlockVector; } + + // 'algorithm' should be one of the *_ALGORITHM_* flags in enums POOL_FLAGS or VIRTUAL_BLOCK_FLAGS + HRESULT Init(UINT32 algorithm, ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures); + + // Validates all data structures inside this object. If not valid, returns false. + bool Validate() const; + +private: + BlockVector* m_BlockVector; + + D3D12MA_CLASS_NO_COPY(NormalBlock) +}; +#endif // _D3D12MA_NORMAL_BLOCK + +#ifndef _D3D12MA_COMMITTED_ALLOCATION_LIST_ITEM_TRAITS +struct CommittedAllocationListItemTraits +{ + using ItemType = Allocation; + + static ItemType* GetPrev(const ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.prev; + } + static ItemType* GetNext(const ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.next; + } + static ItemType*& AccessPrev(ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.prev; + } + static ItemType*& AccessNext(ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.next; + } +}; +#endif // _D3D12MA_COMMITTED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _D3D12MA_COMMITTED_ALLOCATION_LIST +/* +Stores linked list of Allocation objects that are of TYPE_COMMITTED or TYPE_HEAP. +Thread-safe, synchronized internally. +*/ +class CommittedAllocationList +{ +public: + CommittedAllocationList() = default; + void Init(bool useMutex, D3D12_HEAP_TYPE heapType, PoolPimpl* pool); + ~CommittedAllocationList(); + + D3D12_HEAP_TYPE GetHeapType() const { return m_HeapType; } + PoolPimpl* GetPool() const { return m_Pool; } + UINT GetMemorySegmentGroup(AllocatorPimpl* allocator) const; + + void AddStatistics(Statistics& inoutStats); + void AddDetailedStatistics(DetailedStatistics& inoutStats); + // Writes JSON array with the list of allocations. + void BuildStatsString(JsonWriter& json); + + void Register(Allocation* alloc); + void Unregister(Allocation* alloc); + +private: + using CommittedAllocationLinkedList = IntrusiveLinkedList; + + bool m_UseMutex = true; + D3D12_HEAP_TYPE m_HeapType = D3D12_HEAP_TYPE_CUSTOM; + PoolPimpl* m_Pool = NULL; + + D3D12MA_RW_MUTEX m_Mutex; + CommittedAllocationLinkedList m_AllocationList; +}; +#endif // _D3D12MA_COMMITTED_ALLOCATION_LIST + +#ifndef _D3D12M_COMMITTED_ALLOCATION_PARAMETERS +struct CommittedAllocationParameters +{ + CommittedAllocationList* m_List = NULL; + D3D12_HEAP_PROPERTIES m_HeapProperties = {}; + D3D12_HEAP_FLAGS m_HeapFlags = D3D12_HEAP_FLAG_NONE; + ID3D12ProtectedResourceSession* m_ProtectedSession = NULL; + bool m_CanAlias = false; + + bool IsValid() const { return m_List != NULL; } +}; +#endif // _D3D12M_COMMITTED_ALLOCATION_PARAMETERS + +#ifndef _D3D12MA_BLOCK_VECTOR +/* +Sequence of NormalBlock. Represents memory blocks allocated for a specific +heap type and possibly resource type (if only Tier 1 is supported). + +Synchronized internally with a mutex. +*/ +class BlockVector +{ + friend class DefragmentationContextPimpl; + D3D12MA_CLASS_NO_COPY(BlockVector) +public: + BlockVector( + AllocatorPimpl* hAllocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + bool explicitBlockSize, + UINT64 minAllocationAlignment, + UINT32 algorithm, + bool denyMsaaTextures, + ID3D12ProtectedResourceSession* pProtectedSession); + ~BlockVector(); + + const D3D12_HEAP_PROPERTIES& GetHeapProperties() const { return m_HeapProps; } + D3D12_HEAP_FLAGS GetHeapFlags() const { return m_HeapFlags; } + UINT64 GetPreferredBlockSize() const { return m_PreferredBlockSize; } + UINT32 GetAlgorithm() const { return m_Algorithm; } + bool DeniesMsaaTextures() const { return m_DenyMsaaTextures; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + NormalBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + D3D12MA_RW_MUTEX& GetMutex() { return m_Mutex; } + + HRESULT CreateMinBlocks(); + bool IsEmpty(); + + HRESULT Allocate( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + size_t allocationCount, + Allocation** pAllocations); + + void Free(Allocation* hAllocation); + + HRESULT CreateResource( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + const D3D12_RESOURCE_DESC& resourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); + #ifdef __ID3D12Device8_INTERFACE_DEFINED__ -HRESULT BlockVector::CreateResource2( - UINT64 size, - UINT64 alignment, - const ALLOCATION_DESC& allocDesc, - const D3D12_RESOURCE_DESC1& resourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, - Allocation** ppAllocation, - REFIID riidResource, - void** ppvResource) -{ - ID3D12Device8* const device8 = m_hAllocator->GetDevice8(); - if(device8 == NULL) - { - return E_NOINTERFACE; - } - - HRESULT hr = Allocate(size, alignment, allocDesc, 1, ppAllocation); - if(SUCCEEDED(hr)) - { - ID3D12Resource* res = NULL; - hr = device8->CreatePlacedResource1( - (*ppAllocation)->m_Placed.block->GetHeap(), - (*ppAllocation)->GetOffset(), - &resourceDesc, - InitialResourceState, - pOptimizedClearValue, - D3D12MA_IID_PPV_ARGS(&res)); - if(SUCCEEDED(hr)) - { - if(ppvResource != NULL) - { - hr = res->QueryInterface(riidResource, ppvResource); - } - if(SUCCEEDED(hr)) - { - (*ppAllocation)->SetResource(res, &resourceDesc); - } - else - { - res->Release(); - SAFE_RELEASE(*ppAllocation); - } - } - else - { - SAFE_RELEASE(*ppAllocation); - } - } - return hr; -} + HRESULT CreateResource2( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + const D3D12_RESOURCE_DESC1& resourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); #endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ -UINT64 BlockVector::CalcSumBlockSize() const + void AddStatistics(Statistics& inoutStats); + void AddDetailedStatistics(DetailedStatistics& inoutStats); + + void WriteBlockInfoToJson(JsonWriter& json); + +private: + AllocatorPimpl* const m_hAllocator; + const D3D12_HEAP_PROPERTIES m_HeapProps; + const D3D12_HEAP_FLAGS m_HeapFlags; + const UINT64 m_PreferredBlockSize; + const size_t m_MinBlockCount; + const size_t m_MaxBlockCount; + const bool m_ExplicitBlockSize; + const UINT64 m_MinAllocationAlignment; + const UINT32 m_Algorithm; + const bool m_DenyMsaaTextures; + ID3D12ProtectedResourceSession* const m_ProtectedSession; + /* There can be at most one allocation that is completely empty - a + hysteresis to avoid pessimistic case of alternating creation and destruction + of a ID3D12Heap. */ + bool m_HasEmptyBlock; + D3D12MA_RW_MUTEX m_Mutex; + // Incrementally sorted by sumFreeSize, ascending. + Vector m_Blocks; + UINT m_NextBlockId; + bool m_IncrementalSort = true; + + // Disable incremental sorting when freeing allocations + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } + + UINT64 CalcSumBlockSize() const; + UINT64 CalcMaxBlockSize() const; + + // Finds and removes given block from vector. + void Remove(NormalBlock* pBlock); + + // Performs single step in sorting m_Blocks. They may not be fully sorted + // after this call. + void IncrementallySortBlocks(); + void SortByFreeSize(); + + HRESULT AllocatePage( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + Allocation** pAllocation); + + HRESULT AllocateFromBlock( + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + ALLOCATION_FLAGS allocFlags, + void* pPrivateData, + UINT32 strategy, + Allocation** pAllocation); + + HRESULT CommitAllocationRequest( + AllocationRequest& allocRequest, + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + void* pPrivateData, + Allocation** pAllocation); + + HRESULT CreateBlock( + UINT64 blockSize, + size_t* pNewBlockIndex); +}; +#endif // _D3D12MA_BLOCK_VECTOR + +#ifndef _D3D12MA_CURRENT_BUDGET_DATA +class CurrentBudgetData { - UINT64 result = 0; - for(size_t i = m_Blocks.size(); i--; ) - { - result += m_Blocks[i]->m_pMetadata->GetSize(); - } - return result; -} +public: + bool ShouldUpdateBudget() const { return m_OperationsSinceBudgetFetch >= 30; } -UINT64 BlockVector::CalcMaxBlockSize() const -{ - UINT64 result = 0; - for(size_t i = m_Blocks.size(); i--; ) - { - result = D3D12MA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); - if(result >= m_PreferredBlockSize) - { - break; - } - } - return result; -} + void GetStatistics(Statistics& outStats, UINT group) const; + void GetBudget(bool useMutex, + UINT64* outLocalUsage, UINT64* outLocalBudget, + UINT64* outNonLocalUsage, UINT64* outNonLocalBudget); -void BlockVector::Remove(NormalBlock* pBlock) -{ - for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - if(m_Blocks[blockIndex] == pBlock) - { - m_Blocks.remove(blockIndex); - return; - } - } - D3D12MA_ASSERT(0); -} - -void BlockVector::IncrementallySortBlocks() -{ - // Bubble sort only until first swap. - for(size_t i = 1; i < m_Blocks.size(); ++i) - { - if(m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) - { - D3D12MA_SWAP(m_Blocks[i - 1], m_Blocks[i]); - return; - } - } -} - -HRESULT BlockVector::AllocateFromBlock( - NormalBlock* pBlock, - UINT64 size, - UINT64 alignment, - ALLOCATION_FLAGS allocFlags, - Allocation** pAllocation) -{ - alignment = D3D12MA_MAX(alignment, m_MinAllocationAlignment); - - AllocationRequest currRequest = {}; - if(pBlock->m_pMetadata->CreateAllocationRequest( - size, - alignment, - &currRequest)) - { - // We no longer have an empty Allocation. - if(pBlock->m_pMetadata->IsEmpty()) - { - m_HasEmptyBlock = false; - } - - *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size, currRequest.zeroInitialized); - pBlock->m_pMetadata->Alloc(currRequest, size, *pAllocation); - (*pAllocation)->InitPlaced(currRequest.offset, alignment, pBlock); - D3D12MA_HEAVY_ASSERT(pBlock->Validate()); - m_hAllocator->m_Budget.AddAllocation(HeapTypeToIndex(m_HeapProps.Type), size); - return S_OK; - } - return E_OUTOFMEMORY; -} - -HRESULT BlockVector::CreateBlock( - UINT64 blockSize, - size_t* pNewBlockIndex) -{ - NormalBlock* const pBlock = D3D12MA_NEW(m_hAllocator->GetAllocs(), NormalBlock)( - m_hAllocator, - this, - m_HeapProps, - m_HeapFlags, - blockSize, - m_NextBlockId++); - HRESULT hr = pBlock->Init(m_ProtectedSession); - if(FAILED(hr)) - { - D3D12MA_DELETE(m_hAllocator->GetAllocs(), pBlock); - return hr; - } - - m_Blocks.push_back(pBlock); - if(pNewBlockIndex != NULL) - { - *pNewBlockIndex = m_Blocks.size() - 1; - } - - return hr; -} - -void BlockVector::AddStats(StatInfo& outStats) -{ - MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); - - for(size_t i = 0; i < m_Blocks.size(); ++i) - { - const NormalBlock* const pBlock = m_Blocks[i]; - D3D12MA_ASSERT(pBlock); - D3D12MA_HEAVY_ASSERT(pBlock->Validate()); - StatInfo blockStatInfo; - pBlock->m_pMetadata->CalcAllocationStatInfo(blockStatInfo); - AddStatInfo(outStats, blockStatInfo); - } -} - -void BlockVector::AddStats(Stats& outStats) -{ - const UINT heapTypeIndex = HeapTypeToIndex(m_HeapProps.Type); - StatInfo* const pStatInfo = &outStats.HeapType[heapTypeIndex]; - - MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); - - for(size_t i = 0; i < m_Blocks.size(); ++i) - { - const NormalBlock* const pBlock = m_Blocks[i]; - D3D12MA_ASSERT(pBlock); - D3D12MA_HEAVY_ASSERT(pBlock->Validate()); - StatInfo blockStatInfo; - pBlock->m_pMetadata->CalcAllocationStatInfo(blockStatInfo); - AddStatInfo(outStats.Total, blockStatInfo); - AddStatInfo(*pStatInfo, blockStatInfo); - } -} - -void BlockVector::WriteBlockInfoToJson(JsonWriter& json) -{ - MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); - - json.BeginObject(); - - for (size_t i = 0, count = m_Blocks.size(); i < count; ++i) - { - const NormalBlock* const pBlock = m_Blocks[i]; - D3D12MA_ASSERT(pBlock); - D3D12MA_HEAVY_ASSERT(pBlock->Validate()); - json.BeginString(); - json.ContinueString(pBlock->GetId()); - json.EndString(); - - pBlock->m_pMetadata->WriteAllocationInfoToJson(json); - } - - json.EndObject(); -} - -//////////////////////////////////////////////////////////////////////////////// -// Private class PoolPimpl - -PoolPimpl::PoolPimpl(AllocatorPimpl* allocator, const POOL_DESC& desc) : - m_Allocator(allocator), - m_Desc(desc), - m_BlockVector(NULL), - m_Name(NULL) -{ - const bool explicitBlockSize = desc.BlockSize != 0; - const UINT64 preferredBlockSize = explicitBlockSize ? desc.BlockSize : D3D12MA_DEFAULT_BLOCK_SIZE; - UINT maxBlockCount = desc.MaxBlockCount != 0 ? desc.MaxBlockCount : UINT_MAX; - -#ifndef __ID3D12Device4_INTERFACE_DEFINED__ - D3D12MA_ASSERT(m_Desc.pProtectedSession == NULL); +#if D3D12MA_DXGI_1_4 + HRESULT UpdateBudget(IDXGIAdapter3* adapter3, bool useMutex); #endif - m_BlockVector = D3D12MA_NEW(allocator->GetAllocs(), BlockVector)( - allocator, desc.HeapProperties, desc.HeapFlags, - preferredBlockSize, - desc.MinBlockCount, maxBlockCount, - explicitBlockSize, - D3D12MA_MAX(desc.MinAllocationAlignment, (UINT64)D3D12MA_DEBUG_ALIGNMENT), - desc.pProtectedSession); + void AddAllocation(UINT group, UINT64 allocationBytes); + void RemoveAllocation(UINT group, UINT64 allocationBytes); + + void AddBlock(UINT group, UINT64 blockBytes); + void RemoveBlock(UINT group, UINT64 blockBytes); + +private: + D3D12MA_ATOMIC_UINT32 m_BlockCount[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + D3D12MA_ATOMIC_UINT32 m_AllocationCount[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + D3D12MA_ATOMIC_UINT64 m_BlockBytes[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + D3D12MA_ATOMIC_UINT64 m_AllocationBytes[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + + D3D12MA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch = 0; + D3D12MA_RW_MUTEX m_BudgetMutex; + UINT64 m_D3D12Usage[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + UINT64 m_D3D12Budget[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + UINT64 m_BlockBytesAtD3D12Fetch[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; +}; + +#ifndef _D3D12MA_CURRENT_BUDGET_DATA_FUNCTIONS +void CurrentBudgetData::GetStatistics(Statistics& outStats, UINT group) const +{ + outStats.BlockCount = m_BlockCount[group]; + outStats.AllocationCount = m_AllocationCount[group]; + outStats.BlockBytes = m_BlockBytes[group]; + outStats.AllocationBytes = m_AllocationBytes[group]; } -HRESULT PoolPimpl::Init() +void CurrentBudgetData::GetBudget(bool useMutex, + UINT64* outLocalUsage, UINT64* outLocalBudget, + UINT64* outNonLocalUsage, UINT64* outNonLocalBudget) { - m_CommittedAllocations.Init(m_Allocator->UseMutex(), m_Desc.HeapProperties.Type, this); - return m_BlockVector->CreateMinBlocks(); -} - -PoolPimpl::~PoolPimpl() -{ - D3D12MA_ASSERT(m_PrevPool == NULL && m_NextPool == NULL); - FreeName(); - D3D12MA_DELETE(m_Allocator->GetAllocs(), m_BlockVector); -} - -void PoolPimpl::CalculateStats(StatInfo& outStats) -{ - ZeroMemory(&outStats, sizeof(outStats)); - outStats.AllocationSizeMin = UINT64_MAX; - outStats.UnusedRangeSizeMin = UINT64_MAX; - - m_BlockVector->AddStats(outStats); + MutexLockRead lockRead(m_BudgetMutex, useMutex); + if (outLocalUsage) { - StatInfo committedStatInfo; // Uninitialized. - m_CommittedAllocations.CalculateStats(committedStatInfo); - AddStatInfo(outStats, committedStatInfo); + const UINT64 D3D12Usage = m_D3D12Usage[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + const UINT64 blockBytes = m_BlockBytes[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + const UINT64 blockBytesAtD3D12Fetch = m_BlockBytesAtD3D12Fetch[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + *outLocalUsage = D3D12Usage + blockBytes > blockBytesAtD3D12Fetch ? + D3D12Usage + blockBytes - blockBytesAtD3D12Fetch : 0; + } + if (outLocalBudget) + *outLocalBudget = m_D3D12Budget[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + + if (outNonLocalUsage) + { + const UINT64 D3D12Usage = m_D3D12Usage[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; + const UINT64 blockBytes = m_BlockBytes[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; + const UINT64 blockBytesAtD3D12Fetch = m_BlockBytesAtD3D12Fetch[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; + *outNonLocalUsage = D3D12Usage + blockBytes > blockBytesAtD3D12Fetch ? + D3D12Usage + blockBytes - blockBytesAtD3D12Fetch : 0; + } + if (outNonLocalBudget) + *outNonLocalBudget = m_D3D12Budget[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; +} + +#if D3D12MA_DXGI_1_4 +HRESULT CurrentBudgetData::UpdateBudget(IDXGIAdapter3* adapter3, bool useMutex) +{ + D3D12MA_ASSERT(adapter3); + + DXGI_QUERY_VIDEO_MEMORY_INFO infoLocal = {}; + DXGI_QUERY_VIDEO_MEMORY_INFO infoNonLocal = {}; + const HRESULT hrLocal = adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &infoLocal); + const HRESULT hrNonLocal = adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &infoNonLocal); + + if (SUCCEEDED(hrLocal) || SUCCEEDED(hrNonLocal)) + { + MutexLockWrite lockWrite(m_BudgetMutex, useMutex); + + if (SUCCEEDED(hrLocal)) + { + m_D3D12Usage[0] = infoLocal.CurrentUsage; + m_D3D12Budget[0] = infoLocal.Budget; + } + if (SUCCEEDED(hrNonLocal)) + { + m_D3D12Usage[1] = infoNonLocal.CurrentUsage; + m_D3D12Budget[1] = infoNonLocal.Budget; + } + + m_BlockBytesAtD3D12Fetch[0] = m_BlockBytes[0]; + m_BlockBytesAtD3D12Fetch[1] = m_BlockBytes[1]; + m_OperationsSinceBudgetFetch = 0; } - PostProcessStatInfo(outStats); + return FAILED(hrLocal) ? hrLocal : hrNonLocal; +} +#endif // #if D3D12MA_DXGI_1_4 + +void CurrentBudgetData::AddAllocation(UINT group, UINT64 allocationBytes) +{ + ++m_AllocationCount[group]; + m_AllocationBytes[group] += allocationBytes; + ++m_OperationsSinceBudgetFetch; } -void PoolPimpl::AddStats(Stats& inoutStats) +void CurrentBudgetData::RemoveAllocation(UINT group, UINT64 allocationBytes) { - StatInfo poolStatInfo = {}; - CalculateStats(poolStatInfo); - - AddStatInfo(inoutStats.Total, poolStatInfo); - AddStatInfo(inoutStats.HeapType[HeapTypeToIndex(m_Desc.HeapProperties.Type)], poolStatInfo); + D3D12MA_ASSERT(m_AllocationBytes[group] >= allocationBytes); + D3D12MA_ASSERT(m_AllocationCount[group] > 0); + m_AllocationBytes[group] -= allocationBytes; + --m_AllocationCount[group]; + ++m_OperationsSinceBudgetFetch; } -void PoolPimpl::SetName(LPCWSTR Name) +void CurrentBudgetData::AddBlock(UINT group, UINT64 blockBytes) { - FreeName(); + ++m_BlockCount[group]; + m_BlockBytes[group] += blockBytes; + ++m_OperationsSinceBudgetFetch; +} - if(Name) +void CurrentBudgetData::RemoveBlock(UINT group, UINT64 blockBytes) +{ + D3D12MA_ASSERT(m_BlockBytes[group] >= blockBytes); + D3D12MA_ASSERT(m_BlockCount[group] > 0); + m_BlockBytes[group] -= blockBytes; + --m_BlockCount[group]; + ++m_OperationsSinceBudgetFetch; +} +#endif // _D3D12MA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _D3D12MA_CURRENT_BUDGET_DATA + +#ifndef _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL +class DefragmentationContextPimpl +{ + D3D12MA_CLASS_NO_COPY(DefragmentationContextPimpl) +public: + DefragmentationContextPimpl( + AllocatorPimpl* hAllocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector); + ~DefragmentationContextPimpl(); + + void GetStats(DEFRAGMENTATION_STATS& outStats) { outStats = m_GlobalStats; } + const ALLOCATION_CALLBACKS& GetAllocs() const { return m_Moves.GetAllocs(); } + + HRESULT DefragmentPassBegin(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo); + HRESULT DefragmentPassEnd(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo); + +private: + // Max number of allocations to ignore due to size constraints before ending single pass + static const UINT8 MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock { - const size_t nameCharCount = wcslen(Name) + 1; - m_Name = D3D12MA_NEW_ARRAY(m_Allocator->GetAllocs(), WCHAR, nameCharCount); - memcpy(m_Name, Name, nameCharCount * sizeof(WCHAR)); - } -} - -void PoolPimpl::FreeName() -{ - if(m_Name) + UINT32 data; + NormalBlock* block; + }; + struct StateBalanced { - const size_t nameCharCount = wcslen(m_Name) + 1; - D3D12MA_DELETE_ARRAY(m_Allocator->GetAllocs(), m_Name, nameCharCount); - m_Name = NULL; - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Public class Pool implementation - -void Pool::ReleaseThis() -{ - if(this == NULL) + UINT64 avgFreeSize = 0; + UINT64 avgAllocSize = UINT64_MAX; + }; + struct MoveAllocationData { - return; - } + UINT64 size; + UINT64 alignment; + ALLOCATION_FLAGS flags; + DEFRAGMENTATION_MOVE move = {}; + }; - D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), this); -} + const UINT64 m_MaxPassBytes; + const UINT32 m_MaxPassAllocations; -POOL_DESC Pool::GetDesc() const + Vector m_Moves; + + UINT8 m_IgnoredAllocs = 0; + UINT32 m_Algorithm; + UINT32 m_BlockVectorCount; + BlockVector* m_PoolBlockVector; + BlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + DEFRAGMENTATION_STATS m_GlobalStats = { 0 }; + DEFRAGMENTATION_STATS m_PassStats = { 0 }; + void* m_AlgorithmState = NULL; + + static MoveAllocationData GetMoveData(AllocHandle handle, BlockMetadata* metadata); + CounterStatus CheckCounters(UINT64 bytes); + bool IncrementCounters(UINT64 bytes); + bool ReallocWithinBlock(BlockVector& vector, NormalBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, BlockVector& vector); + + bool ComputeDefragmentation(BlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(BlockVector& vector); + bool ComputeDefragmentation_Balanced(BlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(BlockVector& vector); + + void UpdateVectorStatistics(BlockVector& vector, StateBalanced& state); +}; +#endif // _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL + +#ifndef _D3D12MA_POOL_PIMPL +class PoolPimpl { - return m_Pimpl->GetDesc(); -} + friend class Allocator; + friend struct PoolListItemTraits; +public: + PoolPimpl(AllocatorPimpl* allocator, const POOL_DESC& desc); + ~PoolPimpl(); -void Pool::CalculateStats(StatInfo* pStats) + AllocatorPimpl* GetAllocator() const { return m_Allocator; } + const POOL_DESC& GetDesc() const { return m_Desc; } + bool SupportsCommittedAllocations() const { return m_Desc.BlockSize == 0; } + LPCWSTR GetName() const { return m_Name; } + + BlockVector* GetBlockVector() { return m_BlockVector; } + CommittedAllocationList* GetCommittedAllocationList() { return SupportsCommittedAllocations() ? &m_CommittedAllocations : NULL; } + + HRESULT Init(); + void GetStatistics(Statistics& outStats); + void CalculateStatistics(DetailedStatistics& outStats); + void AddDetailedStatistics(DetailedStatistics& inoutStats); + void SetName(LPCWSTR Name); + +private: + AllocatorPimpl* m_Allocator; // Externally owned object. + POOL_DESC m_Desc; + BlockVector* m_BlockVector; // Owned object. + CommittedAllocationList m_CommittedAllocations; + wchar_t* m_Name; + PoolPimpl* m_PrevPool = NULL; + PoolPimpl* m_NextPool = NULL; + + void FreeName(); +}; + +struct PoolListItemTraits { - D3D12MA_ASSERT(pStats); - D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->CalculateStats(*pStats); -} + using ItemType = PoolPimpl; + static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } + static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } + static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } + static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } +}; +#endif // _D3D12MA_POOL_PIMPL -void Pool::SetName(LPCWSTR Name) + +#ifndef _D3D12MA_ALLOCATOR_PIMPL +class AllocatorPimpl { - D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->SetName(Name); -} + friend class Allocator; + friend class Pool; +public: + std::atomic_uint32_t m_RefCount = 1; + CurrentBudgetData m_Budget; -LPCWSTR Pool::GetName() const -{ - return m_Pimpl->GetName(); -} + AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc); + ~AllocatorPimpl(); -Pool::Pool(Allocator* allocator, const POOL_DESC &desc) : - m_Pimpl(D3D12MA_NEW(allocator->m_Pimpl->GetAllocs(), PoolPimpl)(allocator->m_Pimpl, desc)) -{ -} + ID3D12Device* GetDevice() const { return m_Device; } +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + ID3D12Device4* GetDevice4() const { return m_Device4; } +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + ID3D12Device8* GetDevice8() const { return m_Device8; } +#endif + // Shortcut for "Allocation Callbacks", because this function is called so often. + const ALLOCATION_CALLBACKS& GetAllocs() const { return m_AllocationCallbacks; } + const D3D12_FEATURE_DATA_D3D12_OPTIONS& GetD3D12Options() const { return m_D3D12Options; } + BOOL IsUMA() const { return m_D3D12Architecture.UMA; } + BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; } + bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; } + bool UseMutex() const { return m_UseMutex; } + AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; } + UINT GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } + /* + If SupportsResourceHeapTier2(): + 0: D3D12_HEAP_TYPE_DEFAULT + 1: D3D12_HEAP_TYPE_UPLOAD + 2: D3D12_HEAP_TYPE_READBACK + else: + 0: D3D12_HEAP_TYPE_DEFAULT + buffer + 1: D3D12_HEAP_TYPE_DEFAULT + texture + 2: D3D12_HEAP_TYPE_DEFAULT + texture RT or DS + 3: D3D12_HEAP_TYPE_UPLOAD + buffer + 4: D3D12_HEAP_TYPE_UPLOAD + texture + 5: D3D12_HEAP_TYPE_UPLOAD + texture RT or DS + 6: D3D12_HEAP_TYPE_READBACK + buffer + 7: D3D12_HEAP_TYPE_READBACK + texture + 8: D3D12_HEAP_TYPE_READBACK + texture RT or DS + */ + UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 3 : 9; } + BlockVector** GetDefaultPools() { return m_BlockVectors; } -Pool::~Pool() -{ - m_Pimpl->GetAllocator()->UnregisterPool(this, m_Pimpl->GetDesc().HeapProperties.Type); + HRESULT Init(const ALLOCATOR_DESC& desc); + bool HeapFlagsFulfillResourceHeapTier(D3D12_HEAP_FLAGS flags) const; + UINT StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapType) const; + UINT HeapPropertiesToMemorySegmentGroup(const D3D12_HEAP_PROPERTIES& heapProps) const; + UINT64 GetMemoryCapacity(UINT memorySegmentGroup) const; - D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), m_Pimpl); -} + HRESULT CreateResource( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); -//////////////////////////////////////////////////////////////////////////////// -// Private class AllocatorPimpl implementation +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + HRESULT CreateResource2( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ -AllocatorPimpl::AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc) : - m_UseMutex((desc.Flags & ALLOCATOR_FLAG_SINGLETHREADED) == 0), + HRESULT CreateAliasingResource( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + REFIID riidResource, + void** ppvResource); + + HRESULT AllocateMemory( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation); + + // Unregisters allocation from the collection of dedicated allocations. + // Allocation object must be deleted externally afterwards. + void FreeCommittedMemory(Allocation* allocation); + // Unregisters allocation from the collection of placed allocations. + // Allocation object must be deleted externally afterwards. + void FreePlacedMemory(Allocation* allocation); + // Unregisters allocation from the collection of dedicated allocations and destroys associated heap. + // Allocation object must be deleted externally afterwards. + void FreeHeapMemory(Allocation* allocation); + + void SetCurrentFrameIndex(UINT frameIndex); + // For more deailed stats use outCutomHeaps to access statistics divided into L0 and L1 group + void CalculateStatistics(TotalStatistics& outStats, DetailedStatistics outCutomHeaps[2] = NULL); + + void GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget); + void GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType); + + void BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap); + void FreeStatsString(WCHAR* pStatsString); + +private: + using PoolList = IntrusiveLinkedList; + + const bool m_UseMutex; + const bool m_AlwaysCommitted; + const bool m_MsaaAlwaysCommitted; + ID3D12Device* m_Device; // AddRef +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + ID3D12Device4* m_Device4 = NULL; // AddRef, optional +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + ID3D12Device8* m_Device8 = NULL; // AddRef, optional +#endif + IDXGIAdapter* m_Adapter; // AddRef +#if D3D12MA_DXGI_1_4 + IDXGIAdapter3* m_Adapter3 = NULL; // AddRef, optional +#endif + UINT64 m_PreferredBlockSize; + ALLOCATION_CALLBACKS m_AllocationCallbacks; + D3D12MA_ATOMIC_UINT32 m_CurrentFrameIndex; + DXGI_ADAPTER_DESC m_AdapterDesc; + D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options; + D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture; + AllocationObjectAllocator m_AllocationObjectAllocator; + + D3D12MA_RW_MUTEX m_PoolsMutex[HEAP_TYPE_COUNT]; + PoolList m_Pools[HEAP_TYPE_COUNT]; + // Default pools. + BlockVector* m_BlockVectors[DEFAULT_POOL_MAX_COUNT]; + CommittedAllocationList m_CommittedAllocations[STANDARD_HEAP_TYPE_COUNT]; + + /* + Heuristics that decides whether a resource should better be placed in its own, + dedicated allocation (committed resource rather than placed resource). + */ + template + static bool PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc); + + // Allocates and registers new committed resource with implicit heap, as dedicated allocation. + // Creates and returns Allocation object and optionally D3D12 resource. + HRESULT AllocateCommittedResource( + const CommittedAllocationParameters& committedAllocParams, + UINT64 resourceSize, bool withinBudget, void* pPrivateData, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, REFIID riidResource, void** ppvResource); + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + HRESULT AllocateCommittedResource2( + const CommittedAllocationParameters& committedAllocParams, + UINT64 resourceSize, bool withinBudget, void* pPrivateData, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, REFIID riidResource, void** ppvResource); +#endif + + // Allocates and registers new heap without any resources placed in it, as dedicated allocation. + // Creates and returns Allocation object. + HRESULT AllocateHeap( + const CommittedAllocationParameters& committedAllocParams, + const D3D12_RESOURCE_ALLOCATION_INFO& allocInfo, bool withinBudget, + void* pPrivateData, Allocation** ppAllocation); + + template + HRESULT CalcAllocationParams(const ALLOCATION_DESC& allocDesc, UINT64 allocSize, + const D3D12_RESOURCE_DESC_T* resDesc, // Optional + BlockVector*& outBlockVector, CommittedAllocationParameters& outCommittedAllocationParams, bool& outPreferCommitted); + + // Returns UINT32_MAX if index cannot be calculcated. + UINT CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, ResourceClass resourceClass) const; + void CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_HEAP_FLAGS& outHeapFlags, UINT index) const; + + // Registers Pool object in m_Pools. + void RegisterPool(Pool* pool, D3D12_HEAP_TYPE heapType); + // Unregisters Pool object from m_Pools. + void UnregisterPool(Pool* pool, D3D12_HEAP_TYPE heapType); + + HRESULT UpdateD3D12Budget(); + + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC& resourceDesc) const; +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC1& resourceDesc) const; +#endif + + template + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc) const; + + bool NewAllocationWithinBudget(D3D12_HEAP_TYPE heapType, UINT64 size); + + // Writes object { } with data of given budget. + static void WriteBudgetToJson(JsonWriter& json, const Budget& budget); +}; + +#ifndef _D3D12MA_ALLOCATOR_PIMPL_FUNCTINOS +AllocatorPimpl::AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc) + : m_UseMutex((desc.Flags & ALLOCATOR_FLAG_SINGLETHREADED) == 0), m_AlwaysCommitted((desc.Flags & ALLOCATOR_FLAG_ALWAYS_COMMITTED) != 0), + m_MsaaAlwaysCommitted((desc.Flags & ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED) != 0), m_Device(desc.pDevice), m_Adapter(desc.pAdapter), m_PreferredBlockSize(desc.PreferredBlockSize != 0 ? desc.PreferredBlockSize : D3D12MA_DEFAULT_BLOCK_SIZE), @@ -4728,7 +6533,7 @@ AllocatorPimpl::AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, ZeroMemory(m_BlockVectors, sizeof(m_BlockVectors)); - for(UINT i = 0; i < STANDARD_HEAP_TYPE_COUNT; ++i) + for (UINT i = 0; i < STANDARD_HEAP_TYPE_COUNT; ++i) { m_CommittedAllocations[i].Init( m_UseMutex, @@ -4755,13 +6560,13 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) #endif HRESULT hr = m_Adapter->GetDesc(&m_AdapterDesc); - if(FAILED(hr)) + if (FAILED(hr)) { return hr; } hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &m_D3D12Options, sizeof(m_D3D12Options)); - if(FAILED(hr)) + if (FAILED(hr)) { return hr; } @@ -4770,19 +6575,24 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) #endif hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture)); - if(FAILED(hr)) + if (FAILED(hr)) { m_D3D12Architecture.UMA = FALSE; m_D3D12Architecture.CacheCoherentUMA = FALSE; } D3D12_HEAP_PROPERTIES heapProps = {}; - const UINT defaultPoolCount = CalcDefaultPoolCount(); - for(UINT i = 0; i < defaultPoolCount; ++i) + const UINT defaultPoolCount = GetDefaultPoolCount(); + for (UINT i = 0; i < defaultPoolCount; ++i) { D3D12_HEAP_FLAGS heapFlags; CalcDefaultPoolParams(heapProps.Type, heapFlags, i); +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + if (desc.Flags & ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED) + heapFlags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; +#endif + m_BlockVectors[i] = D3D12MA_NEW(GetAllocs(), BlockVector)( this, // hAllocator heapProps, // heapType @@ -4792,15 +6602,14 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) SIZE_MAX, // maxBlockCount false, // explicitBlockSize D3D12MA_DEBUG_ALIGNMENT, // minAllocationAlignment + 0, // Default algorithm, + m_MsaaAlwaysCommitted, NULL); // pProtectedSession // No need to call m_pBlockVectors[i]->CreateMinBlocks here, becase minBlockCount is 0. } #if D3D12MA_DXGI_1_4 - if(m_Adapter3) - { - UpdateD3D12Budget(); - } + UpdateD3D12Budget(); #endif return S_OK; @@ -4820,14 +6629,14 @@ AllocatorPimpl::~AllocatorPimpl() SAFE_RELEASE(m_Adapter); SAFE_RELEASE(m_Device); - for(UINT i = DEFAULT_POOL_MAX_COUNT; i--; ) + for (UINT i = DEFAULT_POOL_MAX_COUNT; i--; ) { D3D12MA_DELETE(GetAllocs(), m_BlockVectors[i]); } - for(UINT i = HEAP_TYPE_COUNT; i--; ) + for (UINT i = HEAP_TYPE_COUNT; i--; ) { - if(!m_Pools[i].IsEmpty()) + if (!m_Pools[i].IsEmpty()) { D3D12MA_ASSERT(0 && "Unfreed pools found!"); } @@ -4836,25 +6645,59 @@ AllocatorPimpl::~AllocatorPimpl() bool AllocatorPimpl::HeapFlagsFulfillResourceHeapTier(D3D12_HEAP_FLAGS flags) const { - if(SupportsResourceHeapTier2()) + if (SupportsResourceHeapTier2()) { return true; } else { - const bool allowBuffers = (flags & D3D12_HEAP_FLAG_DENY_BUFFERS ) == 0; - const bool allowRtDsTextures = (flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES ) == 0; + const bool allowBuffers = (flags & D3D12_HEAP_FLAG_DENY_BUFFERS) == 0; + const bool allowRtDsTextures = (flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) == 0; const bool allowNonRtDsTextures = (flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) == 0; const uint8_t allowedGroupCount = (allowBuffers ? 1 : 0) + (allowRtDsTextures ? 1 : 0) + (allowNonRtDsTextures ? 1 : 0); return allowedGroupCount == 1; } } +UINT AllocatorPimpl::StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapType) const +{ + D3D12MA_ASSERT(IsHeapTypeStandard(heapType)); + if (IsUMA()) + return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; + return heapType == D3D12_HEAP_TYPE_DEFAULT ? + DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; +} + +UINT AllocatorPimpl::HeapPropertiesToMemorySegmentGroup(const D3D12_HEAP_PROPERTIES& heapProps) const +{ + if (IsUMA()) + return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; + if (heapProps.MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN) + return StandardHeapTypeToMemorySegmentGroup(heapProps.Type); + return heapProps.MemoryPoolPreference == D3D12_MEMORY_POOL_L1 ? + DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; +} + +UINT64 AllocatorPimpl::GetMemoryCapacity(UINT memorySegmentGroup) const +{ + switch (memorySegmentGroup) + { + case DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY: + return IsUMA() ? + m_AdapterDesc.DedicatedVideoMemory + m_AdapterDesc.SharedSystemMemory : m_AdapterDesc.DedicatedVideoMemory; + case DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY: + return IsUMA() ? 0 : m_AdapterDesc.SharedSystemMemory; + default: + D3D12MA_ASSERT(0); + return UINT64_MAX; + } +} + HRESULT AllocatorPimpl::CreateResource( const ALLOCATION_DESC* pAllocDesc, const D3D12_RESOURCE_DESC* pResourceDesc, D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) @@ -4862,7 +6705,7 @@ HRESULT AllocatorPimpl::CreateResource( D3D12MA_ASSERT(pAllocDesc && pResourceDesc && ppAllocation); *ppAllocation = NULL; - if(ppvResource) + if (ppvResource) { *ppvResource = NULL; } @@ -4878,36 +6721,36 @@ HRESULT AllocatorPimpl::CreateResource( HRESULT hr = CalcAllocationParams(*pAllocDesc, resAllocInfo.SizeInBytes, pResourceDesc, blockVector, committedAllocationParams, preferCommitted); - if(FAILED(hr)) + if (FAILED(hr)) return hr; const bool withinBudget = (pAllocDesc->Flags & ALLOCATION_FLAG_WITHIN_BUDGET) != 0; hr = E_INVALIDARG; - if(committedAllocationParams.IsValid() && preferCommitted) + if (committedAllocationParams.IsValid() && preferCommitted) { hr = AllocateCommittedResource(committedAllocationParams, - resAllocInfo.SizeInBytes, withinBudget, &finalResourceDesc, - InitialResourceState, pOptimizedClearValue, + resAllocInfo.SizeInBytes, withinBudget, pAllocDesc->pPrivateData, + &finalResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } - if(blockVector != NULL) + if (blockVector != NULL) { hr = blockVector->CreateResource(resAllocInfo.SizeInBytes, resAllocInfo.Alignment, *pAllocDesc, finalResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } - if(committedAllocationParams.IsValid() && !preferCommitted) + if (committedAllocationParams.IsValid() && !preferCommitted) { hr = AllocateCommittedResource(committedAllocationParams, - resAllocInfo.SizeInBytes, withinBudget, &finalResourceDesc, - InitialResourceState, pOptimizedClearValue, + resAllocInfo.SizeInBytes, withinBudget, pAllocDesc->pPrivateData, + &finalResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } return hr; @@ -4918,7 +6761,7 @@ HRESULT AllocatorPimpl::CreateResource2( const ALLOCATION_DESC* pAllocDesc, const D3D12_RESOURCE_DESC1* pResourceDesc, D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) @@ -4926,11 +6769,11 @@ HRESULT AllocatorPimpl::CreateResource2( D3D12MA_ASSERT(pAllocDesc && pResourceDesc && ppAllocation); *ppAllocation = NULL; - if(ppvResource) + if (ppvResource) { *ppvResource = NULL; } - if(m_Device8 == NULL) + if (m_Device8 == NULL) { return E_NOINTERFACE; } @@ -4946,36 +6789,36 @@ HRESULT AllocatorPimpl::CreateResource2( HRESULT hr = CalcAllocationParams(*pAllocDesc, resAllocInfo.SizeInBytes, pResourceDesc, blockVector, committedAllocationParams, preferCommitted); - if(FAILED(hr)) + if (FAILED(hr)) return hr; - + const bool withinBudget = (pAllocDesc->Flags & ALLOCATION_FLAG_WITHIN_BUDGET) != 0; hr = E_INVALIDARG; - if(committedAllocationParams.IsValid() && preferCommitted) + if (committedAllocationParams.IsValid() && preferCommitted) { hr = AllocateCommittedResource2(committedAllocationParams, - resAllocInfo.SizeInBytes, withinBudget,&finalResourceDesc, - InitialResourceState, pOptimizedClearValue, + resAllocInfo.SizeInBytes, withinBudget, pAllocDesc->pPrivateData, + &finalResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } - if(blockVector != NULL) + if (blockVector != NULL) { hr = blockVector->CreateResource2(resAllocInfo.SizeInBytes, resAllocInfo.Alignment, *pAllocDesc, finalResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } - if(committedAllocationParams.IsValid() && !preferCommitted) + if (committedAllocationParams.IsValid() && !preferCommitted) { hr = AllocateCommittedResource2(committedAllocationParams, - resAllocInfo.SizeInBytes, withinBudget,&finalResourceDesc, - InitialResourceState, pOptimizedClearValue, + resAllocInfo.SizeInBytes, withinBudget, pAllocDesc->pPrivateData, + &finalResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } return hr; @@ -4995,28 +6838,28 @@ HRESULT AllocatorPimpl::AllocateMemory( HRESULT hr = CalcAllocationParams(*pAllocDesc, pAllocInfo->SizeInBytes, NULL, // pResDesc blockVector, committedAllocationParams, preferCommitted); - if(FAILED(hr)) + if (FAILED(hr)) return hr; - + const bool withinBudget = (pAllocDesc->Flags & ALLOCATION_FLAG_WITHIN_BUDGET) != 0; hr = E_INVALIDARG; - if(committedAllocationParams.IsValid() && preferCommitted) + if (committedAllocationParams.IsValid() && preferCommitted) { - hr = AllocateHeap(committedAllocationParams, *pAllocInfo, withinBudget, ppAllocation); - if(SUCCEEDED(hr)) + hr = AllocateHeap(committedAllocationParams, *pAllocInfo, withinBudget, pAllocDesc->pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) return hr; } - if(blockVector != NULL) + if (blockVector != NULL) { hr = blockVector->Allocate(pAllocInfo->SizeInBytes, pAllocInfo->Alignment, *pAllocDesc, 1, (Allocation**)ppAllocation); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) return hr; } - if(committedAllocationParams.IsValid() && !preferCommitted) + if (committedAllocationParams.IsValid() && !preferCommitted) { - hr = AllocateHeap(committedAllocationParams, *pAllocInfo, withinBudget, ppAllocation); - if(SUCCEEDED(hr)) + hr = AllocateHeap(committedAllocationParams, *pAllocInfo, withinBudget, pAllocDesc->pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) return hr; } return hr; @@ -5027,7 +6870,7 @@ HRESULT AllocatorPimpl::CreateAliasingResource( UINT64 AllocationLocalOffset, const D3D12_RESOURCE_DESC* pResourceDesc, D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, REFIID riidResource, void** ppvResource) { @@ -5043,7 +6886,7 @@ HRESULT AllocatorPimpl::CreateAliasingResource( const UINT64 existingSize = pAllocation->GetSize(); const UINT64 newOffset = existingOffset + AllocationLocalOffset; - if(existingHeap == NULL || + if (existingHeap == NULL || AllocationLocalOffset + resAllocInfo.SizeInBytes > existingSize || newOffset % resAllocInfo.Alignment != 0) { @@ -5060,6 +6903,582 @@ HRESULT AllocatorPimpl::CreateAliasingResource( ppvResource); } +void AllocatorPimpl::FreeCommittedMemory(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_COMMITTED); + + CommittedAllocationList* const allocList = allocation->m_Committed.list; + allocList->Unregister(allocation); + + const UINT memSegmentGroup = allocList->GetMemorySegmentGroup(this); + const UINT64 allocSize = allocation->GetSize(); + m_Budget.RemoveAllocation(memSegmentGroup, allocSize); + m_Budget.RemoveBlock(memSegmentGroup, allocSize); +} + +void AllocatorPimpl::FreePlacedMemory(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_PLACED); + + NormalBlock* const block = allocation->m_Placed.block; + D3D12MA_ASSERT(block); + BlockVector* const blockVector = block->GetBlockVector(); + D3D12MA_ASSERT(blockVector); + m_Budget.RemoveAllocation(HeapPropertiesToMemorySegmentGroup(block->GetHeapProperties()), allocation->GetSize()); + blockVector->Free(allocation); +} + +void AllocatorPimpl::FreeHeapMemory(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_HEAP); + + CommittedAllocationList* const allocList = allocation->m_Committed.list; + allocList->Unregister(allocation); + SAFE_RELEASE(allocation->m_Heap.heap); + + const UINT memSegmentGroup = allocList->GetMemorySegmentGroup(this); + const UINT64 allocSize = allocation->GetSize(); + m_Budget.RemoveAllocation(memSegmentGroup, allocSize); + m_Budget.RemoveBlock(memSegmentGroup, allocSize); +} + +void AllocatorPimpl::SetCurrentFrameIndex(UINT frameIndex) +{ + m_CurrentFrameIndex.store(frameIndex); + +#if D3D12MA_DXGI_1_4 + UpdateD3D12Budget(); +#endif +} + +void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStatistics outCutomHeaps[2]) +{ + // Init stats + for (size_t i = 0; i < HEAP_TYPE_COUNT; i++) + ClearDetailedStatistics(outStats.HeapType[i]); + for (size_t i = 0; i < DXGI_MEMORY_SEGMENT_GROUP_COUNT; i++) + ClearDetailedStatistics(outStats.MemorySegmentGroup[i]); + ClearDetailedStatistics(outStats.Total); + if (outCutomHeaps) + { + ClearDetailedStatistics(outCutomHeaps[0]); + ClearDetailedStatistics(outCutomHeaps[1]); + } + + // Process default pools. 3 standard heap types only. Add them to outStats.HeapType[i]. + if (SupportsResourceHeapTier2()) + { + // DEFAULT, UPLOAD, READBACK. + for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) + { + BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex]; + D3D12MA_ASSERT(pBlockVector); + pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); + } + } + else + { + // DEFAULT, UPLOAD, READBACK. + for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) + { + for (size_t heapSubType = 0; heapSubType < 3; ++heapSubType) + { + BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType]; + D3D12MA_ASSERT(pBlockVector); + pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); + } + } + } + + // Sum them up to memory segment groups. + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_DEFAULT)], + outStats.HeapType[0]); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_UPLOAD)], + outStats.HeapType[1]); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_READBACK)], + outStats.HeapType[2]); + + // Process custom pools. + DetailedStatistics tmpStats; + for (size_t heapTypeIndex = 0; heapTypeIndex < HEAP_TYPE_COUNT; ++heapTypeIndex) + { + MutexLockRead lock(m_PoolsMutex[heapTypeIndex], m_UseMutex); + PoolList& poolList = m_Pools[heapTypeIndex]; + for (PoolPimpl* pool = poolList.Front(); pool != NULL; pool = poolList.GetNext(pool)) + { + const D3D12_HEAP_PROPERTIES& poolHeapProps = pool->GetDesc().HeapProperties; + ClearDetailedStatistics(tmpStats); + pool->AddDetailedStatistics(tmpStats); + AddDetailedStatistics( + outStats.HeapType[heapTypeIndex], tmpStats); + + UINT memorySegment = HeapPropertiesToMemorySegmentGroup(poolHeapProps); + AddDetailedStatistics( + outStats.MemorySegmentGroup[memorySegment], tmpStats); + + if (outCutomHeaps) + AddDetailedStatistics(outCutomHeaps[memorySegment], tmpStats); + } + } + + // Process committed allocations. 3 standard heap types only. + for (UINT heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) + { + ClearDetailedStatistics(tmpStats); + m_CommittedAllocations[heapTypeIndex].AddDetailedStatistics(tmpStats); + AddDetailedStatistics( + outStats.HeapType[heapTypeIndex], tmpStats); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(IndexToHeapType(heapTypeIndex))], tmpStats); + } + + // Sum up memory segment groups to totals. + AddDetailedStatistics(outStats.Total, outStats.MemorySegmentGroup[0]); + AddDetailedStatistics(outStats.Total, outStats.MemorySegmentGroup[1]); + + D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == + outStats.MemorySegmentGroup[0].Stats.BlockCount + outStats.MemorySegmentGroup[1].Stats.BlockCount); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == + outStats.MemorySegmentGroup[0].Stats.AllocationCount + outStats.MemorySegmentGroup[1].Stats.AllocationCount); + D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == + outStats.MemorySegmentGroup[0].Stats.BlockBytes + outStats.MemorySegmentGroup[1].Stats.BlockBytes); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == + outStats.MemorySegmentGroup[0].Stats.AllocationBytes + outStats.MemorySegmentGroup[1].Stats.AllocationBytes); + D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == + outStats.MemorySegmentGroup[0].UnusedRangeCount + outStats.MemorySegmentGroup[1].UnusedRangeCount); + + D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == + outStats.HeapType[0].Stats.BlockCount + outStats.HeapType[1].Stats.BlockCount + + outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == + outStats.HeapType[0].Stats.AllocationCount + outStats.HeapType[1].Stats.AllocationCount + + outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount); + D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == + outStats.HeapType[0].Stats.BlockBytes + outStats.HeapType[1].Stats.BlockBytes + + outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == + outStats.HeapType[0].Stats.AllocationBytes + outStats.HeapType[1].Stats.AllocationBytes + + outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes); + D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == + outStats.HeapType[0].UnusedRangeCount + outStats.HeapType[1].UnusedRangeCount + + outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount); +} + +void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget) +{ + if (outLocalBudget) + m_Budget.GetStatistics(outLocalBudget->Stats, DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY); + if (outNonLocalBudget) + m_Budget.GetStatistics(outNonLocalBudget->Stats, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY); + +#if D3D12MA_DXGI_1_4 + if (m_Adapter3) + { + if (!m_Budget.ShouldUpdateBudget()) + { + m_Budget.GetBudget(m_UseMutex, + outLocalBudget ? &outLocalBudget->UsageBytes : NULL, + outLocalBudget ? &outLocalBudget->BudgetBytes : NULL, + outNonLocalBudget ? &outNonLocalBudget->UsageBytes : NULL, + outNonLocalBudget ? &outNonLocalBudget->BudgetBytes : NULL); + } + else + { + UpdateD3D12Budget(); + GetBudget(outLocalBudget, outNonLocalBudget); // Recursion + } + } + else +#endif + { + if (outLocalBudget) + { + outLocalBudget->UsageBytes = outLocalBudget->Stats.BlockBytes; + outLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY) * 8 / 10; // 80% heuristics. + } + if (outNonLocalBudget) + { + outNonLocalBudget->UsageBytes = outNonLocalBudget->Stats.BlockBytes; + outNonLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY) * 8 / 10; // 80% heuristics. + } + } +} + +void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType) +{ + switch (heapType) + { + case D3D12_HEAP_TYPE_DEFAULT: + GetBudget(&outBudget, NULL); + break; + case D3D12_HEAP_TYPE_UPLOAD: + case D3D12_HEAP_TYPE_READBACK: + GetBudget(NULL, &outBudget); + break; + default: D3D12MA_ASSERT(0); + } +} + +void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) +{ + StringBuilder sb(GetAllocs()); + { + Budget localBudget = {}, nonLocalBudget = {}; + GetBudget(&localBudget, &nonLocalBudget); + + TotalStatistics stats; + DetailedStatistics customHeaps[2]; + CalculateStatistics(stats, customHeaps); + + JsonWriter json(GetAllocs(), sb); + json.BeginObject(); + { + json.WriteString(L"General"); + json.BeginObject(); + { + json.WriteString(L"API"); + json.WriteString(L"Direct3D 12"); + + json.WriteString(L"GPU"); + json.WriteString(m_AdapterDesc.Description); + + json.WriteString(L"DedicatedVideoMemory"); + json.WriteNumber(m_AdapterDesc.DedicatedVideoMemory); + json.WriteString(L"DedicatedSystemMemory"); + json.WriteNumber(m_AdapterDesc.DedicatedSystemMemory); + json.WriteString(L"SharedSystemMemory"); + json.WriteNumber(m_AdapterDesc.SharedSystemMemory); + + json.WriteString(L"ResourceHeapTier"); + json.WriteNumber(static_cast(m_D3D12Options.ResourceHeapTier)); + + json.WriteString(L"ResourceBindingTier"); + json.WriteNumber(static_cast(m_D3D12Options.ResourceBindingTier)); + + json.WriteString(L"TiledResourcesTier"); + json.WriteNumber(static_cast(m_D3D12Options.TiledResourcesTier)); + + json.WriteString(L"TileBasedRenderer"); + json.WriteBool(m_D3D12Architecture.TileBasedRenderer); + + json.WriteString(L"UMA"); + json.WriteBool(m_D3D12Architecture.UMA); + json.WriteString(L"CacheCoherentUMA"); + json.WriteBool(m_D3D12Architecture.CacheCoherentUMA); + } + json.EndObject(); + } + { + json.WriteString(L"Total"); + json.AddDetailedStatisticsInfoObject(stats.Total); + } + { + json.WriteString(L"MemoryInfo"); + json.BeginObject(); + { + json.WriteString(L"L0"); + json.BeginObject(); + { + json.WriteString(L"Budget"); + WriteBudgetToJson(json, IsUMA() ? localBudget : nonLocalBudget); // When UMA device only L0 present as local + + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.MemorySegmentGroup[!IsUMA()]); + + json.WriteString(L"MemoryPools"); + json.BeginObject(); + { + if (IsUMA()) + { + json.WriteString(L"DEFAULT"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); + } + json.EndObject(); + } + json.WriteString(L"UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[1]); + } + json.EndObject(); + + json.WriteString(L"READBACK"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[2]); + } + json.EndObject(); + + json.WriteString(L"CUSTOM"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(customHeaps[!IsUMA()]); + } + json.EndObject(); + } + json.EndObject(); + } + json.EndObject(); + if (!IsUMA()) + { + json.WriteString(L"L1"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + + json.WriteString(L"Size"); + json.WriteNumber(0U); + + json.WriteString(L"Budget"); + WriteBudgetToJson(json, localBudget); + + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.MemorySegmentGroup[0]); + + json.WriteString(L"MemoryPools"); + json.BeginObject(); + { + json.WriteString(L"DEFAULT"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); + } + json.EndObject(); + + json.WriteString(L"CUSTOM"); + json.BeginObject(); + { + json.WriteString(L"Flags"); + json.BeginArray(true); + json.EndArray(); + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(customHeaps[0]); + } + json.EndObject(); + } + json.EndObject(); + } + json.EndObject(); + } + } + json.EndObject(); + } + + if (detailedMap) + { + const auto writeHeapInfo = [&](BlockVector* blockVector, CommittedAllocationList* committedAllocs, bool customHeap) + { + D3D12MA_ASSERT(blockVector); + + D3D12_HEAP_FLAGS flags = blockVector->GetHeapFlags(); + json.WriteString(L"Flags"); + json.BeginArray(true); + { + if (flags & D3D12_HEAP_FLAG_SHARED) + json.WriteString(L"HEAP_FLAG_SHARED"); + if (flags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) + json.WriteString(L"HEAP_FLAG_ALLOW_DISPLAY"); + if (flags & D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER) + json.WriteString(L"HEAP_FLAG_CROSS_ADAPTER"); + if (flags & D3D12_HEAP_FLAG_HARDWARE_PROTECTED) + json.WriteString(L"HEAP_FLAG_HARDWARE_PROTECTED"); + if (flags & D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH) + json.WriteString(L"HEAP_FLAG_ALLOW_WRITE_WATCH"); + if (flags & D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS) + json.WriteString(L"HEAP_FLAG_ALLOW_SHADER_ATOMICS"); +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + if (flags & D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT) + json.WriteString(L"HEAP_FLAG_CREATE_NOT_RESIDENT"); + if (flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) + json.WriteString(L"HEAP_FLAG_CREATE_NOT_ZEROED"); +#endif + + if (flags & D3D12_HEAP_FLAG_DENY_BUFFERS) + json.WriteString(L"HEAP_FLAG_DENY_BUFFERS"); + if (flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) + json.WriteString(L"HEAP_FLAG_DENY_RT_DS_TEXTURES"); + if (flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) + json.WriteString(L"HEAP_FLAG_DENY_NON_RT_DS_TEXTURES"); + + flags &= ~(D3D12_HEAP_FLAG_SHARED + | D3D12_HEAP_FLAG_DENY_BUFFERS + | D3D12_HEAP_FLAG_ALLOW_DISPLAY + | D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER + | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES + | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES + | D3D12_HEAP_FLAG_HARDWARE_PROTECTED + | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH + | D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS); +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + flags &= ~(D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT + | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED); +#endif + if (flags != 0) + json.WriteNumber((UINT)flags); + + if (customHeap) + { + const D3D12_HEAP_PROPERTIES& properties = blockVector->GetHeapProperties(); + switch (properties.MemoryPoolPreference) + { + default: + D3D12MA_ASSERT(0); + case D3D12_MEMORY_POOL_UNKNOWN: + json.WriteString(L"MEMORY_POOL_UNKNOWN"); + break; + case D3D12_MEMORY_POOL_L0: + json.WriteString(L"MEMORY_POOL_L0"); + break; + case D3D12_MEMORY_POOL_L1: + json.WriteString(L"MEMORY_POOL_L1"); + break; + } + switch (properties.CPUPageProperty) + { + default: + D3D12MA_ASSERT(0); + case D3D12_CPU_PAGE_PROPERTY_UNKNOWN: + json.WriteString(L"CPU_PAGE_PROPERTY_UNKNOWN"); + break; + case D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE: + json.WriteString(L"CPU_PAGE_PROPERTY_NOT_AVAILABLE"); + break; + case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE: + json.WriteString(L"CPU_PAGE_PROPERTY_WRITE_COMBINE"); + break; + case D3D12_CPU_PAGE_PROPERTY_WRITE_BACK: + json.WriteString(L"CPU_PAGE_PROPERTY_WRITE_BACK"); + break; + } + } + } + json.EndArray(); + + json.WriteString(L"PreferredBlockSize"); + json.WriteNumber(blockVector->GetPreferredBlockSize()); + + json.WriteString(L"Blocks"); + blockVector->WriteBlockInfoToJson(json); + + json.WriteString(L"DedicatedAllocations"); + json.BeginArray(); + if (committedAllocs) + committedAllocs->BuildStatsString(json); + json.EndArray(); + }; + + json.WriteString(L"DefaultPools"); + json.BeginObject(); + { + if (SupportsResourceHeapTier2()) + { + for (uint8_t heapType = 0; heapType < STANDARD_HEAP_TYPE_COUNT; ++heapType) + { + json.WriteString(HeapTypeNames[heapType]); + json.BeginObject(); + writeHeapInfo(m_BlockVectors[heapType], m_CommittedAllocations + heapType, false); + json.EndObject(); + } + } + else + { + for (uint8_t heapType = 0; heapType < STANDARD_HEAP_TYPE_COUNT; ++heapType) + { + for (uint8_t heapSubType = 0; heapSubType < 3; ++heapSubType) + { + static const WCHAR* const heapSubTypeName[] = { + L" - Buffers", + L" - Textures", + L" - Textures RT/DS", + }; + json.BeginString(HeapTypeNames[heapType]); + json.EndString(heapSubTypeName[heapSubType]); + + json.BeginObject(); + writeHeapInfo(m_BlockVectors[heapType + heapSubType], m_CommittedAllocations + heapType, false); + json.EndObject(); + } + } + } + } + json.EndObject(); + + json.WriteString(L"CustomPools"); + json.BeginObject(); + for (uint8_t heapTypeIndex = 0; heapTypeIndex < HEAP_TYPE_COUNT; ++heapTypeIndex) + { + MutexLockRead mutex(m_PoolsMutex[heapTypeIndex], m_UseMutex); + auto* item = m_Pools[heapTypeIndex].Front(); + if (item != NULL) + { + size_t index = 0; + json.WriteString(HeapTypeNames[heapTypeIndex]); + json.BeginArray(); + do + { + json.BeginObject(); + json.WriteString(L"Name"); + json.BeginString(); + json.ContinueString(index++); + if (item->GetName()) + { + json.WriteString(L" - "); + json.WriteString(item->GetName()); + } + json.EndString(); + + writeHeapInfo(item->GetBlockVector(), item->GetCommittedAllocationList(), heapTypeIndex == 3); + json.EndObject(); + } while ((item = PoolList::GetNext(item)) != NULL); + json.EndArray(); + } + } + json.EndObject(); + } + json.EndObject(); + } + + const size_t length = sb.GetLength(); + WCHAR* result = AllocateArray(GetAllocs(), length + 2); + result[0] = 0xFEFF; + memcpy(result + 1, sb.GetData(), length * sizeof(WCHAR)); + result[length + 1] = L'\0'; + *ppStatsString = result; +} + +void AllocatorPimpl::FreeStatsString(WCHAR* pStatsString) +{ + D3D12MA_ASSERT(pStatsString); + Free(GetAllocs(), pStatsString); +} + template bool AllocatorPimpl::PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc) { @@ -5069,66 +7488,104 @@ bool AllocatorPimpl::PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& res HRESULT AllocatorPimpl::AllocateCommittedResource( const CommittedAllocationParameters& committedAllocParams, - UINT64 resourceSize, bool withinBudget, + UINT64 resourceSize, bool withinBudget, void* pPrivateData, const D3D12_RESOURCE_DESC* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE *pOptimizedClearValue, + D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE* pOptimizedClearValue, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) { D3D12MA_ASSERT(committedAllocParams.IsValid()); - if(withinBudget && + HRESULT hr; + ID3D12Resource* res = NULL; + // Allocate aliasing memory with explicit heap + if (committedAllocParams.m_CanAlias) + { + D3D12_RESOURCE_ALLOCATION_INFO heapAllocInfo = {}; + heapAllocInfo.SizeInBytes = resourceSize; + heapAllocInfo.Alignment = HeapFlagsToAlignment(committedAllocParams.m_HeapFlags, m_MsaaAlwaysCommitted); + hr = AllocateHeap(committedAllocParams, heapAllocInfo, withinBudget, pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) + { + hr = m_Device->CreatePlacedResource((*ppAllocation)->GetHeap(), 0, + pResourceDesc, InitialResourceState, + pOptimizedClearValue, D3D12MA_IID_PPV_ARGS(&res)); + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) + hr = res->QueryInterface(riidResource, ppvResource); + if (SUCCEEDED(hr)) + { + (*ppAllocation)->SetResourcePointer(res, pResourceDesc); + return hr; + } + res->Release(); + } + FreeHeapMemory(*ppAllocation); + } + return hr; + } + + if (withinBudget && !NewAllocationWithinBudget(committedAllocParams.m_HeapProperties.Type, resourceSize)) { return E_OUTOFMEMORY; } - ID3D12Resource* res = NULL; /* D3D12 ERROR: - * ID3D12Device::CreateCommittedResource: + * ID3D12Device::CreateCommittedResource: * When creating a committed resource, D3D12_HEAP_FLAGS must not have either * D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES, * D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES, * nor D3D12_HEAP_FLAG_DENY_BUFFERS set. * These flags will be set automatically to correspond with the committed resource type. - * + * * [ STATE_CREATION ERROR #640: CREATERESOURCEANDHEAP_INVALIDHEAPMISCFLAGS] */ #ifdef __ID3D12Device4_INTERFACE_DEFINED__ - if(m_Device4 == NULL) - return E_NOINTERFACE; - - HRESULT hr = m_Device4->CreateCommittedResource1( - &committedAllocParams.m_HeapProperties, - committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, - pResourceDesc, InitialResourceState, - pOptimizedClearValue, committedAllocParams.m_ProtectedSession, D3D12MA_IID_PPV_ARGS(&res)); -#else - D3D12MA_ASSERT(committedAllocParams.m_ProtectedSession == NULL); - HRESULT hr = m_Device->CreateCommittedResource( - &committedAllocParams.m_HeapProperties, - committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, - pResourceDesc, InitialResourceState, - pOptimizedClearValue, D3D12MA_IID_PPV_ARGS(&res)); -#endif - if(SUCCEEDED(hr)) + if (m_Device4) { - if(ppvResource != NULL) + hr = m_Device4->CreateCommittedResource1( + &committedAllocParams.m_HeapProperties, + committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, + pResourceDesc, InitialResourceState, + pOptimizedClearValue, committedAllocParams.m_ProtectedSession, D3D12MA_IID_PPV_ARGS(&res)); + } + else +#endif + { + if (committedAllocParams.m_ProtectedSession == NULL) + { + hr = m_Device->CreateCommittedResource( + &committedAllocParams.m_HeapProperties, + committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, + pResourceDesc, InitialResourceState, + pOptimizedClearValue, D3D12MA_IID_PPV_ARGS(&res)); + } + else + hr = E_NOINTERFACE; + } + + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) { hr = res->QueryInterface(riidResource, ppvResource); } - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) { const BOOL wasZeroInitialized = TRUE; - Allocation* alloc = m_AllocationObjectAllocator.Allocate(this, resourceSize, wasZeroInitialized); + Allocation* alloc = m_AllocationObjectAllocator.Allocate(this, resourceSize, pResourceDesc->Alignment, wasZeroInitialized); alloc->InitCommitted(committedAllocParams.m_List); - alloc->SetResource(res, pResourceDesc); + alloc->SetResourcePointer(res, pResourceDesc); + alloc->SetPrivateData(pPrivateData); *ppAllocation = alloc; committedAllocParams.m_List->Register(alloc); - const UINT heapTypeIndex = HeapTypeToIndex(committedAllocParams.m_HeapProperties.Type); - m_Budget.AddCommittedAllocation(heapTypeIndex, resourceSize); + const UINT memSegmentGroup = HeapPropertiesToMemorySegmentGroup(committedAllocParams.m_HeapProperties); + m_Budget.AddBlock(memSegmentGroup, resourceSize); + m_Budget.AddAllocation(memSegmentGroup, resourceSize); } else { @@ -5141,49 +7598,80 @@ HRESULT AllocatorPimpl::AllocateCommittedResource( #ifdef __ID3D12Device8_INTERFACE_DEFINED__ HRESULT AllocatorPimpl::AllocateCommittedResource2( const CommittedAllocationParameters& committedAllocParams, - UINT64 resourceSize, bool withinBudget, + UINT64 resourceSize, bool withinBudget, void* pPrivateData, const D3D12_RESOURCE_DESC1* pResourceDesc, - D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE *pOptimizedClearValue, + D3D12_RESOURCE_STATES InitialResourceState, const D3D12_CLEAR_VALUE* pOptimizedClearValue, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) { D3D12MA_ASSERT(committedAllocParams.IsValid()); - - if(m_Device8 == NULL) + + if (m_Device8 == NULL) { return E_NOINTERFACE; } - if(withinBudget && + HRESULT hr; + ID3D12Resource* res = NULL; + // Allocate aliasing memory with explicit heap + if (committedAllocParams.m_CanAlias) + { + D3D12_RESOURCE_ALLOCATION_INFO heapAllocInfo = {}; + heapAllocInfo.SizeInBytes = resourceSize; + heapAllocInfo.Alignment = HeapFlagsToAlignment(committedAllocParams.m_HeapFlags, m_MsaaAlwaysCommitted); + hr = AllocateHeap(committedAllocParams, heapAllocInfo, withinBudget, pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) + { + hr = m_Device8->CreatePlacedResource1((*ppAllocation)->GetHeap(), 0, + pResourceDesc, InitialResourceState, + pOptimizedClearValue, D3D12MA_IID_PPV_ARGS(&res)); + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) + hr = res->QueryInterface(riidResource, ppvResource); + if (SUCCEEDED(hr)) + { + (*ppAllocation)->SetResourcePointer(res, pResourceDesc); + return hr; + } + res->Release(); + } + FreeHeapMemory(*ppAllocation); + } + return hr; + } + + if (withinBudget && !NewAllocationWithinBudget(committedAllocParams.m_HeapProperties.Type, resourceSize)) { return E_OUTOFMEMORY; } - ID3D12Resource* res = NULL; - HRESULT hr = m_Device8->CreateCommittedResource2( + hr = m_Device8->CreateCommittedResource2( &committedAllocParams.m_HeapProperties, committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, // D3D12 ERROR: ID3D12Device::CreateCommittedResource: When creating a committed resource, D3D12_HEAP_FLAGS must not have either D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES, D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES, nor D3D12_HEAP_FLAG_DENY_BUFFERS set. These flags will be set automatically to correspond with the committed resource type. [ STATE_CREATION ERROR #640: CREATERESOURCEANDHEAP_INVALIDHEAPMISCFLAGS] pResourceDesc, InitialResourceState, pOptimizedClearValue, committedAllocParams.m_ProtectedSession, D3D12MA_IID_PPV_ARGS(&res)); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) { - if(ppvResource != NULL) + if (ppvResource != NULL) { hr = res->QueryInterface(riidResource, ppvResource); } - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) { const BOOL wasZeroInitialized = TRUE; - Allocation* alloc = m_AllocationObjectAllocator.Allocate(this, resourceSize, wasZeroInitialized); + Allocation* alloc = m_AllocationObjectAllocator.Allocate(this, resourceSize, pResourceDesc->Alignment, wasZeroInitialized); alloc->InitCommitted(committedAllocParams.m_List); - alloc->SetResource(res, pResourceDesc); + alloc->SetResourcePointer(res, pResourceDesc); + alloc->SetPrivateData(pPrivateData); *ppAllocation = alloc; committedAllocParams.m_List->Register(alloc); - const UINT heapTypeIndex = HeapTypeToIndex(committedAllocParams.m_HeapProperties.Type); - m_Budget.AddCommittedAllocation(heapTypeIndex, resourceSize); + const UINT memSegmentGroup = HeapPropertiesToMemorySegmentGroup(committedAllocParams.m_HeapProperties); + m_Budget.AddBlock(memSegmentGroup, resourceSize); + m_Budget.AddAllocation(memSegmentGroup, resourceSize); } else { @@ -5197,13 +7685,13 @@ HRESULT AllocatorPimpl::AllocateCommittedResource2( HRESULT AllocatorPimpl::AllocateHeap( const CommittedAllocationParameters& committedAllocParams, const D3D12_RESOURCE_ALLOCATION_INFO& allocInfo, bool withinBudget, - Allocation** ppAllocation) + void* pPrivateData, Allocation** ppAllocation) { D3D12MA_ASSERT(committedAllocParams.IsValid()); *ppAllocation = nullptr; - if(withinBudget && + if (withinBudget && !NewAllocationWithinBudget(committedAllocParams.m_HeapProperties.Type, allocInfo.SizeInBytes)) { return E_OUTOFMEMORY; @@ -5215,25 +7703,31 @@ HRESULT AllocatorPimpl::AllocateHeap( heapDesc.Alignment = allocInfo.Alignment; heapDesc.Flags = committedAllocParams.m_HeapFlags; + HRESULT hr; ID3D12Heap* heap = nullptr; #ifdef __ID3D12Device4_INTERFACE_DEFINED__ - if(m_Device4 == NULL) - return E_NOINTERFACE; - - HRESULT hr = m_Device4->CreateHeap1(&heapDesc, committedAllocParams.m_ProtectedSession, D3D12MA_IID_PPV_ARGS(&heap)); -#else - D3D12MA_ASSERT(committedAllocParams.m_ProtectedSession == NULL); - HRESULT hr = m_Device->CreateHeap(&heapDesc, D3D12MA_IID_PPV_ARGS(&heap)); + if (m_Device4) + hr = m_Device4->CreateHeap1(&heapDesc, committedAllocParams.m_ProtectedSession, D3D12MA_IID_PPV_ARGS(&heap)); + else #endif - if(SUCCEEDED(hr)) + { + if (committedAllocParams.m_ProtectedSession == NULL) + hr = m_Device->CreateHeap(&heapDesc, D3D12MA_IID_PPV_ARGS(&heap)); + else + hr = E_NOINTERFACE; + } + + if (SUCCEEDED(hr)) { const BOOL wasZeroInitialized = TRUE; - (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes, wasZeroInitialized); + (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes, allocInfo.Alignment, wasZeroInitialized); (*ppAllocation)->InitHeap(committedAllocParams.m_List, heap); + (*ppAllocation)->SetPrivateData(pPrivateData); committedAllocParams.m_List->Register(*ppAllocation); - const UINT heapTypeIndex = HeapTypeToIndex(committedAllocParams.m_HeapProperties.Type); - m_Budget.AddCommittedAllocation(heapTypeIndex, allocInfo.SizeInBytes); + const UINT memSegmentGroup = HeapPropertiesToMemorySegmentGroup(committedAllocParams.m_HeapProperties); + m_Budget.AddBlock(memSegmentGroup, allocInfo.SizeInBytes); + m_Budget.AddAllocation(memSegmentGroup, allocInfo.SizeInBytes); } return hr; } @@ -5247,10 +7741,12 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U outCommittedAllocationParams = CommittedAllocationParameters(); outPreferCommitted = false; - if(allocDesc.CustomPool != NULL) + bool msaaAlwaysCommitted; + if (allocDesc.CustomPool != NULL) { PoolPimpl* const pool = allocDesc.CustomPool->m_Pimpl; + msaaAlwaysCommitted = pool->GetBlockVector()->DeniesMsaaTextures(); outBlockVector = pool->GetBlockVector(); outCommittedAllocationParams.m_ProtectedSession = pool->GetDesc().pProtectedSession; @@ -5260,80 +7756,73 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U } else { - if(!IsHeapTypeStandard(allocDesc.HeapType)) + if (!IsHeapTypeStandard(allocDesc.HeapType)) { return E_INVALIDARG; } + msaaAlwaysCommitted = m_MsaaAlwaysCommitted; outCommittedAllocationParams.m_HeapProperties = StandardHeapTypeToHeapProperties(allocDesc.HeapType); outCommittedAllocationParams.m_HeapFlags = allocDesc.ExtraHeapFlags; outCommittedAllocationParams.m_List = &m_CommittedAllocations[HeapTypeToIndex(allocDesc.HeapType)]; - + const ResourceClass resourceClass = (resDesc != NULL) ? ResourceDescToResourceClass(*resDesc) : HeapFlagsToResourceClass(allocDesc.ExtraHeapFlags); const UINT defaultPoolIndex = CalcDefaultPoolIndex(allocDesc, resourceClass); - if(defaultPoolIndex != UINT32_MAX) + if (defaultPoolIndex != UINT32_MAX) { outBlockVector = m_BlockVectors[defaultPoolIndex]; const UINT64 preferredBlockSize = outBlockVector->GetPreferredBlockSize(); - if(allocSize > preferredBlockSize) + if (allocSize > preferredBlockSize) { outBlockVector = NULL; } - else if(allocSize > preferredBlockSize / 2) + else if (allocSize > preferredBlockSize / 2) { // Heuristics: Allocate committed memory if requested size if greater than half of preferred block size. outPreferCommitted = true; } } - + const D3D12_HEAP_FLAGS extraHeapFlags = allocDesc.ExtraHeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS; - if(outBlockVector != NULL && extraHeapFlags != 0) + if (outBlockVector != NULL && extraHeapFlags != 0) { outBlockVector = NULL; } } - if((allocDesc.Flags & ALLOCATION_FLAG_COMMITTED) != 0 || + if ((allocDesc.Flags & ALLOCATION_FLAG_COMMITTED) != 0 || m_AlwaysCommitted) { outBlockVector = NULL; } - if((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) != 0) + if ((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) != 0) { outCommittedAllocationParams.m_List = NULL; } + outCommittedAllocationParams.m_CanAlias = allocDesc.Flags & ALLOCATION_FLAG_CAN_ALIAS; - if(resDesc != NULL && !outPreferCommitted && PrefersCommittedAllocation(*resDesc)) + if (resDesc != NULL) { - outPreferCommitted = true; + if (resDesc->SampleDesc.Count > 1 && msaaAlwaysCommitted) + outBlockVector = NULL; + if (!outPreferCommitted && PrefersCommittedAllocation(*resDesc)) + outPreferCommitted = true; } return (outBlockVector != NULL || outCommittedAllocationParams.m_List != NULL) ? S_OK : E_INVALIDARG; } -UINT AllocatorPimpl::CalcDefaultPoolCount() const -{ - if(SupportsResourceHeapTier2()) - { - return 3; - } - else - { - return 9; - } -} - UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, ResourceClass resourceClass) const { const D3D12_HEAP_FLAGS extraHeapFlags = allocDesc.ExtraHeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS; - if(extraHeapFlags != 0) + if (extraHeapFlags != 0) { return UINT32_MAX; } UINT poolIndex = UINT_MAX; - switch(allocDesc.HeapType) + switch (allocDesc.HeapType) { case D3D12_HEAP_TYPE_DEFAULT: poolIndex = 0; break; case D3D12_HEAP_TYPE_UPLOAD: poolIndex = 1; break; @@ -5341,11 +7830,11 @@ UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, Reso default: D3D12MA_ASSERT(0); } - if(SupportsResourceHeapTier2()) + if (SupportsResourceHeapTier2()) return poolIndex; else { - switch(resourceClass) + switch (resourceClass) { case ResourceClass::Buffer: return poolIndex * 3; @@ -5364,9 +7853,9 @@ void AllocatorPimpl::CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_H outHeapType = D3D12_HEAP_TYPE_DEFAULT; outHeapFlags = D3D12_HEAP_FLAG_NONE; - if(!SupportsResourceHeapTier2()) + if (!SupportsResourceHeapTier2()) { - switch(index % 3) + switch (index % 3) { case 0: outHeapFlags = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; @@ -5382,7 +7871,7 @@ void AllocatorPimpl::CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_H index /= 3; } - switch(index) + switch (index) { case 0: outHeapType = D3D12_HEAP_TYPE_DEFAULT; @@ -5414,427 +7903,13 @@ void AllocatorPimpl::UnregisterPool(Pool* pool, D3D12_HEAP_TYPE heapType) m_Pools[heapTypeIndex].Remove(pool->m_Pimpl); } -void AllocatorPimpl::FreeCommittedMemory(Allocation* allocation) -{ - D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_COMMITTED); - - CommittedAllocationList* const allocList = allocation->m_Committed.list; - allocList->Unregister(allocation); - - const UINT64 allocationSize = allocation->GetSize(); - const UINT heapTypeIndex = HeapTypeToIndex(allocList->GetHeapType()); - m_Budget.RemoveCommittedAllocation(heapTypeIndex, allocationSize); -} - -void AllocatorPimpl::FreePlacedMemory(Allocation* allocation) -{ - D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_PLACED); - - NormalBlock* const block = allocation->m_Placed.block; - D3D12MA_ASSERT(block); - BlockVector* const blockVector = block->GetBlockVector(); - D3D12MA_ASSERT(blockVector); - m_Budget.RemoveAllocation(HeapTypeToIndex(block->GetHeapProperties().Type), allocation->GetSize()); - blockVector->Free(allocation); -} - -void AllocatorPimpl::FreeHeapMemory(Allocation* allocation) -{ - D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_HEAP); - - CommittedAllocationList* const allocList = allocation->m_Committed.list; - allocList->Unregister(allocation); - SAFE_RELEASE(allocation->m_Heap.heap); - - const UINT heapTypeIndex = HeapTypeToIndex(allocList->GetHeapType()); - const UINT64 allocationSize = allocation->GetSize(); - m_Budget.RemoveCommittedAllocation(heapTypeIndex, allocationSize); -} - -void AllocatorPimpl::SetCurrentFrameIndex(UINT frameIndex) -{ - m_CurrentFrameIndex.store(frameIndex); - -#if D3D12MA_DXGI_1_4 - if(m_Adapter3) - { - UpdateD3D12Budget(); - } -#endif -} - -void AllocatorPimpl::CalculateStats(Stats& outStats) -{ - // Init stats - ZeroMemory(&outStats, sizeof(outStats)); - outStats.Total.AllocationSizeMin = UINT64_MAX; - outStats.Total.UnusedRangeSizeMin = UINT64_MAX; - for(size_t i = 0; i < HEAP_TYPE_COUNT; i++) - { - outStats.HeapType[i].AllocationSizeMin = UINT64_MAX; - outStats.HeapType[i].UnusedRangeSizeMin = UINT64_MAX; - } - - // Process deafult pools. - if(SupportsResourceHeapTier2()) - { - for(size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) - { - BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex]; - D3D12MA_ASSERT(pBlockVector); - pBlockVector->AddStats(outStats); - } - } - else - { - for(size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) - { - for(size_t heapSubType = 0; heapSubType < 3; ++heapSubType) - { - BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType]; - D3D12MA_ASSERT(pBlockVector); - pBlockVector->AddStats(outStats); - } - } - } - - // Process custom pools - for(size_t heapTypeIndex = 0; heapTypeIndex < HEAP_TYPE_COUNT; ++heapTypeIndex) - { - MutexLockRead lock(m_PoolsMutex[heapTypeIndex], m_UseMutex); - PoolList& poolList = m_Pools[heapTypeIndex]; - for(PoolPimpl* pool = poolList.Front(); pool != NULL; pool = poolList.GetNext(pool)) - { - pool->AddStats(outStats); - } - } - - // Process committed allocations. - for(size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) - { - StatInfo statInfo; // Uninitialized. - m_CommittedAllocations[heapTypeIndex].CalculateStats(statInfo); - AddStatInfo(outStats.Total, statInfo); - AddStatInfo(outStats.HeapType[heapTypeIndex], statInfo); - } - - // Post process - PostProcessStatInfo(outStats.Total); - for(size_t i = 0; i < HEAP_TYPE_COUNT; ++i) - PostProcessStatInfo(outStats.HeapType[i]); -} - -void AllocatorPimpl::GetBudget(Budget* outGpuBudget, Budget* outCpuBudget) -{ - if(outGpuBudget) - { - // Taking DEFAULT. - outGpuBudget->BlockBytes = m_Budget.m_BlockBytes[0]; - outGpuBudget->AllocationBytes = m_Budget.m_AllocationBytes[0]; - } - if(outCpuBudget) - { - // Taking UPLOAD + READBACK. - outCpuBudget->BlockBytes = m_Budget.m_BlockBytes[1] + m_Budget.m_BlockBytes[2]; - outCpuBudget->AllocationBytes = m_Budget.m_AllocationBytes[1] + m_Budget.m_AllocationBytes[2]; - } - // TODO: What to do with CUSTOM? - -#if D3D12MA_DXGI_1_4 - if(m_Adapter3) - { - if(m_Budget.m_OperationsSinceBudgetFetch < 30) - { - MutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); - if(outGpuBudget) - { - - if(m_Budget.m_D3D12UsageLocal + outGpuBudget->BlockBytes > m_Budget.m_BlockBytesAtBudgetFetch[0]) - { - outGpuBudget->UsageBytes = m_Budget.m_D3D12UsageLocal + - outGpuBudget->BlockBytes - m_Budget.m_BlockBytesAtBudgetFetch[0]; - } - else - { - outGpuBudget->UsageBytes = 0; - } - outGpuBudget->BudgetBytes = m_Budget.m_D3D12BudgetLocal; - } - if(outCpuBudget) - { - if(m_Budget.m_D3D12UsageNonLocal + outCpuBudget->BlockBytes > m_Budget.m_BlockBytesAtBudgetFetch[1] + m_Budget.m_BlockBytesAtBudgetFetch[2]) - { - outCpuBudget->UsageBytes = m_Budget.m_D3D12UsageNonLocal + - outCpuBudget->BlockBytes - (m_Budget.m_BlockBytesAtBudgetFetch[1] + m_Budget.m_BlockBytesAtBudgetFetch[2]); - } - else - { - outCpuBudget->UsageBytes = 0; - } - outCpuBudget->BudgetBytes = m_Budget.m_D3D12BudgetNonLocal; - } - } - else - { - UpdateD3D12Budget(); // Outside of mutex lock - GetBudget(outGpuBudget, outCpuBudget); // Recursion - } - } - else -#endif - { - if(outGpuBudget) - { - const UINT64 gpuMemorySize = m_AdapterDesc.DedicatedVideoMemory + m_AdapterDesc.DedicatedSystemMemory; // TODO: Is this right? - outGpuBudget->UsageBytes = outGpuBudget->BlockBytes; - outGpuBudget->BudgetBytes = gpuMemorySize * 8 / 10; // 80% heuristics. - } - if(outCpuBudget) - { - const UINT64 cpuMemorySize = m_AdapterDesc.SharedSystemMemory; // TODO: Is this right? - outCpuBudget->UsageBytes = outCpuBudget->BlockBytes; - outCpuBudget->BudgetBytes = cpuMemorySize * 8 / 10; // 80% heuristics. - } - } -} - -void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType) -{ - switch(heapType) - { - case D3D12_HEAP_TYPE_DEFAULT: - GetBudget(&outBudget, NULL); - break; - case D3D12_HEAP_TYPE_UPLOAD: - case D3D12_HEAP_TYPE_READBACK: - GetBudget(NULL, &outBudget); - break; - default: D3D12MA_ASSERT(0); - } -} - -static void AddStatInfoToJson(JsonWriter& json, const StatInfo& statInfo) -{ - json.BeginObject(); - json.WriteString(L"Blocks"); - json.WriteNumber(statInfo.BlockCount); - json.WriteString(L"Allocations"); - json.WriteNumber(statInfo.AllocationCount); - json.WriteString(L"UnusedRanges"); - json.WriteNumber(statInfo.UnusedRangeCount); - json.WriteString(L"UsedBytes"); - json.WriteNumber(statInfo.UsedBytes); - json.WriteString(L"UnusedBytes"); - json.WriteNumber(statInfo.UnusedBytes); - - json.WriteString(L"AllocationSize"); - json.BeginObject(true); - json.WriteString(L"Min"); - json.WriteNumber(statInfo.AllocationSizeMin); - json.WriteString(L"Avg"); - json.WriteNumber(statInfo.AllocationSizeAvg); - json.WriteString(L"Max"); - json.WriteNumber(statInfo.AllocationSizeMax); - json.EndObject(); - - json.WriteString(L"UnusedRangeSize"); - json.BeginObject(true); - json.WriteString(L"Min"); - json.WriteNumber(statInfo.UnusedRangeSizeMin); - json.WriteString(L"Avg"); - json.WriteNumber(statInfo.UnusedRangeSizeAvg); - json.WriteString(L"Max"); - json.WriteNumber(statInfo.UnusedRangeSizeMax); - json.EndObject(); - - json.EndObject(); -} - -void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL DetailedMap) -{ - StringBuilder sb(GetAllocs()); - { - JsonWriter json(GetAllocs(), sb); - - Budget gpuBudget = {}, cpuBudget = {}; - GetBudget(&gpuBudget, &cpuBudget); - - Stats stats; - CalculateStats(stats); - - json.BeginObject(); - - json.WriteString(L"Total"); - AddStatInfoToJson(json, stats.Total); - for (size_t heapType = 0; heapType < HEAP_TYPE_COUNT; ++heapType) - { - json.WriteString(HeapTypeNames[heapType]); - AddStatInfoToJson(json, stats.HeapType[heapType]); - } - - json.WriteString(L"Budget"); - json.BeginObject(); - { - json.WriteString(L"GPU"); - WriteBudgetToJson(json, gpuBudget); - json.WriteString(L"CPU"); - WriteBudgetToJson(json, cpuBudget); - } - json.EndObject(); - - if (DetailedMap) - { - json.WriteString(L"DetailedMap"); - json.BeginObject(); - - json.WriteString(L"DefaultPools"); - json.BeginObject(); - - if (SupportsResourceHeapTier2()) - { - for (size_t heapType = 0; heapType < STANDARD_HEAP_TYPE_COUNT; ++heapType) - { - json.WriteString(HeapTypeNames[heapType]); - json.BeginObject(); - - json.WriteString(L"Blocks"); - - BlockVector* blockVector = m_BlockVectors[heapType]; - D3D12MA_ASSERT(blockVector); - blockVector->WriteBlockInfoToJson(json); - - json.EndObject(); // heap name - } - } - else - { - for (size_t heapType = 0; heapType < STANDARD_HEAP_TYPE_COUNT; ++heapType) - { - for (size_t heapSubType = 0; heapSubType < 3; ++heapSubType) - { - static const WCHAR* const heapSubTypeName[] = { - L" + buffer", - L" + texture", - L" + texture RT or DS", - }; - json.BeginString(); - json.ContinueString(HeapTypeNames[heapType]); - json.ContinueString(heapSubTypeName[heapSubType]); - json.EndString(); - json.BeginObject(); - - json.WriteString(L"Blocks"); - - BlockVector* blockVector = m_BlockVectors[heapType * 3 + heapSubType]; - D3D12MA_ASSERT(blockVector); - blockVector->WriteBlockInfoToJson(json); - - json.EndObject(); // heap name - } - } - } - - json.EndObject(); // DefaultPools - - json.WriteString(L"CommittedAllocations"); - json.BeginObject(); - - for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) - { - json.WriteString(HeapTypeNames[heapTypeIndex]); - m_CommittedAllocations[heapTypeIndex].BuildStatsString(json); - } - - json.EndObject(); // CommittedAllocations - - json.WriteString(L"Pools"); - json.BeginObject(); - - for (size_t heapTypeIndex = 0; heapTypeIndex < HEAP_TYPE_COUNT; ++heapTypeIndex) - { - json.WriteString(HeapTypeNames[heapTypeIndex]); - json.BeginArray(); - MutexLockRead mutex(m_PoolsMutex[heapTypeIndex], m_UseMutex); - size_t index = 0; - for (auto* item = m_Pools[heapTypeIndex].Front(); item != nullptr; item = PoolList::GetNext(item)) - { - json.BeginObject(); - json.WriteString(L"Name"); - if (item->GetName() != nullptr) - { - json.WriteString(item->GetName()); - } - else - { - json.BeginString(); - json.ContinueString(index); - json.EndString(); - } - ++index; - - json.WriteString(L"Blocks"); - item->GetBlockVector()->WriteBlockInfoToJson(json); - json.WriteString(L"CommittedAllocations"); - item->GetCommittedAllocationList()->BuildStatsString(json); - - json.EndObject(); - } - json.EndArray(); - } - - json.EndObject(); // Pools - - json.EndObject(); // DetailedMap - } - json.EndObject(); - } - - const size_t length = sb.GetLength(); - WCHAR* result = AllocateArray(GetAllocs(), length + 1); - memcpy(result, sb.GetData(), length * sizeof(WCHAR)); - result[length] = L'\0'; - *ppStatsString = result; -} - -void AllocatorPimpl::FreeStatsString(WCHAR* pStatsString) -{ - D3D12MA_ASSERT(pStatsString); - Free(GetAllocs(), pStatsString); -} - HRESULT AllocatorPimpl::UpdateD3D12Budget() { #if D3D12MA_DXGI_1_4 - D3D12MA_ASSERT(m_Adapter3); - - DXGI_QUERY_VIDEO_MEMORY_INFO infoLocal = {}; - DXGI_QUERY_VIDEO_MEMORY_INFO infoNonLocal = {}; - HRESULT hrLocal = m_Adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &infoLocal); - HRESULT hrNonLocal = m_Adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &infoNonLocal); - - { - MutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex); - - if(SUCCEEDED(hrLocal)) - { - m_Budget.m_D3D12UsageLocal = infoLocal.CurrentUsage; - m_Budget.m_D3D12BudgetLocal = infoLocal.Budget; - } - if(SUCCEEDED(hrNonLocal)) - { - m_Budget.m_D3D12UsageNonLocal = infoNonLocal.CurrentUsage; - m_Budget.m_D3D12BudgetNonLocal = infoNonLocal.Budget; - } - - for(UINT i = 0; i < HEAP_TYPE_COUNT; ++i) - { - m_Budget.m_BlockBytesAtBudgetFetch[i] = m_Budget.m_BlockBytes[i].load(); - } - - m_Budget.m_OperationsSinceBudgetFetch = 0; - } - - return FAILED(hrLocal) ? hrLocal : hrNonLocal; + if (m_Adapter3) + return m_Budget.UpdateBudget(m_Adapter3, m_UseMutex); + else + return E_NOINTERFACE; #else return S_OK; #endif @@ -5859,22 +7934,22 @@ D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_R { /* Optional optimization: Microsoft documentation says: https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-getresourceallocationinfo - + Your application can forgo using GetResourceAllocationInfo for buffer resources (D3D12_RESOURCE_DIMENSION_BUFFER). Buffers have the same size on all adapters, which is merely the smallest multiple of 64KB that's greater or equal to D3D12_RESOURCE_DESC::Width. */ - if(inOutResourceDesc.Alignment == 0 && + if (inOutResourceDesc.Alignment == 0 && inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { return { AlignUp(inOutResourceDesc.Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT), // SizeInBytes - D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT}; // Alignment + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT }; // Alignment } #if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT - if(inOutResourceDesc.Alignment == 0 && + if (inOutResourceDesc.Alignment == 0 && inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D && (inOutResourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0 #if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT == 1 @@ -5892,7 +7967,7 @@ D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_R inOutResourceDesc.Alignment = smallAlignmentToTry; const D3D12_RESOURCE_ALLOCATION_INFO smallAllocInfo = GetResourceAllocationInfoNative(inOutResourceDesc); // Check if alignment requested has been granted. - if(smallAllocInfo.Alignment == smallAlignmentToTry) + if (smallAllocInfo.Alignment == smallAlignmentToTry) { return smallAllocInfo; } @@ -5914,26 +7989,1575 @@ void AllocatorPimpl::WriteBudgetToJson(JsonWriter& json, const Budget& budget) { json.BeginObject(); { - json.WriteString(L"BlockBytes"); - json.WriteNumber(budget.BlockBytes); - json.WriteString(L"AllocationBytes"); - json.WriteNumber(budget.AllocationBytes); - json.WriteString(L"UsageBytes"); - json.WriteNumber(budget.UsageBytes); json.WriteString(L"BudgetBytes"); json.WriteNumber(budget.BudgetBytes); + json.WriteString(L"UsageBytes"); + json.WriteNumber(budget.UsageBytes); } json.EndObject(); } +#endif // _D3D12MA_ALLOCATOR_PIMPL +#endif // _D3D12MA_ALLOCATOR_PIMPL -//////////////////////////////////////////////////////////////////////////////// -// Public but internal class IUnknownImpl implementation +#ifndef _D3D12MA_VIRTUAL_BLOCK_PIMPL +class VirtualBlockPimpl +{ +public: + const ALLOCATION_CALLBACKS m_AllocationCallbacks; + const UINT64 m_Size; + BlockMetadata* m_Metadata; + VirtualBlockPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc); + ~VirtualBlockPimpl(); +}; + +#ifndef _D3D12MA_VIRTUAL_BLOCK_PIMPL_FUNCTIONS +VirtualBlockPimpl::VirtualBlockPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc) + : m_AllocationCallbacks(allocationCallbacks), m_Size(desc.Size) +{ + switch (desc.Flags & VIRTUAL_BLOCK_FLAG_ALGORITHM_MASK) + { + case VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR: + m_Metadata = D3D12MA_NEW(allocationCallbacks, BlockMetadata_Linear)(&m_AllocationCallbacks, true); + break; + default: + D3D12MA_ASSERT(0); + case 0: + m_Metadata = D3D12MA_NEW(allocationCallbacks, BlockMetadata_TLSF)(&m_AllocationCallbacks, true); + break; + } + m_Metadata->Init(m_Size); +} + +VirtualBlockPimpl::~VirtualBlockPimpl() +{ + D3D12MA_DELETE(m_AllocationCallbacks, m_Metadata); +} +#endif // _D3D12MA_VIRTUAL_BLOCK_PIMPL_FUNCTIONS +#endif // _D3D12MA_VIRTUAL_BLOCK_PIMPL + + +#ifndef _D3D12MA_MEMORY_BLOCK_FUNCTIONS +MemoryBlock::MemoryBlock( + AllocatorPimpl* allocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id) + : m_Allocator(allocator), + m_HeapProps(heapProps), + m_HeapFlags(heapFlags), + m_Size(size), + m_Id(id) {} + +MemoryBlock::~MemoryBlock() +{ + if (m_Heap) + { + m_Heap->Release(); + m_Allocator->m_Budget.RemoveBlock( + m_Allocator->HeapPropertiesToMemorySegmentGroup(m_HeapProps), m_Size); + } +} + +HRESULT MemoryBlock::Init(ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures) +{ + D3D12MA_ASSERT(m_Heap == NULL && m_Size > 0); + + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.SizeInBytes = m_Size; + heapDesc.Properties = m_HeapProps; + heapDesc.Alignment = HeapFlagsToAlignment(m_HeapFlags, denyMsaaTextures); + heapDesc.Flags = m_HeapFlags; + + HRESULT hr; +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + ID3D12Device4* const device4 = m_Allocator->GetDevice4(); + if (device4) + hr = m_Allocator->GetDevice4()->CreateHeap1(&heapDesc, pProtectedSession, D3D12MA_IID_PPV_ARGS(&m_Heap)); + else +#endif + { + if (pProtectedSession == NULL) + hr = m_Allocator->GetDevice()->CreateHeap(&heapDesc, D3D12MA_IID_PPV_ARGS(&m_Heap)); + else + hr = E_NOINTERFACE; + } + + if (SUCCEEDED(hr)) + { + m_Allocator->m_Budget.AddBlock( + m_Allocator->HeapPropertiesToMemorySegmentGroup(m_HeapProps), m_Size); + } + return hr; +} +#endif // _D3D12MA_MEMORY_BLOCK_FUNCTIONS + +#ifndef _D3D12MA_NORMAL_BLOCK_FUNCTIONS +NormalBlock::NormalBlock( + AllocatorPimpl* allocator, + BlockVector* blockVector, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id) + : MemoryBlock(allocator, heapProps, heapFlags, size, id), + m_pMetadata(NULL), + m_BlockVector(blockVector) {} + +NormalBlock::~NormalBlock() +{ + if (m_pMetadata != NULL) + { + // THIS IS THE MOST IMPORTANT ASSERT IN THE ENTIRE LIBRARY! + // Hitting it means you have some memory leak - unreleased Allocation objects. + D3D12MA_ASSERT(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + + D3D12MA_DELETE(m_Allocator->GetAllocs(), m_pMetadata); + } +} + +HRESULT NormalBlock::Init(UINT32 algorithm, ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures) +{ + HRESULT hr = MemoryBlock::Init(pProtectedSession, denyMsaaTextures); + if (FAILED(hr)) + { + return hr; + } + + switch (algorithm) + { + case POOL_FLAG_ALGORITHM_LINEAR: + m_pMetadata = D3D12MA_NEW(m_Allocator->GetAllocs(), BlockMetadata_Linear)(&m_Allocator->GetAllocs(), false); + break; + default: + D3D12MA_ASSERT(0); + case 0: + m_pMetadata = D3D12MA_NEW(m_Allocator->GetAllocs(), BlockMetadata_TLSF)(&m_Allocator->GetAllocs(), false); + break; + } + m_pMetadata->Init(m_Size); + + return hr; +} + +bool NormalBlock::Validate() const +{ + D3D12MA_VALIDATE(GetHeap() && + m_pMetadata && + m_pMetadata->GetSize() != 0 && + m_pMetadata->GetSize() == GetSize()); + return m_pMetadata->Validate(); +} +#endif // _D3D12MA_NORMAL_BLOCK_FUNCTIONS + +#ifndef _D3D12MA_COMMITTED_ALLOCATION_LIST_FUNCTIONS +void CommittedAllocationList::Init(bool useMutex, D3D12_HEAP_TYPE heapType, PoolPimpl* pool) +{ + m_UseMutex = useMutex; + m_HeapType = heapType; + m_Pool = pool; +} + +CommittedAllocationList::~CommittedAllocationList() +{ + if (!m_AllocationList.IsEmpty()) + { + D3D12MA_ASSERT(0 && "Unfreed committed allocations found!"); + } +} + +UINT CommittedAllocationList::GetMemorySegmentGroup(AllocatorPimpl* allocator) const +{ + if (m_Pool) + return allocator->HeapPropertiesToMemorySegmentGroup(m_Pool->GetDesc().HeapProperties); + else + return allocator->StandardHeapTypeToMemorySegmentGroup(m_HeapType); +} + +void CommittedAllocationList::AddStatistics(Statistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_UseMutex); + + for (Allocation* alloc = m_AllocationList.Front(); + alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) + { + const UINT64 size = alloc->GetSize(); + inoutStats.BlockCount++; + inoutStats.AllocationCount++; + inoutStats.BlockBytes += size; + inoutStats.AllocationBytes += size; + } +} + +void CommittedAllocationList::AddDetailedStatistics(DetailedStatistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_UseMutex); + + for (Allocation* alloc = m_AllocationList.Front(); + alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) + { + const UINT64 size = alloc->GetSize(); + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += size; + AddDetailedStatisticsAllocation(inoutStats, size); + } +} + +void CommittedAllocationList::BuildStatsString(JsonWriter& json) +{ + MutexLockRead lock(m_Mutex, m_UseMutex); + + for (Allocation* alloc = m_AllocationList.Front(); + alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) + { + json.BeginObject(true); + json.AddAllocationToObject(*alloc); + json.EndObject(); + } +} + +void CommittedAllocationList::Register(Allocation* alloc) +{ + MutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.PushBack(alloc); +} + +void CommittedAllocationList::Unregister(Allocation* alloc) +{ + MutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.Remove(alloc); +} +#endif // _D3D12MA_COMMITTED_ALLOCATION_LIST_FUNCTIONS + +#ifndef _D3D12MA_BLOCK_VECTOR_FUNCTIONS +BlockVector::BlockVector( + AllocatorPimpl* hAllocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + bool explicitBlockSize, + UINT64 minAllocationAlignment, + UINT32 algorithm, + bool denyMsaaTextures, + ID3D12ProtectedResourceSession* pProtectedSession) + : m_hAllocator(hAllocator), + m_HeapProps(heapProps), + m_HeapFlags(heapFlags), + m_PreferredBlockSize(preferredBlockSize), + m_MinBlockCount(minBlockCount), + m_MaxBlockCount(maxBlockCount), + m_ExplicitBlockSize(explicitBlockSize), + m_MinAllocationAlignment(minAllocationAlignment), + m_Algorithm(algorithm), + m_DenyMsaaTextures(denyMsaaTextures), + m_ProtectedSession(pProtectedSession), + m_HasEmptyBlock(false), + m_Blocks(hAllocator->GetAllocs()), + m_NextBlockId(0) {} + +BlockVector::~BlockVector() +{ + for (size_t i = m_Blocks.size(); i--; ) + { + D3D12MA_DELETE(m_hAllocator->GetAllocs(), m_Blocks[i]); + } +} + +HRESULT BlockVector::CreateMinBlocks() +{ + for (size_t i = 0; i < m_MinBlockCount; ++i) + { + HRESULT hr = CreateBlock(m_PreferredBlockSize, NULL); + if (FAILED(hr)) + { + return hr; + } + } + return S_OK; +} + +bool BlockVector::IsEmpty() +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + return m_Blocks.empty(); +} + +HRESULT BlockVector::Allocate( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + size_t allocationCount, + Allocation** pAllocations) +{ + size_t allocIndex; + HRESULT hr = S_OK; + + { + MutexLockWrite lock(m_Mutex, m_hAllocator->UseMutex()); + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + hr = AllocatePage( + size, + alignment, + allocDesc, + pAllocations + allocIndex); + if (FAILED(hr)) + { + break; + } + } + } + + if (FAILED(hr)) + { + // Free all already created allocations. + while (allocIndex--) + { + Free(pAllocations[allocIndex]); + } + ZeroMemory(pAllocations, sizeof(Allocation*) * allocationCount); + } + + return hr; +} + +void BlockVector::Free(Allocation* hAllocation) +{ + NormalBlock* pBlockToDelete = NULL; + + bool budgetExceeded = false; + if (IsHeapTypeStandard(m_HeapProps.Type)) + { + Budget budget = {}; + m_hAllocator->GetBudgetForHeapType(budget, m_HeapProps.Type); + budgetExceeded = budget.UsageBytes >= budget.BudgetBytes; + } + + // Scope for lock. + { + MutexLockWrite lock(m_Mutex, m_hAllocator->UseMutex()); + + NormalBlock* pBlock = hAllocation->m_Placed.block; + + pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + + const size_t blockCount = m_Blocks.size(); + // pBlock became empty after this deallocation. + if (pBlock->m_pMetadata->IsEmpty()) + { + // Already has empty Allocation. We don't want to have two, so delete this one. + if ((m_HasEmptyBlock || budgetExceeded) && + blockCount > m_MinBlockCount) + { + pBlockToDelete = pBlock; + Remove(pBlock); + } + // We now have first empty block. + else + { + m_HasEmptyBlock = true; + } + } + // pBlock didn't become empty, but we have another empty block - find and free that one. + // (This is optional, heuristics.) + else if (m_HasEmptyBlock && blockCount > m_MinBlockCount) + { + NormalBlock* pLastBlock = m_Blocks.back(); + if (pLastBlock->m_pMetadata->IsEmpty()) + { + pBlockToDelete = pLastBlock; + m_Blocks.pop_back(); + m_HasEmptyBlock = false; + } + } + + IncrementallySortBlocks(); + } + + // Destruction of a free Allocation. Deferred until this point, outside of mutex + // lock, for performance reason. + if (pBlockToDelete != NULL) + { + D3D12MA_DELETE(m_hAllocator->GetAllocs(), pBlockToDelete); + } +} + +HRESULT BlockVector::CreateResource( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + const D3D12_RESOURCE_DESC& resourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + HRESULT hr = Allocate(size, alignment, allocDesc, 1, ppAllocation); + if (SUCCEEDED(hr)) + { + ID3D12Resource* res = NULL; + hr = m_hAllocator->GetDevice()->CreatePlacedResource( + (*ppAllocation)->m_Placed.block->GetHeap(), + (*ppAllocation)->GetOffset(), + &resourceDesc, + InitialResourceState, + pOptimizedClearValue, + D3D12MA_IID_PPV_ARGS(&res)); + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) + { + hr = res->QueryInterface(riidResource, ppvResource); + } + if (SUCCEEDED(hr)) + { + (*ppAllocation)->SetResourcePointer(res, &resourceDesc); + } + else + { + res->Release(); + SAFE_RELEASE(*ppAllocation); + } + } + else + { + SAFE_RELEASE(*ppAllocation); + } + } + return hr; +} + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ +HRESULT BlockVector::CreateResource2( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + const D3D12_RESOURCE_DESC1& resourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + ID3D12Device8* const device8 = m_hAllocator->GetDevice8(); + if (device8 == NULL) + { + return E_NOINTERFACE; + } + + HRESULT hr = Allocate(size, alignment, allocDesc, 1, ppAllocation); + if (SUCCEEDED(hr)) + { + ID3D12Resource* res = NULL; + hr = device8->CreatePlacedResource1( + (*ppAllocation)->m_Placed.block->GetHeap(), + (*ppAllocation)->GetOffset(), + &resourceDesc, + InitialResourceState, + pOptimizedClearValue, + D3D12MA_IID_PPV_ARGS(&res)); + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) + { + hr = res->QueryInterface(riidResource, ppvResource); + } + if (SUCCEEDED(hr)) + { + (*ppAllocation)->SetResourcePointer(res, &resourceDesc); + } + else + { + res->Release(); + SAFE_RELEASE(*ppAllocation); + } + } + else + { + SAFE_RELEASE(*ppAllocation); + } + } + return hr; +} +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +void BlockVector::AddStatistics(Statistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + const NormalBlock* const pBlock = m_Blocks[i]; + D3D12MA_ASSERT(pBlock); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void BlockVector::AddDetailedStatistics(DetailedStatistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + const NormalBlock* const pBlock = m_Blocks[i]; + D3D12MA_ASSERT(pBlock); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); + } +} + +void BlockVector::WriteBlockInfoToJson(JsonWriter& json) +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + + json.BeginObject(); + + for (size_t i = 0, count = m_Blocks.size(); i < count; ++i) + { + const NormalBlock* const pBlock = m_Blocks[i]; + D3D12MA_ASSERT(pBlock); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + json.BeginString(); + json.ContinueString(pBlock->GetId()); + json.EndString(); + + json.BeginObject(); + pBlock->m_pMetadata->WriteAllocationInfoToJson(json); + json.EndObject(); + } + + json.EndObject(); +} + +UINT64 BlockVector::CalcSumBlockSize() const +{ + UINT64 result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result += m_Blocks[i]->m_pMetadata->GetSize(); + } + return result; +} + +UINT64 BlockVector::CalcMaxBlockSize() const +{ + UINT64 result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result = D3D12MA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); + if (result >= m_PreferredBlockSize) + { + break; + } + } + return result; +} + +void BlockVector::Remove(NormalBlock* pBlock) +{ + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + if (m_Blocks[blockIndex] == pBlock) + { + m_Blocks.remove(blockIndex); + return; + } + } + D3D12MA_ASSERT(0); +} + +void BlockVector::IncrementallySortBlocks() +{ + if (!m_IncrementalSort) + return; + // Bubble sort only until first swap. + for (size_t i = 1; i < m_Blocks.size(); ++i) + { + if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) + { + D3D12MA_SWAP(m_Blocks[i - 1], m_Blocks[i]); + return; + } + } +} + +void BlockVector::SortByFreeSize() +{ + D3D12MA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](auto* b1, auto* b2) + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + +HRESULT BlockVector::AllocatePage( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + Allocation** pAllocation) +{ + // Early reject: requested allocation size is larger that maximum block size for this block vector. + if (size + D3D12MA_DEBUG_MARGIN > m_PreferredBlockSize) + { + return E_OUTOFMEMORY; + } + + UINT64 freeMemory = UINT64_MAX; + if (IsHeapTypeStandard(m_HeapProps.Type)) + { + Budget budget = {}; + m_hAllocator->GetBudgetForHeapType(budget, m_HeapProps.Type); + freeMemory = (budget.UsageBytes < budget.BudgetBytes) ? (budget.BudgetBytes - budget.UsageBytes) : 0; + } + + const bool canCreateNewBlock = + ((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) == 0) && + (m_Blocks.size() < m_MaxBlockCount) && + // Even if we don't have to stay within budget with this allocation, when the + // budget would be exceeded, we don't want to allocate new blocks, but always + // create resources as committed. + freeMemory >= size; + + // 1. Search existing allocations + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + NormalBlock* const pCurrBlock = m_Blocks[blockIndex]; + D3D12MA_ASSERT(pCurrBlock); + HRESULT hr = AllocateFromBlock( + pCurrBlock, + size, + alignment, + allocDesc.Flags, + allocDesc.pPrivateData, + allocDesc.Flags & ALLOCATION_FLAG_STRATEGY_MASK, + pAllocation); + if (SUCCEEDED(hr)) + { + return hr; + } + } + } + + // 2. Try to create new block. + if (canCreateNewBlock) + { + // Calculate optimal size for new block. + UINT64 newBlockSize = m_PreferredBlockSize; + UINT newBlockSizeShift = 0; + + if (!m_ExplicitBlockSize) + { + // Allocate 1/8, 1/4, 1/2 as first blocks. + const UINT64 maxExistingBlockSize = CalcMaxBlockSize(); + for (UINT i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) + { + const UINT64 smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + } + else + { + break; + } + } + } + + size_t newBlockIndex = 0; + HRESULT hr = newBlockSize <= freeMemory ? + CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; + // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. + if (!m_ExplicitBlockSize) + { + while (FAILED(hr) && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) + { + const UINT64 smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize >= size) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + hr = newBlockSize <= freeMemory ? + CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; + } + else + { + break; + } + } + } + + if (SUCCEEDED(hr)) + { + NormalBlock* const pBlock = m_Blocks[newBlockIndex]; + D3D12MA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + + hr = AllocateFromBlock( + pBlock, + size, + alignment, + allocDesc.Flags, + allocDesc.pPrivateData, + allocDesc.Flags & ALLOCATION_FLAG_STRATEGY_MASK, + pAllocation); + if (SUCCEEDED(hr)) + { + return hr; + } + else + { + // Allocation from new block failed, possibly due to D3D12MA_DEBUG_MARGIN or alignment. + return E_OUTOFMEMORY; + } + } + } + + return E_OUTOFMEMORY; +} + +HRESULT BlockVector::AllocateFromBlock( + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + ALLOCATION_FLAGS allocFlags, + void* pPrivateData, + UINT32 strategy, + Allocation** pAllocation) +{ + alignment = D3D12MA_MAX(alignment, m_MinAllocationAlignment); + + AllocationRequest currRequest = {}; + if (pBlock->m_pMetadata->CreateAllocationRequest( + size, + alignment, + allocFlags & ALLOCATION_FLAG_UPPER_ADDRESS, + strategy, + &currRequest)) + { + return CommitAllocationRequest(currRequest, pBlock, size, alignment, pPrivateData, pAllocation); + } + return E_OUTOFMEMORY; +} + +HRESULT BlockVector::CommitAllocationRequest( + AllocationRequest& allocRequest, + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + void* pPrivateData, + Allocation** pAllocation) +{ + // We no longer have an empty Allocation. + if (pBlock->m_pMetadata->IsEmpty()) + m_HasEmptyBlock = false; + + *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size, alignment, allocRequest.zeroInitialized); + pBlock->m_pMetadata->Alloc(allocRequest, size, *pAllocation); + + (*pAllocation)->InitPlaced(allocRequest.allocHandle, pBlock); + (*pAllocation)->SetPrivateData(pPrivateData); + + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->HeapPropertiesToMemorySegmentGroup(m_HeapProps), size); + + return S_OK; +} + +HRESULT BlockVector::CreateBlock( + UINT64 blockSize, + size_t* pNewBlockIndex) +{ + NormalBlock* const pBlock = D3D12MA_NEW(m_hAllocator->GetAllocs(), NormalBlock)( + m_hAllocator, + this, + m_HeapProps, + m_HeapFlags, + blockSize, + m_NextBlockId++); + HRESULT hr = pBlock->Init(m_Algorithm, m_ProtectedSession, m_DenyMsaaTextures); + if (FAILED(hr)) + { + D3D12MA_DELETE(m_hAllocator->GetAllocs(), pBlock); + return hr; + } + + m_Blocks.push_back(pBlock); + if (pNewBlockIndex != NULL) + { + *pNewBlockIndex = m_Blocks.size() - 1; + } + + return hr; +} +#endif // _D3D12MA_BLOCK_VECTOR_FUNCTIONS + +#ifndef _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL_FUNCTIONS +DefragmentationContextPimpl::DefragmentationContextPimpl( + AllocatorPimpl* hAllocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector) + : m_MaxPassBytes(desc.MaxBytesPerPass == 0 ? UINT64_MAX : desc.MaxBytesPerPass), + m_MaxPassAllocations(desc.MaxAllocationsPerPass == 0 ? UINT32_MAX : desc.MaxAllocationsPerPass), + m_Moves(hAllocator->GetAllocs()) +{ + m_Algorithm = desc.Flags & DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (poolVector != NULL) + { + m_BlockVectorCount = 1; + m_PoolBlockVector = poolVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); + } + else + { + m_BlockVectorCount = hAllocator->GetDefaultPoolCount(); + m_PoolBlockVector = NULL; + m_pBlockVectors = hAllocator->GetDefaultPools(); + for (UINT32 i = 0; i < m_BlockVectorCount; ++i) + { + BlockVector* vector = m_pBlockVectors[i]; + if (vector != NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED; + case DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED: + { + m_AlgorithmState = D3D12MA_NEW_ARRAY(hAllocator->GetAllocs(), StateBalanced, m_BlockVectorCount); + break; + } + } +} + +DefragmentationContextPimpl::~DefragmentationContextPimpl() +{ + if (m_PoolBlockVector != NULL) + m_PoolBlockVector->SetIncrementalSort(true); + else + { + for (UINT32 i = 0; i < m_BlockVectorCount; ++i) + { + BlockVector* vector = m_pBlockVectors[i]; + if (vector != NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED: + D3D12MA_DELETE_ARRAY(m_Moves.GetAllocs(), reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + D3D12MA_ASSERT(0); + } + } +} + +HRESULT DefragmentationContextPimpl::DefragmentPassBegin(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo) +{ + if (m_PoolBlockVector != NULL) + { + MutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->m_hAllocator->UseMutex()); + + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + + // Setup index into block vector + for (size_t i = 0; i < m_Moves.size(); ++i) + m_Moves[i].pDstTmpAllocation->SetPrivateData(0); + } + else + { + for (UINT32 i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != NULL) + { + MutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->m_hAllocator->UseMutex()); + + bool end = false; + size_t movesOffset = m_Moves.size(); + if (m_pBlockVectors[i]->GetBlockCount() > 1) + { + end = ComputeDefragmentation(*m_pBlockVectors[i], i); + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + end = ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0)); + } + + // Setup index into block vector + for (; movesOffset < m_Moves.size(); ++movesOffset) + m_Moves[movesOffset].pDstTmpAllocation->SetPrivateData(reinterpret_cast(static_cast(i))); + + if (end) + break; + } + } + } + + moveInfo.MoveCount = static_cast(m_Moves.size()); + if (moveInfo.MoveCount > 0) + { + moveInfo.pMoves = m_Moves.data(); + return S_FALSE; + } + + moveInfo.pMoves = NULL; + return S_OK; +} + +HRESULT DefragmentationContextPimpl::DefragmentPassEnd(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo) +{ + D3D12MA_ASSERT(moveInfo.MoveCount > 0 ? moveInfo.pMoves != NULL : true); + + HRESULT result = S_OK; + Vector immovableBlocks(m_Moves.GetAllocs()); + + for (uint32_t i = 0; i < moveInfo.MoveCount; ++i) + { + DEFRAGMENTATION_MOVE& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + UINT64 freedBlockSize = 0; + + UINT32 vectorIndex; + BlockVector* vector; + if (m_PoolBlockVector != NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = static_cast(reinterpret_cast(move.pDstTmpAllocation->GetPrivateData())); + vector = m_pBlockVectors[vectorIndex]; + D3D12MA_ASSERT(vector != NULL); + } + + switch (move.Operation) + { + case DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + move.pSrcAllocation->SwapBlockAllocation(move.pDstTmpAllocation); + + // Scope for locks, Free have it's own lock + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.pDstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + move.pDstTmpAllocation->Release(); + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + currentCount = vector->GetBlockCount(); + } + + result = S_FALSE; + break; + } + case DEFRAGMENTATION_MOVE_OPERATION_IGNORE: + { + m_PassStats.BytesMoved -= move.pSrcAllocation->GetSize(); + --m_PassStats.AllocationsMoved; + move.pDstTmpAllocation->Release(); + + NormalBlock* newBlock = move.pSrcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.BytesMoved -= move.pSrcAllocation->GetSize(); + --m_PassStats.AllocationsMoved; + // Scope for locks, Free have it's own lock + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.pSrcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + move.pSrcAllocation->Release(); + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + UINT64 dstBlockSize; + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + dstBlockSize = move.pDstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + move.pDstTmpAllocation->Release(); + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = S_FALSE; + break; + } + default: + D3D12MA_ASSERT(0); + } + + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_PassStats.HeapsFreed += static_cast(freedBlocks); + m_PassStats.BytesFreed += freedBlockSize; + } + } + moveInfo.MoveCount = 0; + moveInfo.pMoves = NULL; + m_Moves.clear(); + + // Update stats + m_GlobalStats.AllocationsMoved += m_PassStats.AllocationsMoved; + m_GlobalStats.BytesFreed += m_PassStats.BytesFreed; + m_GlobalStats.BytesMoved += m_PassStats.BytesMoved; + m_GlobalStats.HeapsFreed += m_PassStats.HeapsFreed; + m_PassStats = { 0 }; + + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + // Move to the begining + for (const FragmentedBlock& block : immovableBlocks) + { + BlockVector* vector = m_pBlockVectors[block.data]; + MutexLockWrite lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + D3D12MA_SWAP(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + } + return result; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation(BlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case DEFRAGMENTATION_FLAG_ALGORITHM_FAST: + return ComputeDefragmentation_Fast(vector); + default: + D3D12MA_ASSERT(0); + case DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED: + return ComputeDefragmentation_Balanced(vector, index, true); + case DEFRAGMENTATION_FLAG_ALGORITHM_FULL: + return ComputeDefragmentation_Full(vector); + } +} + +DefragmentationContextPimpl::MoveAllocationData DefragmentationContextPimpl::GetMoveData( + AllocHandle handle, BlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.pSrcAllocation = (Allocation*)metadata->GetAllocationPrivateData(handle); + moveData.size = moveData.move.pSrcAllocation->GetSize(); + moveData.alignment = moveData.move.pSrcAllocation->GetAlignment(); + moveData.flags = ALLOCATION_FLAG_NONE; + + return moveData; +} + +DefragmentationContextPimpl::CounterStatus DefragmentationContextPimpl::CheckCounters(UINT64 bytes) +{ + // Ignore allocation if will exceed max size for copy + if (m_PassStats.BytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + return CounterStatus::Pass; +} + +bool DefragmentationContextPimpl::IncrementCounters(UINT64 bytes) +{ + m_PassStats.BytesMoved += bytes; + // Early return when max found + if (++m_PassStats.AllocationsMoved >= m_MaxPassAllocations || m_PassStats.BytesMoved >= m_MaxPassBytes) + { + D3D12MA_ASSERT(m_PassStats.AllocationsMoved == m_MaxPassAllocations || + m_PassStats.BytesMoved == m_MaxPassBytes && "Exceeded maximal pass threshold!"); + return true; + } + return false; +} + +bool DefragmentationContextPimpl::ReallocWithinBlock(BlockVector& vector, NormalBlock* block) +{ + BlockMetadata* metadata = block->m_pMetadata; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + UINT64 offset = moveData.move.pSrcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + AllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (SUCCEEDED(vector.CommitAllocationRequest( + request, + block, + moveData.size, + moveData.alignment, + this, + &moveData.move.pDstTmpAllocation))) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool DefragmentationContextPimpl::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, BlockVector& vector) +{ + for (; start < end; ++start) + { + NormalBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (SUCCEEDED(vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + 0, + &data.move.pDstTmpAllocation))) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; + break; + } + } + } + return false; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation_Fast(BlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + BlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation_Balanced(BlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticable gaps between them (some heuristic, ex. average size of allocation in block) + D3D12MA_ASSERT(m_AlgorithmState != NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + UINT64 minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + NormalBlock* block = vector.GetBlock(i); + BlockMetadata* metadata = block->m_pMetadata; + UINT64 prevFreeRegionSize = 0; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + UINT64 nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + UINT64 offset = moveData.move.pSrcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) + { + AllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (SUCCEEDED(vector.CommitAllocationRequest( + request, + block, + moveData.size, + moveData.alignment, + this, + &moveData.move.pDstTmpAllocation))) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves perfomed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation_Full(BlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + NormalBlock* block = vector.GetBlock(i); + BlockMetadata* metadata = block->m_pMetadata; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + UINT64 offset = moveData.move.pSrcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + AllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (SUCCEEDED(vector.CommitAllocationRequest( + request, + block, + moveData.size, + moveData.alignment, + this, + &moveData.move.pDstTmpAllocation))) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + } + return false; +} + +void DefragmentationContextPimpl::UpdateVectorStatistics(BlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + BlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} +#endif // _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL_FUNCTIONS + +#ifndef _D3D12MA_POOL_PIMPL_FUNCTIONS +PoolPimpl::PoolPimpl(AllocatorPimpl* allocator, const POOL_DESC& desc) + : m_Allocator(allocator), + m_Desc(desc), + m_BlockVector(NULL), + m_Name(NULL) +{ + const bool explicitBlockSize = desc.BlockSize != 0; + const UINT64 preferredBlockSize = explicitBlockSize ? desc.BlockSize : D3D12MA_DEFAULT_BLOCK_SIZE; + UINT maxBlockCount = desc.MaxBlockCount != 0 ? desc.MaxBlockCount : UINT_MAX; + +#ifndef __ID3D12Device4_INTERFACE_DEFINED__ + D3D12MA_ASSERT(m_Desc.pProtectedSession == NULL); +#endif + + m_BlockVector = D3D12MA_NEW(allocator->GetAllocs(), BlockVector)( + allocator, desc.HeapProperties, desc.HeapFlags, + preferredBlockSize, + desc.MinBlockCount, maxBlockCount, + explicitBlockSize, + D3D12MA_MAX(desc.MinAllocationAlignment, (UINT64)D3D12MA_DEBUG_ALIGNMENT), + desc.Flags & POOL_FLAG_ALGORITHM_MASK, + desc.Flags & POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED, + desc.pProtectedSession); +} + +PoolPimpl::~PoolPimpl() +{ + D3D12MA_ASSERT(m_PrevPool == NULL && m_NextPool == NULL); + FreeName(); + D3D12MA_DELETE(m_Allocator->GetAllocs(), m_BlockVector); +} + +HRESULT PoolPimpl::Init() +{ + m_CommittedAllocations.Init(m_Allocator->UseMutex(), m_Desc.HeapProperties.Type, this); + return m_BlockVector->CreateMinBlocks(); +} + +void PoolPimpl::GetStatistics(Statistics& outStats) +{ + ClearStatistics(outStats); + m_BlockVector->AddStatistics(outStats); + m_CommittedAllocations.AddStatistics(outStats); +} + +void PoolPimpl::CalculateStatistics(DetailedStatistics& outStats) +{ + ClearDetailedStatistics(outStats); + AddDetailedStatistics(outStats); +} + +void PoolPimpl::AddDetailedStatistics(DetailedStatistics& inoutStats) +{ + m_BlockVector->AddDetailedStatistics(inoutStats); + m_CommittedAllocations.AddDetailedStatistics(inoutStats); +} + +void PoolPimpl::SetName(LPCWSTR Name) +{ + FreeName(); + + if (Name) + { + const size_t nameCharCount = wcslen(Name) + 1; + m_Name = D3D12MA_NEW_ARRAY(m_Allocator->GetAllocs(), WCHAR, nameCharCount); + memcpy(m_Name, Name, nameCharCount * sizeof(WCHAR)); + } +} + +void PoolPimpl::FreeName() +{ + if (m_Name) + { + const size_t nameCharCount = wcslen(m_Name) + 1; + D3D12MA_DELETE_ARRAY(m_Allocator->GetAllocs(), m_Name, nameCharCount); + m_Name = NULL; + } +} +#endif // _D3D12MA_POOL_PIMPL_FUNCTIONS + + +#ifndef _D3D12MA_PUBLIC_INTERFACE +HRESULT CreateAllocator(const ALLOCATOR_DESC* pDesc, Allocator** ppAllocator) +{ + if (!pDesc || !ppAllocator || !pDesc->pDevice || !pDesc->pAdapter || + !(pDesc->PreferredBlockSize == 0 || (pDesc->PreferredBlockSize >= 16 && pDesc->PreferredBlockSize < 0x10000000000ull))) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to CreateAllocator."); + return E_INVALIDARG; + } + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + ALLOCATION_CALLBACKS allocationCallbacks; + SetupAllocationCallbacks(allocationCallbacks, pDesc->pAllocationCallbacks); + + *ppAllocator = D3D12MA_NEW(allocationCallbacks, Allocator)(allocationCallbacks, *pDesc); + HRESULT hr = (*ppAllocator)->m_Pimpl->Init(*pDesc); + if (FAILED(hr)) + { + D3D12MA_DELETE(allocationCallbacks, *ppAllocator); + *ppAllocator = NULL; + } + return hr; +} + +HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC* pDesc, VirtualBlock** ppVirtualBlock) +{ + if (!pDesc || !ppVirtualBlock) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to CreateVirtualBlock."); + return E_INVALIDARG; + } + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + ALLOCATION_CALLBACKS allocationCallbacks; + SetupAllocationCallbacks(allocationCallbacks, pDesc->pAllocationCallbacks); + + *ppVirtualBlock = D3D12MA_NEW(allocationCallbacks, VirtualBlock)(allocationCallbacks, *pDesc); + return S_OK; +} + +#ifndef _D3D12MA_IUNKNOWN_IMPL_FUNCTIONS HRESULT STDMETHODCALLTYPE IUnknownImpl::QueryInterface(REFIID riid, void** ppvObject) { - if(ppvObject == NULL) + if (ppvObject == NULL) return E_POINTER; - if(riid == IID_IUnknown) + if (riid == IID_IUnknown) { ++m_RefCount; *ppvObject = this; @@ -5952,15 +9576,14 @@ ULONG STDMETHODCALLTYPE IUnknownImpl::Release() { D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - const uint32_t newRefCount = --m_RefCount; - if(newRefCount == 0) + const uint32_t newRefCount = --m_RefCount; + if (newRefCount == 0) ReleaseThis(); return newRefCount; } +#endif // _D3D12MA_IUNKNOWN_IMPL_FUNCTIONS -//////////////////////////////////////////////////////////////////////////////// -// Public class Allocation implementation - +#ifndef _D3D12MA_ALLOCATION_FUNCTIONS void Allocation::PackedData::SetType(Type type) { const UINT u = (UINT)type; @@ -5989,51 +9612,36 @@ void Allocation::PackedData::SetTextureLayout(D3D12_TEXTURE_LAYOUT textureLayout m_TextureLayout = u; } -void Allocation::ReleaseThis() -{ - if(this == NULL) - { - return; - } - - SAFE_RELEASE(m_Resource); - - switch(m_PackedData.GetType()) - { - case TYPE_COMMITTED: - m_Allocator->FreeCommittedMemory(this); - break; - case TYPE_PLACED: - m_Allocator->FreePlacedMemory(this); - break; - case TYPE_HEAP: - m_Allocator->FreeHeapMemory(this); - break; - } - - FreeName(); - - m_Allocator->GetAllocationObjectAllocator().Free(this); -} - UINT64 Allocation::GetOffset() const { - switch(m_PackedData.GetType()) + switch (m_PackedData.GetType()) { case TYPE_COMMITTED: case TYPE_HEAP: return 0; case TYPE_PLACED: - return m_Placed.offset; + return m_Placed.block->m_pMetadata->GetAllocationOffset(m_Placed.allocHandle); default: D3D12MA_ASSERT(0); return 0; } } +void Allocation::SetResource(ID3D12Resource* pResource) +{ + if (pResource != m_Resource) + { + if (m_Resource) + m_Resource->Release(); + m_Resource = pResource; + if (m_Resource) + m_Resource->AddRef(); + } +} + ID3D12Heap* Allocation::GetHeap() const { - switch(m_PackedData.GetType()) + switch (m_PackedData.GetType()) { case TYPE_COMMITTED: return NULL; @@ -6051,7 +9659,7 @@ void Allocation::SetName(LPCWSTR Name) { FreeName(); - if(Name) + if (Name) { const size_t nameCharCount = wcslen(Name) + 1; m_Name = D3D12MA_NEW_ARRAY(m_Allocator->GetAllocs(), WCHAR, nameCharCount); @@ -6059,12 +9667,39 @@ void Allocation::SetName(LPCWSTR Name) } } -Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, BOOL wasZeroInitialized) : - m_Allocator{allocator}, - m_Size{size}, - m_Resource{NULL}, - m_CreationFrameIndex{allocator->GetCurrentFrameIndex()}, - m_Name{NULL} +void Allocation::ReleaseThis() +{ + if (this == NULL) + { + return; + } + + SAFE_RELEASE(m_Resource); + + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + m_Allocator->FreeCommittedMemory(this); + break; + case TYPE_PLACED: + m_Allocator->FreePlacedMemory(this); + break; + case TYPE_HEAP: + m_Allocator->FreeHeapMemory(this); + break; + } + + FreeName(); + + m_Allocator->GetAllocationObjectAllocator().Free(this); +} + +Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment, BOOL wasZeroInitialized) + : m_Allocator{ allocator }, + m_Size{ size }, + m_Alignment{ alignment }, + m_Resource{ NULL }, + m_Name{ NULL } { D3D12MA_ASSERT(allocator); @@ -6083,10 +9718,10 @@ void Allocation::InitCommitted(CommittedAllocationList* list) m_Committed.next = NULL; } -void Allocation::InitPlaced(UINT64 offset, UINT64 alignment, NormalBlock* block) +void Allocation::InitPlaced(AllocHandle allocHandle, NormalBlock* block) { m_PackedData.SetType(TYPE_PLACED); - m_Placed.offset = offset; + m_Placed.allocHandle = allocHandle; m_Placed.block = block; } @@ -6099,8 +9734,51 @@ void Allocation::InitHeap(CommittedAllocationList* list, ID3D12Heap* heap) m_Heap.heap = heap; } +void Allocation::SwapBlockAllocation(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation != NULL); + D3D12MA_ASSERT(m_PackedData.GetType() == TYPE_PLACED); + D3D12MA_ASSERT(allocation->m_PackedData.GetType() == TYPE_PLACED); + + D3D12MA_SWAP(m_Resource, allocation->m_Resource); + m_PackedData.SetWasZeroInitialized(allocation->m_PackedData.WasZeroInitialized()); + m_Placed.block->m_pMetadata->SetAllocationPrivateData(m_Placed.allocHandle, allocation); + D3D12MA_SWAP(m_Placed, allocation->m_Placed); + m_Placed.block->m_pMetadata->SetAllocationPrivateData(m_Placed.allocHandle, this); +} + +AllocHandle Allocation::GetAllocHandle() const +{ + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + case TYPE_HEAP: + return (AllocHandle)0; + case TYPE_PLACED: + return m_Placed.allocHandle; + default: + D3D12MA_ASSERT(0); + return (AllocHandle)0; + } +} + +NormalBlock* Allocation::GetBlock() +{ + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + case TYPE_HEAP: + return NULL; + case TYPE_PLACED: + return m_Placed.block; + default: + D3D12MA_ASSERT(0); + return NULL; + } +} + template -void Allocation::SetResource(ID3D12Resource* resource, const D3D12_RESOURCE_DESC_T* pResourceDesc) +void Allocation::SetResourcePointer(ID3D12Resource* resource, const D3D12_RESOURCE_DESC_T* pResourceDesc) { D3D12MA_ASSERT(m_Resource == NULL && pResourceDesc); m_Resource = resource; @@ -6111,83 +9789,157 @@ void Allocation::SetResource(ID3D12Resource* resource, const D3D12_RESOURCE_DESC void Allocation::FreeName() { - if(m_Name) + if (m_Name) { const size_t nameCharCount = wcslen(m_Name) + 1; D3D12MA_DELETE_ARRAY(m_Allocator->GetAllocs(), m_Name, nameCharCount); m_Name = NULL; } } +#endif // _D3D12MA_ALLOCATION_FUNCTIONS -//////////////////////////////////////////////////////////////////////////////// -// Private class AllocationObjectAllocator implementation - -AllocationObjectAllocator::AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks) : - m_Allocator(allocationCallbacks, 1024) +#ifndef _D3D12MA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +HRESULT DefragmentationContext::BeginPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo) { + D3D12MA_ASSERT(pPassInfo); + return m_Pimpl->DefragmentPassBegin(*pPassInfo); } -template Allocation* AllocationObjectAllocator::Allocate(Types... args) +HRESULT DefragmentationContext::EndPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo) { - MutexLock mutexLock(m_Mutex); - return m_Allocator.Alloc(std::forward(args)...); + D3D12MA_ASSERT(pPassInfo); + return m_Pimpl->DefragmentPassEnd(*pPassInfo); } -void AllocationObjectAllocator::Free(Allocation* alloc) +void DefragmentationContext::GetStats(DEFRAGMENTATION_STATS* pStats) { - MutexLock mutexLock(m_Mutex); - m_Allocator.Free(alloc); + D3D12MA_ASSERT(pStats); + m_Pimpl->GetStats(*pStats); } -//////////////////////////////////////////////////////////////////////////////// -// Public class Allocator implementation - -Allocator::Allocator(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc) : - m_Pimpl(D3D12MA_NEW(allocationCallbacks, AllocatorPimpl)(allocationCallbacks, desc)) +void DefragmentationContext::ReleaseThis() { + if (this == NULL) + { + return; + } + + D3D12MA_DELETE(m_Pimpl->GetAllocs(), this); } -Allocator::~Allocator() +DefragmentationContext::DefragmentationContext(AllocatorPimpl* allocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector) + : m_Pimpl(D3D12MA_NEW(allocator->GetAllocs(), DefragmentationContextPimpl)(allocator, desc, poolVector)) {} + +DefragmentationContext::~DefragmentationContext() { D3D12MA_DELETE(m_Pimpl->GetAllocs(), m_Pimpl); } +#endif // _D3D12MA_DEFRAGMENTATION_CONTEXT_FUNCTIONS -void Allocator::ReleaseThis() +#ifndef _D3D12MA_POOL_FUNCTIONS +POOL_DESC Pool::GetDesc() const { - // Copy is needed because otherwise we would call destructor and invalidate the structure with callbacks before using it to free memory. - const ALLOCATION_CALLBACKS allocationCallbacksCopy = m_Pimpl->GetAllocs(); - D3D12MA_DELETE(allocationCallbacksCopy, this); + return m_Pimpl->GetDesc(); } +void Pool::GetStatistics(Statistics* pStats) +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->GetStatistics(*pStats); +} + +void Pool::CalculateStatistics(DetailedStatistics* pStats) +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->CalculateStatistics(*pStats); +} + +void Pool::SetName(LPCWSTR Name) +{ + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->SetName(Name); +} + +LPCWSTR Pool::GetName() const +{ + return m_Pimpl->GetName(); +} + +HRESULT Pool::BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext) +{ + D3D12MA_ASSERT(pDesc && ppContext); + + // Check for support + if (m_Pimpl->GetBlockVector()->GetAlgorithm() & POOL_FLAG_ALGORITHM_LINEAR) + return E_NOINTERFACE; + + AllocatorPimpl* allocator = m_Pimpl->GetAllocator(); + *ppContext = D3D12MA_NEW(allocator->GetAllocs(), DefragmentationContext)(allocator, *pDesc, m_Pimpl->GetBlockVector()); + return S_OK; +} + +void Pool::ReleaseThis() +{ + if (this == NULL) + { + return; + } + + D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), this); +} + +Pool::Pool(Allocator* allocator, const POOL_DESC& desc) + : m_Pimpl(D3D12MA_NEW(allocator->m_Pimpl->GetAllocs(), PoolPimpl)(allocator->m_Pimpl, desc)) {} + +Pool::~Pool() +{ + m_Pimpl->GetAllocator()->UnregisterPool(this, m_Pimpl->GetDesc().HeapProperties.Type); + + D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), m_Pimpl); +} +#endif // _D3D12MA_POOL_FUNCTIONS + +#ifndef _D3D12MA_ALLOCATOR_FUNCTIONS const D3D12_FEATURE_DATA_D3D12_OPTIONS& Allocator::GetD3D12Options() const { return m_Pimpl->GetD3D12Options(); } + BOOL Allocator::IsUMA() const { return m_Pimpl->IsUMA(); } + BOOL Allocator::IsCacheCoherentUMA() const { return m_Pimpl->IsCacheCoherentUMA(); } +UINT64 Allocator::GetMemoryCapacity(UINT memorySegmentGroup) const +{ + return m_Pimpl->GetMemoryCapacity(memorySegmentGroup); +} + HRESULT Allocator::CreateResource( const ALLOCATION_DESC* pAllocDesc, const D3D12_RESOURCE_DESC* pResourceDesc, D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) { - if(!pAllocDesc || !pResourceDesc || !ppAllocation) + if (!pAllocDesc || !pResourceDesc || !ppAllocation) { D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateResource."); return E_INVALIDARG; } D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - return m_Pimpl->CreateResource(pAllocDesc, pResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); + return m_Pimpl->CreateResource(pAllocDesc, pResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); } #ifdef __ID3D12Device8_INTERFACE_DEFINED__ @@ -6195,48 +9947,33 @@ HRESULT Allocator::CreateResource2( const ALLOCATION_DESC* pAllocDesc, const D3D12_RESOURCE_DESC1* pResourceDesc, D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, Allocation** ppAllocation, REFIID riidResource, void** ppvResource) { - if(!pAllocDesc || !pResourceDesc || !ppAllocation) + if (!pAllocDesc || !pResourceDesc || !ppAllocation) { D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateResource2."); return E_INVALIDARG; } D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - return m_Pimpl->CreateResource2(pAllocDesc, pResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); + return m_Pimpl->CreateResource2(pAllocDesc, pResourceDesc, InitialResourceState, pOptimizedClearValue, ppAllocation, riidResource, ppvResource); } #endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ -static inline bool ValidateAllocateMemoryParameters( - const ALLOCATION_DESC* pAllocDesc, - const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, - Allocation** ppAllocation) -{ - return pAllocDesc && - pAllocInfo && - ppAllocation && - (pAllocInfo->Alignment == 0 || - pAllocInfo->Alignment == D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT || - pAllocInfo->Alignment == D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) && - pAllocInfo->SizeInBytes != 0 && - pAllocInfo->SizeInBytes % (64ull * 1024) == 0; -} - HRESULT Allocator::AllocateMemory( const ALLOCATION_DESC* pAllocDesc, const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, Allocation** ppAllocation) { - if(!ValidateAllocateMemoryParameters(pAllocDesc, pAllocInfo, ppAllocation)) + if (!ValidateAllocateMemoryParameters(pAllocDesc, pAllocInfo, ppAllocation)) { D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::AllocateMemory."); return E_INVALIDARG; } D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - return m_Pimpl->AllocateMemory(pAllocDesc, pAllocInfo, ppAllocation); + return m_Pimpl->AllocateMemory(pAllocDesc, pAllocInfo, ppAllocation); } HRESULT Allocator::CreateAliasingResource( @@ -6244,39 +9981,39 @@ HRESULT Allocator::CreateAliasingResource( UINT64 AllocationLocalOffset, const D3D12_RESOURCE_DESC* pResourceDesc, D3D12_RESOURCE_STATES InitialResourceState, - const D3D12_CLEAR_VALUE *pOptimizedClearValue, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, REFIID riidResource, void** ppvResource) { - if(!pAllocation || !pResourceDesc || !ppvResource) + if (!pAllocation || !pResourceDesc || !ppvResource) { D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateAliasingResource."); return E_INVALIDARG; } D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - return m_Pimpl->CreateAliasingResource(pAllocation, AllocationLocalOffset, pResourceDesc, InitialResourceState, pOptimizedClearValue, riidResource, ppvResource); + return m_Pimpl->CreateAliasingResource(pAllocation, AllocationLocalOffset, pResourceDesc, InitialResourceState, pOptimizedClearValue, riidResource, ppvResource); } HRESULT Allocator::CreatePool( const POOL_DESC* pPoolDesc, Pool** ppPool) { - if(!pPoolDesc || !ppPool || + if (!pPoolDesc || !ppPool || (pPoolDesc->MaxBlockCount > 0 && pPoolDesc->MaxBlockCount < pPoolDesc->MinBlockCount) || (pPoolDesc->MinAllocationAlignment > 0 && !IsPow2(pPoolDesc->MinAllocationAlignment))) { D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreatePool."); return E_INVALIDARG; } - if(!m_Pimpl->HeapFlagsFulfillResourceHeapTier(pPoolDesc->HeapFlags)) + if (!m_Pimpl->HeapFlagsFulfillResourceHeapTier(pPoolDesc->HeapFlags)) { D3D12MA_ASSERT(0 && "Invalid pPoolDesc->HeapFlags passed to Allocator::CreatePool. Did you forget to handle ResourceHeapTier=1?"); return E_INVALIDARG; } D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - *ppPool = D3D12MA_NEW(m_Pimpl->GetAllocs(), Pool)(this, *pPoolDesc); + * ppPool = D3D12MA_NEW(m_Pimpl->GetAllocs(), Pool)(this, *pPoolDesc); HRESULT hr = (*ppPool)->m_Pimpl->Init(); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) { m_Pimpl->RegisterPool(*ppPool, pPoolDesc->HeapProperties.Type); } @@ -6291,31 +10028,31 @@ HRESULT Allocator::CreatePool( void Allocator::SetCurrentFrameIndex(UINT frameIndex) { D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->SetCurrentFrameIndex(frameIndex); + m_Pimpl->SetCurrentFrameIndex(frameIndex); } -void Allocator::CalculateStats(Stats* pStats) +void Allocator::GetBudget(Budget* pLocalBudget, Budget* pNonLocalBudget) { - D3D12MA_ASSERT(pStats); - D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->CalculateStats(*pStats); -} - -void Allocator::GetBudget(Budget* pGpuBudget, Budget* pCpuBudget) -{ - if(pGpuBudget == NULL && pCpuBudget == NULL) + if (pLocalBudget == NULL && pNonLocalBudget == NULL) { return; } D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->GetBudget(pGpuBudget, pCpuBudget); + m_Pimpl->GetBudget(pLocalBudget, pNonLocalBudget); +} + +void Allocator::CalculateStatistics(TotalStatistics* pStats) +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->CalculateStatistics(*pStats); } void Allocator::BuildStatsString(WCHAR** ppStatsString, BOOL DetailedMap) const { D3D12MA_ASSERT(ppStatsString); D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->BuildStatsString(ppStatsString, DetailedMap); + m_Pimpl->BuildStatsString(ppStatsString, DetailedMap); } void Allocator::FreeStatsString(WCHAR* pStatsString) const @@ -6323,134 +10060,129 @@ void Allocator::FreeStatsString(WCHAR* pStatsString) const if (pStatsString != NULL) { D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->FreeStatsString(pStatsString); + m_Pimpl->FreeStatsString(pStatsString); } } -//////////////////////////////////////////////////////////////////////////////// -// Private class VirtualBlockPimpl definition - -class VirtualBlockPimpl +void Allocator::BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext) { -public: - const ALLOCATION_CALLBACKS m_AllocationCallbacks; - const UINT64 m_Size; - BlockMetadata_Generic m_Metadata; + D3D12MA_ASSERT(pDesc && ppContext); - VirtualBlockPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT64 size); -}; - -VirtualBlockPimpl::VirtualBlockPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT64 size) : - m_AllocationCallbacks(allocationCallbacks), - m_Size(size), - m_Metadata(&m_AllocationCallbacks, - true) // isVirtual -{ - m_Metadata.Init(m_Size); + *ppContext = D3D12MA_NEW(m_Pimpl->GetAllocs(), DefragmentationContext)(m_Pimpl, *pDesc, NULL); } -//////////////////////////////////////////////////////////////////////////////// -// Public class VirtualBlock implementation - -VirtualBlock::VirtualBlock(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc) : - m_Pimpl(D3D12MA_NEW(allocationCallbacks, VirtualBlockPimpl)(allocationCallbacks, desc.Size)) -{ -} - -VirtualBlock::~VirtualBlock() -{ - // THIS IS AN IMPORTANT ASSERT! - // Hitting it means you have some memory leak - unreleased allocations in this virtual block. - D3D12MA_ASSERT(m_Pimpl->m_Metadata.IsEmpty() && "Some allocations were not freed before destruction of this virtual block!"); - - D3D12MA_DELETE(m_Pimpl->m_AllocationCallbacks, m_Pimpl); -} - -void VirtualBlock::ReleaseThis() +void Allocator::ReleaseThis() { // Copy is needed because otherwise we would call destructor and invalidate the structure with callbacks before using it to free memory. - const ALLOCATION_CALLBACKS allocationCallbacksCopy = m_Pimpl->m_AllocationCallbacks; + const ALLOCATION_CALLBACKS allocationCallbacksCopy = m_Pimpl->GetAllocs(); D3D12MA_DELETE(allocationCallbacksCopy, this); } +Allocator::Allocator(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc) + : m_Pimpl(D3D12MA_NEW(allocationCallbacks, AllocatorPimpl)(allocationCallbacks, desc)) {} + +Allocator::~Allocator() +{ + D3D12MA_DELETE(m_Pimpl->GetAllocs(), m_Pimpl); +} +#endif // _D3D12MA_ALLOCATOR_FUNCTIONS + +#ifndef _D3D12MA_VIRTUAL_BLOCK_FUNCTIONS BOOL VirtualBlock::IsEmpty() const { D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - return m_Pimpl->m_Metadata.IsEmpty() ? TRUE : FALSE; + return m_Pimpl->m_Metadata->IsEmpty() ? TRUE : FALSE; } -void VirtualBlock::GetAllocationInfo(UINT64 offset, VIRTUAL_ALLOCATION_INFO* pInfo) const +void VirtualBlock::GetAllocationInfo(VirtualAllocation allocation, VIRTUAL_ALLOCATION_INFO* pInfo) const { - D3D12MA_ASSERT(offset != UINT64_MAX && pInfo); + D3D12MA_ASSERT(allocation.AllocHandle != (AllocHandle)0 && pInfo); D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->m_Metadata.GetAllocationInfo(offset, *pInfo); + m_Pimpl->m_Metadata->GetAllocationInfo(allocation.AllocHandle, *pInfo); } -HRESULT VirtualBlock::Allocate(const VIRTUAL_ALLOCATION_DESC* pDesc, UINT64* pOffset) +HRESULT VirtualBlock::Allocate(const VIRTUAL_ALLOCATION_DESC* pDesc, VirtualAllocation* pAllocation, UINT64* pOffset) { - if(!pDesc || !pOffset || pDesc->Size == 0 || !IsPow2(pDesc->Alignment)) + if (!pDesc || !pAllocation || pDesc->Size == 0 || !IsPow2(pDesc->Alignment)) { D3D12MA_ASSERT(0 && "Invalid arguments passed to VirtualBlock::Allocate."); return E_INVALIDARG; } - *pOffset = UINT64_MAX; - D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - - const UINT64 alignment = pDesc->Alignment != 0 ? pDesc->Alignment : 1; + + const UINT64 alignment = pDesc->Alignment != 0 ? pDesc->Alignment : 1; AllocationRequest allocRequest = {}; - if(m_Pimpl->m_Metadata.CreateAllocationRequest(pDesc->Size, alignment, &allocRequest)) + if (m_Pimpl->m_Metadata->CreateAllocationRequest( + pDesc->Size, + alignment, + pDesc->Flags & VIRTUAL_ALLOCATION_FLAG_UPPER_ADDRESS, + pDesc->Flags & VIRTUAL_ALLOCATION_FLAG_STRATEGY_MASK, + &allocRequest)) { - m_Pimpl->m_Metadata.Alloc(allocRequest, pDesc->Size, pDesc->pUserData); - D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata.Validate()); - *pOffset = allocRequest.offset; + m_Pimpl->m_Metadata->Alloc(allocRequest, pDesc->Size, pDesc->pPrivateData); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + pAllocation->AllocHandle = allocRequest.allocHandle; + + if (pOffset) + *pOffset = m_Pimpl->m_Metadata->GetAllocationOffset(allocRequest.allocHandle); return S_OK; } - else - { - return E_OUTOFMEMORY; - } + + pAllocation->AllocHandle = (AllocHandle)0; + if (pOffset) + *pOffset = UINT64_MAX; + + return E_OUTOFMEMORY; } -void VirtualBlock::FreeAllocation(UINT64 offset) +void VirtualBlock::FreeAllocation(VirtualAllocation allocation) { + if (allocation.AllocHandle == (AllocHandle)0) + return; + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - D3D12MA_ASSERT(offset != UINT64_MAX); - - m_Pimpl->m_Metadata.FreeAtOffset(offset); - D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata.Validate()); + m_Pimpl->m_Metadata->Free(allocation.AllocHandle); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); } void VirtualBlock::Clear() { D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->m_Metadata.Clear(); - D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata.Validate()); + m_Pimpl->m_Metadata->Clear(); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); } -void VirtualBlock::SetAllocationUserData(UINT64 offset, void* pUserData) +void VirtualBlock::SetAllocationPrivateData(VirtualAllocation allocation, void* pPrivateData) { - D3D12MA_ASSERT(offset != UINT64_MAX); + D3D12MA_ASSERT(allocation.AllocHandle != (AllocHandle)0); D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - m_Pimpl->m_Metadata.SetAllocationUserData(offset, pUserData); + m_Pimpl->m_Metadata->SetAllocationPrivateData(allocation.AllocHandle, pPrivateData); } -void VirtualBlock::CalculateStats(StatInfo* pInfo) const +void VirtualBlock::GetStatistics(Statistics* pStats) const { - D3D12MA_ASSERT(pInfo); - + D3D12MA_ASSERT(pStats); D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + ClearStatistics(*pStats); + m_Pimpl->m_Metadata->AddStatistics(*pStats); +} - D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata.Validate()); - m_Pimpl->m_Metadata.CalcAllocationStatInfo(*pInfo); +void VirtualBlock::CalculateStatistics(DetailedStatistics* pStats) const +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + ClearDetailedStatistics(*pStats); + m_Pimpl->m_Metadata->AddDetailedStatistics(*pStats); } void VirtualBlock::BuildStatsString(WCHAR** ppStatsString) const @@ -6459,11 +10191,11 @@ void VirtualBlock::BuildStatsString(WCHAR** ppStatsString) const D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - StringBuilder sb(m_Pimpl->m_AllocationCallbacks); + StringBuilder sb(m_Pimpl->m_AllocationCallbacks); { JsonWriter json(m_Pimpl->m_AllocationCallbacks, sb); - D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata.Validate()); - m_Pimpl->m_Metadata.WriteAllocationInfoToJson(json); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + m_Pimpl->m_Metadata->WriteAllocationInfoToJson(json); } // Scope for JsonWriter const size_t length = sb.GetLength(); @@ -6478,53 +10210,28 @@ void VirtualBlock::FreeStatsString(WCHAR* pStatsString) const if (pStatsString != NULL) { D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - D3D12MA::Free(m_Pimpl->m_AllocationCallbacks, pStatsString); + D3D12MA::Free(m_Pimpl->m_AllocationCallbacks, pStatsString); } } - -//////////////////////////////////////////////////////////////////////////////// -// Public global functions - -HRESULT CreateAllocator(const ALLOCATOR_DESC* pDesc, Allocator** ppAllocator) +void VirtualBlock::ReleaseThis() { - if(!pDesc || !ppAllocator || !pDesc->pDevice || !pDesc->pAdapter || - !(pDesc->PreferredBlockSize == 0 || (pDesc->PreferredBlockSize >= 16 && pDesc->PreferredBlockSize < 0x10000000000ull))) - { - D3D12MA_ASSERT(0 && "Invalid arguments passed to CreateAllocator."); - return E_INVALIDARG; - } - - D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - - ALLOCATION_CALLBACKS allocationCallbacks; - SetupAllocationCallbacks(allocationCallbacks, pDesc->pAllocationCallbacks); - - *ppAllocator = D3D12MA_NEW(allocationCallbacks, Allocator)(allocationCallbacks, *pDesc); - HRESULT hr = (*ppAllocator)->m_Pimpl->Init(*pDesc); - if(FAILED(hr)) - { - D3D12MA_DELETE(allocationCallbacks, *ppAllocator); - *ppAllocator = NULL; - } - return hr; + // Copy is needed because otherwise we would call destructor and invalidate the structure with callbacks before using it to free memory. + const ALLOCATION_CALLBACKS allocationCallbacksCopy = m_Pimpl->m_AllocationCallbacks; + D3D12MA_DELETE(allocationCallbacksCopy, this); } -HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC* pDesc, VirtualBlock** ppVirtualBlock) +VirtualBlock::VirtualBlock(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc) + : m_Pimpl(D3D12MA_NEW(allocationCallbacks, VirtualBlockPimpl)(allocationCallbacks, desc)) {} + +VirtualBlock::~VirtualBlock() { - if(!pDesc || !ppVirtualBlock) - { - D3D12MA_ASSERT(0 && "Invalid arguments passed to CreateVirtualBlock."); - return E_INVALIDARG; - } + // THIS IS AN IMPORTANT ASSERT! + // Hitting it means you have some memory leak - unreleased allocations in this virtual block. + D3D12MA_ASSERT(m_Pimpl->m_Metadata->IsEmpty() && "Some allocations were not freed before destruction of this virtual block!"); - D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK - - ALLOCATION_CALLBACKS allocationCallbacks; - SetupAllocationCallbacks(allocationCallbacks, pDesc->pAllocationCallbacks); - - *ppVirtualBlock = D3D12MA_NEW(allocationCallbacks, VirtualBlock)(allocationCallbacks, *pDesc); - return S_OK; + D3D12MA_DELETE(m_Pimpl->m_AllocationCallbacks, m_Pimpl); } - +#endif // _D3D12MA_VIRTUAL_BLOCK_FUNCTIONS +#endif // _D3D12MA_PUBLIC_INTERFACE } // namespace D3D12MA diff --git a/WickedEngine/Utility/D3D12MemAlloc.h b/WickedEngine/Utility/D3D12MemAlloc.h index 9bb53bce3..d9e5f9666 100644 --- a/WickedEngine/Utility/D3D12MemAlloc.h +++ b/WickedEngine/Utility/D3D12MemAlloc.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2019-2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,28 +24,31 @@ /** \mainpage D3D12 Memory Allocator -Version 2.0.0-development (2021-07-26) +Version 2.0.0 (2022-03-25) -Copyright (c) 2019-2021 Advanced Micro Devices, Inc. All rights reserved. \n +Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. \n License: MIT Documentation of all members: D3D12MemAlloc.h \section main_table_of_contents Table of contents -- User guide - - \subpage quick_start - - [Project setup](@ref quick_start_project_setup) - - [Creating resources](@ref quick_start_creating_resources) - - [Mapping memory](@ref quick_start_mapping_memory) - - \subpage custom_pools - - \subpage resource_aliasing - - \subpage virtual_allocator +- \subpage quick_start + - [Project setup](@ref quick_start_project_setup) + - [Creating resources](@ref quick_start_creating_resources) + - [Mapping memory](@ref quick_start_mapping_memory) +- \subpage custom_pools +- \subpage defragmentation +- \subpage statistics +- \subpage resource_aliasing +- \subpage linear_algorithm +- \subpage virtual_allocator - \subpage configuration - [Custom CPU memory allocator](@ref custom_memory_allocator) + - [Debug margins](@ref debug_margins) - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) - - [Future plans](@ref general_considerations_future_plans) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) - [Features not supported](@ref general_considerations_features_not_supported) \section main_see_also See also @@ -151,6 +154,7 @@ namespace D3D12MA { /// \cond INTERNAL +class DefragmentationContextPimpl; class AllocatorPimpl; class PoolPimpl; class NormalBlock; @@ -162,16 +166,21 @@ class VirtualBlockPimpl; class Pool; class Allocator; -struct StatInfo; +struct Statistics; +struct DetailedStatistics; +struct TotalStatistics; + +/// \brief Unique identifier of single allocation done inside the memory heap. +typedef UINT64 AllocHandle; /// Pointer to custom callback function that allocates CPU memory. -using ALLOCATE_FUNC_PTR = void* (*)(size_t Size, size_t Alignment, void* pUserData); +using ALLOCATE_FUNC_PTR = void* (*)(size_t Size, size_t Alignment, void* pPrivateData); /** \brief Pointer to custom callback function that deallocates CPU memory. `pMemory = null` should be accepted and ignored. */ -using FREE_FUNC_PTR = void (*)(void* pMemory, void* pUserData); +using FREE_FUNC_PTR = void (*)(void* pMemory, void* pPrivateData); /// Custom callbacks to CPU memory allocation functions. struct ALLOCATION_CALLBACKS @@ -181,9 +190,10 @@ struct ALLOCATION_CALLBACKS /// Dellocation function. FREE_FUNC_PTR pFree; /// Custom data that will be passed to allocation and deallocation functions as `pUserData` parameter. - void* pUserData; + void* pPrivateData; }; + /// \brief Bit flags to be used with ALLOCATION_DESC::Flags. enum ALLOCATION_FLAGS { @@ -194,6 +204,12 @@ enum ALLOCATION_FLAGS Set this flag if the allocation should have its own dedicated memory allocation (committed resource with implicit heap). Use it for special, big resources, like fullscreen textures used as render targets. + + - When used with functions like D3D12MA::Allocator::CreateResource, it will use `ID3D12Device::CreateCommittedResource`, + so the created allocation will contain a resource (D3D12MA::Allocation::GetResource() `!= NULL`) but will not have + a heap (D3D12MA::Allocation::GetHeap() `== NULL`), as the heap is implicit. + - When used with raw memory allocation like D3D12MA::Allocator::AllocateMemory, it will use `ID3D12Device::CreateHeap`, + so the created allocation will contain a heap (D3D12MA::Allocation::GetHeap() `!= NULL`) and its offset will always be 0. */ ALLOCATION_FLAG_COMMITTED = 0x1, @@ -212,6 +228,48 @@ enum ALLOCATION_FLAGS memory budget. Otherwise return `E_OUTOFMEMORY`. */ ALLOCATION_FLAG_WITHIN_BUDGET = 0x4, + + /** Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for custom pools created with #POOL_FLAG_ALGORITHM_LINEAR flag. + */ + ALLOCATION_FLAG_UPPER_ADDRESS = 0x8, + + /** Set this flag if the allocated memory will have aliasing resources. + + Use this when calling D3D12MA::Allocator::CreateResource() and similar to + guarantee creation of explicit heap for desired allocation and prevent it from using `CreateCommittedResource`, + so that new allocation object will always have `allocation->GetHeap() != NULL`. + */ + ALLOCATION_FLAG_CAN_ALIAS = 0x10, + + /** Allocation strategy that chooses smallest possible free range for the allocation + to minimize memory usage and fragmentation, possibly at the expense of allocation time. + */ + ALLOCATION_FLAG_STRATEGY_MIN_MEMORY = 0x00010000, + + /** Allocation strategy that chooses first suitable free range for the allocation - + not necessarily in terms of the smallest offset but the one that is easiest and fastest to find + to minimize allocation time, possibly at the expense of allocation quality. + */ + ALLOCATION_FLAG_STRATEGY_MIN_TIME = 0x00020000, + + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recomended in typical usage. + */ + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET = 0x0004000, + + /// Alias to #ALLOCATION_FLAG_STRATEGY_MIN_MEMORY. + ALLOCATION_FLAG_STRATEGY_BEST_FIT = ALLOCATION_FLAG_STRATEGY_MIN_MEMORY, + /// Alias to #ALLOCATION_FLAG_STRATEGY_MIN_TIME. + ALLOCATION_FLAG_STRATEGY_FIRST_FIT = ALLOCATION_FLAG_STRATEGY_MIN_TIME, + + /// A bit mask to extract only `STRATEGY` bits from entire set of flags. + ALLOCATION_FLAG_STRATEGY_MASK = + ALLOCATION_FLAG_STRATEGY_MIN_MEMORY | + ALLOCATION_FLAG_STRATEGY_MIN_TIME | + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, }; /// \brief Parameters of created D3D12MA::Allocation object. To be used with Allocator::CreateResource. @@ -248,6 +306,141 @@ struct ALLOCATION_DESC It will then never be created as committed. */ Pool* CustomPool; + /// Custom general-purpose pointer that will be stored in D3D12MA::Allocation. + void* pPrivateData; +}; + +/** \brief Calculated statistics of memory usage e.g. in a specific memory heap type, +memory segment group, custom pool, or total. + +These are fast to calculate. +See functions: D3D12MA::Allocator::GetBudget(), D3D12MA::Pool::GetStatistics(). +*/ +struct Statistics +{ + /** \brief Number of D3D12 memory blocks allocated - `ID3D12Heap` objects and committed resources. + */ + UINT BlockCount; + /** \brief Number of D3D12MA::Allocation objects allocated. + + Committed allocations have their own blocks, so each one adds 1 to `AllocationCount` as well as `BlockCount`. + */ + UINT AllocationCount; + /** \brief Number of bytes allocated in memory blocks. + */ + UINT64 BlockBytes; + /** \brief Total number of bytes occupied by all D3D12MA::Allocation objects. + + Always less or equal than `BlockBytes`. + Difference `(BlockBytes - AllocationBytes)` is the amount of memory allocated from D3D12 + but unused by any D3D12MA::Allocation. + */ + UINT64 AllocationBytes; +}; + +/** \brief More detailed statistics than D3D12MA::Statistics. + +These are slower to calculate. Use for debugging purposes. +See functions: D3D12MA::Allocator::CalculateStatistics(), D3D12MA::Pool::CalculateStatistics(). + +Averages are not provided because they can be easily calculated as: + +\code +UINT64 AllocationSizeAvg = DetailedStats.Statistics.AllocationBytes / detailedStats.Statistics.AllocationCount; +UINT64 UnusedBytes = DetailedStats.Statistics.BlockBytes - DetailedStats.Statistics.AllocationBytes; +UINT64 UnusedRangeSizeAvg = UnusedBytes / DetailedStats.UnusedRangeCount; +\endcode +*/ +struct DetailedStatistics +{ + /// Basic statistics. + Statistics Stats; + /// Number of free ranges of memory between allocations. + UINT UnusedRangeCount; + /// Smallest allocation size. `UINT64_MAX` if there are 0 allocations. + UINT64 AllocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + UINT64 AllocationSizeMax; + /// Smallest empty range size. `UINT64_MAX` if there are 0 empty ranges. + UINT64 UnusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + UINT64 UnusedRangeSizeMax; +}; + +/** \brief General statistics from current state of the allocator - +total memory usage across all memory heaps and segments. + +These are slower to calculate. Use for debugging purposes. +See function D3D12MA::Allocator::CalculateStatistics(). +*/ +struct TotalStatistics +{ + /** \brief One element for each type of heap located at the following indices: + + - 0 = `D3D12_HEAP_TYPE_DEFAULT` + - 1 = `D3D12_HEAP_TYPE_UPLOAD` + - 2 = `D3D12_HEAP_TYPE_READBACK` + - 3 = `D3D12_HEAP_TYPE_CUSTOM` + */ + DetailedStatistics HeapType[4]; + /** \brief One element for each memory segment group located at the following indices: + + - 0 = `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` + - 1 = `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` + + Meaning of these segment groups is: + + - When `IsUMA() == FALSE` (discrete graphics card): + - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` (index 0) represents GPU memory + (resources allocated in `D3D12_HEAP_TYPE_DEFAULT` or `D3D12_MEMORY_POOL_L1`). + - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` (index 1) represents system memory + (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). + - When `IsUMA() == TRUE` (integrated graphics chip): + - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` = (index 0) represents memory shared for all the resources. + - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` = (index 1) is unused and always 0. + */ + DetailedStatistics MemorySegmentGroup[2]; + /// Total statistics from all memory allocated from D3D12. + DetailedStatistics Total; +}; + +/** \brief %Statistics of current memory usage and available budget for a specific memory segment group. + +These are fast to calculate. See function D3D12MA::Allocator::GetBudget(). +*/ +struct Budget +{ + /** \brief %Statistics fetched from the library. + */ + Statistics Stats; + /** \brief Estimated current memory usage of the program. + + Fetched from system using `IDXGIAdapter3::QueryVideoMemoryInfo` if possible. + + It might be different than `BlockBytes` (usually higher) due to additional implicit objects + also occupying the memory, like swapchain, pipeline state objects, descriptor heaps, command lists, or + heaps and resources allocated outside of this library, if any. + */ + UINT64 UsageBytes; + /** \brief Estimated amount of memory available to the program. + + Fetched from system using `IDXGIAdapter3::QueryVideoMemoryInfo` if possible. + + It might be different (most probably smaller) than memory capacity returned + by D3D12MA::Allocator::GetMemoryCapacity() due to factors + external to the program, decided by the operating system. + Difference `BudgetBytes - UsageBytes` is the amount of additional memory that can probably + be allocated without problems. Exceeding the budget may result in various problems. + */ + UINT64 BudgetBytes; +}; + + +/// \brief Represents single memory allocation done inside VirtualBlock. +struct D3D12MA_API VirtualAllocation +{ + /// \brief Unique idenitfier of current allocation. 0 means null/invalid. + AllocHandle AllocHandle; }; /** \brief Represents single memory allocation. @@ -277,6 +470,9 @@ public: */ UINT64 GetOffset() const; + /// Returns alignment that resource was created with. + UINT64 GetAlignment() const { return m_Alignment; } + /** \brief Returns size in bytes of the allocation. - If you created a buffer or a texture together with the allocation using function D3D12MA::Allocator::CreateResource, @@ -293,12 +489,21 @@ public: */ ID3D12Resource* GetResource() const { return m_Resource; } + /// Releases the resource currently pointed by the allocation (if any), sets it to new one, incrementing its reference counter (if not null). + void SetResource(ID3D12Resource* pResource); + /** \brief Returns memory heap that the resource is created in. If the Allocation represents committed resource with implicit heap, returns NULL. */ ID3D12Heap* GetHeap() const; + /// Changes custom pointer for an allocation to a new value. + void SetPrivateData(void* pPrivateData) { m_pPrivateData = pPrivateData; } + + /// Get custom pointer associated with the allocation. + void* GetPrivateData() const { return m_pPrivateData; } + /** \brief Associates a name with the allocation object. This name is for use in debug diagnostics and tools. Internal copy of the string is made, so the memory pointed by the argument can be @@ -324,25 +529,28 @@ public: Returns `FALSE` if the memory could potentially contain garbage data. If it's a render-target or depth-stencil texture, it then needs proper initialization with `ClearRenderTargetView`, `ClearDepthStencilView`, `DiscardResource`, - or a copy operation, as described on page: - [ID3D12Device::CreatePlacedResource method - Notes on the required resource initialization](https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-createplacedresource#notes-on-the-required-resource-initialization). + or a copy operation, as described on page + "ID3D12Device::CreatePlacedResource method - Notes on the required resource initialization" in Microsoft documentation. Please note that rendering a fullscreen triangle or quad to the texture as a render target is not a proper way of initialization! See also articles: - ["Coming to DirectX 12: More control over memory allocation"](https://devblogs.microsoft.com/directx/coming-to-directx-12-more-control-over-memory-allocation/), - ["Initializing DX12 Textures After Allocation and Aliasing"](https://asawicki.info/news_1724_initializing_dx12_textures_after_allocation_and_aliasing). + + - "Coming to DirectX 12: More control over memory allocation" on DirectX Developer Blog + - ["Initializing DX12 Textures After Allocation and Aliasing"](https://asawicki.info/news_1724_initializing_dx12_textures_after_allocation_and_aliasing). */ BOOL WasZeroInitialized() const { return m_PackedData.WasZeroInitialized(); } protected: - virtual void ReleaseThis(); + void ReleaseThis() override; private: friend class AllocatorPimpl; friend class BlockVector; friend class CommittedAllocationList; friend class JsonWriter; + friend class BlockMetadata_Linear; + friend class DefragmentationContextPimpl; friend struct CommittedAllocationListItemTraits; template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); template friend class PoolAllocator; @@ -357,8 +565,9 @@ private: AllocatorPimpl* m_Allocator; UINT64 m_Size; + UINT64 m_Alignment; ID3D12Resource* m_Resource; - UINT m_CreationFrameIndex; + void* m_pPrivateData; wchar_t* m_Name; union @@ -372,7 +581,7 @@ private: struct { - UINT64 offset; + AllocHandle allocHandle; NormalBlock* block; } m_Placed; @@ -412,23 +621,236 @@ private: UINT m_WasZeroInitialized : 1; // BOOL } m_PackedData; - Allocation(AllocatorPimpl* allocator, UINT64 size, BOOL wasZeroInitialized); + Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment, BOOL wasZeroInitialized); // Nothing here, everything already done in Release. - // ~Allocation() {} - + virtual ~Allocation() = default; + void InitCommitted(CommittedAllocationList* list); - void InitPlaced(UINT64 offset, UINT64 alignment, NormalBlock* block); + void InitPlaced(AllocHandle allocHandle, NormalBlock* block); void InitHeap(CommittedAllocationList* list, ID3D12Heap* heap); + void SwapBlockAllocation(Allocation* allocation); + // If the Allocation represents committed resource with implicit heap, returns UINT64_MAX. + AllocHandle GetAllocHandle() const; + NormalBlock* GetBlock(); template - void SetResource(ID3D12Resource* resource, const D3D12_RESOURCE_DESC_T* pResourceDesc); + void SetResourcePointer(ID3D12Resource* resource, const D3D12_RESOURCE_DESC_T* pResourceDesc); void FreeName(); D3D12MA_CLASS_NO_COPY(Allocation) }; + +/// Flags to be passed as DEFRAGMENTATION_DESC::Flags. +enum DEFRAGMENTATION_FLAGS +{ + /** Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + DEFRAGMENTATION_FLAG_ALGORITHM_FAST = 0x1, + /** Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED = 0x2, + /** Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + DEFRAGMENTATION_FLAG_ALGORITHM_FULL = 0x4, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + DEFRAGMENTATION_FLAG_ALGORITHM_FAST | + DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED | + DEFRAGMENTATION_FLAG_ALGORITHM_FULL +}; + +/** \brief Parameters for defragmentation. + +To be used with functions Allocator::BeginDefragmentation() and Pool::BeginDefragmentation(). +*/ +struct DEFRAGMENTATION_DESC +{ + /// Flags. + DEFRAGMENTATION_FLAGS Flags; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + + 0 means no limit. + */ + UINT64 MaxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. + + 0 means no limit. + */ + UINT32 MaxAllocationsPerPass; +}; + +/// Operation performed on single defragmentation move. +enum DEFRAGMENTATION_MOVE_OPERATION +{ + /** Resource has been recreated at `pDstTmpAllocation`, data has been copied, old resource has been destroyed. + `pSrcAllocation` will be changed to point to the new place. This is the default value set by DefragmentationContext::BeginPass(). + */ + DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `pDstTmpAllocation` will be freed. `pSrcAllocation` will remain unchanged. + DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the resource. New place reserved `pDstTmpAllocation` will be freed, along with `pSrcAllocation`. + DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +}; + +/// Single move of an allocation to be done for defragmentation. +struct DEFRAGMENTATION_MOVE +{ + /** \brief Operation to be performed on the allocation by DefragmentationContext::EndPass(). + Default value is #DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + */ + DEFRAGMENTATION_MOVE_OPERATION Operation; + /// %Allocation that should be moved. + Allocation* pSrcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `pSrcAllocation`. + + Use it to retrieve new `ID3D12Heap` and offset to create new `ID3D12Resource` and then store it here via Allocation::SetResource(). + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for storing newly created resource. DefragmentationContext::EndPass() will destroy it and make `pSrcAllocation` point to this memory. + */ + Allocation* pDstTmpAllocation; +}; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function DefragmentationContext::BeginPass(). +*/ +struct DEFRAGMENTATION_PASS_MOVE_INFO +{ + /// Number of elements in the `pMoves` array. + UINT32 MoveCount; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `MoveCount` elements, owned by %D3D12MA, created in DefragmentationContext::BeginPass(), destroyed in DefragmentationContext::EndPass(). + + For each element, you should: + + 1. Create a new resource in the place pointed by `pMoves[i].pDstTmpAllocation->GetHeap()` + `pMoves[i].pDstTmpAllocation->GetOffset()`. + 2. Store new resource in `pMoves[i].pDstTmpAllocation` by using Allocation::SetResource(). It will later replace old resource from `pMoves[i].pSrcAllocation`. + 3. Copy data from the `pMoves[i].pSrcAllocation` e.g. using `D3D12GraphicsCommandList::CopyResource`. + 4. Make sure these commands finished executing on the GPU. + + Only then you can finish defragmentation pass by calling DefragmentationContext::EndPass(). + After this call, the allocation will point to the new place in memory. + + Alternatively, if you cannot move specific allocation, + you can set DEFRAGMENTATION_MOVE::Operation to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + + Alternatively, if you decide you want to completely remove the allocation, + set DEFRAGMENTATION_MOVE::Operation to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + Then, after DefragmentationContext::EndPass() the allocation will be released. + */ + DEFRAGMENTATION_MOVE* pMoves; +}; + +/// %Statistics returned for defragmentation process by function DefragmentationContext::GetStats(). +struct DEFRAGMENTATION_STATS +{ + /// Total number of bytes that have been copied while moving allocations to different places. + UINT64 BytesMoved; + /// Total number of bytes that have been released to the system by freeing empty heaps. + UINT64 BytesFreed; + /// Number of allocations that have been moved to different places. + UINT32 AllocationsMoved; + /// Number of empty `ID3D12Heap` objects that have been released to the system. + UINT32 HeapsFreed; +}; + +/** \brief Represents defragmentation process in progress. + +You can create this object using Allocator::BeginDefragmentation (for default pools) or +Pool::BeginDefragmentation (for a custom pool). +*/ +class D3D12MA_API DefragmentationContext : public IUnknownImpl +{ +public: + /** \brief Starts single defragmentation pass. + + \param[out] pPassInfo Computed informations for current pass. + \returns + - `S_OK` if no more moves are possible. Then you can omit call to DefragmentationContext::EndPass() and simply end whole defragmentation. + - `S_FALSE` if there are pending moves returned in `pPassInfo`. You need to perform them, call DefragmentationContext::EndPass(), + and then preferably try another pass with DefragmentationContext::BeginPass(). + */ + HRESULT BeginPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo); + /** \brief Ends single defragmentation pass. + + \param pPassInfo Computed informations for current pass filled by DefragmentationContext::BeginPass() and possibly modified by you. + \return Returns `S_OK` if no more moves are possible or `S_FALSE` if more defragmentations are possible. + + Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. + After this call: + + - %Allocation at `pPassInfo[i].pSrcAllocation` that had `pPassInfo[i].Operation ==` #DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. + - %Allocation at `pPassInfo[i].pSrcAllocation` that had `pPassInfo[i].operation ==` #DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be released. + + If no more moves are possible you can end whole defragmentation. + */ + HRESULT EndPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo); + /** \brief Returns statistics of the defragmentation performed so far. + */ + void GetStats(DEFRAGMENTATION_STATS* pStats); + +protected: + void ReleaseThis() override; + +private: + friend class Pool; + friend class Allocator; + template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + + DefragmentationContextPimpl* m_Pimpl; + + DefragmentationContext(AllocatorPimpl* allocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector); + ~DefragmentationContext(); + + D3D12MA_CLASS_NO_COPY(DefragmentationContext) +}; + +/// \brief Bit flags to be used with POOL_DESC::Flags. +enum POOL_FLAGS +{ + /// Zero + POOL_FLAG_NONE = 0, + + /** \brief Enables alternative, linear allocation algorithm in this pool. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + POOL_FLAG_ALGORITHM_LINEAR = 0x1, + + /** \brief Optimization, allocate MSAA textures as committed resources always. + + Specify this flag to create MSAA textures with implicit heaps, as if they were created + with flag ALLOCATION_FLAG_COMMITTED. Usage of this flags enables pool to create its heaps + on smaller alignment not suitable for MSAA textures. + */ + POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x2, + + // Bit mask to extract only `ALGORITHM` bits from entire set of flags. + POOL_FLAG_ALGORITHM_MASK = POOL_FLAG_ALGORITHM_LINEAR +}; + /// \brief Parameters of created D3D12MA::Pool object. To be used with D3D12MA::Allocator::CreatePool. struct POOL_DESC { + /// Flags. + POOL_FLAGS Flags; /** \brief The parameters of memory heap where allocations of this pool should be placed. In the simplest case, just fill it with zeros and set `Type` to one of: `D3D12_HEAP_TYPE_DEFAULT`, @@ -496,9 +918,17 @@ public: */ POOL_DESC GetDesc() const; - /** \brief Retrieves statistics from the current state of this pool. + /** \brief Retrieves basic statistics of the custom pool that are fast to calculate. + + \param[out] pStats %Statistics of the current pool. */ - void CalculateStats(StatInfo* pStats); + void GetStatistics(Statistics* pStats); + + /** \brief Retrieves detailed statistics of the custom pool that are slower to calculate. + + \param[out] pStats %Statistics of the current pool. + */ + void CalculateStatistics(DetailedStatistics* pStats); /** \brief Associates a name with the pool. This name is for use in debug diagnostics and tools. @@ -517,8 +947,21 @@ public: */ LPCWSTR GetName() const; + /** \brief Begins defragmentation process of the current pool. + + \param pDesc Structure filled with parameters of defragmentation. + \param[out] ppContext Context object that will manage defragmentation. + \returns + - `S_OK` if defragmentation can begin. + - `E_NOINTERFACE` if defragmentation is not supported. + + For more information about defragmentation, see documentation chapter: + [Defragmentation](@ref defragmentation). + */ + HRESULT BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext); + protected: - virtual void ReleaseThis(); + void ReleaseThis() override; private: friend class Allocator; @@ -533,6 +976,7 @@ private: D3D12MA_CLASS_NO_COPY(Pool) }; + /// \brief Bit flags to be used with ALLOCATOR_DESC::Flags. enum ALLOCATOR_FLAGS { @@ -551,8 +995,32 @@ enum ALLOCATOR_FLAGS /** Every allocation will have its own memory block. To be used for debugging purposes. - */ + */ ALLOCATOR_FLAG_ALWAYS_COMMITTED = 0x2, + + /** + Heaps created for the default pools will be created with flag `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`, + allowing for their memory to be not zeroed by the system if possible, + which can speed up allocation. + + Only affects default pools. + To use the flag with @ref custom_pools, you need to add it manually: + + \code + poolDesc.heapFlags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + \endcode + + Only avaiable if `ID3D12Device8` is present. Otherwise, the flag is ignored. + */ + ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED = 0x4, + + /** \brief Optimization, allocate MSAA textures as committed resources always. + + Specify this flag to create MSAA textures with implicit heaps, as if they were created + with flag ALLOCATION_FLAG_COMMITTED. Usage of this flags enables all default pools + to create its heaps on smaller alignment not suitable for MSAA textures. + */ + ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x8, }; /// \brief Parameters of created Allocator object. To be used with CreateAllocator(). @@ -586,86 +1054,6 @@ struct ALLOCATOR_DESC IDXGIAdapter* pAdapter; }; -/** -\brief Number of D3D12 memory heap types supported. -*/ -const UINT HEAP_TYPE_COUNT = 4; - -/** -\brief Calculated statistics of memory usage in entire allocator. -*/ -struct StatInfo -{ - /// Number of memory blocks (heaps) allocated. - UINT BlockCount; - /// Number of D3D12MA::Allocation objects allocated. - UINT AllocationCount; - /// Number of free ranges of memory between allocations. - UINT UnusedRangeCount; - /// Total number of bytes occupied by all allocations. - UINT64 UsedBytes; - /// Total number of bytes occupied by unused ranges. - UINT64 UnusedBytes; - UINT64 AllocationSizeMin; - UINT64 AllocationSizeAvg; - UINT64 AllocationSizeMax; - UINT64 UnusedRangeSizeMin; - UINT64 UnusedRangeSizeAvg; - UINT64 UnusedRangeSizeMax; -}; - -/** -\brief General statistics from the current state of the allocator. -*/ -struct Stats -{ - /// Total statistics from all heap types. - StatInfo Total; - /** - One StatInfo for each type of heap located at the following indices: - 0 - DEFAULT, 1 - UPLOAD, 2 - READBACK, 3 - CUSTOM. - */ - StatInfo HeapType[HEAP_TYPE_COUNT]; -}; - -/** \brief Statistics of current memory usage and available budget, in bytes, for GPU or CPU memory. -*/ -struct Budget -{ - /** \brief Sum size of all memory blocks allocated from particular heap type, in bytes. - */ - UINT64 BlockBytes; - - /** \brief Sum size of all allocations created in particular heap type, in bytes. - - Always less or equal than `BlockBytes`. - Difference `BlockBytes - AllocationBytes` is the amount of memory allocated but unused - - available for new allocations or wasted due to fragmentation. - */ - UINT64 AllocationBytes; - - /** \brief Estimated current memory usage of the program, in bytes. - - Fetched from system using `IDXGIAdapter3::QueryVideoMemoryInfo` if enabled. - - It might be different than `BlockBytes` (usually higher) due to additional implicit objects - also occupying the memory, like swapchain, pipeline state objects, descriptor heaps, command lists, or - memory blocks allocated outside of this library, if any. - */ - UINT64 UsageBytes; - - /** \brief Estimated amount of memory available to the program, in bytes. - - Fetched from system using `IDXGIAdapter3::QueryVideoMemoryInfo` if enabled. - - It might be different (most probably smaller) than memory sizes reported in `DXGI_ADAPTER_DESC` due to factors - external to the program, like other programs also consuming system resources. - Difference `BudgetBytes - UsageBytes` is the amount of additional memory that can probably - be allocated without problems. Exceeding the budget may result in various problems. - */ - UINT64 BudgetBytes; -}; - /** \brief Represents main object of this library initialized for particular `ID3D12Device`. @@ -682,20 +1070,39 @@ public: const D3D12_FEATURE_DATA_D3D12_OPTIONS& GetD3D12Options() const; /** \brief Returns true if `D3D12_FEATURE_DATA_ARCHITECTURE1::UMA` was found to be true. - For more information about how to use it, see articles in Microsoft Docs: - - https://docs.microsoft.com/en-us/windows/win32/direct3d12/default-texture-mapping - - https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_feature_data_architecture - - https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-getcustomheapproperties + For more information about how to use it, see articles in Microsoft Docs articles: + + - "UMA Optimizations: CPU Accessible Textures and Standard Swizzle" + - "D3D12_FEATURE_DATA_ARCHITECTURE structure (d3d12.h)" + - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" */ BOOL IsUMA() const; /** \brief Returns true if `D3D12_FEATURE_DATA_ARCHITECTURE1::CacheCoherentUMA` was found to be true. - For more information about how to use it, see articles in Microsoft Docs: - - https://docs.microsoft.com/en-us/windows/win32/direct3d12/default-texture-mapping - - https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_feature_data_architecture - - https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-getcustomheapproperties + For more information about how to use it, see articles in Microsoft Docs articles: + + - "UMA Optimizations: CPU Accessible Textures and Standard Swizzle" + - "D3D12_FEATURE_DATA_ARCHITECTURE structure (d3d12.h)" + - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" */ BOOL IsCacheCoherentUMA() const; + /** \brief Returns total amount of memory of specific segment group, in bytes. + + \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. + + This information is taken from `DXGI_ADAPTER_DESC`. + It is not recommended to use this number. + You should preferably call GetBudget() and limit memory usage to D3D12MA::Budget::BudgetBytes instead. + + - When IsUMA() `== FALSE` (discrete graphics card): + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL)` returns the size of the video memory. + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL)` returns the size of the system memory available for D3D12 resources. + - When IsUMA() `== TRUE` (integrated graphics chip): + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL)` returns the size of the shared memory available for all D3D12 resources. + All memory is considered "local". + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL)` is not applicable and returns 0. + */ + UINT64 GetMemoryCapacity(UINT memorySegmentGroup) const; /** \brief Allocates memory and creates a D3D12 resource (buffer or texture). This is the main allocation function. @@ -818,25 +1225,42 @@ public: */ void SetCurrentFrameIndex(UINT frameIndex); - /** \brief Retrieves statistics from the current state of the allocator. - */ - void CalculateStats(Stats* pStats); + /** \brief Retrieves information about current memory usage and budget. - /** \brief Retrieves information about current memory budget. + \param[out] pLocalBudget Optional, can be null. + \param[out] pNonLocalBudget Optional, can be null. - \param[out] pGpuBudget Optional, can be null. - \param[out] pCpuBudget Optional, can be null. + - When IsUMA() `== FALSE` (discrete graphics card): + - `pLocalBudget` returns the budget of the video memory. + - `pNonLocalBudget` returns the budget of the system memory available for D3D12 resources. + - When IsUMA() `== TRUE` (integrated graphics chip): + - `pLocalBudget` returns the budget of the shared memory available for all D3D12 resources. + All memory is considered "local". + - `pNonLocalBudget` is not applicable and returns zeros. This function is called "get" not "calculate" because it is very fast, suitable to be called - every frame or every allocation. For more detailed statistics use CalculateStats(). + every frame or every allocation. For more detailed statistics use CalculateStatistics(). Note that when using allocator from multiple threads, returned information may immediately become outdated. */ - void GetBudget(Budget* pGpuBudget, Budget* pCpuBudget); + void GetBudget(Budget* pLocalBudget, Budget* pNonLocalBudget); - /// Builds and returns statistics as a string in JSON format. - /** @param[out] ppStatsString Must be freed using Allocator::FreeStatsString. + /** \brief Retrieves statistics from current state of the allocator. + + This function is called "calculate" not "get" because it has to traverse all + internal data structures, so it may be quite slow. Use it for debugging purposes. + For faster but more brief statistics suitable to be called every frame or every allocation, + use GetBudget(). + + Note that when using allocator from multiple threads, returned information may immediately + become outdated. + */ + void CalculateStatistics(TotalStatistics* pStats); + + /** \brief Builds and returns statistics as a string in JSON format. + * + @param[out] ppStatsString Must be freed using Allocator::FreeStatsString. @param DetailedMap `TRUE` to include full list of allocations (can make the string quite long), `FALSE` to only return statistics. */ void BuildStatsString(WCHAR** ppStatsString, BOOL DetailedMap) const; @@ -844,12 +1268,23 @@ public: /// Frees memory of a string returned from Allocator::BuildStatsString. void FreeStatsString(WCHAR* pStatsString) const; + /** \brief Begins defragmentation process of the default pools. + + \param pDesc Structure filled with parameters of defragmentation. + \param[out] ppContext Context object that will manage defragmentation. + + For more information about defragmentation, see documentation chapter: + [Defragmentation](@ref defragmentation). + */ + void BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext); + protected: - virtual void ReleaseThis(); + void ReleaseThis() override; private: friend D3D12MA_API HRESULT CreateAllocator(const ALLOCATOR_DESC*, Allocator**); template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + friend class DefragmentationContext; friend class Pool; Allocator(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc); @@ -860,9 +1295,35 @@ private: D3D12MA_CLASS_NO_COPY(Allocator) }; + +/// \brief Bit flags to be used with VIRTUAL_BLOCK_DESC::Flags. +enum VIRTUAL_BLOCK_FLAGS +{ + /// Zero + VIRTUAL_BLOCK_FLAG_NONE = 0, + + /** \brief Enables alternative, linear allocation algorithm in this virtual block. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR = POOL_FLAG_ALGORITHM_LINEAR, + + // Bit mask to extract only `ALGORITHM` bits from entire set of flags. + VIRTUAL_BLOCK_FLAG_ALGORITHM_MASK = POOL_FLAG_ALGORITHM_MASK +}; + /// Parameters of created D3D12MA::VirtualBlock object to be passed to CreateVirtualBlock(). struct VIRTUAL_BLOCK_DESC { + /// Flags. + VIRTUAL_BLOCK_FLAGS Flags; /** \brief Total size of the block. Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. @@ -876,9 +1337,38 @@ struct VIRTUAL_BLOCK_DESC const ALLOCATION_CALLBACKS* pAllocationCallbacks; }; +/// \brief Bit flags to be used with VIRTUAL_ALLOCATION_DESC::Flags. +enum VIRTUAL_ALLOCATION_FLAGS +{ + /// Zero + VIRTUAL_ALLOCATION_FLAG_NONE = 0, + + /** \brief Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for virtual blocks created with #VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR flag. + */ + VIRTUAL_ALLOCATION_FLAG_UPPER_ADDRESS = ALLOCATION_FLAG_UPPER_ADDRESS, + + /// Allocation strategy that tries to minimize memory usage. + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_MEMORY = ALLOCATION_FLAG_STRATEGY_MIN_MEMORY, + /// Allocation strategy that tries to minimize allocation time. + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_TIME = ALLOCATION_FLAG_STRATEGY_MIN_TIME, + /** \brief Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_OFFSET = ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + + These strategy flags are binary compatible with equivalent flags in #ALLOCATION_FLAGS. + */ + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MASK = ALLOCATION_FLAG_STRATEGY_MASK, +}; + /// Parameters of created virtual allocation to be passed to VirtualBlock::Allocate(). struct VIRTUAL_ALLOCATION_DESC { + /// Flags. + VIRTUAL_ALLOCATION_FLAGS Flags; /** \brief Size of the allocation. Cannot be zero. @@ -893,22 +1383,24 @@ struct VIRTUAL_ALLOCATION_DESC It can be fetched or changed later. */ - void* pUserData; + void* pPrivateData; }; /// Parameters of an existing virtual allocation, returned by VirtualBlock::GetAllocationInfo(). struct VIRTUAL_ALLOCATION_INFO { + /// \brief Offset of the allocation. + UINT64 Offset; /** \brief Size of the allocation. Same value as passed in VIRTUAL_ALLOCATION_DESC::Size. */ - UINT64 size; + UINT64 Size; /** \brief Custom pointer associated with the allocation. - Same value as passed in VIRTUAL_ALLOCATION_DESC::pUserData or VirtualBlock::SetAllocationUserData(). + Same value as passed in VIRTUAL_ALLOCATION_DESC::pPrivateData or VirtualBlock::SetAllocationPrivateData(). */ - void* pUserData; + void* pPrivateData; }; /** \brief Represents pure allocation algorithm and a data structure with allocations in some memory block, without actually allocating any GPU memory. @@ -928,29 +1420,40 @@ public: /** \brief Returns true if the block is empty - contains 0 allocations. */ BOOL IsEmpty() const; - /** \brief Returns information about an allocation at given offset - its size and custom pointer. + /** \brief Returns information about an allocation - its offset, size and custom pointer. */ - void GetAllocationInfo(UINT64 offset, VIRTUAL_ALLOCATION_INFO* pInfo) const; + void GetAllocationInfo(VirtualAllocation allocation, VIRTUAL_ALLOCATION_INFO* pInfo) const; /** \brief Creates new allocation. \param pDesc - \param[out] pOffset Offset of the new allocation, which can also be treated as an unique identifier of the allocation within this block. `UINT64_MAX` if allocation failed. + \param[out] pAllocation Unique indentifier of the new allocation within single block. + \param[out] pOffset Returned offset of the new allocation. Optional, can be null. \return `S_OK` if allocation succeeded, `E_OUTOFMEMORY` if it failed. + + If the allocation failed, `pAllocation->AllocHandle` is set to 0 and `pOffset`, if not null, is set to `UINT64_MAX`. */ - HRESULT Allocate(const VIRTUAL_ALLOCATION_DESC* pDesc, UINT64* pOffset); - /** \brief Frees the allocation at given offset. + HRESULT Allocate(const VIRTUAL_ALLOCATION_DESC* pDesc, VirtualAllocation* pAllocation, UINT64* pOffset); + /** \brief Frees the allocation. + + Calling this function with `allocation.AllocHandle == 0` is correct and does nothing. */ - void FreeAllocation(UINT64 offset); + void FreeAllocation(VirtualAllocation allocation); /** \brief Frees all the allocations. */ void Clear(); - /** \brief Changes custom pointer for an allocation at given offset to a new value. + /** \brief Changes custom pointer for an allocation to a new value. */ - void SetAllocationUserData(UINT64 offset, void* pUserData); + void SetAllocationPrivateData(VirtualAllocation allocation, void* pPrivateData); + /** \brief Retrieves basic statistics of the virtual block that are fast to calculate. - /** \brief Retrieves statistics from the current state of the block. + \param[out] pStats %Statistics of the virtual block. */ - void CalculateStats(StatInfo* pInfo) const; + void GetStatistics(Statistics* pStats) const; + /** \brief Retrieves detailed statistics of the virtual block that are slower to calculate. + + \param[out] pStats %Statistics of the virtual block. + */ + void CalculateStatistics(DetailedStatistics* pStats) const; /** \brief Builds and returns statistics as a string in JSON format, including the list of allocations with their parameters. @param[out] ppStatsString Must be freed using VirtualBlock::FreeStatsString. @@ -962,7 +1465,7 @@ public: void FreeStatsString(WCHAR* pStatsString) const; protected: - virtual void ReleaseThis(); + void ReleaseThis() override; private: friend D3D12MA_API HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC*, VirtualBlock**); @@ -976,6 +1479,7 @@ private: D3D12MA_CLASS_NO_COPY(VirtualBlock) }; + /** \brief Creates new main D3D12MA::Allocator object and returns it through `ppAllocator`. You normally only need to call it once and keep a single Allocator object for your `ID3D12Device`. @@ -992,7 +1496,11 @@ D3D12MA_API HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC* pDesc, VirtualB /// \cond INTERNAL DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::ALLOCATION_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::DEFRAGMENTATION_FLAGS); DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::ALLOCATOR_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::POOL_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::VIRTUAL_BLOCK_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::VIRTUAL_ALLOCATION_FLAGS); /// \endcond /** @@ -1083,7 +1591,7 @@ resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; D3D12MA::ALLOCATION_DESC allocationDesc = {}; -allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; +allocationDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; D3D12Resource* resource; D3D12MA::Allocation* allocation; @@ -1128,10 +1636,10 @@ manually. The process of getting regular CPU-side pointer to the memory of a resource in Direct3D is called "mapping". There are rules and restrictions to this process, -as described in D3D12 documentation of [ID3D12Resource::Map method](https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/nf-d3d12-id3d12resource-map). +as described in D3D12 documentation of `ID3D12Resource::Map` method. Mapping happens on the level of particular resources, not entire memory heaps, -and so it is out of scope of this library. Just as the linked documentation says: +and so it is out of scope of this library. Just as the documentation of the `Map` function says: - Returned pointer refers to data of particular subresource, not entire memory heap. - You can map same resource multiple times. It is ref-counted internally. @@ -1231,8 +1739,8 @@ While it is recommended to use default pools whenever possible for simplicity an more opportunities for internal optimizations, custom pools may be useful in following cases: - To keep some resources separate from others in memory. -- To keep track of memory usage of just a specific group of resources. Statistics can be queried using - D3D12MA::Pool::CalculateStats. +- To keep track of memory usage of just a specific group of resources. %Statistics can be queried using + D3D12MA::Pool::CalculateStatistics. - To use specific size of a memory block (`ID3D12Heap`). To set it, use member D3D12MA::POOL_DESC::BlockSize. When set to 0, the library uses automatically determined, variable block sizes. - To reserve some minimum amount of memory allocated. To use it, set member D3D12MA::POOL_DESC::MinBlockCount. @@ -1265,6 +1773,206 @@ in some cases, e.g. to have separate memory usage statistics for some group of r extended allocation parameters, like custom `D3D12_HEAP_PROPERTIES`, which are available only in custom pools. +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because %D3D12MA is a low level library that only allocates memory. +It cannot recreate buffers and textures in a new place as it doesn't remember the contents of `D3D12_RESOURCE_DESC` structure. +It cannot copy their contents as it doesn't record any commands to a command list. + +Example: + +\code +D3D12MA::DEFRAGMENTATION_DESC defragDesc = {}; +defragDesc.Flags = D3D12MA::DEFRAGMENTATION_FLAG_ALGORITHM_FAST; + +D3D12MA::DefragmentationContext* defragCtx; +allocator->BeginDefragmentation(&defragDesc, &defragCtx); + +for(;;) +{ + D3D12MA::DEFRAGMENTATION_PASS_MOVE_INFO pass; + HRESULT hr = defragCtx->BeginPass(&pass); + if(hr == S_OK) + break; + else if(hr != S_FALSE) + // Handle error... + + for(UINT i = 0; i < pass.MoveCount; ++i) + { + // Inspect pass.pMoves[i].pSrcAllocation, identify what buffer/texture it represents. + MyEngineResourceData* resData = (MyEngineResourceData*)pMoves[i].pSrcAllocation->GetPrivateData(); + + // Recreate this buffer/texture as placed at pass.pMoves[i].pDstTmpAllocation. + D3D12_RESOURCE_DESC resDesc = ... + ID3D12Resource* newRes; + hr = device->CreatePlacedResource( + pass.pMoves[i].pDstTmpAllocation->GetHeap(), + pass.pMoves[i].pDstTmpAllocation->GetOffset(), &resDesc, + D3D12_RESOURCE_STATE_COPY_DEST, NULL, IID_PPV_ARGS(&newRes)); + // Check hr... + + // Store new resource in the pDstTmpAllocation. + pass.pMoves[i].pDstTmpAllocation->SetResource(newRes); + + // Copy its content to the new place. + cmdList->CopyResource( + pass.pMoves[i].pDstTmpAllocation->GetResource(), + pass.pMoves[i].pSrcAllocation->GetResource()); + } + + // Make sure the copy commands finished executing. + cmdQueue->ExecuteCommandLists(...); + // ... + WaitForSingleObject(fenceEvent, INFINITE); + + // Update appropriate descriptors to point to the new places... + + hr = defragCtx->EndPass(&pass); + if(hr == S_OK) + break; + else if(hr != S_FALSE) + // Handle error... +} + +defragCtx->Release(); +\endcode + +Although functions like D3D12MA::Allocator::CreateResource() +create an allocation and a buffer/texture at once, these are just a shortcut for +allocating memory and creating a placed resource. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `S_FALSE` not `S_OK`. +In each pass: + +1. D3D12MA::DefragmentationContext::BeginPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `ID3D12Heap` + offset using methods like D3D12MA::Allocation::GetHeap(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/textures as placed at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Store the pointer to the new resource in the temporary destination allocation. +3. D3D12MA::DefragmentationContext::EndPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source D3D12MA::Allocation objects that are moved to point to the destination reserved memory + and destination resource, while source resource is released. + - Frees `ID3D12Heap` blocks that became empty. + +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].Operation` to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `CopyResource()`. + - You need to make sure these commands finished executing before the source buffers/textures are released by D3D12MA::DefragmentationContext::EndPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient render-target texture to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this texture + without copying its data. +- If the resource is in `D3D12_HEAP_TYPE_READBACK` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].Operation` to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - D3D12MA::DefragmentationContext::EndPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].Operation` to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - D3D12MA::DefragmentationContext::EndPass() will then free both source and destination memory, and will destroy the source D3D12MA::Allocation object. + +You can defragment a specific custom pool by calling D3D12MA::Pool::BeginDefragmentation +or all the default pools by calling D3D12MA::Allocator::BeginDefragmentation (like in the example above). + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different heap types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `GetResourceAllocationInfo()` is also respected after defragmentation. +Buffers/textures should be recreated with the same `D3D12_RESOURCE_DESC` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: D3D12MA::DEFRAGMENTATION_DESC::MaxBytesPerPass, D3D12MA::DEFRAGMENTATION_DESC::MaxAllocationsPerPass. + +It is also safe to perform the defragmentation asynchronously to render frames and other Direct3D 12 and %D3D12MA +usage, possibly from multiple threads, with the exception that allocations +returned in D3D12MA::DEFRAGMENTATION_PASS_MOVE_INFO::pMoves shouldn't be released until the defragmentation pass is ended. + +Mapping is out of scope of this library and so it is not preserved after an allocation is moved during defragmentation. +You need to map the new resource yourself if needed. + +\note Defragmentation is not supported in custom pools created with D3D12MA::POOL_FLAG_ALGORITHM_LINEAR. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from D3D12. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per memory segment group, together with current budget, +you can call function D3D12MA::Allocator::GetBudget() and inspect structure D3D12MA::Budget. +This is useful to keep track of memory usage and stay withing budget. +Example: + +\code +D3D12MA::Budget localBudget; +allocator->GetBudget(&localBudget, NULL); + +printf("My GPU memory currently has %u allocations taking %llu B,\n", + localBudget.Statistics.AllocationCount, + localBudget.Statistics.AllocationBytes); +printf("allocated out of %u D3D12 memory heaps taking %llu B,\n", + localBudget.Statistics.BlockCount, + localBudget.Statistics.BlockBytes); +printf("D3D12 reports total usage %llu B with budget %llu B.\n", + localBudget.UsageBytes, + localBudget.BudgetBytes); +\endcode + +You can query for more detailed statistics per heap type, memory segment group, and totals, +including minimum and maximum allocation size and unused range size, +by calling function D3D12MA::Allocator::CalculateStatistics() and inspecting structure D3D12MA::TotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function D3D12MA::Pool::GetStatistics() +or D3D12MA::Pool::CalculateStatistics(). + +You can query for information about a specific allocation using functions of the D3D12MA::Allocation class, +e.g. `GetSize()`, `GetOffset()`, `GetHeap()`. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function D3D12MA::Allocator::BuildStatsString(). +The result is guaranteed to be correct JSON. +It uses Windows Unicode (UTF-16) encoding. +Any strings provided by user (see D3D12MA::Allocation::SetName()) +are copied as-is and properly escaped for JSON. +It must be freed using function D3D12MA::Allocator::FreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using D3D12MA::Allocator::CalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + \page resource_aliasing Resource aliasing (overlap) New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory @@ -1368,7 +2076,7 @@ Additional considerations: - D3D12 also allows to interpret contents of memory between aliasing resources consistently in some cases, which is called "data inheritance". For details, see - Microsoft documentation, chapter [Memory Aliasing and Data Inheritance](https://docs.microsoft.com/en-us/windows/win32/direct3d12/memory-aliasing-and-data-inheritance). + Microsoft documentation chapter "Memory Aliasing and Data Inheritance". - You can create more complex layout where different textures and buffers are bound at different offsets inside one large allocation. For example, one can imagine a big texture used in some render passes, aliasing with a set of many small buffers @@ -1379,6 +2087,93 @@ Additional considerations: Otherwise they must be placed in different memory heap types, and thus aliasing them is not possible. +\page linear_algorithm Linear allocation algorithm + +Each D3D12 memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + +![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +D3D12MA::POOL_FLAG_ALGORITHM_LINEAR to D3D12MA::POOL_DESC::Flags while creating +D3D12MA::Pool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + +![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\section linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually by calling `allocation->Release()`. You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + +![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) + +This mode is also available for pools created with D3D12MA::POOL_DESC::MaxBlockCount +value that allows multiple memory blocks. + +\section linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + +![Stack](../gfx/Linear_allocator_4_stack.png) + +This mode is also available for pools created with D3D12MA::POOL_DESC::MaxBlockCount +value that allows multiple memory blocks. + +\section linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag D3D12MA::ALLOCATION_FLAG_UPPER_ADDRESS +to D3D12MA::ALLOCATION_DESC::Flags. + +![Double stack](../gfx/Linear_allocator_7_double_stack.png) + +Double stack is available only in pools with one memory block - +D3D12MA::POOL_DESC::MaxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual `E_OUTOFMEMORY` error. + +\section linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + +![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) + +Ring buffer is available only in pools with one memory block - +D3D12MA::POOL_DESC::MaxBlockCount must be 1. Otherwise behavior is undefined. + +\section linear_algorithm_additional_considerations Additional considerations + +Linear algorithm can also be used with \ref virtual_allocator. +See flag D3D12MA::VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR. + + \page virtual_allocator Virtual allocator As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". @@ -1409,14 +2204,13 @@ HRESULT hr = CreateVirtualBlock(&blockDesc, &block); D3D12MA::VirtualBlock object contains internal data structure that keeps track of free and occupied regions using the same code as the main D3D12 memory allocator. -However, there is no "virtual allocation" object. -When you request a new allocation, a `UINT64` number is returned. -It is an offset inside the block where the allocation has been placed, but it also uniquely identifies the allocation within this block. +A single allocation is identified by a lightweight structure D3D12MA::VirtualAllocation. +You will also likely want to know the offset at which the allocation was made in the block. In order to make an allocation: -# Fill in D3D12MA::VIRTUAL_ALLOCATION_DESC structure. --# Call D3D12MA::VirtualBlock::Allocate. Get new `UINT64 offset` that identifies the allocation. +-# Call D3D12MA::VirtualBlock::Allocate. Get new D3D12MA::VirtualAllocation value that identifies the allocation. Example: @@ -1424,8 +2218,9 @@ Example: D3D12MA::VIRTUAL_ALLOCATION_DESC allocDesc = {}; allocDesc.Size = 4096; // 4 KB +D3D12MA::VirtualAllocation alloc; UINT64 allocOffset; -hr = block->Allocate(&allocDesc, &allocOffset); +hr = block->Allocate(&allocDesc, &alloc, &allocOffset); if(SUCCEEDED(hr)) { // Use the 4 KB of your memory starting at allocOffset. @@ -1439,22 +2234,22 @@ else \section virtual_allocator_deallocation Deallocation When no longer needed, an allocation can be freed by calling D3D12MA::VirtualBlock::FreeAllocation. -You can only pass to this function the exact offset that was previously returned by D3D12MA::VirtualBlock::Allocate -and not any other location within the memory. -When whole block is no longer needed, the block object can be released by calling D3D12MA::VirtualBlock::Release. +When whole block is no longer needed, the block object can be released by calling `block->Release()`. All allocations must be freed before the block is destroyed, which is checked internally by an assert. However, if you don't want to call `block->FreeAllocation` for each allocation, you can use D3D12MA::VirtualBlock::Clear to free them all at once - -a feature not available in normal D3D12 memory allocator. Example: +a feature not available in normal D3D12 memory allocator. + +Example: \code -block->FreeAllocation(allocOffset); +block->FreeAllocation(alloc); block->Release(); \endcode \section virtual_allocator_allocation_parameters Allocation parameters -You can attach a custom pointer to each allocation by using D3D12MA::VirtualBlock::SetAllocationUserData. +You can attach a custom pointer to each allocation by using D3D12MA::VirtualBlock::SetAllocationPrivateData. Its default value is `NULL`. It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some larger data structure containing more information. Example: @@ -1466,20 +2261,20 @@ struct CustomAllocData }; CustomAllocData* allocData = new CustomAllocData(); allocData->m_AllocName = "My allocation 1"; -block->SetAllocationUserData(allocOffset, allocData); +block->SetAllocationPrivateData(alloc, allocData); \endcode -The pointer can later be fetched, along with allocation size, by passing the allocation offset to function +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function D3D12MA::VirtualBlock::GetAllocationInfo and inspecting returned structure D3D12MA::VIRTUAL_ALLOCATION_INFO. If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! Example: \code VIRTUAL_ALLOCATION_INFO allocInfo; -block->GetAllocationInfo(allocOffset, &allocInfo); -delete (CustomAllocData*)allocInfo.pUserData; +block->GetAllocationInfo(alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pPrivateData; -block->FreeAllocation(allocOffset); +block->FreeAllocation(alloc); \endcode \section virtual_allocator_alignment_and_units Alignment and units @@ -1493,8 +2288,9 @@ D3D12MA::VIRTUAL_ALLOCATION_DESC allocDesc = {}; allocDesc.Size = 4096; // 4 KB allocDesc.Alignment = 4; // Returned offset must be a multiply of 4 B +D3D12MA::VirtualAllocation alloc; UINT64 allocOffset; -hr = block->Allocate(&allocDesc, &allocOffset); +hr = block->Allocate(&allocDesc, &alloc, &allocOffset); \endcode Alignments of different allocations made from one block may vary. @@ -1504,21 +2300,24 @@ It might be more convenient, but you need to make sure to use this new unit cons - D3D12MA::VIRTUAL_BLOCK_DESC::Size - D3D12MA::VIRTUAL_ALLOCATION_DESC::Size and D3D12MA::VIRTUAL_ALLOCATION_DESC::Alignment -- Using offset returned by D3D12MA::VirtualBlock::Allocate +- Using offset returned by D3D12MA::VirtualBlock::Allocate and D3D12MA::VIRTUAL_ALLOCATION_INFO::Offset \section virtual_allocator_statistics Statistics -You can obtain statistics of a virtual block using D3D12MA::VirtualBlock::CalculateStats. -The function fills structure D3D12MA::StatInfo - same as used by the normal D3D12 memory allocator. +You can obtain brief statistics of a virtual block using D3D12MA::VirtualBlock::GetStatistics(). +The function fills structure D3D12MA::Statistics - same as used by the normal D3D12 memory allocator. Example: \code -D3D12MA::StatInfo statInfo; -block->CalculateStats(&statInfo); +D3D12MA::Statistics stats; +block->GetStatistics(&stats); printf("My virtual block has %llu bytes used by %u virtual allocations\n", - statInfo.UsedBytes, statInfo.AllocationCount); + stats.AllocationBytes, stats.AllocationCount); \endcode +More detailed statistics can be obtained using function D3D12MA::VirtualBlock::CalculateStatistics(), +but they are slower to calculate. + You can also request a full list of allocations and free regions as a string in JSON format by calling D3D12MA::VirtualBlock::BuildStatsString. Returned string must be later freed using D3D12MA::VirtualBlock::FreeStatsString. @@ -1526,6 +2325,9 @@ The format of this string may differ from the one returned by the main D3D12 all \section virtual_allocator_additional_considerations Additional considerations +Alternative, linear algorithm can be used with virtual allocator - see flag +D3D12MA::VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR and documentation: \ref linear_algorithm. + Note that the "virtual allocator" functionality is implemented on a level of individual memory blocks. Keeping track of a whole collection of blocks, allocating new ones when out of free space, deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. @@ -1549,14 +2351,14 @@ allocations. Example: \code #include -void* CustomAllocate(size_t Size, size_t Alignment, void* pUserData) +void* CustomAllocate(size_t Size, size_t Alignment, void* pPrivateData) { void* memory = _aligned_malloc(Size, Alignment); // Your extra bookkeeping here... return memory; } -void CustomFree(void* pMemory, void* pUserData) +void CustomFree(void* pMemory, void* pPrivateData) { // Your extra bookkeeping here... _aligned_free(pMemory); @@ -1578,6 +2380,37 @@ HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); \endcode +\section debug_margins Debug margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment returned by `ID3D12Device::GetResourceAllocationInfo`). + +![Allocations without margin](../gfx/Margins_1.png) + +Define macro `D3D12MA_DEBUG_MARGIN` to some non-zero value (e.g. 16) inside "D3D12MemAlloc.cpp" +to enforce specified number of bytes as a margin after every allocation. + +![Allocations with margin](../gfx/Margins_2.png) + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all memory heap types. + +Margin is applied only to placed allocations made out of memory heaps and not to committed +allocations, which have their own, implicit memory heap of specific size. +It is thus not applied to allocations made using D3D12MA::ALLOCATION_FLAG_COMMITTED flag +or those automatically decided to put into committed allocations, e.g. due to its large size. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + + \page general_considerations General considerations \section general_considerations_thread_safety Thread safety @@ -1591,19 +2424,27 @@ HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); Using this flag may improve performance. - D3D12MA::VirtualBlock is not safe to be used from multiple threads simultaneously. -\section general_considerations_future_plans Future plans +\section general_considerations_versioning_and_compatibility Versioning and compatibility -Features planned for future releases: +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: -Near future: feature parity with [Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/), including: +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. -- Alternative allocation algorithms: linear allocator, buddy allocator -- Support for priorities using `ID3D12Device1::SetResidencyPriority` +All changes between official releases are documented in file "CHANGELOG.md". -Later: - -- Memory defragmentation -- Support for multi-GPU (multi-adapter) +\warning Backward compatiblity is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. \section general_considerations_features_not_supported Features not supported @@ -1611,6 +2452,7 @@ Features deliberately excluded from the scope of this library: - **Descriptor allocation.** Although also called "heaps", objects that represent descriptors are separate part of the D3D12 API from buffers and textures. + You can still use \ref virtual_allocator to manage descriptors and their ranges inside a descriptor heap. - **Support for reserved (tiled) resources.** We don't recommend using them. - Support for `ID3D12Device::Evict` and `MakeResident`. We don't recommend using them. You can call them on the D3D12 objects manually. diff --git a/WickedEngine/Utility/vk_mem_alloc.h b/WickedEngine/Utility/vk_mem_alloc.h index 7e7ba314d..c8a986635 100644 --- a/WickedEngine/Utility/vk_mem_alloc.h +++ b/WickedEngine/Utility/vk_mem_alloc.h @@ -25,7 +25,7 @@ /** \mainpage Vulkan Memory Allocator -Version 3.0.0-development +Version 3.0.0 (2022-03-25) Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. \n License: MIT @@ -60,12 +60,7 @@ License: MIT - [Stack](@ref linear_algorithm_stack) - [Double stack](@ref linear_algorithm_double_stack) - [Ring buffer](@ref linear_algorithm_ring_buffer) - - [Buddy allocation algorithm](@ref buddy_algorithm) - \subpage defragmentation - - [Defragmenting CPU memory](@ref defragmentation_cpu) - - [Defragmenting GPU memory](@ref defragmentation_gpu) - - [Additional notes](@ref defragmentation_additional_notes) - - [Writing custom allocation algorithm](@ref defragmentation_custom_algorithm) - \subpage statistics - [Numeric statistics](@ref statistics_numeric_statistics) - [JSON dump](@ref statistics_json_dump) @@ -89,11 +84,14 @@ License: MIT - [Custom host memory allocator](@ref custom_memory_allocator) - [Device memory allocation callbacks](@ref allocation_callbacks) - [Device heap memory limit](@ref heap_memory_limit) -- \subpage vk_khr_dedicated_allocation -- \subpage enabling_buffer_device_address -- \subpage vk_amd_device_coherent_memory +- Extension support + - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_ext_memory_priority + - \subpage vk_amd_device_coherent_memory - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) - [Allocation algorithm](@ref general_considerations_allocation_algorithm) - [Features not supported](@ref general_considerations_features_not_supported) @@ -120,6 +118,7 @@ for user-defined purpose without allocating any real GPU memory. \defgroup group_stats Statistics \brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. */ @@ -427,6 +426,7 @@ typedef enum VmaAllocatorCreateFlagBits VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. typedef VkFlags VmaAllocatorCreateFlags; /** @} */ @@ -534,8 +534,7 @@ typedef enum VmaAllocationCreateFlagBits You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. - - If VmaAllocationCreateInfo::pool is not null, this flag is implied and ignored. */ + */ VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. @@ -548,13 +547,11 @@ typedef enum VmaAllocationCreateFlagBits support it (e.g. Intel GPU). */ VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, - /// \deprecated Removed. Do not use. - VMA_ALLOCATION_CREATE_RESERVED_1_BIT = 0x00000008, - /// \deprecated Removed. Do not use. - VMA_ALLOCATION_CREATE_RESERVED_2_BIT = 0x00000010, - /** Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a + /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + + Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a null-terminated string. Instead of copying pointer value, a local copy of the - string is made and stored in allocation's `pUserData`. The string is automatically + string is made and stored in allocation's `pName`. The string is automatically freed together with the allocation. It is also used in vmaBuildStatsString(). */ VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, @@ -567,6 +564,10 @@ typedef enum VmaAllocationCreateFlagBits It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). Otherwise it is ignored. + + If you want to make sure the new buffer/image is not tied to the new memory allocation + through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, + use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. */ VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, /** Create allocation only if additional device memory required for it, if any, won't exceed @@ -591,7 +592,7 @@ typedef enum VmaAllocationCreateFlagBits never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. \warning Violating this declaration may work correctly, but will likely be very slow. - Watch out for implicit reads introduces by doing e.g. `pMappedData[i] += x;` + Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. */ VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, @@ -604,7 +605,7 @@ typedef enum VmaAllocationCreateFlagBits This includes allocations created in \ref custom_memory_pools. Declares that mapped memory can be read, written, and accessed in random order, - so a `HOST_CACHED` memory type is preferred. + so a `HOST_CACHED` memory type is required. */ VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, /** @@ -628,6 +629,11 @@ typedef enum VmaAllocationCreateFlagBits to minimize allocation time, possibly at the expense of allocation quality. */ VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recomended in typical usage. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000, /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. */ VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, @@ -638,10 +644,12 @@ typedef enum VmaAllocationCreateFlagBits */ VMA_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | - VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. typedef VkFlags VmaAllocationCreateFlags; /// Flags to be passed as VmaPoolCreateInfo::flags. @@ -679,48 +687,61 @@ typedef enum VmaPoolCreateFlagBits */ VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, - /** \brief Enables alternative, buddy allocation algorithm in this pool. - - It operates on a tree of blocks, each having size that is a power of two and - a half of its parent's size. Comparing to default algorithm, this one provides - faster allocation and deallocation and decreased external fragmentation, - at the expense of more memory wasted (internal fragmentation). - For details, see documentation chapter \ref buddy_algorithm. - */ - VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT = 0x00000008, - - /** \brief Enables alternative, Two-Level Segregated Fit (TLSF) allocation algorithm in this pool. - - This algorithm is based on 2-level lists dividing address space into smaller - chunks. The first level is aligned to power of two which serves as buckets for requested - memory to fall into, and the second level is lineary subdivided into lists of free memory. - This algorithm aims to achieve bounded response time even in the worst case scenario. - Allocation time can be sometimes slightly longer than compared to other algorithms - but in return the application can avoid stalls in case of fragmentation, giving - predictable results, suitable for real-time use cases. - */ - VMA_POOL_CREATE_TLSF_ALGORITHM_BIT = 0x00000010, - /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. */ VMA_POOL_CREATE_ALGORITHM_MASK = - VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT | - VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT | - VMA_POOL_CREATE_TLSF_ALGORITHM_BIT, + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaPoolCreateFlagBits; /// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. typedef VkFlags VmaPoolCreateFlags; -/// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use. +/// Flags to be passed as VmaDefragmentationInfo::flags. typedef enum VmaDefragmentationFlagBits { - VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1, + /* \brief Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, + /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, + /* \brief Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, + /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. + Only available when bufferImageGranularity is greater than 1, since it aims to reduce + alignment issues between different types of resources. + Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. typedef VkFlags VmaDefragmentationFlags; +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. +typedef enum VmaDefragmentationMoveOperation +{ + /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. + VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. + VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + /** @} */ /** @@ -744,34 +765,10 @@ typedef enum VmaVirtualBlockCreateFlagBits */ VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, - /** \brief Enables alternative, buddy allocation algorithm in this virtual block. - - It operates on a tree of blocks, each having size that is a power of two and - a half of its parent's size. Comparing to default algorithm, this one provides - faster allocation and deallocation and decreased external fragmentation, - at the expense of more memory wasted (internal fragmentation). - For details, see documentation chapter \ref buddy_algorithm. - */ - VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT = 0x00000002, - - /** \brief Enables alternative, TLSF allocation algorithm in virtual block. - - This algorithm is based on 2-level lists dividing address space into smaller - chunks. The first level is aligned to power of two which serves as buckets for requested - memory to fall into, and the second level is lineary subdivided into lists of free memory. - This algorithm aims to achieve bounded response time even in the worst case scenario. - Allocation time can be sometimes slightly longer than compared to other algorithms - but in return the application can avoid stalls in case of fragmentation, giving - predictable results, suitable for real-time use cases. - */ - VMA_VIRTUAL_BLOCK_CREATE_TLSF_ALGORITHM_BIT = 0x00000004, - /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. */ VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = - VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT | - VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT | - VMA_VIRTUAL_BLOCK_CREATE_TLSF_ALGORITHM_BIT, + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaVirtualBlockCreateFlagBits; @@ -792,6 +789,10 @@ typedef enum VmaVirtualAllocationCreateFlagBits /** \brief Allocation strategy that tries to minimize allocation time. */ VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. @@ -865,10 +866,10 @@ returned structure VmaAllocationInfo. VK_DEFINE_HANDLE(VmaAllocation) /** \struct VmaDefragmentationContext -\brief Represents Opaque object that represents started defragmentation process. +\brief An opaque object that represents started defragmentation process. -Fill structure #VmaDefragmentationInfo2 and call function vmaDefragmentationBegin() to create it. -Call function vmaDefragmentationEnd() to destroy it. +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. */ VK_DEFINE_HANDLE(VmaDefragmentationContext) @@ -1101,62 +1102,105 @@ typedef struct VmaAllocatorInfo @{ */ -/// Calculated statistics of memory usage in entire allocator. -typedef struct VmaStatInfo +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). +*/ +typedef struct VmaStatistics { - /// Number of `VkDeviceMemory` Vulkan memory blocks allocated. + /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. + */ uint32_t blockCount; - /// Number of #VmaAllocation allocation objects allocated. + /** \brief Number of #VmaAllocation objects allocated. + + Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. + */ uint32_t allocationCount; - /// Number of free ranges of memory between allocations. - uint32_t unusedRangeCount; - /// Total number of bytes occupied by all allocations. - VkDeviceSize usedBytes; - /// Total number of bytes occupied by unused ranges. - VkDeviceSize unusedBytes; - VkDeviceSize allocationSizeMin, allocationSizeAvg, allocationSizeMax; - VkDeviceSize unusedRangeSizeMin, unusedRangeSizeAvg, unusedRangeSizeMax; -} VmaStatInfo; - -/// General statistics from current state of Allocator. -typedef struct VmaStats -{ - VmaStatInfo memoryType[VK_MAX_MEMORY_TYPES]; - VmaStatInfo memoryHeap[VK_MAX_MEMORY_HEAPS]; - VmaStatInfo total; -} VmaStats; - -/// Statistics of current memory usage and available budget, in bytes, for specific memory heap. -typedef struct VmaBudget -{ - /** \brief Sum size of all `VkDeviceMemory` blocks allocated from particular heap, in bytes. + /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + + \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object + (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls + "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. */ VkDeviceSize blockBytes; - - /** \brief Sum size of all allocations created in particular heap, in bytes. - - Usually less or equal than `blockBytes`. - Difference `blockBytes - allocationBytes` is the amount of memory allocated but unused - - available for new allocations or wasted due to fragmentation. + /** \brief Total number of bytes occupied by all #VmaAllocation objects. + + Always less or equal than `blockBytes`. + Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan + but unused by any #VmaAllocation. */ VkDeviceSize allocationBytes; +} VmaStatistics; +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ + /// Basic statistics. + VmaStatistics statistics; + /// Number of free ranges of memory between allocations. + uint32_t unusedRangeCount; + /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. + VkDeviceSize allocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + VkDeviceSize allocationSizeMax; + /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; + +/** \brief General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. + +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). +*/ +typedef struct VmaTotalStatistics +{ + VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; + VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; + VmaDetailedStatistics total; +} VmaTotalStatistics; + +/** \brief Statistics of current memory usage and available budget for a specific memory heap. + +These are fast to calculate. +See function vmaGetHeapBudgets(). +*/ +typedef struct VmaBudget +{ + /** \brief Statistics fetched from the library. + */ + VmaStatistics statistics; /** \brief Estimated current memory usage of the program, in bytes. - Fetched from system using `VK_EXT_memory_budget` extension if enabled. + Fetched from system using VK_EXT_memory_budget extension if enabled. - It might be different than `blockBytes` (usually higher) due to additional implicit objects + It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or `VkDeviceMemory` blocks allocated outside of this library, if any. */ VkDeviceSize usage; - /** \brief Estimated amount of memory available to the program, in bytes. - Fetched from system using `VK_EXT_memory_budget` extension if enabled. + Fetched from system using VK_EXT_memory_budget extension if enabled. It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors - external to the program, like other programs also consuming system resources. + external to the program, decided by the operating system. Difference `budget - usage` is the amount of additional memory that can probably be allocated without problems. Exceeding the budget may result in various problems. */ @@ -1170,6 +1214,10 @@ typedef struct VmaBudget @{ */ +/** \brief Parameters of new #VmaAllocation. + +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. +*/ typedef struct VmaAllocationCreateInfo { /// Use #VmaAllocationCreateFlagBits enum. @@ -1279,33 +1327,6 @@ typedef struct VmaPoolCreateInfo /** @} */ -/** -\addtogroup group_stats -@{ -*/ - -/// Describes parameter of existing #VmaPool. -typedef struct VmaPoolStats -{ - /** \brief Total amount of `VkDeviceMemory` allocated from Vulkan for this pool, in bytes. - */ - VkDeviceSize size; - /** \brief Total number of bytes in the pool not used by any #VmaAllocation. - */ - VkDeviceSize unusedSize; - /** \brief Number of #VmaAllocation objects created from this pool that were not destroyed. - */ - size_t allocationCount; - /** \brief Number of continuous memory ranges in the pool not used by any #VmaAllocation. - */ - size_t unusedRangeCount; - /** \brief Number of `VkDeviceMemory` blocks allocated for this pool. - */ - size_t blockCount; -} VmaPoolStats; - -/** @} */ - /** \addtogroup group_alloc @{ @@ -1323,7 +1344,7 @@ typedef struct VmaAllocationInfo Same memory object can be shared by multiple allocations. - It can change after call to vmaDefragment() if this allocation is passed to the function. + It can change after the allocation is moved during \ref defragmentation. */ VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. @@ -1333,7 +1354,7 @@ typedef struct VmaAllocationInfo not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation and apply this offset automatically. - It can change after call to vmaDefragment() if this allocation is passed to the function. + It can change after the allocation is moved during \ref defragmentation. */ VkDeviceSize offset; /** \brief Size of this allocation, in bytes. @@ -1353,7 +1374,7 @@ typedef struct VmaAllocationInfo created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. It can change after call to vmaMapMemory(), vmaUnmapMemory(). - It can also change after call to vmaDefragment() if this allocation is passed to the function. + It can also change after the allocation is moved during \ref defragmentation. */ void* VMA_NULLABLE pMappedData; /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). @@ -1361,120 +1382,92 @@ typedef struct VmaAllocationInfo It can change after call to vmaSetAllocationUserData() for this allocation. */ void* VMA_NULLABLE pUserData; + /** \brief Custom allocation name that was set with vmaSetAllocationName(). + + It can change after call to vmaSetAllocationName() for this allocation. + + Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with + additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. + */ + const char* VMA_NULLABLE pName; } VmaAllocationInfo; /** \brief Parameters for defragmentation. -To be used with function vmaDefragmentationBegin(). +To be used with function vmaBeginDefragmentation(). */ -typedef struct VmaDefragmentationInfo2 +typedef struct VmaDefragmentationInfo { - /** \brief Reserved for future use. Should be 0. - */ + /// \brief Use combination of #VmaDefragmentationFlagBits. VmaDefragmentationFlags flags; - /** \brief Number of allocations in `pAllocations` array. + /** \brief Custom pool to be defragmented. + + If null then default pools will undergo defragmentation process. */ - uint32_t allocationCount; - /** \brief Pointer to array of allocations that can be defragmented. + VmaPool VMA_NULLABLE pool; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. - The array should have `allocationCount` elements. - The array should not contain nulls. - Elements in the array should be unique - same allocation cannot occur twice. - All allocations not present in this array are considered non-moveable during this defragmentation. + `0` means no limit. */ - const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations; - /** \brief Optional, output. Pointer to array that will be filled with information whether the allocation at certain index has been changed during defragmentation. + VkDeviceSize maxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. - The array should have `allocationCount` elements. - You can pass null if you are not interested in this information. + `0` means no limit. */ - VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged; - /** \brief Numer of pools in `pPools` array. - */ - uint32_t poolCount; - /** \brief Either null or pointer to array of pools to be defragmented. + uint32_t maxAllocationsPerPass; +} VmaDefragmentationInfo; - All the allocations in the specified pools can be moved during defragmentation - and there is no way to check if they were really moved as in `pAllocationsChanged`, - so you must query all the allocations in all these pools for new `VkDeviceMemory` - and offset using vmaGetAllocationInfo() if you might need to recreate buffers - and images bound to them. - - The array should have `poolCount` elements. - The array should not contain nulls. - Elements in the array should be unique - same pool cannot occur twice. - - Using this array is equivalent to specifying all allocations from the pools in `pAllocations`. - It might be more efficient. - */ - const VmaPool VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(poolCount) pPools; - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on CPU side, like `memcpy()`, `memmove()`. - - `VK_WHOLE_SIZE` means no limit. - */ - VkDeviceSize maxCpuBytesToMove; - /** \brief Maximum number of allocations that can be moved to a different place using transfers on CPU side, like `memcpy()`, `memmove()`. - - `UINT32_MAX` means no limit. - */ - uint32_t maxCpuAllocationsToMove; - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on GPU side, posted to `commandBuffer`. - - `VK_WHOLE_SIZE` means no limit. - */ - VkDeviceSize maxGpuBytesToMove; - /** \brief Maximum number of allocations that can be moved to a different place using transfers on GPU side, posted to `commandBuffer`. - - `UINT32_MAX` means no limit. - */ - uint32_t maxGpuAllocationsToMove; - /** \brief Optional. Command buffer where GPU copy commands will be posted. - - If not null, it must be a valid command buffer handle that supports Transfer queue type. - It must be in the recording state and outside of a render pass instance. - You need to submit it and make sure it finished execution before calling vmaDefragmentationEnd(). - - Passing null means that only CPU defragmentation will be performed. - */ - VkCommandBuffer VMA_NULLABLE commandBuffer; -} VmaDefragmentationInfo2; - -typedef struct VmaDefragmentationPassMoveInfo +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove { - VmaAllocation VMA_NOT_NULL allocation; - VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory; - VkDeviceSize offset; -} VmaDefragmentationPassMoveInfo; + /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + VmaDefragmentationMoveOperation operation; + /// Allocation that should be moved. + VmaAllocation VMA_NOT_NULL srcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). + vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. + */ + VmaAllocation VMA_NOT_NULL dstTmpAllocation; +} VmaDefragmentationMove; /** \brief Parameters for incremental defragmentation steps. To be used with function vmaBeginDefragmentationPass(). */ -typedef struct VmaDefragmentationPassInfo +typedef struct VmaDefragmentationPassMoveInfo { + /// Number of elements in the `pMoves` array. uint32_t moveCount; - VmaDefragmentationPassMoveInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(moveCount) pMoves; -} VmaDefragmentationPassInfo; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). -/** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment(). + For each element, you should: + + 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. + 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. + 3. Make sure these commands finished executing on the GPU. + 4. Destroy the old buffer/image. + + Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). + After this call, the allocation will point to the new place in memory. -\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. -*/ -typedef struct VmaDefragmentationInfo -{ - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places. + Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. - Default is `VK_WHOLE_SIZE`, which means no limit. + Alternatively, if you decide you want to completely remove the allocation: + + 1. Destroy its buffer/image. + 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + + Then, after vmaEndDefragmentationPass() the allocation will be freed. */ - VkDeviceSize maxBytesToMove; - /** \brief Maximum number of allocations that can be moved to different place. + VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; - Default is `UINT32_MAX`, which means no limit. - */ - uint32_t maxAllocationsToMove; -} VmaDefragmentationInfo; - -/// Statistics returned by function vmaDefragment(). +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). typedef struct VmaDefragmentationStats { /// Total number of bytes that have been copied while moving allocations to different places. @@ -1630,23 +1623,24 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( /** \brief Retrieves statistics from current state of the Allocator. This function is called "calculate" not "get" because it has to traverse all -internal data structures, so it may be quite slow. For faster but more brief statistics -suitable to be called every frame or every allocation, use vmaGetHeapBudgets(). +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). Note that when using allocator from multiple threads, returned information may immediately become outdated. */ -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( VmaAllocator VMA_NOT_NULL allocator, - VmaStats* VMA_NOT_NULL pStats); + VmaTotalStatistics* VMA_NOT_NULL pStats); -/** \brief Retrieves information about current memory budget for all memory heaps. +/** \brief Retrieves information about current memory usage and budget for all memory heaps. \param allocator \param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. This function is called "get" not "calculate" because it is very fast, suitable to be called -every frame or every allocation. For more detailed statistics use vmaCalculateStats(). +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). Note that when using allocator from multiple threads, returned information may immediately become outdated. @@ -1738,10 +1732,21 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( \param pool Pool object. \param[out] pPoolStats Statistics of specified pool. */ -VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool, - VmaPoolStats* VMA_NOT_NULL pPoolStats); + VmaStatistics* VMA_NOT_NULL pPoolStats); + +/** \brief Retrieves detailed statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); /** @} */ @@ -1836,14 +1841,19 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); -/** +/** \brief Allocates memory suitable for given `VkBuffer`. + \param allocator \param buffer \param pCreateInfo \param[out] pAllocation Handle to allocated memory. \param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). -You should free the memory using vmaFreeMemory(). +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( VmaAllocator VMA_NOT_NULL allocator, @@ -1852,7 +1862,20 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); -/// Function similar to vmaAllocateMemoryForBuffer(). +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( VmaAllocator VMA_NOT_NULL allocator, VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, @@ -1900,15 +1923,8 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( /** \brief Sets pUserData in given allocation to new value. -If the allocation was created with VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT, -pUserData must be either null, or pointer to a null-terminated string. The function -makes local copy of the string and sets it as allocation's `pUserData`. String -passed as pUserData doesn't need to be valid for whole lifetime of the allocation - -you can free it after this call. String previously pointed by allocation's -pUserData is freed from memory. - -If the flag was not used, the value of pointer `pUserData` is just copied to -allocation's `pUserData`. It is opaque, so you can use it however you want - e.g. +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. as a pointer, ordinal number or some handle to you own data. */ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( @@ -1916,6 +1932,19 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( VmaAllocation VMA_NOT_NULL allocation, void* VMA_NULLABLE pUserData); +/** \brief Sets pName in given allocation to new value. + +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName); + /** \brief Given an allocation, returns Property Flags of its memory type. @@ -2102,103 +2131,69 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( \param allocator Allocator object. \param pInfo Structure filled with parameters of defragmentation. -\param[out] pStats Optional. Statistics of defragmentation. You can pass null if you are not interested in this information. -\param[out] pContext Context object that must be passed to vmaDefragmentationEnd() to finish defragmentation. -\return `VK_SUCCESS` and `*pContext == null` if defragmentation finished within this function call. `VK_NOT_READY` and `*pContext != null` if defragmentation has been started and you need to call vmaDefragmentationEnd() to finish it. Negative value in case of error. +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. -Use this function instead of old, deprecated vmaDefragment(). - -Warning! Between the call to vmaDefragmentationBegin() and vmaDefragmentationEnd(): - -- You should not use any of allocations passed as `pInfo->pAllocations` or - any allocations that belong to pools passed as `pInfo->pPools`, - including calling vmaGetAllocationInfo(), or access - their data. -- Some mutexes protecting internal data structures may be locked, so trying to - make or free any allocations, bind buffers or images, map memory, or launch - another simultaneous defragmentation in between may cause stall (when done on - another thread) or deadlock (when done on the same thread), unless you are - 100% sure that defragmented allocations are in different pools. -- Information returned via `pStats` and `pInfo->pAllocationsChanged` are undefined. - They become valid after call to vmaDefragmentationEnd(). -- If `pInfo->commandBuffer` is not null, you must submit that command buffer - and make sure it finished execution before calling vmaDefragmentationEnd(). - -For more information and important limitations regarding defragmentation, see documentation chapter: +For more information about defragmentation, see documentation chapter: [Defragmentation](@ref defragmentation). */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VmaAllocator VMA_NOT_NULL allocator, - const VmaDefragmentationInfo2* VMA_NOT_NULL pInfo, - VmaDefragmentationStats* VMA_NULLABLE pStats, + const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); /** \brief Ends defragmentation process. -Use this function to finish defragmentation started by vmaDefragmentationBegin(). -It is safe to pass `context == null`. The function then does nothing. -*/ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( - VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context); +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. +Use this function to finish defragmentation started by vmaBeginDefragmentation(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed informations for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), + and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context, - VmaDefragmentationPassInfo* VMA_NOT_NULL pInfo); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); +/** \brief Ends single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed informations for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. + +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be freed. + +If no more moves are possible you can end whole defragmentation. +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context); - -/** \brief Deprecated. Compacts memory by moving allocations. - -\param allocator -\param pAllocations Array of allocations that can be moved during this compation. -\param allocationCount Number of elements in pAllocations and pAllocationsChanged arrays. -\param[out] pAllocationsChanged Array of boolean values that will indicate whether matching allocation in pAllocations array has been moved. This parameter is optional. Pass null if you don't need this information. -\param pDefragmentationInfo Configuration parameters. Optional - pass null to use default values. -\param[out] pDefragmentationStats Statistics returned by the function. Optional - pass null if you don't need this information. -\return `VK_SUCCESS` if completed, negative error code in case of error. - -\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. - -This function works by moving allocations to different places (different -`VkDeviceMemory` objects and/or different offsets) in order to optimize memory -usage. Only allocations that are in `pAllocations` array can be moved. All other -allocations are considered nonmovable in this call. Basic rules: - -- Only allocations made in memory types that have - `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` - flags can be compacted. You may pass other allocations but it makes no sense - - these will never be moved. -- Custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT or - #VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT flag are not defragmented. Allocations - passed to this function that come from such pools are ignored. -- Allocations created with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT or - created as dedicated allocations for any other reason are also ignored. -- Both allocations made with or without #VMA_ALLOCATION_CREATE_MAPPED_BIT - flag can be compacted. If not persistently mapped, memory will be mapped - temporarily inside this function if needed. -- You must not pass same #VmaAllocation object multiple times in `pAllocations` array. - -The function also frees empty `VkDeviceMemory` blocks. - -Warning: This function may be time-consuming, so you shouldn't call it too often -(like after every resource creation/destruction). -You can call it on special occasions (like when reloading a game level or -when you just destroyed a lot of objects). Calling it every frame may be OK, but -you should measure that on your platform. - -For more information, see [Defragmentation](@ref defragmentation) chapter. -*/ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( - VmaAllocator VMA_NOT_NULL allocator, - const VmaAllocation VMA_NOT_NULL* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, - size_t allocationCount, - VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged, - const VmaDefragmentationInfo* VMA_NULLABLE pDefragmentationInfo, - VmaDefragmentationStats* VMA_NULLABLE pDefragmentationStats); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); /** \brief Binds buffer to allocation. @@ -2274,7 +2269,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, const void* VMA_NULLABLE pNext); -/** +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + \param allocator \param pBufferCreateInfo \param pAllocationCreateInfo @@ -2289,7 +2285,7 @@ This function automatically: -# Binds the buffer with the memory. If any of these operations fail, buffer and allocation are not created, -returned value is negative error code, *pBuffer and *pAllocation are null. +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. If the function succeeded, you must destroy both buffer and allocation when you no longer need them using either convenience function vmaDestroyBuffer() or @@ -2330,6 +2326,31 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + /** \brief Destroys Vulkan buffer and frees allocated memory. This is just a convenience function equivalent to: @@ -2355,6 +2376,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); +/// Function similar to vmaCreateAliasingBuffer(). +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + /** \brief Destroys Vulkan image and frees allocated memory. This is just a convenience function equivalent to: @@ -2454,10 +2482,21 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( void* VMA_NULLABLE pUserData); /** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). */ -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( VmaVirtualBlock VMA_NOT_NULL virtualBlock, - VmaStatInfo* VMA_NOT_NULL pStatInfo); + VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats); /** @} */ @@ -2470,7 +2509,7 @@ VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStats( /** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. \param virtualBlock Virtual block. \param[out] ppStatsString Returned string. -\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStats(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. Returned string must be freed using vmaFreeVirtualBlockStatsString(). */ @@ -2726,6 +2765,11 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) #endif #endif +#ifndef VMA_COUNT_BITS_SET + // Returns number of bits set to 1 in (v) + #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + #ifndef VMA_BITSCAN_LSB // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) @@ -3100,23 +3144,13 @@ typedef VmaList> VmaSuballoc struct VmaAllocationRequest; class VmaBlockMetadata; -class VmaBlockMetadata_Generic; class VmaBlockMetadata_Linear; -class VmaBlockMetadata_Buddy; class VmaBlockMetadata_TLSF; class VmaBlockVector; -struct VmaDefragmentationMove; -class VmaDefragmentationAlgorithm; -class VmaDefragmentationAlgorithm_Generic; -class VmaDefragmentationAlgorithm_Fast; - struct VmaPoolListItemTraits; -struct VmaBlockDefragmentationContext; -class VmaBlockVectorDefragmentationContext; - struct VmaCurrentBudgetData; class VmaAllocationObjectAllocator; @@ -3125,21 +3159,28 @@ class VmaAllocationObjectAllocator; #ifndef _VMA_FUNCTIONS -// Returns number of bits set to 1 in (v). + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use instrinsics like: + +Visual Studio: + return __popcnt(v); +GCC, Clang: + return static_cast(__builtin_popcount(v)); + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ static inline uint32_t VmaCountBitsSet(uint32_t v) { -#ifdef _MSC_VER - return __popcnt(v); -#elif defined __GNUC__ || defined __clang__ - return static_cast(__builtin_popcount(v)); -#else uint32_t c = v - ((v >> 1) & 0x55555555); c = ((c >> 2) & 0x33333333) + (c & 0x33333333); c = ((c >> 4) + c) & 0x0F0F0F0F; c = ((c >> 8) + c) & 0x00FF00FF; c = ((c >> 16) + c) & 0x0000FFFF; return c; -#endif } static inline uint8_t VmaBitScanLSB(uint64_t mask) @@ -3335,12 +3376,8 @@ static const char* VmaAlgorithmToStr(uint32_t algorithm) { case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: return "Linear"; - case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: - return "Buddy"; - case VMA_POOL_CREATE_TLSF_ALGORITHM_BIT: - return "TLSF"; case 0: - return "Default"; + return "TLSF"; default: VMA_ASSERT(0); return ""; @@ -3682,7 +3719,11 @@ static bool FindMemoryPreferences( // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory if(deviceAccess) { - outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + // ...unless there is a clear preference from the user not to do so. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } // No direct GPU access, no CPU access, just transfers. // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or @@ -3847,65 +3888,60 @@ bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& } #endif // _VMA_FUNCTIONS -#ifndef _VMA_STAT_INFO_FUNCTIONS -static void VmaInitStatInfo(VmaStatInfo& outInfo) +#ifndef _VMA_STATISTICS_FUNCTIONS + +static void VmaClearStatistics(VmaStatistics& outStats) { - memset(&outInfo, 0, sizeof(outInfo)); - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMin = UINT64_MAX; + outStats.blockCount = 0; + outStats.allocationCount = 0; + outStats.blockBytes = 0; + outStats.allocationBytes = 0; } -// Adds statistics srcInfo into inoutInfo, like: inoutInfo += srcInfo. -static void VmaAddStatInfo(VmaStatInfo& inoutInfo, const VmaStatInfo& srcInfo) +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) { - inoutInfo.blockCount += srcInfo.blockCount; - inoutInfo.allocationCount += srcInfo.allocationCount; - inoutInfo.unusedRangeCount += srcInfo.unusedRangeCount; - inoutInfo.usedBytes += srcInfo.usedBytes; - inoutInfo.unusedBytes += srcInfo.unusedBytes; - inoutInfo.allocationSizeMin = VMA_MIN(inoutInfo.allocationSizeMin, srcInfo.allocationSizeMin); - inoutInfo.allocationSizeMax = VMA_MAX(inoutInfo.allocationSizeMax, srcInfo.allocationSizeMax); - inoutInfo.unusedRangeSizeMin = VMA_MIN(inoutInfo.unusedRangeSizeMin, srcInfo.unusedRangeSizeMin); - inoutInfo.unusedRangeSizeMax = VMA_MAX(inoutInfo.unusedRangeSizeMax, srcInfo.unusedRangeSizeMax); + inoutStats.blockCount += src.blockCount; + inoutStats.allocationCount += src.allocationCount; + inoutStats.blockBytes += src.blockBytes; + inoutStats.allocationBytes += src.allocationBytes; } -static void VmaAddStatInfoAllocation(VmaStatInfo& inoutInfo, VkDeviceSize size) +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) { - ++inoutInfo.allocationCount; - inoutInfo.usedBytes += size; - if (size < inoutInfo.allocationSizeMin) - { - inoutInfo.allocationSizeMin = size; - } - if (size > inoutInfo.allocationSizeMax) - { - inoutInfo.allocationSizeMax = size; - } + VmaClearStatistics(outStats.statistics); + outStats.unusedRangeCount = 0; + outStats.allocationSizeMin = VK_WHOLE_SIZE; + outStats.allocationSizeMax = 0; + outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; + outStats.unusedRangeSizeMax = 0; } -static void VmaAddStatInfoUnusedRange(VmaStatInfo& inoutInfo, VkDeviceSize size) +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) { - ++inoutInfo.unusedRangeCount; - inoutInfo.unusedBytes += size; - if (size < inoutInfo.unusedRangeSizeMin) - { - inoutInfo.unusedRangeSizeMin = size; - } - if (size > inoutInfo.unusedRangeSizeMax) - { - inoutInfo.unusedRangeSizeMax = size; - } + inoutStats.statistics.allocationCount++; + inoutStats.statistics.allocationBytes += size; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); } -static void VmaPostprocessCalcStatInfo(VmaStatInfo& inoutInfo) +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) { - inoutInfo.allocationSizeAvg = (inoutInfo.allocationCount > 0) ? - VmaRoundDiv(inoutInfo.usedBytes, inoutInfo.allocationCount) : 0; - inoutInfo.unusedRangeSizeAvg = (inoutInfo.unusedRangeCount > 0) ? - VmaRoundDiv(inoutInfo.unusedBytes, inoutInfo.unusedRangeCount) : 0; + inoutStats.unusedRangeCount++; + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); } -#endif // _VMA_STAT_INFO_FUNCTIONS +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) +{ + VmaAddStatistics(inoutStats.statistics, src.statistics); + inoutStats.unusedRangeCount += src.unusedRangeCount; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); +} + +#endif // _VMA_STATISTICS_FUNCTIONS #ifndef _VMA_MUTEX_LOCK // Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). @@ -4301,8 +4337,9 @@ VmaSmallVector::VmaSmallVector(size_t count, const AllocatorT& template void VmaSmallVector::push_back(const T& src) { - resize(m_Count + 1); - data()[m_Count] = src; + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; } template @@ -4801,6 +4838,7 @@ public: class iterator { + friend class const_iterator; friend class VmaList; public: iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} @@ -4826,6 +4864,7 @@ public: }; class reverse_iterator { + friend class const_reverse_iterator; friend class VmaList; public: reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} @@ -4857,6 +4896,8 @@ public: const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } @@ -4883,6 +4924,8 @@ public: const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + reverse_iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } @@ -4919,8 +4962,8 @@ public: reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } - const_reverse_iterator crbegin() { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } - const_reverse_iterator crend() { return const_reverse_iterator(&m_RawList, VMA_NULL); } + const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } const_reverse_iterator rbegin() const { return crbegin(); } const_reverse_iterator rend() const { return crend(); } @@ -5289,13 +5332,14 @@ public: iterator begin() { return m_Vector.begin(); } iterator end() { return m_Vector.end(); } + size_t size() { return m_Vector.size(); } void insert(const PairType& pair); iterator find(const KeyT& key); void erase(iterator it); private: - VmaVector< PairType, VmaStlAllocator > m_Vector; + VmaVector< PairType, VmaStlAllocator> m_Vector; }; #ifndef _VMA_MAP_FUNCTIONS @@ -5715,51 +5759,35 @@ void VmaJsonWriter::WriteIndent(bool oneLess) } #endif // _VMA_JSON_WRITER_FUNCTIONS -static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) { json.BeginObject(); - json.WriteString("Blocks"); - json.WriteNumber(stat.blockCount); - - json.WriteString("Allocations"); - json.WriteNumber(stat.allocationCount); - - json.WriteString("UnusedRanges"); + json.WriteString("BlockCount"); + json.WriteNumber(stat.statistics.blockCount); + json.WriteString("BlockBytes"); + json.WriteNumber(stat.statistics.blockBytes); + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); + json.WriteString("AllocationBytes"); + json.WriteNumber(stat.statistics.allocationBytes); + json.WriteString("UnusedRangeCount"); json.WriteNumber(stat.unusedRangeCount); - json.WriteString("UsedBytes"); - json.WriteNumber(stat.usedBytes); - - json.WriteString("UnusedBytes"); - json.WriteNumber(stat.unusedBytes); - - if (stat.allocationCount > 1) + if (stat.statistics.allocationCount > 1) { - json.WriteString("AllocationSize"); - json.BeginObject(true); - json.WriteString("Min"); + json.WriteString("AllocationSizeMin"); json.WriteNumber(stat.allocationSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.allocationSizeAvg); - json.WriteString("Max"); + json.WriteString("AllocationSizeMax"); json.WriteNumber(stat.allocationSizeMax); - json.EndObject(); } - if (stat.unusedRangeCount > 1) { - json.WriteString("UnusedRangeSize"); - json.BeginObject(true); - json.WriteString("Min"); + json.WriteString("UnusedRangeSizeMin"); json.WriteNumber(stat.unusedRangeSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.unusedRangeSizeAvg); - json.WriteString("Max"); + json.WriteString("UnusedRangeSizeMax"); json.WriteNumber(stat.unusedRangeSizeMax); - json.EndObject(); } - json.EndObject(); } #endif // _VMA_JSON_WRITER @@ -5951,9 +5979,8 @@ struct VmaAllocation_T enum FLAGS { - FLAG_USER_DATA_STRING = 0x01, - FLAG_PERSISTENT_MAP = 0x02, - FLAG_MAPPING_ALLOWED = 0x04, + FLAG_PERSISTENT_MAP = 0x01, + FLAG_MAPPING_ALLOWED = 0x02, }; public: @@ -5965,7 +5992,7 @@ public: }; // This struct is allocated using VmaPoolAllocator. - VmaAllocation_T(bool userDataString, bool mappingAllowed); + VmaAllocation_T(bool mappingAllowed); ~VmaAllocation_T(); void InitBlockAllocation( @@ -5988,8 +6015,8 @@ public: ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } VkDeviceSize GetAlignment() const { return m_Alignment; } VkDeviceSize GetSize() const { return m_Size; } - bool IsUserDataString() const { return (m_Flags & FLAG_USER_DATA_STRING) != 0; } void* GetUserData() const { return m_pUserData; } + const char* GetName() const { return m_pName; } VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } @@ -5997,17 +6024,16 @@ public: bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } - void SetUserData(VmaAllocator hAllocator, void* pUserData); - void ChangeBlockAllocation(VmaAllocator hAllocator, VmaDeviceMemoryBlock* block, VmaAllocHandle allocHandle); - void ChangeAllocHandle(VmaAllocHandle newAllocHandle); + void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } + void SetName(VmaAllocator hAllocator, const char* pName); + void FreeName(VmaAllocator hAllocator); + uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); VmaAllocHandle GetAllocHandle() const; VkDeviceSize GetOffset() const; VmaPool GetParentPool() const; VkDeviceMemory GetMemory() const; void* GetMappedData() const; - void DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo); - void BlockAllocMap(); void BlockAllocUnmap(); VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); @@ -6047,6 +6073,7 @@ private: VkDeviceSize m_Alignment; VkDeviceSize m_Size; void* m_pUserData; + char* m_pName; uint32_t m_MemoryTypeIndex; uint8_t m_Type; // ALLOCATION_TYPE uint8_t m_SuballocationType; // VmaSuballocationType @@ -6056,8 +6083,6 @@ private: #if VMA_STATS_STRING_ENABLED uint32_t m_BufferImageUsage; // 0 if unknown. #endif - - void FreeUserDataString(VmaAllocator hAllocator); }; #endif // _VMA_ALLOCATION_T @@ -6103,8 +6128,8 @@ public: void Init(bool useMutex) { m_UseMutex = useMutex; } bool Validate(); - void AddStats(VmaStats* stats, uint32_t memTypeIndex, uint32_t memHeapIndex); - void AddPoolStats(VmaPoolStats* stats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + void AddStatistics(VmaStatistics& inoutStats); #if VMA_STATS_STRING_ENABLED // Writes JSON array with the list of allocations. void BuildStatsString(VmaJsonWriter& json); @@ -6149,31 +6174,30 @@ bool VmaDedicatedAllocationList::Validate() return true; } -void VmaDedicatedAllocationList::AddStats(VmaStats* stats, uint32_t memTypeIndex, uint32_t memHeapIndex) +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) { - VmaMutexLockRead lock(m_Mutex, m_UseMutex); - for (VmaAllocation alloc = m_AllocationList.Front(); - alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + for(auto* item = m_AllocationList.Front(); item != nullptr; item = DedicatedAllocationLinkedList::GetNext(item)) { - VmaStatInfo allocationStatInfo; - alloc->DedicatedAllocCalcStatsInfo(allocationStatInfo); - VmaAddStatInfo(stats->total, allocationStatInfo); - VmaAddStatInfo(stats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(stats->memoryHeap[memHeapIndex], allocationStatInfo); + const VkDeviceSize size = item->GetSize(); + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); } } -void VmaDedicatedAllocationList::AddPoolStats(VmaPoolStats* stats) +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) { VmaMutexLockRead lock(m_Mutex, m_UseMutex); - const size_t allocCount = m_AllocationList.GetCount(); - stats->allocationCount += allocCount; - stats->blockCount += allocCount; + const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); + inoutStats.blockCount += allocCount; + inoutStats.allocationCount += allocCount; for(auto* item = m_AllocationList.Front(); item != nullptr; item = DedicatedAllocationLinkedList::GetNext(item)) { - stats->size += item->GetSize(); + const VkDeviceSize size = item->GetSize(); + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size; } } @@ -6295,20 +6319,24 @@ public: // Validates all data structures inside this object. If not valid, returns false. virtual bool Validate() const = 0; virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; virtual VkDeviceSize GetSumFreeSize() const = 0; // Returns true if this block is empty - contains only single free suballocation. virtual bool IsEmpty() const = 0; virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; + virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + + virtual VmaAllocHandle GetAllocationListBegin() const = 0; + virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; + virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; - // Must set blockCount to 1. - virtual void CalcAllocationStatInfo(VmaStatInfo& outInfo) const = 0; // Shouldn't modify blockCount. - virtual void AddPoolStats(VmaPoolStats& inoutStats) const = 0; + virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; + virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; #if VMA_STATS_STRING_ENABLED - // mapRefCount == UINT32_MAX means unspecified. - virtual void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const = 0; + virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; #endif // Tries to find a place for suballocation with given parameters inside this block. @@ -6352,8 +6380,7 @@ protected: void PrintDetailedMap_Begin(class VmaJsonWriter& json, VkDeviceSize unusedBytes, size_t allocationCount, - size_t unusedRangeCount, - uint32_t mapRefCount) const; + size_t unusedRangeCount) const; void PrintDetailedMap_Allocation(class VmaJsonWriter& json, VkDeviceSize offset, VkDeviceSize size, void* userData) const; void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, @@ -6389,35 +6416,17 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size VmaAllocation allocation = reinterpret_cast(userData); userData = allocation->GetUserData(); + const char* name = allocation->GetName(); #if VMA_STATS_STRING_ENABLED - if (userData != VMA_NULL && allocation->IsUserDataString()) - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %s; Type: %s; Usage: %u", - offset, size, reinterpret_cast(userData), - VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], - allocation->GetBufferImageUsage()); - } - else - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Type: %s; Usage: %u", - offset, size, userData, - VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], - allocation->GetBufferImageUsage()); - } + VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Name: %s; Type: %s; Usage: %u", + offset, size, userData, name ? name : "vma_empty", + VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], + allocation->GetBufferImageUsage()); #else - if (userData != VMA_NULL && allocation->IsUserDataString()) - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %s; Type: %u", - offset, size, reinterpret_cast(userData), - (uint32_t)allocation->GetSuballocationType()); - } - else - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Type: %u", - offset, size, userData, - (uint32_t)allocation->GetSuballocationType()); - } + VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Name: %s; Type: %u", + offset, size, userData, name ? name : "vma_empty", + (uint32_t)allocation->GetSuballocationType()); #endif // VMA_STATS_STRING_ENABLED } @@ -6425,10 +6434,8 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size #if VMA_STATS_STRING_ENABLED void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, - VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount, uint32_t mapRefCount) const + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const { - json.BeginObject(); - json.WriteString("TotalBytes"); json.WriteNumber(GetSize()); @@ -6436,16 +6443,10 @@ void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, json.WriteNumber(unusedBytes); json.WriteString("Allocations"); - json.WriteNumber((uint64_t)allocationCount); + json.WriteNumber(allocationCount); json.WriteString("UnusedRanges"); - json.WriteNumber((uint64_t)unusedRangeCount); - - if(mapRefCount != UINT32_MAX) - { - json.WriteString("MapRefCount"); - json.WriteNumber(mapRefCount); - } + json.WriteNumber(unusedRangeCount); json.WriteString("Suballocations"); json.BeginArray(); @@ -6461,15 +6462,11 @@ void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, if (IsVirtual()) { - json.WriteString("Type"); - json.WriteString("VirtualAllocation"); - json.WriteString("Size"); json.WriteNumber(size); - - if (userData != VMA_NULL) + if (userData) { - json.WriteString("UserData"); + json.WriteString("CustomData"); json.BeginString(); json.ContinueString_Pointer(userData); json.EndString(); @@ -6503,7 +6500,6 @@ void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const { json.EndArray(); - json.EndObject(); } #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_BLOCK_METADATA_FUNCTIONS @@ -6744,6 +6740,7 @@ void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocTy #endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS #endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +#if 0 #ifndef _VMA_BLOCK_METADATA_GENERIC class VmaBlockMetadata_Generic : public VmaBlockMetadata { @@ -6764,8 +6761,8 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; @@ -6787,15 +6784,13 @@ public: void* userData) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; - // For defragmentation - bool IsBufferImageGranularityConflictPossible( - VkDeviceSize bufferImageGranularity, - VmaSuballocationType& inOutPrevSuballocType) const; - private: uint32_t m_FreeCount; VkDeviceSize m_SumFreeSize; @@ -6805,7 +6800,7 @@ private: VkDeviceSize AlignAllocationSize(VkDeviceSize size) const { return IsVirtual() ? size : VmaAlignUp(size, (VkDeviceSize)16); } - VmaSuballocationList::iterator FindAtOffset(VkDeviceSize offset); + VmaSuballocationList::iterator FindAtOffset(VkDeviceSize offset) const; bool ValidateFreeSuballocationList() const; // Checks if requested suballocation with given parameters can be placed in given pFreeSuballocItem. @@ -6940,33 +6935,27 @@ bool VmaBlockMetadata_Generic::Validate() const return true; } -void VmaBlockMetadata_Generic::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_Generic::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); for (const auto& suballoc : m_Suballocations) { if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - VmaAddStatInfoAllocation(outInfo, suballoc.size); - } + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); else - { - VmaAddStatInfoUnusedRange(outInfo, suballoc.size); - } + VmaAddDetailedStatisticsUnusedRange(inoutStats, suballoc.size); } } -void VmaBlockMetadata_Generic::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_Generic::AddStatistics(VmaStatistics& inoutStats) const { - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - - inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize; - inoutStats.allocationCount += rangeCount - m_FreeCount; - inoutStats.unusedRangeCount += m_FreeCount; + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_Suballocations.size() - m_FreeCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - m_SumFreeSize; } #if VMA_STATS_STRING_ENABLED @@ -7069,7 +7058,7 @@ bool VmaBlockMetadata_Generic::CreateAllocationRequest( } else { - VMA_ASSERT(strategy == VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT); + VMA_ASSERT(strategy & (VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT )); // Search staring from biggest suballocations. for (size_t index = freeSuballocCount; index--; ) { @@ -7180,6 +7169,37 @@ void VmaBlockMetadata_Generic::GetAllocationInfo(VmaAllocHandle allocHandle, Vma outInfo.pUserData = suballoc.userData; } +void* VmaBlockMetadata_Generic::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindAtOffset((VkDeviceSize)allocHandle - 1)->userData; +} + +VmaAllocHandle VmaBlockMetadata_Generic::GetAllocationListBegin() const +{ + if (IsEmpty()) + return VK_NULL_HANDLE; + + for (const auto& suballoc : m_Suballocations) + { + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + return (VmaAllocHandle)(suballoc.offset + 1); + } + VMA_ASSERT(false && "Should contain at least 1 allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Generic::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + VmaSuballocationList::const_iterator prev = FindAtOffset((VkDeviceSize)prevAlloc - 1); + + for (VmaSuballocationList::const_iterator it = ++prev; it != m_Suballocations.end(); ++it) + { + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + return (VmaAllocHandle)(it->offset + 1); + } + return VK_NULL_HANDLE; +} + void VmaBlockMetadata_Generic::Clear() { const VkDeviceSize size = GetSize(); @@ -7214,15 +7234,15 @@ void VmaBlockMetadata_Generic::DebugLogAllAllocations() const } } -VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSize offset) +VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSize offset) const { VMA_HEAVY_ASSERT(!m_Suballocations.empty()); const VkDeviceSize last = m_Suballocations.rbegin()->offset; if (last == offset) - return m_Suballocations.rbegin(); + return m_Suballocations.rbegin().drop_const(); const VkDeviceSize first = m_Suballocations.begin()->offset; if (first == offset) - return m_Suballocations.begin(); + return m_Suballocations.begin().drop_const(); const size_t suballocCount = m_Suballocations.size(); const VkDeviceSize step = (last - first + m_Suballocations.begin()->size) / suballocCount; @@ -7232,12 +7252,11 @@ VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSi suballocItem != end; ++suballocItem) { - VmaSuballocation& suballoc = *suballocItem; - if (suballoc.offset == offset) - return suballocItem; + if (suballocItem->offset == offset) + return suballocItem.drop_const(); } VMA_ASSERT(false && "Not found!"); - return m_Suballocations.end(); + return m_Suballocations.end().drop_const(); }; // If requested offset is closer to the end of range, search from the end if (offset - first > suballocCount * step / 2) @@ -7481,37 +7500,9 @@ void VmaBlockMetadata_Generic::UnregisterFreeSuballocation(VmaSuballocationList: //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); } - -bool VmaBlockMetadata_Generic::IsBufferImageGranularityConflictPossible( - VkDeviceSize bufferImageGranularity, - VmaSuballocationType& inOutPrevSuballocType) const -{ - if (bufferImageGranularity == 1 || IsEmpty() || IsVirtual()) - { - return false; - } - - VkDeviceSize minAlignment = VK_WHOLE_SIZE; - bool typeConflictFound = false; - for (const auto& suballoc : m_Suballocations) - { - const VmaSuballocationType suballocType = suballoc.type; - if (suballocType != VMA_SUBALLOCATION_TYPE_FREE) - { - VmaAllocation const alloc = (VmaAllocation)suballoc.userData; - minAlignment = VMA_MIN(minAlignment, alloc->GetAlignment()); - if (VmaIsBufferImageGranularityConflict(inOutPrevSuballocType, suballocType)) - { - typeConflictFound = true; - } - inOutPrevSuballocType = suballocType; - } - } - - return typeConflictFound || minAlignment >= bufferImageGranularity; -} #endif // _VMA_BLOCK_METADATA_GENERIC_FUNCTIONS #endif // _VMA_BLOCK_METADATA_GENERIC +#endif // #if 0 #ifndef _VMA_BLOCK_METADATA_LINEAR /* @@ -7607,12 +7598,13 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; + void PrintDetailedMap(class VmaJsonWriter& json) const override; #endif bool CreateAllocationRequest( @@ -7632,6 +7624,10 @@ public: void Free(VmaAllocHandle allocHandle) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; @@ -7678,7 +7674,7 @@ private: const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } - VmaSuballocation& FindSuballocation(VkDeviceSize offset); + VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; bool ShouldCompact1st() const; void CleanupAfterFree(); @@ -7870,7 +7866,14 @@ size_t VmaBlockMetadata_Linear::GetAllocationCount() const AccessSuballocations2nd().size() - m_2ndNullItemsCount; } -void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return SIZE_MAX; +} + +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { const VkDeviceSize size = GetSize(); const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); @@ -7878,8 +7881,8 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const const size_t suballoc1stCount = suballocations1st.size(); const size_t suballoc2ndCount = suballocations2nd.size(); - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; VkDeviceSize lastOffset = 0; @@ -7906,12 +7909,12 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. - VmaAddStatInfoAllocation(outInfo, suballoc.size); + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; @@ -7924,7 +7927,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if (lastOffset < freeSpace2ndTo1stEnd) { const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // End of loop. @@ -7955,12 +7958,12 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. - VmaAddStatInfoAllocation(outInfo, suballoc.size); + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; @@ -7973,7 +7976,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if (lastOffset < freeSpace1stTo2ndEnd) { const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // End of loop. @@ -8003,12 +8006,12 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. - VmaAddStatInfoAllocation(outInfo, suballoc.size); + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; @@ -8021,7 +8024,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if (lastOffset < size) { const VkDeviceSize unusedRangeSize = size - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // End of loop. @@ -8029,11 +8032,9 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const } } } - - outInfo.unusedBytes = size - outInfo.usedBytes; } -void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const { const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); @@ -8041,7 +8042,9 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const const size_t suballoc1stCount = suballocations1st.size(); const size_t suballoc2ndCount = suballocations2nd.size(); - inoutStats.size += size; + inoutStats.blockCount++; + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size - m_SumFreeSize; VkDeviceSize lastOffset = 0; @@ -8068,8 +8071,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // 2. Process this allocation. @@ -8087,8 +8088,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to freeSpace2ndTo1stEnd. const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // End of loop. @@ -8119,8 +8118,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // 2. Process this allocation. @@ -8138,8 +8135,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to freeSpace1stTo2ndEnd. const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // End of loop. @@ -8169,8 +8164,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // 2. Process this allocation. @@ -8188,8 +8181,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to size. const VkDeviceSize unusedRangeSize = size - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // End of loop. @@ -8200,7 +8191,7 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const { const VkDeviceSize size = GetSize(); const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); @@ -8362,7 +8353,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json, uint32 } const VkDeviceSize unusedBytes = size - usedBytes; - PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount, mapRefCount); + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); // SECOND PASS lastOffset = 0; @@ -8734,6 +8725,32 @@ void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaV outInfo.pUserData = suballoc.userData; } +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return 0; +} + void VmaBlockMetadata_Linear::Clear() { m_SumFreeSize = GetSize(); @@ -8765,10 +8782,10 @@ void VmaBlockMetadata_Linear::DebugLogAllAllocations() const DebugLogAllocation(it->offset, it->size, it->userData); } -VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const { - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); VmaSuballocation refSuballoc; refSuballoc.offset = offset; @@ -8776,31 +8793,31 @@ VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset // Item from the 1st vector. { - const SuballocationVectorType::iterator it = VmaBinaryFindSorted( + SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( suballocations1st.begin() + m_1stNullItemsBeginCount, suballocations1st.end(), refSuballoc, VmaSuballocationOffsetLess()); if (it != suballocations1st.end()) { - return *it; + return const_cast(*it); } } if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) { // Rest of members stays uninitialized intentionally for better performance. - const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); if (it != suballocations2nd.end()) { - return *it; + return const_cast(*it); } } VMA_ASSERT(0 && "Allocation not found in linear allocator!"); - return suballocations1st.back(); // Should never occur. + return const_cast(suballocations1st.back()); // Should never occur. } bool VmaBlockMetadata_Linear::ShouldCompact1st() const @@ -9211,6 +9228,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( #endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS #endif // _VMA_BLOCK_METADATA_LINEAR +#if 0 #ifndef _VMA_BLOCK_METADATA_BUDDY /* - GetSize() is the original size of allocated memory block. @@ -9241,8 +9259,8 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; @@ -9263,6 +9281,9 @@ public: void Free(VmaAllocHandle allocHandle) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; @@ -9336,11 +9357,11 @@ private: } return VmaNextPow2(size); } - Node* FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel); + Node* FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) const; void DeleteNodeChildren(Node* node); bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const; uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const; - void CalcAllocationStatInfoNode(VmaStatInfo& inoutInfo, const Node* node, VkDeviceSize levelNodeSize) const; + void AddNodeToDetailedStatistics(VmaDetailedStatistics& inoutStats, const Node* node, VkDeviceSize levelNodeSize) const; // Adds node to the front of FreeList at given level. // node->type must be FREE. // node->free.prev, next can be undefined. @@ -9444,46 +9465,38 @@ bool VmaBlockMetadata_Buddy::Validate() const return true; } -void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_Buddy::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); - CalcAllocationStatInfoNode(outInfo, m_Root, LevelToNodeSize(0)); + AddNodeToDetailedStatistics(inoutStats, m_Root, LevelToNodeSize(0)); const VkDeviceSize unusableSize = GetUnusableSize(); if (unusableSize > 0) - { - VmaAddStatInfoUnusedRange(outInfo, unusableSize); - } + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusableSize); } -void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_Buddy::AddStatistics(VmaStatistics& inoutStats) const { - const VkDeviceSize unusableSize = GetUnusableSize(); - - inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize + unusableSize; - inoutStats.allocationCount += m_AllocationCount; - inoutStats.unusedRangeCount += m_FreeCount; - - if (unusableSize > 0) - { - ++inoutStats.unusedRangeCount; - } + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocationCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - m_SumFreeSize; } #if VMA_STATS_STRING_ENABLED void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { - VmaStatInfo stat; - CalcAllocationStatInfo(stat); + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); PrintDetailedMap_Begin( json, - stat.unusedBytes, - stat.allocationCount, - stat.unusedRangeCount, + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount, mapRefCount); PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0)); @@ -9632,6 +9645,25 @@ void VmaBlockMetadata_Buddy::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVi outInfo.pUserData = node->allocation.userData; } +void* VmaBlockMetadata_Buddy::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + uint32_t level = 0; + const Node* const node = FindAllocationNode((VkDeviceSize)allocHandle - 1, level); + return node->allocation.userData; +} + +VmaAllocHandle VmaBlockMetadata_Buddy::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Buddy::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + return VK_NULL_HANDLE; +} + void VmaBlockMetadata_Buddy::DeleteNodeChildren(Node* node) { if (node->type == Node::TYPE_SPLIT) @@ -9660,7 +9692,7 @@ void VmaBlockMetadata_Buddy::SetAllocationUserData(VmaAllocHandle allocHandle, v node->allocation.userData = userData; } -VmaBlockMetadata_Buddy::Node* VmaBlockMetadata_Buddy::FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) +VmaBlockMetadata_Buddy::Node* VmaBlockMetadata_Buddy::FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) const { Node* node = m_Root; VkDeviceSize nodeOffset = 0; @@ -9776,23 +9808,23 @@ void VmaBlockMetadata_Buddy::Free(VmaAllocHandle allocHandle) AddToFreeListFront(level, node); } -void VmaBlockMetadata_Buddy::CalcAllocationStatInfoNode(VmaStatInfo& inoutInfo, const Node* node, VkDeviceSize levelNodeSize) const +void VmaBlockMetadata_Buddy::AddNodeToDetailedStatistics(VmaDetailedStatistics& inoutStats, const Node* node, VkDeviceSize levelNodeSize) const { switch (node->type) { case Node::TYPE_FREE: - VmaAddStatInfoUnusedRange(inoutInfo, levelNodeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, levelNodeSize); break; case Node::TYPE_ALLOCATION: - VmaAddStatInfoAllocation(inoutInfo, levelNodeSize); + VmaAddDetailedStatisticsAllocation(inoutStats, levelNodeSize); break; case Node::TYPE_SPLIT: { const VkDeviceSize childrenNodeSize = levelNodeSize / 2; const Node* const leftChild = node->split.leftChild; - CalcAllocationStatInfoNode(inoutInfo, leftChild, childrenNodeSize); + AddNodeToDetailedStatistics(inoutStats, leftChild, childrenNodeSize); const Node* const rightChild = leftChild->buddy; - CalcAllocationStatInfoNode(inoutInfo, rightChild, childrenNodeSize); + AddNodeToDetailedStatistics(inoutStats, rightChild, childrenNodeSize); } break; default: @@ -9857,6 +9889,8 @@ void VmaBlockMetadata_Buddy::DebugLogAllAllocationNode(Node* node, uint32_t leve { switch (node->type) { + case Node::TYPE_FREE: + break; case Node::TYPE_ALLOCATION: DebugLogAllocation(node->offset, LevelToNodeSize(level), node->allocation.userData); break; @@ -9899,6 +9933,7 @@ void VmaBlockMetadata_Buddy::PrintDetailedMapNode(class VmaJsonWriter& json, con #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_BLOCK_METADATA_BUDDY_FUNCTIONS #endif // _VMA_BLOCK_METADATA_BUDDY +#endif // #if 0 #ifndef _VMA_BLOCK_METADATA_TLSF // To not search current larger region if first allocation won't succeed and skip to smaller range @@ -9914,6 +9949,7 @@ public: virtual ~VmaBlockMetadata_TLSF(); size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } bool IsEmpty() const override { return m_NullBlock->offset == 0; } VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }; @@ -9921,11 +9957,11 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; + void PrintDetailedMap(class VmaJsonWriter& json) const override; #endif bool CreateAllocationRequest( @@ -9944,6 +9980,10 @@ public: void Free(VmaAllocHandle allocHandle) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; @@ -10169,34 +10209,32 @@ bool VmaBlockMetadata_TLSF::Validate() const return true; } -void VmaBlockMetadata_TLSF::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); if (m_NullBlock->size > 0) - VmaAddStatInfoUnusedRange(outInfo, m_NullBlock->size); + VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) { if (block->IsFree()) - VmaAddStatInfoUnusedRange(outInfo, block->size); + VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); else - VmaAddStatInfoAllocation(outInfo, block->size); + VmaAddDetailedStatisticsAllocation(inoutStats, block->size); } } -void VmaBlockMetadata_TLSF::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const { - inoutStats.size += GetSize(); - inoutStats.unusedSize += GetSumFreeSize(); - inoutStats.allocationCount += m_AllocCount; - inoutStats.unusedRangeCount += m_BlocksFreeCount; - if(m_NullBlock->size > 0) - ++inoutStats.unusedRangeCount; + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const { size_t blockCount = m_AllocCount + m_BlocksFreeCount; VmaStlAllocator allocator(GetAllocationCallbacks()); @@ -10209,14 +10247,14 @@ void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json, uint32_t } VMA_ASSERT(i == 0); - VmaStatInfo stat; - CalcAllocationStatInfo(stat); + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); PrintDetailedMap_Begin(json, - stat.unusedBytes, - stat.allocationCount, - stat.unusedRangeCount, - mapRefCount); + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount); for (; i < blockCount; ++i) { @@ -10327,6 +10365,33 @@ bool VmaBlockMetadata_TLSF::CreateAllocationRequest( nextListBlock = nextListBlock->NextFree(); } } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) + { + // Perform search from the start + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(m_BlocksFreeCount, allocator); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } else { // Check larger bucket @@ -10559,6 +10624,50 @@ void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVir outInfo.pUserData = block->UserData(); } +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return VK_NULL_HANDLE; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + Block* block = (Block*)alloc; + VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + void VmaBlockMetadata_TLSF::Clear() { m_AllocCount = 0; @@ -10771,7 +10880,7 @@ Synchronized internally with a mutex. */ class VmaBlockVector { - friend class VmaDefragmentationAlgorithm_Generic; + friend struct VmaDefragmentationContext_T; VMA_CLASS_NO_COPY(VmaBlockVector) public: VmaBlockVector( @@ -10798,10 +10907,16 @@ public: uint32_t GetAlgorithm() const { return m_Algorithm; } bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } float GetPriority() const { return m_Priority; } - void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + void* const GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + VMA_RW_MUTEX &GetMutex() { return m_Mutex; } VkResult CreateMinBlocks(); - void AddPoolStats(VmaPoolStats* pStats); + void AddStatistics(VmaStatistics& inoutStats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); bool IsEmpty(); bool IsCorruptionDetectionEnabled() const; @@ -10814,8 +10929,6 @@ public: VmaAllocation* pAllocations); void Free(const VmaAllocation hAllocation); - // Adds statistics of this BlockVector to pStats. - void AddStats(VmaStats* pStats); #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json); @@ -10823,34 +10936,6 @@ public: VkResult CheckCorruption(); - // Saves results in pCtx->res. - void Defragment( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, - VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, - VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer); - void DefragmentationEnd( - class VmaBlockVectorDefragmentationContext* pCtx, - uint32_t flags, - VmaDefragmentationStats* pStats); - - uint32_t ProcessDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves); - - void CommitDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats); - - //////////////////////////////////////////////////////////////////////////////// - // To be used only while the m_Mutex is locked. Used during defragmentation. - - size_t GetBlockCount() const { return m_Blocks.size(); } - VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } - size_t CalcAllocationCount() const; - bool IsBufferImageGranularityConflictPossible() const; - private: const VmaAllocator m_hAllocator; const VmaPool m_hParentPool; @@ -10869,6 +10954,9 @@ private: // Incrementally sorted by sumFreeSize, ascending. VmaVector> m_Blocks; uint32_t m_NextBlockId; + bool m_IncrementalSort = true; + + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } VkDeviceSize CalcMaxBlockSize() const; // Finds and removes given block from vector. @@ -10876,6 +10964,7 @@ private: // Performs single step in sorting m_Blocks. They may not be fully sorted // after this call. void IncrementallySortBlocks(); + void SortByFreeSize(); VkResult AllocatePage( VkDeviceSize size, @@ -10894,327 +10983,103 @@ private: uint32_t strategy, VmaAllocation* pAllocation); - VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); - // Saves result to pCtx->res. - void ApplyDefragmentationMovesCpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector>& moves); - // Saves result to pCtx->res. - void ApplyDefragmentationMovesGpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - VmaVector>& moves, - VkCommandBuffer commandBuffer); + VkResult CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); - /* - Used during defragmentation. pDefragmentationStats is optional. It is in/out - - updated with new data. - */ - void FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats); - bool HasEmptyBlock() const; + VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); + bool HasEmptyBlock(); }; #endif // _VMA_BLOCK_VECTOR -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM -struct VmaDefragmentationMove -{ - size_t srcBlockIndex; - size_t dstBlockIndex; - VkDeviceSize srcOffset; - VkDeviceSize dstOffset; - VmaAllocHandle dstHandle; - VkDeviceSize size; - VmaAllocation hAllocation; - VmaDeviceMemoryBlock* pSrcBlock; - VmaDeviceMemoryBlock* pDstBlock; -}; - -/* -Performs defragmentation: - -- Updates `pBlockVector->m_pMetadata`. -- Updates allocations by calling ChangeBlockAllocation() or ChangeOffset(). -- Does not move actual data, only returns requested moves as `moves`. -*/ -class VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm) -public: - VmaDefragmentationAlgorithm( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector) - : m_hAllocator(hAllocator), - m_pBlockVector(pBlockVector) {} - virtual ~VmaDefragmentationAlgorithm() = default; - - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) = 0; - virtual void AddAll() = 0; - - virtual VkResult Defragment( - VmaVector>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) = 0; - - virtual VkDeviceSize GetBytesMoved() const = 0; - virtual uint32_t GetAllocationsMoved() const = 0; - -protected: - struct AllocationInfo - { - VmaAllocation m_hAllocation; - VkBool32* m_pChanged; - - AllocationInfo() : m_hAllocation(VK_NULL_HANDLE), m_pChanged(VMA_NULL) {} - AllocationInfo(VmaAllocation hAlloc, VkBool32* pChanged) : m_hAllocation(hAlloc), m_pChanged(pChanged) {} - }; - - VmaAllocator const m_hAllocator; - VmaBlockVector* const m_pBlockVector; -}; - -#endif // _VMA_DEFRAGMENTATION_ALGORITHM - -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC -class VmaDefragmentationAlgorithm_Generic : public VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm_Generic) -public: - VmaDefragmentationAlgorithm_Generic( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported); - virtual ~VmaDefragmentationAlgorithm_Generic(); - - virtual void AddAll() { m_AllAllocations = true; } - virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - virtual VkResult Defragment( - VmaVector>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags); - -private: - struct AllocationInfoSizeGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const; - }; - struct AllocationInfoOffsetGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const; - }; - struct BlockInfo - { - size_t m_OriginalBlockIndex; - VmaDeviceMemoryBlock* m_pBlock; - bool m_HasNonMovableAllocations; - VmaVector> m_Allocations; - - BlockInfo(const VkAllocationCallbacks* pAllocationCallbacks); - - void CalcHasNonMovableAllocations(); - void SortAllocationsBySizeDescending(); - void SortAllocationsByOffsetDescending(); - }; - struct BlockPointerLess - { - bool operator()(const BlockInfo* pLhsBlockInfo, const VmaDeviceMemoryBlock* pRhsBlock) const; - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const; - }; - // 1. Blocks with some non-movable allocations go first. - // 2. Blocks with smaller sumFreeSize go first. - struct BlockInfoCompareMoveDestination - { - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const; - }; - typedef VmaVector> BlockInfoVector; - - BlockInfoVector m_Blocks; - uint32_t m_AllocationCount; - bool m_AllAllocations; - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - static bool MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset); - - size_t CalcBlocksWithNonMovableCount() const; - VkResult DefragmentRound( - VmaVector>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - bool freeOldAllocations); -}; -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC - -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_FAST -class VmaDefragmentationAlgorithm_Fast : public VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm_Fast) -public: - VmaDefragmentationAlgorithm_Fast( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported); - virtual ~VmaDefragmentationAlgorithm_Fast() = default; - - virtual void AddAll() { m_AllAllocations = true; } - virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) { ++m_AllocationCount; } - - virtual VkResult Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags); - -private: - struct BlockInfo - { - size_t origBlockIndex; - }; - class FreeSpaceDatabase - { - public: - FreeSpaceDatabase(); - - void Register(size_t blockInfoIndex, VkDeviceSize offset, VkDeviceSize size); - bool Fetch(VkDeviceSize alignment, VkDeviceSize size, - size_t& outBlockInfoIndex, VkDeviceSize& outDstOffset); - - private: - static const size_t MAX_COUNT = 4; - - struct FreeSpace - { - size_t blockInfoIndex; // SIZE_MAX means this structure is invalid. - VkDeviceSize offset; - VkDeviceSize size; - } m_FreeSpaces[MAX_COUNT]; - }; - - const bool m_OverlappingMoveSupported; - - uint32_t m_AllocationCount; - bool m_AllAllocations; - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - VmaVector> m_BlockInfos; - - void PreprocessMetadata(); - void PostprocessMetadata(); - void InsertSuballoc(VmaBlockMetadata_Generic* pMetadata, const VmaSuballocation& suballoc); -}; -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_FAST - -#ifndef _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT -struct VmaBlockDefragmentationContext -{ - enum BLOCK_FLAG - { - BLOCK_FLAG_USED = 0x00000001, - }; - uint32_t flags; - VkBuffer hBuffer; -}; - -class VmaBlockVectorDefragmentationContext -{ - VMA_CLASS_NO_COPY(VmaBlockVectorDefragmentationContext) -public: - VkResult res; - bool mutexLocked; - VmaVector> blockContexts; - VmaVector> defragmentationMoves; - uint32_t defragmentationMovesProcessed; - uint32_t defragmentationMovesCommitted; - bool hasDefragmentationPlan; - - VmaBlockVectorDefragmentationContext( - VmaAllocator hAllocator, - VmaPool hCustomPool, // Optional. - VmaBlockVector* pBlockVector); - ~VmaBlockVectorDefragmentationContext(); - - VmaPool GetCustomPool() const { return m_hCustomPool; } - VmaBlockVector* GetBlockVector() const { return m_pBlockVector; } - VmaDefragmentationAlgorithm* GetAlgorithm() const { return m_pAlgorithm; } - void AddAll() { m_AllAllocations = true; } - - void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags); - -private: - struct AllocInfo - { - VmaAllocation hAlloc; - VkBool32* pChanged; - }; - - const VmaAllocator m_hAllocator; - // Null if not from custom pool. - const VmaPool m_hCustomPool; - // Redundant, for convenience not to fetch from m_hCustomPool->m_BlockVector or m_hAllocator->m_pBlockVectors. - VmaBlockVector* const m_pBlockVector; - // Owner of this object. - VmaDefragmentationAlgorithm* m_pAlgorithm; - // Used between constructor and Begin. - VmaVector> m_Allocations; - bool m_AllAllocations; -}; -#endif // _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT - #ifndef _VMA_DEFRAGMENTATION_CONTEXT struct VmaDefragmentationContext_T { -private: VMA_CLASS_NO_COPY(VmaDefragmentationContext_T) public: VmaDefragmentationContext_T( VmaAllocator hAllocator, - uint32_t flags, - VmaDefragmentationStats* pStats); + const VmaDefragmentationInfo& info); ~VmaDefragmentationContext_T(); - void AddPools(uint32_t poolCount, const VmaPool* pPools); - void AddAllocations( - uint32_t allocationCount, - const VmaAllocation* pAllocations, - VkBool32* pAllocationsChanged); + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } - /* - Returns: - - `VK_SUCCESS` if succeeded and object can be destroyed immediately. - - `VK_NOT_READY` if succeeded but the object must remain alive until vmaDefragmentationEnd(). - - Negative value if error occurred and object can be destroyed immediately. - */ - VkResult Defragment( - VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, - VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags); - - VkResult DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo); - VkResult DefragmentPassEnd(); + VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); + VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); private: - const VmaAllocator m_hAllocator; - const uint32_t m_Flags; - VmaDefragmentationStats* const m_pStats; + // Max number of allocations to ignore due to size constraints before ending single pass + static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; - VkDeviceSize m_MaxCpuBytesToMove; - uint32_t m_MaxCpuAllocationsToMove; - VkDeviceSize m_MaxGpuBytesToMove; - uint32_t m_MaxGpuAllocationsToMove; + struct FragmentedBlock + { + uint32_t data; + VmaDeviceMemoryBlock* block; + }; + struct StateBalanced + { + VkDeviceSize avgFreeSize = 0; + VkDeviceSize avgAllocSize = UINT64_MAX; + }; + struct StateExtensive + { + enum class Operation : uint8_t + { + FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, + MoveBuffers, MoveTextures, MoveAll, + Cleanup, Done + }; - // Owner of these objects. - VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES]; - // Owner of these objects. - VmaVector> m_CustomPoolContexts; + Operation operation = Operation::FindFreeBlockTexture; + size_t firstFreeBlock = SIZE_MAX; + }; + struct MoveAllocationData + { + VkDeviceSize size; + VkDeviceSize alignment; + VmaSuballocationType type; + VmaAllocationCreateFlags flags; + VmaDefragmentationMove move = {}; + }; + + const VkDeviceSize m_MaxPassBytes; + const uint32_t m_MaxPassAllocations; + + VmaStlAllocator m_MoveAllocator; + VmaVector> m_Moves; + + uint8_t m_IgnoredAllocs = 0; + uint32_t m_Algorithm; + uint32_t m_BlockVectorCount; + VmaBlockVector* m_PoolBlockVector; + VmaBlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + VmaDefragmentationStats m_GlobalStats = { 0 }; + VmaDefragmentationStats m_PassStats = { 0 }; + void* m_AlgorithmState = VMA_NULL; + + static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); + CounterStatus CheckCounters(VkDeviceSize bytes); + bool IncrementCounters(VkDeviceSize bytes); + bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + + bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(VmaBlockVector& vector); + bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + + void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); + bool MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent); }; #endif // _VMA_DEFRAGMENTATION_CONTEXT @@ -11264,6 +11129,8 @@ struct VmaPoolListItemTraits #ifndef _VMA_CURRENT_BUDGET_DATA struct VmaCurrentBudgetData { + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; @@ -11286,6 +11153,8 @@ VmaCurrentBudgetData::VmaCurrentBudgetData() { for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) { + m_BlockCount[heapIndex] = 0; + m_AllocationCount[heapIndex] = 0; m_BlockBytes[heapIndex] = 0; m_AllocationBytes[heapIndex] = 0; #if VMA_MEMORY_BUDGET @@ -11303,6 +11172,7 @@ VmaCurrentBudgetData::VmaCurrentBudgetData() void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) { m_AllocationBytes[heapIndex] += allocationSize; + ++m_AllocationCount[heapIndex]; #if VMA_MEMORY_BUDGET ++m_OperationsSinceBudgetFetch; #endif @@ -11312,6 +11182,8 @@ void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize all { VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); m_AllocationBytes[heapIndex] -= allocationSize; + VMA_ASSERT(m_AllocationCount[heapIndex] > 0); + --m_AllocationCount[heapIndex]; #if VMA_MEMORY_BUDGET ++m_OperationsSinceBudgetFetch; #endif @@ -11373,7 +11245,8 @@ public: void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, VkDeviceSize* outOffset); - void CalculateStats(VmaStatInfo& outStatInfo) const; + void GetStatistics(VmaStatistics& outStats) const; + void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; #if VMA_STATS_STRING_ENABLED void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; #endif @@ -11390,20 +11263,14 @@ VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; switch (algorithm) { + default: + VMA_ASSERT(0); case 0: - m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Generic)(VK_NULL_HANDLE, 1, true); - break; - case VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT: - m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Buddy)(VK_NULL_HANDLE, 1, true); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); break; case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); break; - case VMA_VIRTUAL_BLOCK_CREATE_TLSF_ALGORITHM_BIT: - m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); - break; - default: - VMA_ASSERT(0); } m_Metadata->Init(createInfo.size); @@ -11457,10 +11324,16 @@ VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& creat return VK_ERROR_OUT_OF_DEVICE_MEMORY; } -void VmaVirtualBlock_T::CalculateStats(VmaStatInfo& outStatInfo) const +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const { - m_Metadata->CalcAllocationStatInfo(outStatInfo); - VmaPostprocessCalcStatInfo(outStatInfo); + VmaClearStatistics(outStats); + m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const +{ + VmaClearDetailedStatistics(outStats); + m_Metadata->AddDetailedStatistics(outStats); } #if VMA_STATS_STRING_ENABLED @@ -11469,17 +11342,18 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) VmaJsonWriter json(GetAllocationCallbacks(), sb); json.BeginObject(); - VmaStatInfo stat = {}; - CalculateStats(stat); + VmaDetailedStatistics stats; + CalculateDetailedStatistics(stats); json.WriteString("Stats"); - VmaPrintStatInfo(json, stat); + VmaPrintDetailedStatistics(json, stats); if (detailedMap) { json.WriteString("Details"); - m_Metadata->PrintDetailedMap(json, - UINT32_MAX); // mapRefCount + json.BeginObject(); + m_Metadata->PrintDetailedMap(json); + json.EndObject(); } json.EndObject(); @@ -11488,6 +11362,7 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) #endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS #endif // _VMA_VIRTUAL_BLOCK_T + // Main allocator object. struct VmaAllocator_T { @@ -11606,7 +11481,7 @@ public: size_t allocationCount, const VmaAllocation* pAllocations); - void CalculateStats(VmaStats* pStats); + void CalculateStatistics(VmaTotalStatistics* pStats); void GetHeapBudgets( VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); @@ -11615,24 +11490,12 @@ public: void PrintDetailedMap(class VmaJsonWriter& json); #endif - VkResult DefragmentationBegin( - const VmaDefragmentationInfo2& info, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext* pContext); - VkResult DefragmentationEnd( - VmaDefragmentationContext context); - - VkResult DefragmentationPassBegin( - VmaDefragmentationPassInfo* pInfo, - VmaDefragmentationContext context); - VkResult DefragmentationPassEnd( - VmaDefragmentationContext context); - void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); void DestroyPool(VmaPool pool); - void GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats); + void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); + void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); void SetCurrentFrameIndex(uint32_t frameIndex); uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } @@ -11895,19 +11758,11 @@ void VmaDeviceMemoryBlock::Init( m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), bufferImageGranularity, false); // isVirtual break; - case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Buddy)(hAllocator->GetAllocationCallbacks(), - bufferImageGranularity, false); // isVirtual - break; - case VMA_POOL_CREATE_TLSF_ALGORITHM_BIT: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), - bufferImageGranularity, false); // isVirtual - break; default: VMA_ASSERT(0); // Fall-through. case 0: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Generic)(hAllocator->GetAllocationCallbacks(), + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), bufferImageGranularity, false); // isVirtual } m_pMetadata->Init(newSize); @@ -12106,18 +11961,17 @@ VkResult VmaDeviceMemoryBlock::BindImageMemory( #endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS #ifndef _VMA_ALLOCATION_T_FUNCTIONS -VmaAllocation_T::VmaAllocation_T(bool userDataString, bool mappingAllowed) +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) : m_Alignment{ 1 }, m_Size{ 0 }, m_pUserData{ VMA_NULL }, + m_pName{ VMA_NULL }, m_MemoryTypeIndex{ 0 }, m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, m_MapCount{ 0 }, m_Flags{ 0 } { - if(userDataString) - m_Flags |= (uint8_t)FLAG_USER_DATA_STRING; if(mappingAllowed) m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; @@ -12131,7 +11985,7 @@ VmaAllocation_T::~VmaAllocation_T() VMA_ASSERT(m_MapCount == 0 && "Allocation was not unmapped before destruction."); // Check if owned string was freed. - VMA_ASSERT(m_pUserData == VMA_NULL); + VMA_ASSERT(m_pName == VMA_NULL); } void VmaAllocation_T::InitBlockAllocation( @@ -12186,51 +12040,33 @@ void VmaAllocation_T::InitDedicatedAllocation( m_DedicatedAllocation.m_Next = VMA_NULL; } -void VmaAllocation_T::SetUserData(VmaAllocator hAllocator, void* pUserData) +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) { - if (IsUserDataString()) - { - VMA_ASSERT(pUserData == VMA_NULL || pUserData != m_pUserData); + VMA_ASSERT(pName == VMA_NULL || pName != m_pName); - FreeUserDataString(hAllocator); + FreeName(hAllocator); - if (pUserData != VMA_NULL) - { - m_pUserData = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), (const char*)pUserData); - } - } - else - { - m_pUserData = pUserData; - } + if (pName != VMA_NULL) + m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); } -void VmaAllocation_T::ChangeBlockAllocation( - VmaAllocator hAllocator, - VmaDeviceMemoryBlock* block, - VmaAllocHandle allocHandle) +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) { - VMA_ASSERT(block != VMA_NULL); + VMA_ASSERT(allocation != VMA_NULL); VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); - // Move mapping reference counter from old block to new block. - if (block != m_BlockAllocation.m_Block) - { - uint32_t mapRefCount = m_MapCount; - if (IsPersistentMap()) - ++mapRefCount; - m_BlockAllocation.m_Block->Unmap(hAllocator, mapRefCount); - block->Map(hAllocator, mapRefCount, VMA_NULL); - } + if (m_MapCount != 0) + m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); - m_BlockAllocation.m_Block = block; - m_BlockAllocation.m_AllocHandle = allocHandle; -} + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); + VMA_SWAP(m_BlockAllocation, allocation->m_BlockAllocation); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); -void VmaAllocation_T::ChangeAllocHandle(VmaAllocHandle newAllocHandle) -{ - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - m_BlockAllocation.m_AllocHandle = newAllocHandle; +#if VMA_STATS_STRING_ENABLED + VMA_SWAP(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif + return m_MapCount; } VmaAllocHandle VmaAllocation_T::GetAllocHandle() const @@ -12314,19 +12150,6 @@ void* VmaAllocation_T::GetMappedData() const } } -void VmaAllocation_T::DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo) -{ - VMA_ASSERT(m_Type == ALLOCATION_TYPE_DEDICATED); - outInfo.blockCount = 1; - outInfo.allocationCount = 1; - outInfo.unusedRangeCount = 0; - outInfo.usedBytes = m_Size; - outInfo.unusedBytes = 0; - outInfo.allocationSizeMin = outInfo.allocationSizeMax = m_Size; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; -} - void VmaAllocation_T::BlockAllocMap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); @@ -12429,35 +12252,31 @@ void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const json.WriteString("Size"); json.WriteNumber(m_Size); + json.WriteString("Usage"); + json.WriteNumber(m_BufferImageUsage); if (m_pUserData != VMA_NULL) { - json.WriteString("UserData"); - if (IsUserDataString()) - { - json.WriteString((const char*)m_pUserData); - } - else - { - json.BeginString(); - json.ContinueString_Pointer(m_pUserData); - json.EndString(); - } + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(m_pUserData); + json.EndString(); } - - if (m_BufferImageUsage != 0) + if (m_pName != VMA_NULL) { - json.WriteString("Usage"); - json.WriteNumber(m_BufferImageUsage); + json.WriteString("Name"); + json.WriteString(m_pName); } } #endif // VMA_STATS_STRING_ENABLED -void VmaAllocation_T::FreeUserDataString(VmaAllocator hAllocator) +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) { - VMA_ASSERT(IsUserDataString()); - VmaFreeString(hAllocator->GetAllocationCallbacks(), (char*)m_pUserData); - m_pUserData = VMA_NULL; + if(m_pName) + { + VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); + m_pName = VMA_NULL; + } } #endif // _VMA_ALLOCATION_T_FUNCTIONS @@ -12512,19 +12331,31 @@ VkResult VmaBlockVector::CreateMinBlocks() return VK_SUCCESS; } -void VmaBlockVector::AddPoolStats(VmaPoolStats* pStats) +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) { VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); const size_t blockCount = m_Blocks.size(); - pStats->blockCount += blockCount; - for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) { const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; VMA_ASSERT(pBlock); VMA_HEAVY_ASSERT(pBlock->Validate()); - pBlock->m_pMetadata->AddPoolStats(*pStats); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); } } @@ -12582,14 +12413,8 @@ VkResult VmaBlockVector::Allocate( if (res != VK_SUCCESS) { // Free all already created allocations. - const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); while (allocIndex--) - { - VmaAllocation_T* const alloc = pAllocations[allocIndex]; - const VkDeviceSize allocSize = alloc->GetSize(); - Free(alloc); - m_hAllocator->m_Budget.RemoveAllocation(heapIndex, allocSize); - } + Free(pAllocations[allocIndex]); memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); } @@ -12800,8 +12625,7 @@ VkResult VmaBlockVector::AllocatePage( return VK_ERROR_OUT_OF_DEVICE_MEMORY; } -void VmaBlockVector::Free( - const VmaAllocation hAllocation) +void VmaBlockVector::Free(const VmaAllocation hAllocation) { VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; @@ -12872,6 +12696,9 @@ void VmaBlockVector::Free( pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const @@ -12903,6 +12730,8 @@ void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) void VmaBlockVector::IncrementallySortBlocks() { + if (!m_IncrementalSort) + return; if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) { // Bubble sort only until first swap. @@ -12917,6 +12746,15 @@ void VmaBlockVector::IncrementallySortBlocks() } } +void VmaBlockVector::SortByFreeSize() +{ + VMA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](auto* b1, auto* b2) + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + VkResult VmaBlockVector::AllocateFromBlock( VmaDeviceMemoryBlock* pBlock, VkDeviceSize size, @@ -12928,10 +12766,6 @@ VkResult VmaBlockVector::AllocateFromBlock( VmaAllocation* pAllocation) { const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; - const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; - const bool isMappingAllowed = (allocFlags & - (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; VmaAllocationRequest currRequest = {}; if (pBlock->m_pMetadata->CreateAllocationRequest( @@ -12942,45 +12776,64 @@ VkResult VmaBlockVector::AllocateFromBlock( strategy, &currRequest)) { - pBlock->PostAlloc(); - - // Allocate from pCurrBlock. - if (mapped) - { - VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); - if (res != VK_SUCCESS) - { - return res; - } - } - - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); - pBlock->m_pMetadata->Alloc(currRequest, suballocType, *pAllocation); - (*pAllocation)->InitBlockAllocation( - pBlock, - currRequest.allocHandle, - alignment, - currRequest.size, // Not size, as actual allocation size may be larger than requested! - m_MemoryTypeIndex, - suballocType, - mapped); - VMA_HEAVY_ASSERT(pBlock->Validate()); - (*pAllocation)->SetUserData(m_hAllocator, pUserData); - m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), currRequest.size); - if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) - { - m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); - } - if (IsCorruptionDetectionEnabled()) - { - VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), currRequest.size); - VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); - } - return VK_SUCCESS; + return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); } return VK_ERROR_OUT_OF_DEVICE_MEMORY; } +VkResult VmaBlockVector::CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; + const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + + pBlock->PostAlloc(); + // Allocate from pCurrBlock. + if (mapped) + { + VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); + pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); + (*pAllocation)->InitBlockAllocation( + pBlock, + allocRequest.allocHandle, + alignment, + allocRequest.size, // Not size, as actual allocation size may be larger than requested! + m_MemoryTypeIndex, + suballocType, + mapped); + VMA_HEAVY_ASSERT(pBlock->Validate()); + if (isUserDataString) + (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); + else + (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); + if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + } + return VK_SUCCESS; +} + VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) { VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; @@ -13002,6 +12855,7 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; if (m_hAllocator->m_UseExtMemoryPriority) { + VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); priorityInfo.priority = m_Priority; VmaPnextChainPushFront(&allocInfo, &priorityInfo); } @@ -13047,229 +12901,7 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn return VK_SUCCESS; } -void VmaBlockVector::ApplyDefragmentationMovesCpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector>& moves) -{ - const size_t blockCount = m_Blocks.size(); - const bool isNonCoherent = m_hAllocator->IsMemoryTypeNonCoherent(m_MemoryTypeIndex); - - enum BLOCK_FLAG - { - BLOCK_FLAG_USED = 0x00000001, - BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION = 0x00000002, - }; - - struct BlockInfo - { - uint32_t flags; - void* pMappedData; - }; - VmaVector< BlockInfo, VmaStlAllocator > - blockInfo(blockCount, BlockInfo(), VmaStlAllocator(m_hAllocator->GetAllocationCallbacks())); - memset(blockInfo.data(), 0, blockCount * sizeof(BlockInfo)); - - // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. - const size_t moveCount = moves.size(); - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - blockInfo[move.srcBlockIndex].flags |= BLOCK_FLAG_USED; - blockInfo[move.dstBlockIndex].flags |= BLOCK_FLAG_USED; - } - - VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); - - // Go over all blocks. Get mapped pointer or map if necessary. - for (size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) - { - BlockInfo& currBlockInfo = blockInfo[blockIndex]; - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if ((currBlockInfo.flags & BLOCK_FLAG_USED) != 0) - { - currBlockInfo.pMappedData = pBlock->GetMappedData(); - // It is not originally mapped - map it. - if (currBlockInfo.pMappedData == VMA_NULL) - { - pDefragCtx->res = pBlock->Map(m_hAllocator, 1, &currBlockInfo.pMappedData); - if (pDefragCtx->res == VK_SUCCESS) - { - currBlockInfo.flags |= BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION; - } - } - } - } - - // Go over all moves. Do actual data transfer. - if (pDefragCtx->res == VK_SUCCESS) - { - const VkDeviceSize nonCoherentAtomSize = m_hAllocator->m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; - VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; - - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - const BlockInfo& srcBlockInfo = blockInfo[move.srcBlockIndex]; - const BlockInfo& dstBlockInfo = blockInfo[move.dstBlockIndex]; - - VMA_ASSERT(srcBlockInfo.pMappedData && dstBlockInfo.pMappedData); - - // Invalidate source. - if (isNonCoherent) - { - VmaDeviceMemoryBlock* const pSrcBlock = m_Blocks[move.srcBlockIndex]; - memRange.memory = pSrcBlock->GetDeviceMemory(); - memRange.offset = VmaAlignDown(move.srcOffset, nonCoherentAtomSize); - memRange.size = VMA_MIN( - VmaAlignUp(move.size + (move.srcOffset - memRange.offset), nonCoherentAtomSize), - pSrcBlock->m_pMetadata->GetSize() - memRange.offset); - (*m_hAllocator->GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hAllocator->m_hDevice, 1, &memRange); - } - - // THE PLACE WHERE ACTUAL DATA COPY HAPPENS. - memmove( - reinterpret_cast(dstBlockInfo.pMappedData) + move.dstOffset, - reinterpret_cast(srcBlockInfo.pMappedData) + move.srcOffset, - static_cast(move.size)); - - if (IsCorruptionDetectionEnabled()) - { - VmaWriteMagicValue(dstBlockInfo.pMappedData, move.dstOffset + move.size); - } - - // Flush destination. - if (isNonCoherent) - { - VmaDeviceMemoryBlock* const pDstBlock = m_Blocks[move.dstBlockIndex]; - memRange.memory = pDstBlock->GetDeviceMemory(); - memRange.offset = VmaAlignDown(move.dstOffset, nonCoherentAtomSize); - memRange.size = VMA_MIN( - VmaAlignUp(move.size + (move.dstOffset - memRange.offset), nonCoherentAtomSize), - pDstBlock->m_pMetadata->GetSize() - memRange.offset); - (*m_hAllocator->GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hAllocator->m_hDevice, 1, &memRange); - } - } - } - - // Go over all blocks in reverse order. Unmap those that were mapped just for defragmentation. - // Regardless of pCtx->res == VK_SUCCESS. - for (size_t blockIndex = blockCount; blockIndex--; ) - { - const BlockInfo& currBlockInfo = blockInfo[blockIndex]; - if ((currBlockInfo.flags & BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION) != 0) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - pBlock->Unmap(m_hAllocator, 1); - } - } -} - -void VmaBlockVector::ApplyDefragmentationMovesGpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - VmaVector>& moves, - VkCommandBuffer commandBuffer) -{ - const size_t blockCount = m_Blocks.size(); - - pDefragCtx->blockContexts.resize(blockCount); - memset(pDefragCtx->blockContexts.data(), 0, blockCount * sizeof(VmaBlockDefragmentationContext)); - - // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. - const size_t moveCount = moves.size(); - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN) - { - // Old school move still require us to map the whole block - pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - } - } - - VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); - - // Go over all blocks. Create and bind buffer for whole block if necessary. - { - VkBufferCreateInfo bufCreateInfo; - VmaFillGpuDefragmentationBufferCreateInfo(bufCreateInfo); - - for (size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) - { - VmaBlockDefragmentationContext& currBlockCtx = pDefragCtx->blockContexts[blockIndex]; - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if ((currBlockCtx.flags & VmaBlockDefragmentationContext::BLOCK_FLAG_USED) != 0) - { - bufCreateInfo.size = pBlock->m_pMetadata->GetSize(); - pDefragCtx->res = (*m_hAllocator->GetVulkanFunctions().vkCreateBuffer)( - m_hAllocator->m_hDevice, &bufCreateInfo, m_hAllocator->GetAllocationCallbacks(), &currBlockCtx.hBuffer); - if (pDefragCtx->res == VK_SUCCESS) - { - pDefragCtx->res = (*m_hAllocator->GetVulkanFunctions().vkBindBufferMemory)( - m_hAllocator->m_hDevice, currBlockCtx.hBuffer, pBlock->GetDeviceMemory(), 0); - } - } - } - } - - // Go over all moves. Post data transfer commands to command buffer. - if (pDefragCtx->res == VK_SUCCESS) - { - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - const VmaBlockDefragmentationContext& srcBlockCtx = pDefragCtx->blockContexts[move.srcBlockIndex]; - const VmaBlockDefragmentationContext& dstBlockCtx = pDefragCtx->blockContexts[move.dstBlockIndex]; - - VMA_ASSERT(srcBlockCtx.hBuffer && dstBlockCtx.hBuffer); - - VkBufferCopy region = { - move.srcOffset, - move.dstOffset, - move.size }; - (*m_hAllocator->GetVulkanFunctions().vkCmdCopyBuffer)( - commandBuffer, srcBlockCtx.hBuffer, dstBlockCtx.hBuffer, 1, ®ion); - } - } - - // Save buffers to defrag context for later destruction. - if (pDefragCtx->res == VK_SUCCESS && moveCount > 0) - { - pDefragCtx->res = VK_NOT_READY; - } -} - -void VmaBlockVector::FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats) -{ - for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if (pBlock->m_pMetadata->IsEmpty()) - { - if (m_Blocks.size() > m_MinBlockCount) - { - if (pDefragmentationStats != VMA_NULL) - { - ++pDefragmentationStats->deviceMemoryBlocksFreed; - pDefragmentationStats->bytesFreed += pBlock->m_pMetadata->GetSize(); - } - - VmaVectorRemove(m_Blocks, blockIndex); - pBlock->Destroy(m_hAllocator); - vma_delete(m_hAllocator, pBlock); - } - else - { - break; - } - } - } -} - -bool VmaBlockVector::HasEmptyBlock() const +bool VmaBlockVector::HasEmptyBlock() { for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) { @@ -13287,50 +12919,7 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) { VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - if (IsCustomPool()) - { - const char* poolName = m_hParentPool->GetName(); - if (poolName != VMA_NULL && poolName[0] != '\0') - { - json.WriteString("Name"); - json.WriteString(poolName); - } - json.WriteString("MemoryTypeIndex"); - json.WriteNumber(m_MemoryTypeIndex); - - json.WriteString("BlockSize"); - json.WriteNumber(m_PreferredBlockSize); - - json.WriteString("BlockCount"); - json.BeginObject(true); - if (m_MinBlockCount > 0) - { - json.WriteString("Min"); - json.WriteNumber((uint64_t)m_MinBlockCount); - } - if (m_MaxBlockCount < SIZE_MAX) - { - json.WriteString("Max"); - json.WriteNumber((uint64_t)m_MaxBlockCount); - } - json.WriteString("Cur"); - json.WriteNumber((uint64_t)m_Blocks.size()); - json.EndObject(); - - if (m_Algorithm != 0) - { - json.WriteString("Algorithm"); - json.WriteString(VmaAlgorithmToStr(m_Algorithm)); - } - } - else - { - json.WriteString("PreferredBlockSize"); - json.WriteNumber(m_PreferredBlockSize); - } - - json.WriteString("Blocks"); json.BeginObject(); for (size_t i = 0; i < m_Blocks.size(); ++i) { @@ -13338,234 +12927,17 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) json.ContinueString(m_Blocks[i]->GetId()); json.EndString(); - m_Blocks[i]->m_pMetadata->PrintDetailedMap(json, m_Blocks[i]->GetMapRefCount()); + json.BeginObject(); + json.WriteString("MapRefCount"); + json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + json.EndObject(); } json.EndObject(); } #endif // VMA_STATS_STRING_ENABLED -void VmaBlockVector::Defragment( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, - VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, - VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer) -{ - pCtx->res = VK_SUCCESS; - - const VkMemoryPropertyFlags memPropFlags = - m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags; - const bool isHostVisible = (memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; - - const bool canDefragmentOnCpu = maxCpuBytesToMove > 0 && maxCpuAllocationsToMove > 0 && - isHostVisible; - const bool canDefragmentOnGpu = maxGpuBytesToMove > 0 && maxGpuAllocationsToMove > 0 && - !IsCorruptionDetectionEnabled() && - ((1u << m_MemoryTypeIndex) & m_hAllocator->GetGpuDefragmentationMemoryTypeBits()) != 0; - - // There are options to defragment this memory type. - if (canDefragmentOnCpu || canDefragmentOnGpu) - { - bool defragmentOnGpu; - // There is only one option to defragment this memory type. - if (canDefragmentOnGpu != canDefragmentOnCpu) - { - defragmentOnGpu = canDefragmentOnGpu; - } - // Both options are available: Heuristics to choose the best one. - else - { - defragmentOnGpu = (memPropFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0 || - m_hAllocator->IsIntegratedGpu(); - } - - bool overlappingMoveSupported = !defragmentOnGpu; - - if (m_hAllocator->m_UseMutex) - { - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - if (!m_Mutex.TryLockWrite()) - { - pCtx->res = VK_ERROR_INITIALIZATION_FAILED; - return; - } - } - else - { - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; - } - } - - pCtx->Begin(overlappingMoveSupported, flags); - - // Defragment. - - const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove; - const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove; - VmaDefragmentationAlgorithm* algo = pCtx->GetAlgorithm(); - pCtx->res = algo->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags); - - // Accumulate statistics. - if (pStats != VMA_NULL) - { - const VkDeviceSize bytesMoved = algo->GetBytesMoved(); - const uint32_t allocationsMoved = algo->GetAllocationsMoved(); - pStats->bytesMoved += bytesMoved; - pStats->allocationsMoved += allocationsMoved; - VMA_ASSERT(bytesMoved <= maxBytesToMove); - VMA_ASSERT(allocationsMoved <= maxAllocationsToMove); - if (defragmentOnGpu) - { - maxGpuBytesToMove -= bytesMoved; - maxGpuAllocationsToMove -= allocationsMoved; - } - else - { - maxCpuBytesToMove -= bytesMoved; - maxCpuAllocationsToMove -= allocationsMoved; - } - } - - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - if (m_hAllocator->m_UseMutex) - m_Mutex.UnlockWrite(); - - if (pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty()) - pCtx->res = VK_NOT_READY; - - return; - } - - if (pCtx->res >= VK_SUCCESS) - { - if (defragmentOnGpu) - { - ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer); - } - else - { - ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves); - } - } - } -} - -void VmaBlockVector::DefragmentationEnd( - class VmaBlockVectorDefragmentationContext* pCtx, - uint32_t flags, - VmaDefragmentationStats* pStats) -{ - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL && m_hAllocator->m_UseMutex) - { - VMA_ASSERT(pCtx->mutexLocked == false); - - // Incremental defragmentation doesn't hold the lock, so when we enter here we don't actually have any - // lock protecting us. Since we mutate state here, we have to take the lock out now - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; - } - - // If the mutex isn't locked we didn't do any work and there is nothing to delete. - if (pCtx->mutexLocked || !m_hAllocator->m_UseMutex) - { - // Destroy buffers. - for (size_t blockIndex = pCtx->blockContexts.size(); blockIndex--;) - { - VmaBlockDefragmentationContext& blockCtx = pCtx->blockContexts[blockIndex]; - if (blockCtx.hBuffer) - { - (*m_hAllocator->GetVulkanFunctions().vkDestroyBuffer)(m_hAllocator->m_hDevice, blockCtx.hBuffer, m_hAllocator->GetAllocationCallbacks()); - } - } - - if (pCtx->res >= VK_SUCCESS) - { - FreeEmptyBlocks(pStats); - } - } - - if (pCtx->mutexLocked) - { - VMA_ASSERT(m_hAllocator->m_UseMutex); - m_Mutex.UnlockWrite(); - } -} - -uint32_t VmaBlockVector::ProcessDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves) -{ - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - - const uint32_t moveCount = VMA_MIN(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves); - - for (uint32_t i = 0; i < moveCount; ++i) - { - VmaDefragmentationMove& move = pCtx->defragmentationMoves[pCtx->defragmentationMovesProcessed + i]; - - pMove->allocation = move.hAllocation; - pMove->memory = move.pDstBlock->GetDeviceMemory(); - pMove->offset = move.dstOffset; - - ++pMove; - } - - pCtx->defragmentationMovesProcessed += moveCount; - - return moveCount; -} - -void VmaBlockVector::CommitDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats) -{ - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - - for (uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++i) - { - const VmaDefragmentationMove& move = pCtx->defragmentationMoves[i]; - - move.pSrcBlock->m_pMetadata->Free(move.hAllocation->GetAllocHandle()); - move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstHandle); - } - - pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed; - FreeEmptyBlocks(pStats); -} - -size_t VmaBlockVector::CalcAllocationCount() const -{ - size_t result = 0; - for (size_t i = 0; i < m_Blocks.size(); ++i) - { - result += m_Blocks[i]->m_pMetadata->GetAllocationCount(); - } - return result; -} - -bool VmaBlockVector::IsBufferImageGranularityConflictPossible() const -{ - if (m_BufferImageGranularity == 1) - { - return false; - } - VmaSuballocationType lastSuballocType = VMA_SUBALLOCATION_TYPE_FREE; - for (size_t i = 0, count = m_Blocks.size(); i < count; ++i) - { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[i]; - VMA_ASSERT(m_Algorithm == 0); - VmaBlockMetadata_Generic* const pMetadata = (VmaBlockMetadata_Generic*)pBlock->m_pMetadata; - if (pMetadata->IsBufferImageGranularityConflictPossible(m_BufferImageGranularity, lastSuballocType)) - { - return true; - } - } - return false; -} - VkResult VmaBlockVector::CheckCorruption() { if (!IsCorruptionDetectionEnabled()) @@ -13587,1265 +12959,974 @@ VkResult VmaBlockVector::CheckCorruption() return VK_SUCCESS; } -void VmaBlockVector::AddStats(VmaStats* pStats) -{ - const uint32_t memTypeIndex = m_MemoryTypeIndex; - const uint32_t memHeapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(memTypeIndex); - - VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - - for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - VMA_HEAVY_ASSERT(pBlock->Validate()); - VmaStatInfo allocationStatInfo; - pBlock->m_pMetadata->CalcAllocationStatInfo(allocationStatInfo); - VmaAddStatInfo(pStats->total, allocationStatInfo); - VmaAddStatInfo(pStats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(pStats->memoryHeap[memHeapIndex], allocationStatInfo); - } -} #endif // _VMA_BLOCK_VECTOR_FUNCTIONS -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC_FUNCTIONS -VmaDefragmentationAlgorithm_Generic::VmaDefragmentationAlgorithm_Generic( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported) - : VmaDefragmentationAlgorithm(hAllocator, pBlockVector), - m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_AllocationCount(0), - m_AllAllocations(false), - m_BytesMoved(0), - m_AllocationsMoved(0) -{ - // Create block info for each block. - const size_t blockCount = m_pBlockVector->m_Blocks.size(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks()); - pBlockInfo->m_OriginalBlockIndex = blockIndex; - pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex]; - m_Blocks.push_back(pBlockInfo); - } - - // Sort them by m_pBlock pointer value. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockPointerLess()); -} - -VmaDefragmentationAlgorithm_Generic::~VmaDefragmentationAlgorithm_Generic() -{ - for (size_t i = m_Blocks.size(); i--; ) - { - vma_delete(m_hAllocator, m_Blocks[i]); - } -} - -void VmaDefragmentationAlgorithm_Generic::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - VmaDeviceMemoryBlock* pBlock = hAlloc->GetBlock(); - BlockInfoVector::iterator it = VmaBinaryFindFirstNotLess(m_Blocks.begin(), m_Blocks.end(), pBlock, BlockPointerLess()); - if (it != m_Blocks.end() && (*it)->m_pBlock == pBlock) - { - AllocationInfo allocInfo = AllocationInfo(hAlloc, pChanged); - (*it)->m_Allocations.push_back(allocInfo); - } - else - { - VMA_ASSERT(0); - } - - ++m_AllocationCount; -} - -VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - bool freeOldAllocations) -{ - if (m_Blocks.empty()) - { - return VK_SUCCESS; - } - - // This is a choice based on research. - // Option 1: - uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT; - // Option 2: - //uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT; - - size_t srcBlockMinIndex = 0; - // When FAST_ALGORITHM, move allocations from only last out of blocks that contain non-movable allocations. - /* - if(m_AlgorithmFlags & VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT) - { - const size_t blocksWithNonMovableCount = CalcBlocksWithNonMovableCount(); - if(blocksWithNonMovableCount > 0) - { - srcBlockMinIndex = blocksWithNonMovableCount - 1; - } - } - */ - - size_t srcBlockIndex = m_Blocks.size() - 1; - size_t srcAllocIndex = SIZE_MAX; - for (;;) - { - // 1. Find next allocation to move. - // 1.1. Start from last to first m_Blocks - they are sorted from most "destination" to most "source". - // 1.2. Then start from last to first m_Allocations. - while (srcAllocIndex >= m_Blocks[srcBlockIndex]->m_Allocations.size()) - { - if (m_Blocks[srcBlockIndex]->m_Allocations.empty()) - { - // Finished: no more allocations to process. - if (srcBlockIndex == srcBlockMinIndex) - { - return VK_SUCCESS; - } - else - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - } - else - { - srcAllocIndex = m_Blocks[srcBlockIndex]->m_Allocations.size() - 1; - } - } - - BlockInfo* pSrcBlockInfo = m_Blocks[srcBlockIndex]; - AllocationInfo& allocInfo = pSrcBlockInfo->m_Allocations[srcAllocIndex]; - - const VkDeviceSize size = allocInfo.m_hAllocation->GetSize(); - const VkDeviceSize srcOffset = allocInfo.m_hAllocation->GetOffset(); - const VkDeviceSize alignment = allocInfo.m_hAllocation->GetAlignment(); - const VmaSuballocationType suballocType = allocInfo.m_hAllocation->GetSuballocationType(); - - // 2. Try to find new place for this allocation in preceding or current block. - for (size_t dstBlockIndex = 0; dstBlockIndex <= srcBlockIndex; ++dstBlockIndex) - { - BlockInfo* pDstBlockInfo = m_Blocks[dstBlockIndex]; - VmaBlockMetadata* pMetadata = pDstBlockInfo->m_pBlock->m_pMetadata; - VmaAllocationRequest dstAllocRequest; - if (pMetadata->CreateAllocationRequest( - size, - alignment, - false, // upperAddress - suballocType, - strategy, - &dstAllocRequest) && - MoveMakesSense( - dstBlockIndex, pMetadata->GetAllocationOffset(dstAllocRequest.allocHandle), srcBlockIndex, srcOffset)) - { - // Reached limit on number of allocations or bytes to move. - if ((m_AllocationsMoved + 1 > maxAllocationsToMove) || - (m_BytesMoved + size > maxBytesToMove)) - { - return VK_SUCCESS; - } - - VmaDefragmentationMove move = {}; - move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex; - move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex; - move.srcOffset = srcOffset; - move.dstOffset = pMetadata->GetAllocationOffset(dstAllocRequest.allocHandle); - move.size = size; - move.hAllocation = allocInfo.m_hAllocation; - move.pSrcBlock = pSrcBlockInfo->m_pBlock; - move.pDstBlock = pDstBlockInfo->m_pBlock; - move.dstHandle = dstAllocRequest.allocHandle; - - moves.push_back(move); - - pDstBlockInfo->m_pBlock->m_pMetadata->Alloc(dstAllocRequest, suballocType, allocInfo.m_hAllocation); - - if (freeOldAllocations) - { - pSrcBlockInfo->m_pBlock->m_pMetadata->Free(allocInfo.m_hAllocation->GetAllocHandle()); - allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.allocHandle); - } - - if (allocInfo.m_pChanged != VMA_NULL) - { - *allocInfo.m_pChanged = VK_TRUE; - } - - ++m_AllocationsMoved; - m_BytesMoved += size; - - VmaVectorRemove(pSrcBlockInfo->m_Allocations, srcAllocIndex); - - break; - } - } - - // If not processed, this allocInfo remains in pBlockInfo->m_Allocations for next round. - - if (srcAllocIndex > 0) - { - --srcAllocIndex; - } - else - { - if (srcBlockIndex > 0) - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - else - { - return VK_SUCCESS; - } - } - } -} - -bool VmaDefragmentationAlgorithm_Generic::AllocationInfoSizeGreater::operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const -{ - return lhs.m_hAllocation->GetSize() > rhs.m_hAllocation->GetSize(); -} - -bool VmaDefragmentationAlgorithm_Generic::AllocationInfoOffsetGreater::operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const -{ - return lhs.m_hAllocation->GetOffset() > rhs.m_hAllocation->GetOffset(); -} - -VmaDefragmentationAlgorithm_Generic::BlockInfo::BlockInfo(const VkAllocationCallbacks* pAllocationCallbacks) - : m_OriginalBlockIndex(SIZE_MAX), - m_pBlock(VMA_NULL), - m_HasNonMovableAllocations(true), - m_Allocations(pAllocationCallbacks) {} - -void VmaDefragmentationAlgorithm_Generic::BlockInfo::CalcHasNonMovableAllocations() -{ - const size_t blockAllocCount = m_pBlock->m_pMetadata->GetAllocationCount(); - const size_t defragmentAllocCount = m_Allocations.size(); - m_HasNonMovableAllocations = blockAllocCount != defragmentAllocCount; -} - -void VmaDefragmentationAlgorithm_Generic::BlockInfo::SortAllocationsBySizeDescending() -{ - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoSizeGreater()); -} - -void VmaDefragmentationAlgorithm_Generic::BlockInfo::SortAllocationsByOffsetDescending() -{ - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoOffsetGreater()); -} - -bool VmaDefragmentationAlgorithm_Generic::BlockPointerLess::operator()(const BlockInfo* pLhsBlockInfo, const VmaDeviceMemoryBlock* pRhsBlock) const -{ - return pLhsBlockInfo->m_pBlock < pRhsBlock; -} -bool VmaDefragmentationAlgorithm_Generic::BlockPointerLess::operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const -{ - return pLhsBlockInfo->m_pBlock < pRhsBlockInfo->m_pBlock; -} - -bool VmaDefragmentationAlgorithm_Generic::BlockInfoCompareMoveDestination::operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const -{ - if (pLhsBlockInfo->m_HasNonMovableAllocations && !pRhsBlockInfo->m_HasNonMovableAllocations) - { - return true; - } - if (!pLhsBlockInfo->m_HasNonMovableAllocations && pRhsBlockInfo->m_HasNonMovableAllocations) - { - return false; - } - if (pLhsBlockInfo->m_pBlock->m_pMetadata->GetSumFreeSize() < pRhsBlockInfo->m_pBlock->m_pMetadata->GetSumFreeSize()) - { - return true; - } - return false; -} - -bool VmaDefragmentationAlgorithm_Generic::MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset) -{ - if (dstBlockIndex < srcBlockIndex) - { - return true; - } - if (dstBlockIndex > srcBlockIndex) - { - return false; - } - if (dstOffset < srcOffset) - { - return true; - } - return false; -} - -size_t VmaDefragmentationAlgorithm_Generic::CalcBlocksWithNonMovableCount() const -{ - size_t result = 0; - for (size_t i = 0; i < m_Blocks.size(); ++i) - { - if (m_Blocks[i]->m_HasNonMovableAllocations) - { - ++result; - } - } - return result; -} - -VkResult VmaDefragmentationAlgorithm_Generic::Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) -{ - if (!m_AllAllocations && m_AllocationCount == 0) - { - return VK_SUCCESS; - } - - const size_t blockCount = m_Blocks.size(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - BlockInfo* pBlockInfo = m_Blocks[blockIndex]; - - if (m_AllAllocations) - { - VmaBlockMetadata_Generic* pMetadata = (VmaBlockMetadata_Generic*)pBlockInfo->m_pBlock->m_pMetadata; - VMA_ASSERT(!pMetadata->IsVirtual()); - for (VmaSuballocationList::const_iterator it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); - ++it) - { - if (it->type != VMA_SUBALLOCATION_TYPE_FREE) - { - AllocationInfo allocInfo = AllocationInfo((VmaAllocation)it->userData, VMA_NULL); - pBlockInfo->m_Allocations.push_back(allocInfo); - } - } - } - - pBlockInfo->CalcHasNonMovableAllocations(); - - // This is a choice based on research. - // Option 1: - pBlockInfo->SortAllocationsByOffsetDescending(); - // Option 2: - //pBlockInfo->SortAllocationsBySizeDescending(); - } - - // Sort m_Blocks this time by the main criterium, from most "destination" to most "source" blocks. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockInfoCompareMoveDestination()); - - // This is a choice based on research. - const uint32_t roundCount = 2; - - // Execute defragmentation rounds (the main part). - VkResult result = VK_SUCCESS; - for (uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round) - { - result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)); - } - - return result; -} -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC_FUNCTIONS - -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_FAST_FUNCTIONS -VmaDefragmentationAlgorithm_Fast::VmaDefragmentationAlgorithm_Fast( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported) - : VmaDefragmentationAlgorithm(hAllocator, pBlockVector), - m_OverlappingMoveSupported(overlappingMoveSupported), - m_AllocationCount(0), - m_AllAllocations(false), - m_BytesMoved(0), - m_AllocationsMoved(0), - m_BlockInfos(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) -{ - VMA_ASSERT(VMA_DEBUG_MARGIN == 0); -} - -VkResult VmaDefragmentationAlgorithm_Fast::Defragment( - VmaVector>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) -{ - VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount); - - const size_t blockCount = m_pBlockVector->GetBlockCount(); - if (blockCount == 0 || maxBytesToMove == 0 || maxAllocationsToMove == 0) - { - return VK_SUCCESS; - } - - PreprocessMetadata(); - - // Sort blocks in order from most destination. - - m_BlockInfos.resize(blockCount); - for (size_t i = 0; i < blockCount; ++i) - { - m_BlockInfos[i].origBlockIndex = i; - } - - VMA_SORT(m_BlockInfos.begin(), m_BlockInfos.end(), [this](const BlockInfo& lhs, const BlockInfo& rhs) -> bool { - return m_pBlockVector->GetBlock(lhs.origBlockIndex)->m_pMetadata->GetSumFreeSize() < - m_pBlockVector->GetBlock(rhs.origBlockIndex)->m_pMetadata->GetSumFreeSize(); - }); - - // THE MAIN ALGORITHM - - FreeSpaceDatabase freeSpaceDb; - - size_t dstBlockInfoIndex = 0; - size_t dstOrigBlockIndex = m_BlockInfos[dstBlockInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* pDstBlock = m_pBlockVector->GetBlock(dstOrigBlockIndex); - VmaBlockMetadata_Generic* pDstMetadata = (VmaBlockMetadata_Generic*)pDstBlock->m_pMetadata; - VkDeviceSize dstBlockSize = pDstMetadata->GetSize(); - VkDeviceSize dstOffset = 0; - - bool end = false; - for (size_t srcBlockInfoIndex = 0; !end && srcBlockInfoIndex < blockCount; ++srcBlockInfoIndex) - { - const size_t srcOrigBlockIndex = m_BlockInfos[srcBlockInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* const pSrcBlock = m_pBlockVector->GetBlock(srcOrigBlockIndex); - VmaBlockMetadata_Generic* const pSrcMetadata = (VmaBlockMetadata_Generic*)pSrcBlock->m_pMetadata; - for (VmaSuballocationList::iterator srcSuballocIt = pSrcMetadata->m_Suballocations.begin(); - !end && srcSuballocIt != pSrcMetadata->m_Suballocations.end(); ) - { - VmaAllocation const pAlloc = (VmaAllocation)srcSuballocIt->userData; - const VkDeviceSize srcAllocAlignment = pAlloc->GetAlignment(); - const VkDeviceSize srcAllocSize = srcSuballocIt->size; - if (m_AllocationsMoved == maxAllocationsToMove || - m_BytesMoved + srcAllocSize > maxBytesToMove) - { - end = true; - break; - } - const VkDeviceSize srcAllocOffset = srcSuballocIt->offset; - - VmaDefragmentationMove move = {}; - // Try to place it in one of free spaces from the database. - size_t freeSpaceInfoIndex; - VkDeviceSize dstAllocOffset; - if (freeSpaceDb.Fetch(srcAllocAlignment, srcAllocSize, - freeSpaceInfoIndex, dstAllocOffset)) - { - size_t freeSpaceOrigBlockIndex = m_BlockInfos[freeSpaceInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* pFreeSpaceBlock = m_pBlockVector->GetBlock(freeSpaceOrigBlockIndex); - VmaBlockMetadata_Generic* pFreeSpaceMetadata = (VmaBlockMetadata_Generic*)pFreeSpaceBlock->m_pMetadata; - - // Same block - if (freeSpaceInfoIndex == srcBlockInfoIndex) - { - VMA_ASSERT(dstAllocOffset <= srcAllocOffset); - - // MOVE OPTION 1: Move the allocation inside the same block by decreasing offset. - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - ((VmaAllocation)(suballoc.userData))->ChangeAllocHandle((VmaAllocHandle)(dstAllocOffset + 1)); - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - InsertSuballoc(pFreeSpaceMetadata, suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = freeSpaceOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); - } - // Different block - else - { - // MOVE OPTION 2: Move the allocation to a different block. - - VMA_ASSERT(freeSpaceInfoIndex < srcBlockInfoIndex); - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - ((VmaAllocation)(suballoc.userData))->ChangeBlockAllocation(m_hAllocator, pFreeSpaceBlock, (VmaAllocHandle)(dstAllocOffset + 1)); - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - InsertSuballoc(pFreeSpaceMetadata, suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = freeSpaceOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); - } - } - else - { - dstAllocOffset = VmaAlignUp(dstOffset, srcAllocAlignment); - - // If the allocation doesn't fit before the end of dstBlock, forward to next block. - while (dstBlockInfoIndex < srcBlockInfoIndex && - dstAllocOffset + srcAllocSize > dstBlockSize) - { - // But before that, register remaining free space at the end of dst block. - freeSpaceDb.Register(dstBlockInfoIndex, dstOffset, dstBlockSize - dstOffset); - - ++dstBlockInfoIndex; - dstOrigBlockIndex = m_BlockInfos[dstBlockInfoIndex].origBlockIndex; - pDstBlock = m_pBlockVector->GetBlock(dstOrigBlockIndex); - pDstMetadata = (VmaBlockMetadata_Generic*)pDstBlock->m_pMetadata; - dstBlockSize = pDstMetadata->GetSize(); - dstOffset = 0; - dstAllocOffset = 0; - } - - // Same block - if (dstBlockInfoIndex == srcBlockInfoIndex) - { - VMA_ASSERT(dstAllocOffset <= srcAllocOffset); - - const bool overlap = dstAllocOffset + srcAllocSize > srcAllocOffset; - - bool skipOver = overlap; - if (overlap && m_OverlappingMoveSupported && dstAllocOffset < srcAllocOffset) - { - // If destination and source place overlap, skip if it would move it - // by only < 1/64 of its size. - skipOver = (srcAllocOffset - dstAllocOffset) * 64 < srcAllocSize; - } - - if (skipOver) - { - freeSpaceDb.Register(dstBlockInfoIndex, dstOffset, srcAllocOffset - dstOffset); - - dstOffset = srcAllocOffset + srcAllocSize; - ++srcSuballocIt; - } - // MOVE OPTION 1: Move the allocation inside the same block by decreasing offset. - else - { - srcSuballocIt->offset = dstAllocOffset; - ((VmaAllocation)(srcSuballocIt->userData))->ChangeAllocHandle((VmaAllocHandle)(dstAllocOffset + 1)); - dstOffset = dstAllocOffset + srcAllocSize; - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - ++srcSuballocIt; - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = dstOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); - } - } - // Different block - else - { - // MOVE OPTION 2: Move the allocation to a different block. - - VMA_ASSERT(dstBlockInfoIndex < srcBlockInfoIndex); - VMA_ASSERT(dstAllocOffset + srcAllocSize <= dstBlockSize); - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - ((VmaAllocation)(suballoc.userData))->ChangeBlockAllocation(m_hAllocator, pDstBlock, (VmaAllocHandle)(dstAllocOffset + 1)); - dstOffset = dstAllocOffset + srcAllocSize; - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - pDstMetadata->m_Suballocations.push_back(suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = dstOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); - } - } - } - } - - m_BlockInfos.clear(); - - PostprocessMetadata(); - - return VK_SUCCESS; -} - -VmaDefragmentationAlgorithm_Fast::FreeSpaceDatabase::FreeSpaceDatabase() -{ - FreeSpace s = {}; - s.blockInfoIndex = SIZE_MAX; - for (size_t i = 0; i < MAX_COUNT; ++i) - { - m_FreeSpaces[i] = s; - } -} - -void VmaDefragmentationAlgorithm_Fast::FreeSpaceDatabase::Register(size_t blockInfoIndex, VkDeviceSize offset, VkDeviceSize size) -{ - // Find first invalid or the smallest structure. - size_t bestIndex = SIZE_MAX; - for (size_t i = 0; i < MAX_COUNT; ++i) - { - // Empty structure. - if (m_FreeSpaces[i].blockInfoIndex == SIZE_MAX) - { - bestIndex = i; - break; - } - if (m_FreeSpaces[i].size < size && - (bestIndex == SIZE_MAX || m_FreeSpaces[bestIndex].size > m_FreeSpaces[i].size)) - { - bestIndex = i; - } - } - - if (bestIndex != SIZE_MAX) - { - m_FreeSpaces[bestIndex].blockInfoIndex = blockInfoIndex; - m_FreeSpaces[bestIndex].offset = offset; - m_FreeSpaces[bestIndex].size = size; - } -} - -bool VmaDefragmentationAlgorithm_Fast::FreeSpaceDatabase::Fetch(VkDeviceSize alignment, VkDeviceSize size, - size_t& outBlockInfoIndex, VkDeviceSize& outDstOffset) -{ - size_t bestIndex = SIZE_MAX; - VkDeviceSize bestFreeSpaceAfter = 0; - for (size_t i = 0; i < MAX_COUNT; ++i) - { - // Structure is valid. - if (m_FreeSpaces[i].blockInfoIndex != SIZE_MAX) - { - const VkDeviceSize dstOffset = VmaAlignUp(m_FreeSpaces[i].offset, alignment); - // Allocation fits into this structure. - if (dstOffset + size <= m_FreeSpaces[i].offset + m_FreeSpaces[i].size) - { - const VkDeviceSize freeSpaceAfter = (m_FreeSpaces[i].offset + m_FreeSpaces[i].size) - - (dstOffset + size); - if (bestIndex == SIZE_MAX || freeSpaceAfter > bestFreeSpaceAfter) - { - bestIndex = i; - bestFreeSpaceAfter = freeSpaceAfter; - } - } - } - } - - if (bestIndex != SIZE_MAX) - { - outBlockInfoIndex = m_FreeSpaces[bestIndex].blockInfoIndex; - outDstOffset = VmaAlignUp(m_FreeSpaces[bestIndex].offset, alignment); - - // Leave this structure for remaining empty space. - const VkDeviceSize alignmentPlusSize = (outDstOffset - m_FreeSpaces[bestIndex].offset) + size; - m_FreeSpaces[bestIndex].offset += alignmentPlusSize; - m_FreeSpaces[bestIndex].size -= alignmentPlusSize; - - return true; - } - - return false; -} - -void VmaDefragmentationAlgorithm_Fast::PreprocessMetadata() -{ - const size_t blockCount = m_pBlockVector->GetBlockCount(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - VmaBlockMetadata_Generic* const pMetadata = - (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; - pMetadata->m_FreeCount = 0; - pMetadata->m_SumFreeSize = pMetadata->GetSize(); - pMetadata->m_FreeSuballocationsBySize.clear(); - for (VmaSuballocationList::iterator it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); ) - { - if (it->type == VMA_SUBALLOCATION_TYPE_FREE) - { - VmaSuballocationList::iterator nextIt = it; - ++nextIt; - pMetadata->m_Suballocations.erase(it); - it = nextIt; - } - else - { - ++it; - } - } - } -} - -void VmaDefragmentationAlgorithm_Fast::PostprocessMetadata() -{ - const size_t blockCount = m_pBlockVector->GetBlockCount(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - VmaBlockMetadata_Generic* const pMetadata = - (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; - const VkDeviceSize blockSize = pMetadata->GetSize(); - - // No allocations in this block - entire area is free. - if (pMetadata->m_Suballocations.empty()) - { - pMetadata->m_FreeCount = 1; - //pMetadata->m_SumFreeSize is already set to blockSize. - VmaSuballocation suballoc = { - 0, // offset - blockSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - pMetadata->m_Suballocations.push_back(suballoc); - pMetadata->RegisterFreeSuballocation(pMetadata->m_Suballocations.begin()); - } - // There are some allocations in this block. - else - { - VkDeviceSize offset = 0; - VmaSuballocationList::iterator it; - for (it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); - ++it) - { - VMA_ASSERT(it->type != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(it->offset >= offset); - - // Need to insert preceding free space. - if (it->offset > offset) - { - ++pMetadata->m_FreeCount; - const VkDeviceSize freeSize = it->offset - offset; - VmaSuballocation suballoc = { - offset, // offset - freeSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - VmaSuballocationList::iterator precedingFreeIt = pMetadata->m_Suballocations.insert(it, suballoc); - pMetadata->m_FreeSuballocationsBySize.push_back(precedingFreeIt); - } - - pMetadata->m_SumFreeSize -= it->size; - offset = it->offset + it->size; - } - - // Need to insert trailing free space. - if (offset < blockSize) - { - ++pMetadata->m_FreeCount; - const VkDeviceSize freeSize = blockSize - offset; - VmaSuballocation suballoc = { - offset, // offset - freeSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - VMA_ASSERT(it == pMetadata->m_Suballocations.end()); - VmaSuballocationList::iterator trailingFreeIt = pMetadata->m_Suballocations.insert(it, suballoc); - pMetadata->m_FreeSuballocationsBySize.push_back(trailingFreeIt); - } - - VMA_SORT( - pMetadata->m_FreeSuballocationsBySize.begin(), - pMetadata->m_FreeSuballocationsBySize.end(), - VmaSuballocationItemSizeLess()); - } - - VMA_HEAVY_ASSERT(pMetadata->Validate()); - } -} - -void VmaDefragmentationAlgorithm_Fast::InsertSuballoc(VmaBlockMetadata_Generic* pMetadata, const VmaSuballocation& suballoc) -{ - VmaSuballocationList& suballocs = pMetadata->m_Suballocations; - VmaSuballocationList::iterator elementAfter; - const VkDeviceSize last = suballocs.rbegin()->offset; - const VkDeviceSize first = suballocs.begin()->offset; - - if (last <= suballoc.offset) - elementAfter = suballocs.end(); - else if (first >= suballoc.offset) - elementAfter = suballocs.begin(); - else - { - const size_t suballocCount = suballocs.size(); - const VkDeviceSize step = (last - first + suballocs.begin()->size) / suballocCount; - // If offset to be inserted is closer to the end of range, search from the end - if ((suballoc.offset - first) / step > suballocCount / 2) - { - elementAfter = suballocs.begin(); - for (VmaSuballocationList::reverse_iterator suballocItem = ++suballocs.rbegin(); - suballocItem != suballocs.rend(); - ++suballocItem) - { - if (suballocItem->offset <= suballoc.offset) - { - elementAfter = --suballocItem; - break; - } - } - } - else - { - elementAfter = suballocs.end(); - for (VmaSuballocationList::iterator suballocItem = ++suballocs.begin(); - suballocItem != suballocs.end(); - ++suballocItem) - { - if (suballocItem->offset >= suballoc.offset) - { - elementAfter = suballocItem; - break; - } - } - } - } - pMetadata->m_Suballocations.insert(elementAfter, suballoc); -} -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_FAST_FUNCTIONS - -#ifndef _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT_FUNCTIONS -VmaBlockVectorDefragmentationContext::VmaBlockVectorDefragmentationContext( - VmaAllocator hAllocator, - VmaPool hCustomPool, - VmaBlockVector* pBlockVector) - : res(VK_SUCCESS), - mutexLocked(false), - blockContexts(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - defragmentationMoves(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - defragmentationMovesProcessed(0), - defragmentationMovesCommitted(0), - hasDefragmentationPlan(0), - m_hAllocator(hAllocator), - m_hCustomPool(hCustomPool), - m_pBlockVector(pBlockVector), - m_pAlgorithm(VMA_NULL), - m_Allocations(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_AllAllocations(false) {} - -VmaBlockVectorDefragmentationContext::~VmaBlockVectorDefragmentationContext() -{ - vma_delete(m_hAllocator, m_pAlgorithm); -} - -void VmaBlockVectorDefragmentationContext::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - AllocInfo info = { hAlloc, pChanged }; - m_Allocations.push_back(info); -} - -void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags) -{ - const bool allAllocations = m_AllAllocations || - m_Allocations.size() == m_pBlockVector->CalcAllocationCount(); - - /******************************** - HERE IS THE CHOICE OF DEFRAGMENTATION ALGORITHM. - ********************************/ - - /* - Fast algorithm is supported only when certain criteria are met: - - VMA_DEBUG_MARGIN is 0. - - All allocations in this block vector are movable. - - There is no possibility of image/buffer granularity conflict. - - The defragmentation is not incremental - */ - if (VMA_DEBUG_MARGIN == 0 && - allAllocations && - !m_pBlockVector->IsBufferImageGranularityConflictPossible() && - !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)) - { - m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)( - m_hAllocator, m_pBlockVector, overlappingMoveSupported); - } - else - { - m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Generic)( - m_hAllocator, m_pBlockVector, overlappingMoveSupported); - } - - if (allAllocations) - { - m_pAlgorithm->AddAll(); - } - else - { - for (size_t i = 0, count = m_Allocations.size(); i < count; ++i) - { - m_pAlgorithm->AddAllocation(m_Allocations[i].hAlloc, m_Allocations[i].pChanged); - } - } -} -#endif // _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT_FUNCTIONS - #ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS VmaDefragmentationContext_T::VmaDefragmentationContext_T( VmaAllocator hAllocator, - uint32_t flags, - VmaDefragmentationStats* pStats) - : m_hAllocator(hAllocator), - m_Flags(flags), - m_pStats(pStats), - m_CustomPoolContexts(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) + const VmaDefragmentationInfo& info) + : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), + m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_MoveAllocator(hAllocator->GetAllocationCallbacks()), + m_Moves(m_MoveAllocator) { - memset(m_DefaultPoolContexts, 0, sizeof(m_DefaultPoolContexts)); + m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (info.pool != VMA_NULL) + { + m_BlockVectorCount = 1; + m_PoolBlockVector = &info.pool->m_BlockVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); + } + else + { + m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); + m_PoolBlockVector = VMA_NULL; + m_pBlockVectors = hAllocator->m_pBlockVectors; + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + { + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + } + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + { + if (hAllocator->GetBufferImageGranularity() > 1) + { + m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); + } + break; + } + } } VmaDefragmentationContext_T::~VmaDefragmentationContext_T() { - for (size_t i = m_CustomPoolContexts.size(); i--; ) + if (m_PoolBlockVector != VMA_NULL) { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[i]; - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); - vma_delete(m_hAllocator, pBlockVectorCtx); + m_PoolBlockVector->SetIncrementalSort(true); } - for (size_t i = m_hAllocator->m_MemProps.memoryTypeCount; i--; ) + else { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[i]; - if (pBlockVectorCtx) + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) { - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); - vma_delete(m_hAllocator, pBlockVectorCtx); + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + VMA_ASSERT(0); } } } -void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, const VmaPool* pPools) +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) { - for (uint32_t poolIndex = 0; poolIndex < poolCount; ++poolIndex) + if (m_PoolBlockVector != VMA_NULL) { - VmaPool pool = pPools[poolIndex]; - VMA_ASSERT(pool); - // Pools with algorithm other than default are not defragmented. - if (pool->m_BlockVector.GetAlgorithm() == 0) - { - VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; + VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); - for (size_t i = m_CustomPoolContexts.size(); i--; ) + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != VMA_NULL) { - if (m_CustomPoolContexts[i]->GetCustomPool() == pool) + VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + + if (m_pBlockVectors[i]->GetBlockCount() > 1) { - pBlockVectorDefragCtx = m_CustomPoolContexts[i]; - break; + if (ComputeDefragmentation(*m_pBlockVectors[i], i)) + break; + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) + break; } } - - if (!pBlockVectorDefragCtx) - { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - pool, - &pool->m_BlockVector); - m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); - } - - pBlockVectorDefragCtx->AddAll(); - } - } -} - -void VmaDefragmentationContext_T::AddAllocations( - uint32_t allocationCount, - const VmaAllocation* pAllocations, - VkBool32* pAllocationsChanged) -{ - // Dispatch pAllocations among defragmentators. Create them when necessary. - for (uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) - { - const VmaAllocation hAlloc = pAllocations[allocIndex]; - VMA_ASSERT(hAlloc); - // DedicatedAlloc cannot be defragmented. - if (hAlloc->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK) - { - VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; - - const VmaPool hAllocPool = hAlloc->GetBlock()->GetParentPool(); - // This allocation belongs to custom pool. - if (hAllocPool != VK_NULL_HANDLE) - { - // Pools with algorithm other than default are not defragmented. - if (hAllocPool->m_BlockVector.GetAlgorithm() == 0) - { - for (size_t i = m_CustomPoolContexts.size(); i--; ) - { - if (m_CustomPoolContexts[i]->GetCustomPool() == hAllocPool) - { - pBlockVectorDefragCtx = m_CustomPoolContexts[i]; - break; - } - } - if (!pBlockVectorDefragCtx) - { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - hAllocPool, - &hAllocPool->m_BlockVector); - m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); - } - } - } - // This allocation belongs to default pool. - else - { - const uint32_t memTypeIndex = hAlloc->GetMemoryTypeIndex(); - pBlockVectorDefragCtx = m_DefaultPoolContexts[memTypeIndex]; - if (!pBlockVectorDefragCtx) - { - VMA_ASSERT(m_hAllocator->m_pBlockVectors[memTypeIndex] && "Trying to use unsupported memory type!"); - - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - VMA_NULL, // hCustomPool - m_hAllocator->m_pBlockVectors[memTypeIndex]); - m_DefaultPoolContexts[memTypeIndex] = pBlockVectorDefragCtx; - } - } - - if (pBlockVectorDefragCtx) - { - VkBool32* const pChanged = (pAllocationsChanged != VMA_NULL) ? - &pAllocationsChanged[allocIndex] : VMA_NULL; - pBlockVectorDefragCtx->AddAllocation(hAlloc, pChanged); - } - } - } -} - -VkResult VmaDefragmentationContext_T::Defragment( - VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, - VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags) -{ - if (pStats) - { - memset(pStats, 0, sizeof(VmaDefragmentationStats)); - } - - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - // For incremental defragmetnations, we just earmark how much we can move - // The real meat is in the defragmentation steps - m_MaxCpuBytesToMove = maxCpuBytesToMove; - m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove; - - m_MaxGpuBytesToMove = maxGpuBytesToMove; - m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove; - - if (m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 && - m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0) - return VK_SUCCESS; - - return VK_NOT_READY; - } - - if (commandBuffer == VK_NULL_HANDLE) - { - maxGpuBytesToMove = 0; - maxGpuAllocationsToMove = 0; - } - - VkResult res = VK_SUCCESS; - - // Process default pools. - for (uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount() && res >= VK_SUCCESS; - ++memTypeIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if (pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - pStats, flags, - maxCpuBytesToMove, maxCpuAllocationsToMove, - maxGpuBytesToMove, maxGpuAllocationsToMove, - commandBuffer); - if (pBlockVectorCtx->res != VK_SUCCESS) - { - res = pBlockVectorCtx->res; - } } } - // Process custom pools. - for (size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount && res >= VK_SUCCESS; - ++customCtxIndex) + moveInfo.moveCount = static_cast(m_Moves.size()); + if (moveInfo.moveCount > 0) { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - pStats, flags, - maxCpuBytesToMove, maxCpuAllocationsToMove, - maxGpuBytesToMove, maxGpuAllocationsToMove, - commandBuffer); - if (pBlockVectorCtx->res != VK_SUCCESS) - { - res = pBlockVectorCtx->res; - } + moveInfo.pMoves = m_Moves.data(); + return VK_INCOMPLETE; } - return res; -} - -VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo) -{ - VmaDefragmentationPassMoveInfo* pCurrentMove = pInfo->pMoves; - uint32_t movesLeft = pInfo->moveCount; - - // Process default pools. - for (uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount(); - ++memTypeIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if (pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - - if (!pBlockVectorCtx->hasDefragmentationPlan) - { - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - m_pStats, m_Flags, - m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, - m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, - VK_NULL_HANDLE); - - if (pBlockVectorCtx->res < VK_SUCCESS) - continue; - - pBlockVectorCtx->hasDefragmentationPlan = true; - } - - const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( - pBlockVectorCtx, - pCurrentMove, movesLeft); - - movesLeft -= processed; - pCurrentMove += processed; - } - } - - // Process custom pools. - for (size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount; - ++customCtxIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - - if (!pBlockVectorCtx->hasDefragmentationPlan) - { - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - m_pStats, m_Flags, - m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, - m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, - VK_NULL_HANDLE); - - if (pBlockVectorCtx->res < VK_SUCCESS) - continue; - - pBlockVectorCtx->hasDefragmentationPlan = true; - } - - const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( - pBlockVectorCtx, - pCurrentMove, movesLeft); - - movesLeft -= processed; - pCurrentMove += processed; - } - - pInfo->moveCount = pInfo->moveCount - movesLeft; - + moveInfo.pMoves = VMA_NULL; return VK_SUCCESS; } -VkResult VmaDefragmentationContext_T::DefragmentPassEnd() +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) { - VkResult res = VK_SUCCESS; + VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); - // Process default pools. - for (uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount(); - ++memTypeIndex) + VkResult result = VK_SUCCESS; + VmaStlAllocator blockAllocator(m_MoveAllocator.m_pCallbacks); + VmaVector> immovableBlocks(blockAllocator); + VmaVector> mappedBlocks(blockAllocator); + + VmaAllocator allocator = VMA_NULL; + for (uint32_t i = 0; i < moveInfo.moveCount; ++i) { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if (pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); + VmaDefragmentationMove& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + VkDeviceSize freedBlockSize = 0; - if (!pBlockVectorCtx->hasDefragmentationPlan) + uint32_t vectorIndex; + VmaBlockVector* vector; + if (m_PoolBlockVector != VMA_NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); + vector = m_pBlockVectors[vectorIndex]; + VMA_ASSERT(vector != VMA_NULL); + } + + switch (move.operation) + { + case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); + if (mapCount > 0) { - res = VK_NOT_READY; - continue; + allocator = vector->m_hAllocator; + VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (FragmentedBlock& block : mappedBlocks) + { + if (block.block == newMapBlock) + { + notPresent = false; + block.data += mapCount; + break; + } + } + if (notPresent) + mappedBlocks.push_back({ mapCount, newMapBlock }); } - pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( - pBlockVectorCtx, m_pStats); + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } - if (pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) - res = VK_NOT_READY; + result = VK_INCOMPLETE; + break; } - } - - // Process custom pools. - for (size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount; - ++customCtxIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - - if (!pBlockVectorCtx->hasDefragmentationPlan) + case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: { - res = VK_NOT_READY; - continue; + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + vector->Free(move.dstTmpAllocation); + + VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.srcAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + VkDeviceSize dstBlockSize; + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + default: + VMA_ASSERT(0); } - pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( - pBlockVectorCtx, m_pStats); + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_PassStats.deviceMemoryBlocksFreed += static_cast(freedBlocks); + m_PassStats.bytesFreed += freedBlockSize; + } - if (pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) - res = VK_NOT_READY; + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + { + if (m_AlgorithmState != VMA_NULL) + { + // Avoid unnecessary tries to allocate when new free block is avaiable + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) + { + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) + { + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); + } + else + state.firstFreeBlock = 0; + } + } + } + } + } + moveInfo.moveCount = 0; + moveInfo.pMoves = VMA_NULL; + m_Moves.clear(); + + // Update stats + m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; + m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; + m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; + m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; + m_PassStats = { 0 }; + + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + { + if (m_AlgorithmState != VMA_NULL) + { + bool swapped = false; + // Move to the start of free blocks range + for (const FragmentedBlock& block : immovableBlocks) + { + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; + if (state.operation != StateExtensive::Operation::Cleanup) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + { + if (vector->GetBlock(i) == block.block) + { + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) + { + if (i < state.firstFreeBlock - 1) + { + if (state.firstFreeBlock > 1) + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; + } + } + swapped = true; + break; + } + } + } + } + if (swapped) + result = VK_INCOMPLETE; + break; + } + } + default: + { + // Move to the begining + for (const FragmentedBlock& block : immovableBlocks) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + break; + } + } } - return res; + // Bulk-map destination blocks + for (const FragmentedBlock& block : mappedBlocks) + { + VkResult res = block.block->Map(allocator, block.data, VMA_NULL); + VMA_ASSERT(res == VK_SUCCESS); + } + return result; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: + return ComputeDefragmentation_Fast(vector); + default: + VMA_ASSERT(0); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + return ComputeDefragmentation_Balanced(vector, index, true); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: + return ComputeDefragmentation_Full(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + return ComputeDefragmentation_Extensive(vector, index); + } +} + +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( + VmaAllocHandle handle, VmaBlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); + moveData.size = moveData.move.srcAllocation->GetSize(); + moveData.alignment = moveData.move.srcAllocation->GetAlignment(); + moveData.type = moveData.move.srcAllocation->GetSuballocationType(); + moveData.flags = 0; + + if (moveData.move.srcAllocation->IsPersistentMap()) + moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + if (moveData.move.srcAllocation->IsMappingAllowed()) + moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + + return moveData; +} + +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) +{ + // Ignore allocation if will exceed max size for copy + if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + return CounterStatus::Pass; +} + +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) +{ + m_PassStats.bytesMoved += bytes; + // Early return when max found + if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) + { + VMA_ASSERT(m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes && "Exceeded maximal pass threshold!"); + return true; + } + return false; +} + +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) +{ + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) +{ + for (; start < end; ++start) + { + VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + data.type, + 0, + &data.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; + break; + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticable gaps between them (some heuristic, ex. average size of allocation in block) + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + VkDeviceSize prevFreeRegionSize = 0; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves perfomed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) +{ + // First free single block, then populate it to the brim, then free another block, and so on + + // Fallback to previous algorithm since without granularity conflicts it can achieve max packing + if (vector.m_BufferImageGranularity == 1) + return ComputeDefragmentation_Full(vector); + + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateExtensive& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + + bool texturePresent = false, bufferPresent = false, otherPresent = false; + switch (vectorState.operation) + { + case StateExtensive::Operation::Done: // Vector defragmented + return false; + case StateExtensive::Operation::FindFreeBlockBuffer: + case StateExtensive::Operation::FindFreeBlockTexture: + case StateExtensive::Operation::FindFreeBlockAll: + { + // No free blocks, have to clear last one + size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; + VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; + + const size_t prevMoveCount = m_Moves.size(); + for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = freeMetadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, freeMetadata); + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, last, moveData, vector)) + { + // Full clear performed already + if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) + reinterpret_cast(m_AlgorithmState)[index] = last; + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // Cannot perform full clear, have to move data in other blocks around + if (last != 0) + { + for (size_t i = last - 1; i; --i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // No possible reallocs within blocks, try to move them around fast + return ComputeDefragmentation_Fast(vector); + } + } + else + { + switch (vectorState.operation) + { + case StateExtensive::Operation::FindFreeBlockBuffer: + vectorState.operation = StateExtensive::Operation::MoveBuffers; + break; + default: + VMA_ASSERT(0); + case StateExtensive::Operation::FindFreeBlockTexture: + vectorState.operation = StateExtensive::Operation::MoveTextures; + break; + case StateExtensive::Operation::FindFreeBlockAll: + vectorState.operation = StateExtensive::Operation::MoveAll; + break; + } + vectorState.firstFreeBlock = last; + // Nothing done, block found without reallocations, can perform another reallocs in same pass + if (prevMoveCount == m_Moves.size()) + return ComputeDefragmentation_Extensive(vector, index); + } + break; + } + case StateExtensive::Operation::MoveTextures: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (texturePresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!bufferPresent && !otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more textures to move, check buffers + vectorState.operation = StateExtensive::Operation::MoveBuffers; + bufferPresent = false; + otherPresent = false; + } + else + break; + } + case StateExtensive::Operation::MoveBuffers: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (bufferPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more buffers to move, check all others + vectorState.operation = StateExtensive::Operation::MoveAll; + otherPresent = false; + } + else + break; + } + case StateExtensive::Operation::MoveAll: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (otherPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + // Everything moved + vectorState.operation = StateExtensive::Operation::Cleanup; + } + break; + } + } + + if (vectorState.operation == StateExtensive::Operation::Cleanup) + { + // All other work done, pack data in blocks even tighter if possible + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + + if (prevMoveCount == m_Moves.size()) + vectorState.operation = StateExtensive::Operation::Done; + } + return false; +} + +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} + +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = firstFreeBlock ; i;) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(--i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Move only single type of resources at once + if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) + { + // Try to fit allocation into free blocks + if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) + return false; + } + + if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) + texturePresent = true; + else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) + bufferPresent = true; + else + otherPresent = true; + } + } + return prevMoveCount == m_Moves.size(); } #endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS @@ -15559,6 +14640,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; if(m_UseExtMemoryPriority) { + VMA_ASSERT(priority >= 0.f && priority <= 1.f); priorityInfo.priority = priority; VmaPnextChainPushFront(&allocInfo, &priorityInfo); } @@ -15623,7 +14705,6 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); - currAlloc->SetUserData(this, VMA_NULL); m_AllocationObjectAllocator.Free(currAlloc); } @@ -15671,9 +14752,12 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } } - *pAllocation = m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); + *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); - (*pAllocation)->SetUserData(this, pUserData); + if (isUserDataString) + (*pAllocation)->SetName(this, (const char*)pUserData); + else + (*pAllocation)->SetUserData(this, pUserData); m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { @@ -15785,8 +14869,8 @@ VkResult VmaAllocator_T::FindMemoryTypeIndex( if((requiredFlags & ~currFlags) == 0) { // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + - VmaCountBitsSet(currFlags & notPreferredFlags); + uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + + VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); // Remember memory type with lowest cost. if(currCost < minCost) { @@ -16005,6 +15089,8 @@ void VmaAllocator_T::FreeMemory( FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); } + allocation->FreeName(this); + switch(allocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: @@ -16030,29 +15116,25 @@ void VmaAllocator_T::FreeMemory( default: VMA_ASSERT(0); } - - m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); - allocation->SetUserData(this, VMA_NULL); - m_AllocationObjectAllocator.Free(allocation); } } } -void VmaAllocator_T::CalculateStats(VmaStats* pStats) +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) { // Initialize. - VmaInitStatInfo(pStats->total); - for(size_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) - VmaInitStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) - VmaInitStatInfo(pStats->memoryHeap[i]); + VmaClearDetailedStatistics(pStats->total); + for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) + VmaClearDetailedStatistics(pStats->memoryType[i]); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + VmaClearDetailedStatistics(pStats->memoryHeap[i]); // Process default pools. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; if (pBlockVector != VMA_NULL) - pBlockVector->AddStats(pStats); + pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } // Process custom pools. @@ -16061,26 +15143,33 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats) for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) { VmaBlockVector& blockVector = pool->m_BlockVector; - blockVector.AddStats(pStats); const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); - const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - pool->m_DedicatedAllocations.AddStats(pStats, memTypeIndex, memHeapIndex); + blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } } // Process dedicated allocations. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - m_DedicatedAllocations[memTypeIndex].AddStats(pStats, memTypeIndex, memHeapIndex); + m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } - // Postprocess. - VmaPostprocessCalcStatInfo(pStats->total); - for(size_t i = 0; i < GetMemoryTypeCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < GetMemoryHeapCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]); + // Sum from memory types to memory heaps. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; + VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); + } + + // Sum from memory heaps to total. + for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) + VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + + VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || + pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); + VMA_ASSERT(pStats->total.unusedRangeCount == 0 || + pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); } void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) @@ -16095,13 +15184,15 @@ void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, u { const uint32_t heapIndex = firstHeap + i; - outBudgets->blockBytes = m_Budget.m_BlockBytes[heapIndex]; - outBudgets->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; - if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) { outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + - outBudgets->blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; } else { @@ -16126,66 +15217,17 @@ void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, u { const uint32_t heapIndex = firstHeap + i; - outBudgets->blockBytes = m_Budget.m_BlockBytes[heapIndex]; - outBudgets->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; - outBudgets->usage = outBudgets->blockBytes; + outBudgets->usage = outBudgets->statistics.blockBytes; outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. } } } -VkResult VmaAllocator_T::DefragmentationBegin( - const VmaDefragmentationInfo2& info, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext* pContext) -{ - if(info.pAllocationsChanged != VMA_NULL) - { - memset(info.pAllocationsChanged, 0, info.allocationCount * sizeof(VkBool32)); - } - - *pContext = vma_new(this, VmaDefragmentationContext_T)( - this, info.flags, pStats); - - (*pContext)->AddPools(info.poolCount, info.pPools); - (*pContext)->AddAllocations( - info.allocationCount, info.pAllocations, info.pAllocationsChanged); - - VkResult res = (*pContext)->Defragment( - info.maxCpuBytesToMove, info.maxCpuAllocationsToMove, - info.maxGpuBytesToMove, info.maxGpuAllocationsToMove, - info.commandBuffer, pStats, info.flags); - - if(res != VK_NOT_READY) - { - vma_delete(this, *pContext); - *pContext = VMA_NULL; - } - - return res; -} - -VkResult VmaAllocator_T::DefragmentationEnd( - VmaDefragmentationContext context) -{ - vma_delete(this, context); - return VK_SUCCESS; -} - -VkResult VmaAllocator_T::DefragmentationPassBegin( - VmaDefragmentationPassInfo* pInfo, - VmaDefragmentationContext context) -{ - return context->DefragmentPassBegin(pInfo); -} - -VkResult VmaAllocator_T::DefragmentationPassEnd( - VmaDefragmentationContext context) -{ - return context->DefragmentPassEnd(); -} - void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) { pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); @@ -16194,6 +15236,7 @@ void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationI pAllocationInfo->size = hAllocation->GetSize(); pAllocationInfo->pMappedData = hAllocation->GetMappedData(); pAllocationInfo->pUserData = hAllocation->GetUserData(); + pAllocationInfo->pName = hAllocation->GetName(); } VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) @@ -16260,16 +15303,18 @@ void VmaAllocator_T::DestroyPool(VmaPool pool) vma_delete(this, pool); } -void VmaAllocator_T::GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats) +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) { - pPoolStats->size = 0; - pPoolStats->unusedSize = 0; - pPoolStats->allocationCount = 0; - pPoolStats->unusedRangeCount = 0; - pPoolStats->blockCount = 0; + VmaClearStatistics(*pPoolStats); + pool->m_BlockVector.AddStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} - pool->m_BlockVector.AddPoolStats(pPoolStats); - pool->m_DedicatedAllocations.AddPoolStats(pPoolStats); +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ + VmaClearDetailedStatistics(*pPoolStats); + pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); } void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) @@ -16373,6 +15418,7 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc { m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; } + ++m_Budget.m_BlockCount[heapIndex]; // VULKAN CALL vkAllocateMemory. VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); @@ -16393,6 +15439,7 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc } else { + --m_Budget.m_BlockCount[heapIndex]; m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; } @@ -16410,7 +15457,9 @@ void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, Vk // VULKAN CALL vkFreeMemory. (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); - m_Budget.m_BlockBytes[MemoryTypeIndexToHeapIndex(memoryType)] -= size; + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= size; --m_DeviceMemoryCount; } @@ -16668,6 +15717,9 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + m_AllocationObjectAllocator.Free(allocation); + VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); } @@ -16855,89 +15907,90 @@ uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() #if VMA_STATS_STRING_ENABLED void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) { - bool dedicatedAllocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + json.WriteString("DefaultPools"); + json.BeginObject(); { - VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; - if(!dedicatedAllocList.IsEmpty()) - { - if(dedicatedAllocationsStarted == false) - { - dedicatedAllocationsStarted = true; - json.WriteString("DedicatedAllocations"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); - - dedicatedAllocList.BuildStatsString(json); - } - } - if(dedicatedAllocationsStarted) - { - json.EndObject(); - } - - { - bool allocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; - if(pBlockVector != VMA_NULL) + VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; + if (pBlockVector != VMA_NULL) { - if (pBlockVector->IsEmpty() == false) - { - if (allocationsStarted == false) - { - allocationsStarted = true; - json.WriteString("DefaultPools"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); - - json.BeginObject(); - pBlockVector->PrintDetailedMap(json); - json.EndObject(); - } - } - } - if(allocationsStarted) - { - json.EndObject(); - } - } - - // Custom pools - { - VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); - if(!m_Pools.IsEmpty()) - { - json.WriteString("Pools"); - json.BeginObject(); - for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) - { - json.BeginString(); - json.ContinueString(pool->GetId()); + json.BeginString("Type "); + json.ContinueString(memTypeIndex); json.EndString(); - json.BeginObject(); - pool->m_BlockVector.PrintDetailedMap(json); - - if (!pool->m_DedicatedAllocations.IsEmpty()) { + json.WriteString("PreferredBlockSize"); + json.WriteNumber(pBlockVector->GetPreferredBlockSize()); + + json.WriteString("Blocks"); + pBlockVector->PrintDetailedMap(json); + json.WriteString("DedicatedAllocations"); - pool->m_DedicatedAllocations.BuildStatsString(json); + dedicatedAllocList.BuildStatsString(json); } json.EndObject(); } - json.EndObject(); } } + json.EndObject(); + + json.WriteString("CustomPools"); + json.BeginObject(); + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + if (!m_Pools.IsEmpty()) + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + bool displayType = true; + size_t index = 0; + for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + if (blockVector.GetMemoryTypeIndex() == memTypeIndex) + { + if (displayType) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginArray(); + displayType = false; + } + + json.BeginObject(); + { + json.WriteString("Name"); + json.BeginString(); + json.ContinueString(index++); + if (pool->GetName()) + { + json.WriteString(" - "); + json.WriteString(pool->GetName()); + } + json.EndString(); + + json.WriteString("PreferredBlockSize"); + json.WriteNumber(blockVector.GetPreferredBlockSize()); + + json.WriteString("Blocks"); + blockVector.PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); + } + json.EndObject(); + } + } + + if (!displayType) + json.EndArray(); + } + } + } + json.EndObject(); } #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_ALLOCATOR_T_FUNCTIONS @@ -17018,13 +16071,13 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( allocator->SetCurrentFrameIndex(frameIndex); } -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( VmaAllocator allocator, - VmaStats* pStats) + VmaTotalStatistics* pStats) { VMA_ASSERT(allocator && pStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->CalculateStats(pStats); + allocator->CalculateStatistics(pStats); } VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( @@ -17048,123 +16101,176 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( VmaStringBuilder sb(allocator->GetAllocationCallbacks()); { - VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); - json.BeginObject(); - VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); - VmaStats stats; - allocator->CalculateStats(&stats); + VmaTotalStatistics stats; + allocator->CalculateStatistics(&stats); - json.WriteString("Total"); - VmaPrintStatInfo(json, stats.total); - - for(uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) + VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); + json.BeginObject(); { - json.BeginString("Heap "); - json.ContinueString(heapIndex); - json.EndString(); - json.BeginObject(); - - json.WriteString("Size"); - json.WriteNumber(allocator->m_MemProps.memoryHeaps[heapIndex].size); - - json.WriteString("Flags"); - json.BeginArray(true); - if((allocator->m_MemProps.memoryHeaps[heapIndex].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - json.EndArray(); - - json.WriteString("Budget"); + json.WriteString("General"); json.BeginObject(); { - json.WriteString("BlockBytes"); - json.WriteNumber(budgets[heapIndex].blockBytes); - json.WriteString("AllocationBytes"); - json.WriteNumber(budgets[heapIndex].allocationBytes); - json.WriteString("Usage"); - json.WriteNumber(budgets[heapIndex].usage); - json.WriteString("Budget"); - json.WriteNumber(budgets[heapIndex].budget); + const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; + const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; + + json.WriteString("API"); + json.WriteString("Vulkan"); + + json.WriteString("apiVersion"); + json.BeginString(); + json.ContinueString(VK_API_VERSION_MAJOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_API_VERSION_MINOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_API_VERSION_PATCH(deviceProperties.apiVersion)); + json.EndString(); + + json.WriteString("GPU"); + json.WriteString(deviceProperties.deviceName); + json.WriteString("deviceType"); + json.WriteNumber(static_cast(deviceProperties.deviceType)); + + json.WriteString("maxMemoryAllocationCount"); + json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); + json.WriteString("bufferImageGranularity"); + json.WriteNumber(deviceProperties.limits.bufferImageGranularity); + json.WriteString("nonCoherentAtomSize"); + json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + + json.WriteString("memoryHeapCount"); + json.WriteNumber(memoryProperties.memoryHeapCount); + json.WriteString("memoryTypeCount"); + json.WriteNumber(memoryProperties.memoryTypeCount); } json.EndObject(); - - if(stats.memoryHeap[heapIndex].blockCount > 0) + } + { + json.WriteString("Total"); + VmaPrintDetailedStatistics(json, stats.total); + } + { + json.WriteString("MemoryInfo"); + json.BeginObject(); { - json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryHeap[heapIndex]); - } - - for(uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) - { - if(allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) { - json.BeginString("Type "); - json.ContinueString(typeIndex); + json.BeginString("Heap "); + json.ContinueString(heapIndex); json.EndString(); - json.BeginObject(); + { + const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; + json.WriteString("Flags"); + json.BeginArray(true); + { + if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + #if VMA_VULKAN_VERSION >= 1001000 + if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) + json.WriteString("MULTI_INSTANCE"); + #endif - json.WriteString("Flags"); - json.BeginArray(true); - VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; - if((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) - { - json.WriteString("HOST_VISIBLE"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0) - { - json.WriteString("HOST_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) - { - json.WriteString("HOST_CACHED"); - } - if((flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) != 0) - { - json.WriteString("LAZILY_ALLOCATED"); - } -#if VMA_VULKAN_VERSION >= 1001000 - if((flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) - { - json.WriteString("PROTECTED"); - } -#endif // #if VMA_VULKAN_VERSION >= 1001000 -#if VK_AMD_device_coherent_memory - if((flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) - { - json.WriteString("DEVICE_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) != 0) - { - json.WriteString("DEVICE_UNCACHED"); - } -#endif // #if VK_AMD_device_coherent_memory - json.EndArray(); + VkMemoryHeapFlags flags = heapInfo.flags & + ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT + #endif + ); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Size"); + json.WriteNumber(heapInfo.size); + + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BudgetBytes"); + json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("UsageBytes"); + json.WriteNumber(budgets[heapIndex].usage); + } + json.EndObject(); - if(stats.memoryType[typeIndex].blockCount > 0) - { json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryType[typeIndex]); - } + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + json.WriteString("MemoryPools"); + json.BeginObject(); + { + for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + { + if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + { + json.BeginString("Type "); + json.ContinueString(typeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("Flags"); + json.BeginArray(true); + { + VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + json.WriteString("HOST_VISIBLE"); + if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + json.WriteString("HOST_COHERENT"); + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + json.WriteString("HOST_CACHED"); + if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + json.WriteString("LAZILY_ALLOCATED"); + #if VMA_VULKAN_VERSION >= 1001000 + if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) + json.WriteString("PROTECTED"); + #endif + #if VK_AMD_device_coherent_memory + if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) + json.WriteString("DEVICE_COHERENT_AMD"); + if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) + json.WriteString("DEVICE_UNCACHED_AMD"); + #endif + + flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT + #endif + #if VK_AMD_device_coherent_memory + | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY + | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY + #endif + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); + } + json.EndObject(); + } + } + + } + json.EndObject(); + } json.EndObject(); } } - json.EndObject(); } - if(detailedMap == VK_TRUE) - { + + if (detailedMap == VK_TRUE) allocator->PrintDetailedMap(json); - } json.EndObject(); } @@ -17335,16 +16441,28 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( allocator->DestroyPool(pool); } -VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator allocator, VmaPool pool, - VmaPoolStats* pPoolStats) + VmaStatistics* pPoolStats) { VMA_ASSERT(allocator && pool && pPoolStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->GetPoolStats(pool, pPoolStats); + allocator->GetPoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaDetailedStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->CalculatePoolStatistics(pool, pPoolStats); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) @@ -17603,6 +16721,14 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( allocation->SetUserData(allocator, pUserData); } +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName) +{ + allocation->SetName(allocator, pName); +} + VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( VmaAllocator VMA_NOT_NULL allocator, VmaAllocation VMA_NOT_NULL allocation, @@ -17733,123 +16859,70 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( return allocator->CheckCorruption(memoryTypeBits); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VmaAllocator allocator, - const VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo *pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats) -{ - // Deprecated interface, reimplemented using new one. - - VmaDefragmentationInfo2 info2 = {}; - info2.allocationCount = (uint32_t)allocationCount; - info2.pAllocations = pAllocations; - info2.pAllocationsChanged = pAllocationsChanged; - if(pDefragmentationInfo != VMA_NULL) - { - info2.maxCpuAllocationsToMove = pDefragmentationInfo->maxAllocationsToMove; - info2.maxCpuBytesToMove = pDefragmentationInfo->maxBytesToMove; - } - else - { - info2.maxCpuAllocationsToMove = UINT32_MAX; - info2.maxCpuBytesToMove = VK_WHOLE_SIZE; - } - // info2.flags, maxGpuAllocationsToMove, maxGpuBytesToMove, commandBuffer deliberately left zero. - - VmaDefragmentationContext ctx; - VkResult res = vmaDefragmentationBegin(allocator, &info2, pDefragmentationStats, &ctx); - if(res == VK_NOT_READY) - { - res = vmaDefragmentationEnd( allocator, ctx); - } - return res; -} - -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( - VmaAllocator allocator, - const VmaDefragmentationInfo2* pInfo, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext *pContext) + const VmaDefragmentationInfo* pInfo, + VmaDefragmentationContext* pContext) { VMA_ASSERT(allocator && pInfo && pContext); - // Degenerate case: Nothing to defragment. - if(pInfo->allocationCount == 0 && pInfo->poolCount == 0) + VMA_DEBUG_LOG("vmaBeginDefragmentation"); + + if (pInfo->pool != VMA_NULL) { - return VK_SUCCESS; + // Check if run on supported algorithms + if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; } - VMA_ASSERT(pInfo->allocationCount == 0 || pInfo->pAllocations != VMA_NULL); - VMA_ASSERT(pInfo->poolCount == 0 || pInfo->pPools != VMA_NULL); - VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->allocationCount, pInfo->pAllocations)); - VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->poolCount, pInfo->pPools)); - - VMA_DEBUG_LOG("vmaDefragmentationBegin"); - VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult res = allocator->DefragmentationBegin(*pInfo, pStats, pContext); - - return res; + *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); + return VK_SUCCESS; } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( VmaAllocator allocator, - VmaDefragmentationContext context) + VmaDefragmentationContext context, + VmaDefragmentationStats* pStats) { - VMA_ASSERT(allocator); + VMA_ASSERT(allocator && context); - VMA_DEBUG_LOG("vmaDefragmentationEnd"); + VMA_DEBUG_LOG("vmaEndDefragmentation"); - if(context != VK_NULL_HANDLE) - { - VMA_DEBUG_GLOBAL_MUTEX_LOCK - return allocator->DefragmentationEnd(context); - } - else - { - return VK_SUCCESS; - } + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if (pStats) + context->GetStats(*pStats); + vma_delete(allocator, context); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( - VmaAllocator allocator, - VmaDefragmentationContext context, - VmaDefragmentationPassInfo* pInfo - ) + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) { - VMA_ASSERT(allocator); - VMA_ASSERT(pInfo); + VMA_ASSERT(context && pPassInfo); VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); VMA_DEBUG_GLOBAL_MUTEX_LOCK - if(context == VK_NULL_HANDLE) - { - pInfo->moveCount = 0; - return VK_SUCCESS; - } - - return allocator->DefragmentationPassBegin(pInfo, context); + return context->DefragmentPassBegin(*pPassInfo); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( - VmaAllocator allocator, - VmaDefragmentationContext context) + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) { - VMA_ASSERT(allocator); + VMA_ASSERT(context && pPassInfo); VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK - if(context == VK_NULL_HANDLE) - return VK_SUCCESS; - - return allocator->DefragmentationPassEnd(context); + return context->DefragmentPassEnd(*pPassInfo); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( @@ -18098,6 +17171,50 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( return res; } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); + + VMA_DEBUG_LOG("vmaCreateAliasingBuffer"); + + *pBuffer = VK_NULL_HANDLE; + + if (pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if (res >= 0) + { + // 2. Bind buffer with memory. + res = allocator->BindBufferMemory(allocation, 0, *pBuffer, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + } + return res; +} + VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( VmaAllocator allocator, VkBuffer buffer, @@ -18219,10 +17336,52 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( return res; } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + + *pImage = VK_NULL_HANDLE; + + VMA_DEBUG_LOG("vmaCreateImage"); + + if (pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if (res >= 0) + { + // 2. Bind image with memory. + res = allocator->BindImageMemory(allocation, 0, *pImage, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + } + return res; +} + VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( - VmaAllocator allocator, - VkImage image, - VmaAllocation allocation) + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation) { VMA_ASSERT(allocator); @@ -18331,13 +17490,22 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock virtualBlock->SetAllocationUserData(allocation, pUserData); } -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStats(VmaVirtualBlock VMA_NOT_NULL virtualBlock, - VmaStatInfo* VMA_NOT_NULL pStatInfo) +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats) { - VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStatInfo != VMA_NULL); - VMA_DEBUG_LOG("vmaCalculateVirtualBlockStats"); + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); VMA_DEBUG_GLOBAL_MUTEX_LOCK; - virtualBlock->CalculateStats(*pStatInfo); + virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->CalculateDetailedStatistics(*pStats); } #if VMA_STATS_STRING_ENABLED @@ -18809,8 +17977,8 @@ To query for current memory usage and available budget, use function vmaGetHeapB Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. Please note that this function returns different information and works faster than -vmaCalculateStats(). vmaGetHeapBudgets() can be called every frame or even before every -allocation, while vmaCalculateStats() is intended to be used rarely, +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, only to obtain statistical information, e.g. for debugging purposes. It is recommended to use VK_EXT_memory_budget device extension to obtain information @@ -18840,20 +18008,27 @@ budget, by default the library still tries to create it, leaving it to the Vulka implementation whether the allocation succeeds or fails. You can change this behavior by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is not made if it would exceed the budget or if the budget is already exceeded. -The allocation then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +The all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag when creating resources that are not essential for the application (e.g. the texture of a specific object) and not to pass it when creating critically important resources (e.g. render targets). +On AMD graphics cards there is a custom vendor extension available: VK_AMD_memory_overallocation_behavior +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure a new allocation is created only when it fits inside one of the existing memory blocks. If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. This also ensures that the function call is very fast because it never goes to Vulkan to obtain a new block. -Please note that creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount -set to more than 0 will try to allocate memory blocks without checking whether they +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they fit within budget. @@ -18993,14 +18168,28 @@ To use custom memory pools: Example: \code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, + &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + // Create a pool that can have at most 2 blocks, 128 MiB each. VmaPoolCreateInfo poolCreateInfo = {}; -poolCreateInfo.memoryTypeIndex = ... +poolCreateInfo.memoryTypeIndex = memTypeIndex; poolCreateInfo.blockSize = 128ull * 1024 * 1024; poolCreateInfo.maxBlockCount = 2; VmaPool pool; -vmaCreatePool(allocator, &poolCreateInfo, &pool); +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... // Allocate a buffer out of it. VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; @@ -19012,8 +18201,8 @@ allocCreateInfo.pool = pool; VkBuffer buf; VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... \endcode You have to free all allocations made from this pool before destroying it. @@ -19151,39 +18340,8 @@ you can achieve behavior of a ring buffer / queue. Ring buffer is available only in pools with one memory block - VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. -\section buddy_algorithm Buddy allocation algorithm +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. -There is another allocation algorithm that can be used with custom pools, called -"buddy". Its internal data structure is based on a binary tree of blocks, each having -size that is a power of two and a half of its parent's size. When you want to -allocate memory of certain size, a free node in the tree is located. If it is too -large, it is recursively split into two halves (called "buddies"). However, if -requested allocation size is not a power of two, the size of the allocation is -aligned up to the nearest power of two and the remaining space is wasted. When -two buddy nodes become free, they are merged back into one larger node. - -![Buddy allocator](../gfx/Buddy_allocator.png) - -The advantage of buddy allocation algorithm over default algorithm is faster -allocation and deallocation, as well as smaller external fragmentation. The -disadvantage is more wasted space (internal fragmentation). -For more information, please search the Internet for "Buddy memory allocation" - -sources that describe this concept in general. - -To use buddy allocation algorithm with a custom pool, add flag -#VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating -#VmaPool object. - -Several limitations apply to pools that use buddy algorithm: - -- It is recommended to use VmaPoolCreateInfo::blockSize that is a power of two. - Otherwise, only largest power of two smaller than the size is used for - allocations. The remaining space always stays unused. -- [Margins](@ref debugging_memory_usage_margins) and - [corruption detection](@ref debugging_memory_usage_corruption_detection) - don't work in such pools. -- [Defragmentation](@ref defragmentation) doesn't work with allocations made from - such pool. \page defragmentation Defragmentation @@ -19193,236 +18351,185 @@ to find a continuous range of free memory for a new allocation despite there is enough free space, just scattered across many small free ranges between existing allocations. -To mitigate this problem, you can use defragmentation feature: -structure #VmaDefragmentationInfo2, function vmaDefragmentationBegin(), vmaDefragmentationEnd(). -Given set of allocations, -this function can move them to compact used memory, ensure more continuous free -space and possibly also free some `VkDeviceMemory` blocks. - -What the defragmentation does is: - -- Updates #VmaAllocation objects to point to new `VkDeviceMemory` and offset. - After allocation has been moved, its VmaAllocationInfo::deviceMemory and/or - VmaAllocationInfo::offset changes. You must query them again using - vmaGetAllocationInfo() if you need them. -- Moves actual data in memory. - -What it doesn't do, so you need to do it yourself: - -- Recreate buffers and images that were bound to allocations that were defragmented and - bind them with their new places in memory. - You must use `vkDestroyBuffer()`, `vkDestroyImage()`, - `vkCreateBuffer()`, `vkCreateImage()`, vmaBindBufferMemory(), vmaBindImageMemory() - for that purpose and NOT vmaDestroyBuffer(), - vmaDestroyImage(), vmaCreateBuffer(), vmaCreateImage(), because you don't need to - destroy or create allocation objects! -- Recreate views and update descriptors that point to these buffers and images. - -\section defragmentation_cpu Defragmenting CPU memory - -Following example demonstrates how you can run defragmentation on CPU. -Only allocations created in memory types that are `HOST_VISIBLE` can be defragmented. -Others are ignored. - -The way it works is: - -- It temporarily maps entire memory blocks when necessary. -- It moves data using `memmove()` function. - -\code -// Given following variables already initialized: -VkDevice device; -VmaAllocator allocator; -std::vector buffers; -std::vector allocations; - - -const uint32_t allocCount = (uint32_t)allocations.size(); -std::vector allocationsChanged(allocCount); - -VmaDefragmentationInfo2 defragInfo = {}; -defragInfo.allocationCount = allocCount; -defragInfo.pAllocations = allocations.data(); -defragInfo.pAllocationsChanged = allocationsChanged.data(); -defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE; // No limit. -defragInfo.maxCpuAllocationsToMove = UINT32_MAX; // No limit. - -VmaDefragmentationContext defragCtx; -vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx); -vmaDefragmentationEnd(allocator, defragCtx); - -for(uint32_t i = 0; i < allocCount; ++i) -{ - if(allocationsChanged[i]) - { - // Destroy buffer that is immutably bound to memory region which is no longer valid. - vkDestroyBuffer(device, buffers[i], nullptr); - - // Create new buffer with same parameters. - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - // Bind new buffer to new memory region. Data contained in it is already moved. - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vmaBindBufferMemory(allocator, allocations[i], buffers[i]); - } -} -\endcode - -Setting VmaDefragmentationInfo2::pAllocationsChanged is optional. -This output array tells whether particular allocation in VmaDefragmentationInfo2::pAllocations at the same index -has been modified during defragmentation. -You can pass null, but you then need to query every allocation passed to defragmentation -for new parameters using vmaGetAllocationInfo() if you might need to recreate and rebind a buffer or image associated with it. - -If you use [Custom memory pools](@ref choosing_memory_type_custom_memory_pools), -you can fill VmaDefragmentationInfo2::poolCount and VmaDefragmentationInfo2::pPools -instead of VmaDefragmentationInfo2::allocationCount and VmaDefragmentationInfo2::pAllocations -to defragment all allocations in given pools. -You cannot use VmaDefragmentationInfo2::pAllocationsChanged in that case. -You can also combine both methods. - -\section defragmentation_gpu Defragmenting GPU memory - -It is also possible to defragment allocations created in memory types that are not `HOST_VISIBLE`. -To do that, you need to pass a command buffer that meets requirements as described in -VmaDefragmentationInfo2::commandBuffer. The way it works is: - -- It creates temporary buffers and binds them to entire memory blocks when necessary. -- It issues `vkCmdCopyBuffer()` to passed command buffer. +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. Example: \code -// Given following variables already initialized: -VkDevice device; -VmaAllocator allocator; -VkCommandBuffer commandBuffer; -std::vector buffers; -std::vector allocations; - - -const uint32_t allocCount = (uint32_t)allocations.size(); -std::vector allocationsChanged(allocCount); - -VkCommandBufferBeginInfo cmdBufBeginInfo = ...; -vkBeginCommandBuffer(commandBuffer, &cmdBufBeginInfo); - -VmaDefragmentationInfo2 defragInfo = {}; -defragInfo.allocationCount = allocCount; -defragInfo.pAllocations = allocations.data(); -defragInfo.pAllocationsChanged = allocationsChanged.data(); -defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; // Notice it is "GPU" this time. -defragInfo.maxGpuAllocationsToMove = UINT32_MAX; // Notice it is "GPU" this time. -defragInfo.commandBuffer = commandBuffer; +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; VmaDefragmentationContext defragCtx; -vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx); +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... -vkEndCommandBuffer(commandBuffer); - -// Submit commandBuffer. -// Wait for a fence that ensures commandBuffer execution finished. - -vmaDefragmentationEnd(allocator, defragCtx); - -for(uint32_t i = 0; i < allocCount; ++i) +for(;;) { - if(allocationsChanged[i]) + VmaDefragmentationPassMoveInfo pass; + res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... + + for(uint32_t i = 0; i < pass.moveCount; ++i) { - // Destroy buffer that is immutably bound to memory region which is no longer valid. - vkDestroyBuffer(device, buffers[i], nullptr); - - // Create new buffer with same parameters. - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - // Bind new buffer to new memory region. Data contained in it is already moved. + // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vmaBindBufferMemory(allocator, allocations[i], buffers[i]); + vmaGetAllocationInfo(allocator, pMoves[i].srcAllocation, &allocInfo); + MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + + // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + VkImageCreateInfo imgCreateInfo = ... + VkImage newImg; + res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); + // Check res... + res = vmaBindImageMemory(allocator, pMoves[i].dstTmpAllocation, newImg); + // Check res... + + // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); } + + // Make sure the copy commands finished executing. + vkWaitForFences(...); + + // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // ... + vkDestroyImage(device, resData->img, nullptr); + } + + // Update appropriate descriptors to point to the new places... + + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... } + +vmaEndDefragmentation(allocator, defragCtx, nullptr); \endcode -You can combine these two methods by specifying non-zero `maxGpu*` as well as `maxCpu*` parameters. -The library automatically chooses best method to defragment each memory pool. +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: -You may try not to block your entire program to wait until defragmentation finishes, -but do it in the background, as long as you carefully fullfill requirements described -in function vmaDefragmentationBegin(). +1. vmaBeginDefragmentationPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/images and bind them at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. + - Frees `VkDeviceMemory` blocks that became empty. -\section defragmentation_additional_notes Additional notes +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. -It is only legal to defragment allocations bound to: +Inside a pass, for each allocation that should be moved: -- buffers -- images created with `VK_IMAGE_CREATE_ALIAS_BIT`, `VK_IMAGE_TILING_LINEAR`, and - being currently in `VK_IMAGE_LAYOUT_GENERAL` or `VK_IMAGE_LAYOUT_PREINITIALIZED`. +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. + - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this image + without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - vmaEndDefragmentationPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. -Defragmentation of images created with `VK_IMAGE_TILING_OPTIMAL` or in any other -layout may give undefined results. +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. -If you defragment allocations bound to images, new images to be bound to new -memory region after defragmentation should be created with `VK_IMAGE_LAYOUT_PREINITIALIZED` -and then transitioned to their original layout from before defragmentation if -needed using an image memory barrier. +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. -While using defragmentation, you may experience validation layer warnings, which you just need to ignore. -See [Validation layer warnings](@ref general_considerations_validation_layer_warnings). +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. -Please don't expect memory to be fully compacted after defragmentation. -Algorithms inside are based on some heuristics that try to maximize number of Vulkan -memory blocks to make totally empty to release them, as well as to maximize continuous -empty space inside remaining blocks, while minimizing the number and size of allocations that -need to be moved. Some fragmentation may still remain - this is normal. +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. -\section defragmentation_custom_algorithm Writing custom defragmentation algorithm +Mapping is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. -If you want to implement your own, custom defragmentation algorithm, -there is infrastructure prepared for that, -but it is not exposed through the library API - you need to hack its source code. -Here are steps needed to do this: - --# Main thing you need to do is to define your own class derived from base abstract - class `VmaDefragmentationAlgorithm` and implement your version of its pure virtual methods. - See definition and comments of this class for details. --# Your code needs to interact with device memory block metadata. - If you need more access to its data than it is provided by its public interface, - declare your new class as a friend class e.g. in class `VmaBlockMetadata_Generic`. --# If you want to create a flag that would enable your algorithm or pass some additional - flags to configure it, add them to `VmaDefragmentationFlagBits` and use them in - VmaDefragmentationInfo2::flags. --# Modify function `VmaBlockVectorDefragmentationContext::Begin` to create object - of your new class whenever needed. +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. \page statistics Statistics -This library contains functions that return information about its internal state, +This library contains several functions that return information about its internal state, especially the amount of memory allocated from Vulkan. -Please keep in mind that these functions need to traverse all internal data structures -to gather these information, so they may be quite time-consuming. -Don't call them too often. \section statistics_numeric_statistics Numeric statistics -You can query for overall statistics of the allocator using function vmaCalculateStats(). -Information are returned using structure #VmaStats. -It contains #VmaStatInfo - number of allocated blocks, number of allocations -(occupied ranges in these blocks), number of unused (free) ranges in these blocks, -number of bytes used and unused (but still allocated from Vulkan) and other information. -They are summed across memory heaps, memory types and total for whole allocator. +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay withing budget +(see also \ref staying_within_budget). +Example: -You can query for statistics of a custom pool using function vmaGetPoolStats(). -Information are returned using structure #VmaPoolStats. +\code +uint32_t heapIndex = ... -You can query for information about specific allocation using function vmaGetAllocationInfo(). +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", + budgets[heapIndex].statistics.allocationCount, + budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", + budgets[heapIndex].statistics.blockCount, + budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", + budgets[heapIndex].usage, + budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). It fill structure #VmaAllocationInfo. \section statistics_json_dump JSON dump @@ -19439,7 +18546,7 @@ The format of this JSON string is not part of official documentation of the libr but it will not change in backward-incompatible way without increasing library major version number and appropriate mention in changelog. -The JSON string contains all the data that can be obtained using vmaCalculateStats(). +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). It can also contain detailed map of allocated memory blocks and their regions - free and occupied by allocations. This allows e.g. to visualize the memory or assess fragmentation. @@ -19454,6 +18561,8 @@ To do that, fill VmaAllocationCreateInfo::pUserData field when creating an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, some handle, index, key, ordinal number or any other value that would associate the allocation with your custom metadata. +It it useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. \code VkBufferCreateInfo bufCreateInfo = ... @@ -19484,44 +18593,21 @@ vmaBuildStatsString() in hexadecimal form. \section allocation_names Allocation names -There is alternative mode available where `pUserData` pointer is used to point to -a null-terminated string, giving a name to the allocation. To use this mode, -set #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT flag in VmaAllocationCreateInfo::flags. -Then `pUserData` passed as VmaAllocationCreateInfo::pUserData or argument to -vmaSetAllocationUserData() must be either null or a pointer to a null-terminated string. +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). The library creates internal copy of the string, so the pointer you pass doesn't need to be valid for whole lifetime of the allocation. You can free it after the call. \code -VkImageCreateInfo imageInfo = ... - std::string imageName = "Texture: "; imageName += fileName; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; -allocCreateInfo.pUserData = imageName.c_str(); - -VkImage image; -VmaAllocation allocation; -vmaCreateImage(allocator, &imageInfo, &allocCreateInfo, &image, &allocation, nullptr); +vmaSetAllocationName(allocator, allocation, imageName.c_str()); \endcode -The value of `pUserData` pointer of the allocation will be different than the one -you passed when setting allocation's name - pointing to a buffer managed -internally that holds copy of the string. +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). -\code -VmaAllocationInfo allocInfo; -vmaGetAllocationInfo(allocator, allocation, &allocInfo); -const char* imageName = (const char*)allocInfo.pUserData; -printf("Image name: %s\n", imageName); -\endcode - -That string is also printed in JSON report created by vmaBuildStatsString(). - -\note Passing string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. @@ -19655,15 +18741,17 @@ It might be more convenient, but you need to make sure to use this new unit cons \section virtual_allocator_statistics Statistics -You can obtain statistics of a virtual block using vmaCalculateVirtualBlockStats(). -The function fills structure #VmaStatInfo - same as used by the normal Vulkan memory allocator. +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. Example: \code -VmaStatInfo statInfo; -vmaCalculateVirtualBlockStats(block, &statInfo); +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); printf("My virtual block has %llu bytes used by %u virtual allocations\n", - statInfo.usedBytes, statInfo.allocationCount); + stats.allocationBytes, stats.allocationCount); \endcode You can also request a full list of allocations and free regions as a string in JSON format by calling @@ -19743,7 +18831,6 @@ allocations, which have their own memory block of specific size. It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag or those automatically decided to put into dedicated allocations, e.g. due to its large size or recommended by VK_KHR_dedicated_allocation extension. -Margins are also not active in custom pools created with #VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT flag. Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. @@ -19863,6 +18950,7 @@ imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; VkImage img; VmaAllocation alloc; @@ -19874,7 +18962,8 @@ Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DED especially if they are large or if you plan to destroy and recreate them with different sizes e.g. when display resolution changes. Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. - +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. \section usage_patterns_staging_copy_upload Staging copy for upload @@ -20025,6 +19114,7 @@ else // [Executed in runtime]: memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + //vkCmdPipelineBarrier: VK_ACCESS_HOST_WRITE_BIT --> VK_ACCESS_TRANSFER_READ_BIT VkBufferCopy bufCopy = { 0, // srcOffset 0, // dstOffset, @@ -20187,6 +19277,86 @@ To learn more about this extension, see: +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters + from the parametrs passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters + VMA used when creating default pools, which means `priority == 0.5f`. + + \page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory VK_AMD_device_coherent_memory is a device extension that enables access to @@ -20311,6 +19481,28 @@ accompanying this library. functions. - #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatiblity is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + \section general_considerations_validation_layer_warnings Validation layer warnings When using this library, you can meet following types of warnings issued by