From 63d4e4967b2fb1e540e544830d810ca89839493a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Thu, 28 Sep 2023 07:28:25 +0200 Subject: [PATCH] fix for hair particles and ray tracing #755 --- WickedEngine/wiGraphicsDevice.h | 5 ++ WickedEngine/wiGraphicsDevice_DX12.cpp | 28 +++++---- WickedEngine/wiHairParticle.cpp | 11 +++- WickedEngine/wiHairParticle.h | 3 + WickedEngine/wiRenderer.cpp | 79 ++++++++++++++++---------- WickedEngine/wiVersion.cpp | 3 +- 6 files changed, 87 insertions(+), 42 deletions(-) diff --git a/WickedEngine/wiGraphicsDevice.h b/WickedEngine/wiGraphicsDevice.h index d20d87537..e4f5b0cd2 100644 --- a/WickedEngine/wiGraphicsDevice.h +++ b/WickedEngine/wiGraphicsDevice.h @@ -243,6 +243,11 @@ namespace wi::graphics return CreateBuffer2(desc, [&](void* dest) { std::memcpy(dest, initial_data, desc->size); }, buffer); } + void Barrier(const GPUBarrier& barrier, CommandList cmd) + { + Barrier(&barrier, 1, cmd); + } + struct GPULinearAllocator { GPUBuffer buffer; diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index 6acb93dca..6ca8d6985 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -3396,20 +3396,26 @@ using namespace dx12_internal; if (has_flag(desc->bind_flags, BindFlag::UNORDERED_ACCESS)) { CreateSubresource(buffer, SubresourceType::UAV, 0); - - if (has_flag(desc->bind_flags, BindFlag::UNORDERED_ACCESS) && !has_flag(desc->misc_flags, ResourceMiscFlag::BUFFER_RAW)) - { - // Create raw buffer if doesn't exist for ClearUAV: - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - uav_desc.Buffer.NumElements = uint32_t(desc->size / sizeof(uint32_t)); - internal_state->uav_raw.init(this, uav_desc, internal_state->resource.Get()); - } } } + if ( + has_flag(desc->bind_flags, BindFlag::UNORDERED_ACCESS) && + ( + !has_flag(desc->misc_flags, ResourceMiscFlag::BUFFER_RAW) || + has_flag(desc->misc_flags, ResourceMiscFlag::NO_DEFAULT_DESCRIPTORS) + ) + ) + { + // Create raw buffer if doesn't exist for ClearUAV: + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; + uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uav_desc.Buffer.NumElements = uint32_t(desc->size / sizeof(uint32_t)); + internal_state->uav_raw.init(this, uav_desc, internal_state->resource.Get()); + } + return SUCCEEDED(hr); } bool GraphicsDevice_DX12::CreateTexture(const TextureDesc* desc, const SubresourceData* initial_data, Texture* texture) const diff --git a/WickedEngine/wiHairParticle.cpp b/WickedEngine/wiHairParticle.cpp index 753906c96..b7e5ffc9a 100644 --- a/WickedEngine/wiHairParticle.cpp +++ b/WickedEngine/wiHairParticle.cpp @@ -221,7 +221,6 @@ namespace wi { RaytracingAccelerationStructureDesc desc; desc.type = RaytracingAccelerationStructureDesc::Type::BOTTOMLEVEL; - desc.flags |= RaytracingAccelerationStructureDesc::FLAG_ALLOW_UPDATE; desc.flags |= RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_BUILD; desc.bottom_level.geometries.emplace_back(); @@ -400,6 +399,16 @@ namespace wi device->EventEnd(cmd); } + void HairParticleSystem::InitializeGPUDataIfNeeded(wi::graphics::CommandList cmd) + { + if (gpu_initialized) + return; + GraphicsDevice* device = wi::graphics::GetDevice(); + device->ClearUAV(&generalBuffer, 0, cmd); + device->Barrier(GPUBarrier::Buffer(&generalBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::COPY_DST), cmd); + gpu_initialized = true; + } + void HairParticleSystem::Draw(const MaterialComponent& material, wi::enums::RENDERPASS renderPass, CommandList cmd) const { if (strandCount == 0 || !constantBuffer.IsValid()) diff --git a/WickedEngine/wiHairParticle.h b/WickedEngine/wiHairParticle.h index d8343e072..92d30c184 100644 --- a/WickedEngine/wiHairParticle.h +++ b/WickedEngine/wiHairParticle.h @@ -57,6 +57,9 @@ namespace wi wi::graphics::CommandList cmd ); + mutable bool gpu_initialized = false; + void InitializeGPUDataIfNeeded(wi::graphics::CommandList cmd); + void Draw( const wi::scene::MaterialComponent& material, wi::enums::RENDERPASS renderPass, diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 03aaa7d89..4b63b3e3b 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -2276,14 +2276,17 @@ void SetUpStates() const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count) { - const size_t required_index_count = max_quad_count * 6; - const size_t required_max_index = max_quad_count * 4; + const size_t required_max_index = max_quad_count * 4u; static std::mutex locker; std::scoped_lock lock(locker); - if (required_max_index < 65536) + if (required_max_index < 65536u) { + // 16-bit request: + max_quad_count = std::max(65535u / 4u, max_quad_count); // minimum a full 16 bit index buffer request, avoid allocating multiple 16-bit small requests + const size_t required_index_count = max_quad_count * 6u; + static GPUBuffer indexBufferForQuads16; if (!indexBufferForQuads16.IsValid() || indexBufferForQuads16.desc.size / indexBufferForQuads16.desc.stride < required_index_count) { @@ -2296,24 +2299,31 @@ const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count) bd.format = Format::R16_UINT; bd.stride = GetFormatStride(bd.format); bd.size = bd.stride * required_index_count; - wi::vector primitiveData(required_index_count); - for (uint16_t particleID = 0; particleID < uint16_t(max_quad_count); ++particleID) + auto fill_ib = [&](void* dst) { - uint16_t v0 = particleID * 4; - uint32_t i0 = particleID * 6; - primitiveData[i0 + 0] = v0 + 0; - primitiveData[i0 + 1] = v0 + 1; - primitiveData[i0 + 2] = v0 + 2; - primitiveData[i0 + 3] = v0 + 2; - primitiveData[i0 + 4] = v0 + 1; - primitiveData[i0 + 5] = v0 + 3; - } - device->CreateBuffer(&bd, primitiveData.data(), &indexBufferForQuads16); - device->SetName(&indexBufferForQuads16, "wi::renderer::indexBufferForQuads16"); + uint16_t* primitiveData = (uint16_t*)dst; + for (uint16_t particleID = 0; particleID < uint16_t(max_quad_count); ++particleID) + { + uint16_t v0 = particleID * 4; + uint32_t i0 = particleID * 6; + primitiveData[i0 + 0] = v0 + 0; + primitiveData[i0 + 1] = v0 + 1; + primitiveData[i0 + 2] = v0 + 2; + primitiveData[i0 + 3] = v0 + 2; + primitiveData[i0 + 4] = v0 + 1; + primitiveData[i0 + 5] = v0 + 3; + } + }; + device->CreateBuffer2(&bd, fill_ib, &indexBufferForQuads16); + device->SetName(&indexBufferForQuads16, "wi::renderer::indexBufferForQuads16bit"); } return indexBufferForQuads16; } + // 32-bit request below: + max_quad_count = wi::math::GetNextPowerOfTwo(max_quad_count); // reduce allocations by making larger fitting allocations + const size_t required_index_count = max_quad_count * 6u; + static GPUBuffer indexBufferForQuads32; if (!indexBufferForQuads32.IsValid() || indexBufferForQuads32.desc.size / indexBufferForQuads32.desc.stride < required_index_count) { @@ -2326,20 +2336,23 @@ const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count) bd.format = Format::R32_UINT; bd.stride = GetFormatStride(bd.format); bd.size = bd.stride * required_index_count; - wi::vector primitiveData(required_index_count); - for (uint particleID = 0; particleID < max_quad_count; ++particleID) + auto fill_ib = [&](void* dst) { - uint32_t v0 = particleID * 4; - uint32_t i0 = particleID * 6; - primitiveData[i0 + 0] = v0 + 0; - primitiveData[i0 + 1] = v0 + 1; - primitiveData[i0 + 2] = v0 + 2; - primitiveData[i0 + 3] = v0 + 2; - primitiveData[i0 + 4] = v0 + 1; - primitiveData[i0 + 5] = v0 + 3; - } - device->CreateBuffer(&bd, primitiveData.data(), &indexBufferForQuads32); - device->SetName(&indexBufferForQuads32, "wi::renderer::indexBufferForQuads32"); + uint32_t* primitiveData = (uint32_t*)dst; + for (uint particleID = 0; particleID < max_quad_count; ++particleID) + { + uint32_t v0 = particleID * 4; + uint32_t i0 = particleID * 6; + primitiveData[i0 + 0] = v0 + 0; + primitiveData[i0 + 1] = v0 + 1; + primitiveData[i0 + 2] = v0 + 2; + primitiveData[i0 + 3] = v0 + 2; + primitiveData[i0 + 4] = v0 + 1; + primitiveData[i0 + 5] = v0 + 3; + } + }; + device->CreateBuffer2(&bd, fill_ib, &indexBufferForQuads32); + device->SetName(&indexBufferForQuads32, "wi::renderer::indexBufferForQuads32bit"); } return indexBufferForQuads32; @@ -4243,6 +4256,14 @@ void UpdateRenderData( barrier_stack_flush(cmd); // wind/skinning flush + // Hair particle initialization is needed for all, not just visible ones: + // This fixes an issue when hair is included in ray tracing acceleration + // structure, but not yet updated properly, because it was not yet visible + for (size_t i = 0; i < vis.scene->hairs.GetCount(); ++i) + { + vis.scene->hairs[i].InitializeGPUDataIfNeeded(cmd); + } + // Hair particle systems GPU simulation: // (This must be non-async too, as prepass will render hairs!) static thread_local wi::vector hair_updates; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index d47ddd187..3c177d682 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 298; + const int revision = 299; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision); @@ -149,6 +149,7 @@ Patreon supporters - Anthony Python - Gnowos - Klaus +- slaughternaut )"; return credits;