diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 88a59f152..b890f9b82 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -20,7 +20,6 @@ struct ShaderEntry }; wi::vector shaders = { {"hairparticle_simulateCS", wi::graphics::ShaderStage::CS}, - {"hairparticle_finishUpdateCS", wi::graphics::ShaderStage::CS}, {"emittedparticle_simulateCS", wi::graphics::ShaderStage::CS}, {"generateMIPChainCubeCS_float4", wi::graphics::ShaderStage::CS}, {"generateMIPChainCubeCS_unorm4", wi::graphics::ShaderStage::CS}, diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 487484915..5f274fdb7 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -688,16 +688,6 @@ Compute 4.0 - - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute 5.0 diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 0701abc44..9b975b528 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -746,9 +746,6 @@ VS - - CS - VS diff --git a/WickedEngine/shaders/hairparticle_finishUpdateCS.hlsl b/WickedEngine/shaders/hairparticle_finishUpdateCS.hlsl deleted file mode 100644 index 0619d5a73..000000000 --- a/WickedEngine/shaders/hairparticle_finishUpdateCS.hlsl +++ /dev/null @@ -1,16 +0,0 @@ -#include "globals.hlsli" -#include "ShaderInterop_HairParticle.h" - -RWByteAddressBuffer counterBuffer : register(u0); - -[numthreads(1, 1, 1)] -void main(uint3 DTid : SV_DispatchThreadID) -{ - uint particleCount = counterBuffer.Load(0); - - // Reset counter for next frame: - counterBuffer.Store(0, 0); - - // Create draw argument buffer (IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation): - counterBuffer.Store4(4, uint4(particleCount * 6, 1, 0, 0)); -} diff --git a/WickedEngine/shaders/hairparticle_simulateCS.hlsl b/WickedEngine/shaders/hairparticle_simulateCS.hlsl index 5ddb1cfa2..c8c81a843 100644 --- a/WickedEngine/shaders/hairparticle_simulateCS.hlsl +++ b/WickedEngine/shaders/hairparticle_simulateCS.hlsl @@ -17,7 +17,7 @@ RWStructuredBuffer simulationBuffer : register(u0); RWByteAddressBuffer vertexBuffer_POS : register(u1); RWByteAddressBuffer vertexBuffer_UVS : register(u2); RWBuffer culledIndexBuffer : register(u3); -RWByteAddressBuffer counterBuffer : register(u4); +RWStructuredBuffer indirectBuffer : register(u4); [numthreads(THREADCOUNT_SIMULATEHAIR, 1, 1)] void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex) @@ -288,9 +288,9 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIn uint waveOffset; if (WaveIsFirstLane() && waveAppendCount > 0) { - counterBuffer.InterlockedAdd(0, waveAppendCount, waveOffset); + InterlockedAdd(indirectBuffer[0].IndexCountPerInstance, waveAppendCount * 6, waveOffset); } - waveOffset = WaveReadLaneFirst(waveOffset); + waveOffset = WaveReadLaneFirst(waveOffset) / 6; if (visible) { diff --git a/WickedEngine/wiHairParticle.cpp b/WickedEngine/wiHairParticle.cpp index 021674223..9b95a5547 100644 --- a/WickedEngine/wiHairParticle.cpp +++ b/WickedEngine/wiHairParticle.cpp @@ -24,7 +24,6 @@ namespace wi static Shader ps; static Shader ps_simple; static Shader cs_simulate; - static Shader cs_finishUpdate; static DepthStencilState dss_default, dss_equal; static RasterizerState rs, ncrs, wirers; static BlendState bs; @@ -171,8 +170,9 @@ namespace wi if (!indirectBuffer.IsValid()) { GPUBufferDesc desc; - desc.size = sizeof(uint) + sizeof(IndirectDrawArgsIndexedInstanced); // counter + draw args - desc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS; + desc.stride = sizeof(IndirectDrawArgsIndexedInstanced); + desc.size = desc.stride; + desc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED | ResourceMiscFlag::INDIRECT_ARGS; desc.bind_flags = BindFlag::UNORDERED_ACCESS; device->CreateBuffer(&desc, nullptr, &indirectBuffer); } @@ -296,6 +296,16 @@ namespace wi hcb.xHairLayerMask = hair.layerMask; hcb.xHairInstanceIndex = item.instanceIndex; device->UpdateBuffer(&hair.constantBuffer, &hcb, cmd); + barrier_stack.push_back(GPUBarrier::Buffer(&hair.constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER)); + + IndirectDrawArgsIndexedInstanced args = {}; + args.BaseVertexLocation = 0; + args.IndexCountPerInstance = 0; // this will use shader atomic + args.InstanceCount = 1; + args.StartIndexLocation = 0; + args.StartInstanceLocation = 0; + device->UpdateBuffer(&hair.indirectBuffer, &args, cmd); + barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); if (hair.regenerate_frame) { @@ -305,17 +315,14 @@ namespace wi device->ClearUAV(&hair.vertexBuffer_POS[1], 0, cmd); device->ClearUAV(&hair.vertexBuffer_UVS, 0, cmd); device->ClearUAV(&hair.culledIndexBuffer, 0, cmd); - device->ClearUAV(&hair.indirectBuffer, 0, cmd); barrier_stack.push_back(GPUBarrier::Memory(&hair.simulationBuffer)); barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_POS[0])); barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_POS[1])); barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_UVS)); barrier_stack.push_back(GPUBarrier::Memory(&hair.culledIndexBuffer)); - barrier_stack.push_back(GPUBarrier::Memory(&hair.indirectBuffer)); } - barrier_stack.push_back(GPUBarrier::Buffer(&hair.constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER)); } barrier_stack_flush(); @@ -364,7 +371,7 @@ namespace wi device->Dispatch((hair.strandCount + THREADCOUNT_SIMULATEHAIR - 1) / THREADCOUNT_SIMULATEHAIR, 1, 1, cmd); barrier_stack.push_back(GPUBarrier::Memory(&hair.simulationBuffer)); - barrier_stack.push_back(GPUBarrier::Memory(&hair.indirectBuffer)); + barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT)); barrier_stack.push_back(GPUBarrier::Buffer(&hair.vertexBuffer_POS[0], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); barrier_stack.push_back(GPUBarrier::Buffer(&hair.vertexBuffer_UVS, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); barrier_stack.push_back(GPUBarrier::Buffer(&hair.culledIndexBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDEX_BUFFER)); @@ -372,29 +379,6 @@ namespace wi barrier_stack_flush(); - // Finish update (reset counter, create indirect draw args): - device->BindComputeShader(&cs_finishUpdate, cmd); - for (uint32_t i = 0; i < itemCount; ++i) - { - const UpdateGPUItem& item = items[i]; - const HairParticleSystem& hair = *item.hair; - if (hair.strandCount == 0 || !hair.simulationBuffer.IsValid()) - { - continue; - } - - const GPUResource* uavs[] = { - &hair.indirectBuffer - }; - device->BindUAVs(uavs, 0, arraysize(uavs), cmd); - - device->Dispatch(1, 1, 1, cmd); - - barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT)); - } - - barrier_stack_flush(); - device->EventEnd(cmd); } @@ -433,7 +417,7 @@ namespace wi device->BindIndexBuffer(&culledIndexBuffer, GetIndexBufferFormat(culledIndexBuffer.desc.format), 0, cmd); - device->DrawIndexedInstancedIndirect(&indirectBuffer, 4, cmd); + device->DrawIndexedInstancedIndirect(&indirectBuffer, 0, cmd); device->EventEnd(cmd); } @@ -506,7 +490,6 @@ namespace wi wi::renderer::LoadShader(ShaderStage::PS, ps, "hairparticlePS.cso"); wi::renderer::LoadShader(ShaderStage::CS, cs_simulate, "hairparticle_simulateCS.cso"); - wi::renderer::LoadShader(ShaderStage::CS, cs_finishUpdate, "hairparticle_finishUpdateCS.cso"); GraphicsDevice* device = wi::graphics::GetDevice(); diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index f78324c8a..c950040b9 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 206; + const int revision = 207; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);