optimization for hair particle indirect draw argument creation

This commit is contained in:
Turánszki János
2023-05-19 21:37:24 +02:00
parent bfbb1852e3
commit 2f8a48257c
7 changed files with 19 additions and 66 deletions
-1
View File
@@ -20,7 +20,6 @@ struct ShaderEntry
};
wi::vector<ShaderEntry> shaders = {
{"hairparticle_simulateCS", wi::graphics::ShaderStage::CS},
{"hairparticle_finishUpdateCS", wi::graphics::ShaderStage::CS},
{"emittedparticle_simulateCS", wi::graphics::ShaderStage::CS},
{"generateMIPChainCubeCS_float4", wi::graphics::ShaderStage::CS},
{"generateMIPChainCubeCS_unorm4", wi::graphics::ShaderStage::CS},
@@ -688,16 +688,6 @@
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)hairparticle_finishUpdateCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)normalsfromdepthCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
@@ -746,9 +746,6 @@
<FxCompile Include="$(MSBuildThisFileDirectory)volumetriclight_spotVS.hlsl">
<Filter>VS</Filter>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)hairparticle_finishUpdateCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)shadowVS_emulation.hlsl">
<Filter>VS</Filter>
</FxCompile>
@@ -1,16 +0,0 @@
#include "globals.hlsli"
#include "ShaderInterop_HairParticle.h"
RWByteAddressBuffer counterBuffer : register(u0);
[numthreads(1, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
uint particleCount = counterBuffer.Load(0);
// Reset counter for next frame:
counterBuffer.Store(0, 0);
// Create draw argument buffer (IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation):
counterBuffer.Store4(4, uint4(particleCount * 6, 1, 0, 0));
}
@@ -17,7 +17,7 @@ RWStructuredBuffer<PatchSimulationData> simulationBuffer : register(u0);
RWByteAddressBuffer vertexBuffer_POS : register(u1);
RWByteAddressBuffer vertexBuffer_UVS : register(u2);
RWBuffer<uint> culledIndexBuffer : register(u3);
RWByteAddressBuffer counterBuffer : register(u4);
RWStructuredBuffer<IndirectDrawArgsIndexedInstanced> indirectBuffer : register(u4);
[numthreads(THREADCOUNT_SIMULATEHAIR, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
@@ -288,9 +288,9 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIn
uint waveOffset;
if (WaveIsFirstLane() && waveAppendCount > 0)
{
counterBuffer.InterlockedAdd(0, waveAppendCount, waveOffset);
InterlockedAdd(indirectBuffer[0].IndexCountPerInstance, waveAppendCount * 6, waveOffset);
}
waveOffset = WaveReadLaneFirst(waveOffset);
waveOffset = WaveReadLaneFirst(waveOffset) / 6;
if (visible)
{
+15 -32
View File
@@ -24,7 +24,6 @@ namespace wi
static Shader ps;
static Shader ps_simple;
static Shader cs_simulate;
static Shader cs_finishUpdate;
static DepthStencilState dss_default, dss_equal;
static RasterizerState rs, ncrs, wirers;
static BlendState bs;
@@ -171,8 +170,9 @@ namespace wi
if (!indirectBuffer.IsValid())
{
GPUBufferDesc desc;
desc.size = sizeof(uint) + sizeof(IndirectDrawArgsIndexedInstanced); // counter + draw args
desc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS;
desc.stride = sizeof(IndirectDrawArgsIndexedInstanced);
desc.size = desc.stride;
desc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED | ResourceMiscFlag::INDIRECT_ARGS;
desc.bind_flags = BindFlag::UNORDERED_ACCESS;
device->CreateBuffer(&desc, nullptr, &indirectBuffer);
}
@@ -296,6 +296,16 @@ namespace wi
hcb.xHairLayerMask = hair.layerMask;
hcb.xHairInstanceIndex = item.instanceIndex;
device->UpdateBuffer(&hair.constantBuffer, &hcb, cmd);
barrier_stack.push_back(GPUBarrier::Buffer(&hair.constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER));
IndirectDrawArgsIndexedInstanced args = {};
args.BaseVertexLocation = 0;
args.IndexCountPerInstance = 0; // this will use shader atomic
args.InstanceCount = 1;
args.StartIndexLocation = 0;
args.StartInstanceLocation = 0;
device->UpdateBuffer(&hair.indirectBuffer, &args, cmd);
barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS));
if (hair.regenerate_frame)
{
@@ -305,17 +315,14 @@ namespace wi
device->ClearUAV(&hair.vertexBuffer_POS[1], 0, cmd);
device->ClearUAV(&hair.vertexBuffer_UVS, 0, cmd);
device->ClearUAV(&hair.culledIndexBuffer, 0, cmd);
device->ClearUAV(&hair.indirectBuffer, 0, cmd);
barrier_stack.push_back(GPUBarrier::Memory(&hair.simulationBuffer));
barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_POS[0]));
barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_POS[1]));
barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_UVS));
barrier_stack.push_back(GPUBarrier::Memory(&hair.culledIndexBuffer));
barrier_stack.push_back(GPUBarrier::Memory(&hair.indirectBuffer));
}
barrier_stack.push_back(GPUBarrier::Buffer(&hair.constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER));
}
barrier_stack_flush();
@@ -364,7 +371,7 @@ namespace wi
device->Dispatch((hair.strandCount + THREADCOUNT_SIMULATEHAIR - 1) / THREADCOUNT_SIMULATEHAIR, 1, 1, cmd);
barrier_stack.push_back(GPUBarrier::Memory(&hair.simulationBuffer));
barrier_stack.push_back(GPUBarrier::Memory(&hair.indirectBuffer));
barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT));
barrier_stack.push_back(GPUBarrier::Buffer(&hair.vertexBuffer_POS[0], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE));
barrier_stack.push_back(GPUBarrier::Buffer(&hair.vertexBuffer_UVS, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE));
barrier_stack.push_back(GPUBarrier::Buffer(&hair.culledIndexBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDEX_BUFFER));
@@ -372,29 +379,6 @@ namespace wi
barrier_stack_flush();
// Finish update (reset counter, create indirect draw args):
device->BindComputeShader(&cs_finishUpdate, cmd);
for (uint32_t i = 0; i < itemCount; ++i)
{
const UpdateGPUItem& item = items[i];
const HairParticleSystem& hair = *item.hair;
if (hair.strandCount == 0 || !hair.simulationBuffer.IsValid())
{
continue;
}
const GPUResource* uavs[] = {
&hair.indirectBuffer
};
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
device->Dispatch(1, 1, 1, cmd);
barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT));
}
barrier_stack_flush();
device->EventEnd(cmd);
}
@@ -433,7 +417,7 @@ namespace wi
device->BindIndexBuffer(&culledIndexBuffer, GetIndexBufferFormat(culledIndexBuffer.desc.format), 0, cmd);
device->DrawIndexedInstancedIndirect(&indirectBuffer, 4, cmd);
device->DrawIndexedInstancedIndirect(&indirectBuffer, 0, cmd);
device->EventEnd(cmd);
}
@@ -506,7 +490,6 @@ namespace wi
wi::renderer::LoadShader(ShaderStage::PS, ps, "hairparticlePS.cso");
wi::renderer::LoadShader(ShaderStage::CS, cs_simulate, "hairparticle_simulateCS.cso");
wi::renderer::LoadShader(ShaderStage::CS, cs_finishUpdate, "hairparticle_finishUpdateCS.cso");
GraphicsDevice* device = wi::graphics::GetDevice();
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 206;
const int revision = 207;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);