optimization for hair particle indirect draw argument creation
This commit is contained in:
@@ -20,7 +20,6 @@ struct ShaderEntry
|
||||
};
|
||||
wi::vector<ShaderEntry> shaders = {
|
||||
{"hairparticle_simulateCS", wi::graphics::ShaderStage::CS},
|
||||
{"hairparticle_finishUpdateCS", wi::graphics::ShaderStage::CS},
|
||||
{"emittedparticle_simulateCS", wi::graphics::ShaderStage::CS},
|
||||
{"generateMIPChainCubeCS_float4", wi::graphics::ShaderStage::CS},
|
||||
{"generateMIPChainCubeCS_unorm4", wi::graphics::ShaderStage::CS},
|
||||
|
||||
@@ -688,16 +688,6 @@
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)hairparticle_finishUpdateCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)normalsfromdepthCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
|
||||
@@ -746,9 +746,6 @@
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)volumetriclight_spotVS.hlsl">
|
||||
<Filter>VS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)hairparticle_finishUpdateCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)shadowVS_emulation.hlsl">
|
||||
<Filter>VS</Filter>
|
||||
</FxCompile>
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
#include "globals.hlsli"
|
||||
#include "ShaderInterop_HairParticle.h"
|
||||
|
||||
RWByteAddressBuffer counterBuffer : register(u0);
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
uint particleCount = counterBuffer.Load(0);
|
||||
|
||||
// Reset counter for next frame:
|
||||
counterBuffer.Store(0, 0);
|
||||
|
||||
// Create draw argument buffer (IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation):
|
||||
counterBuffer.Store4(4, uint4(particleCount * 6, 1, 0, 0));
|
||||
}
|
||||
@@ -17,7 +17,7 @@ RWStructuredBuffer<PatchSimulationData> simulationBuffer : register(u0);
|
||||
RWByteAddressBuffer vertexBuffer_POS : register(u1);
|
||||
RWByteAddressBuffer vertexBuffer_UVS : register(u2);
|
||||
RWBuffer<uint> culledIndexBuffer : register(u3);
|
||||
RWByteAddressBuffer counterBuffer : register(u4);
|
||||
RWStructuredBuffer<IndirectDrawArgsIndexedInstanced> indirectBuffer : register(u4);
|
||||
|
||||
[numthreads(THREADCOUNT_SIMULATEHAIR, 1, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
|
||||
@@ -288,9 +288,9 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIn
|
||||
uint waveOffset;
|
||||
if (WaveIsFirstLane() && waveAppendCount > 0)
|
||||
{
|
||||
counterBuffer.InterlockedAdd(0, waveAppendCount, waveOffset);
|
||||
InterlockedAdd(indirectBuffer[0].IndexCountPerInstance, waveAppendCount * 6, waveOffset);
|
||||
}
|
||||
waveOffset = WaveReadLaneFirst(waveOffset);
|
||||
waveOffset = WaveReadLaneFirst(waveOffset) / 6;
|
||||
|
||||
if (visible)
|
||||
{
|
||||
|
||||
@@ -24,7 +24,6 @@ namespace wi
|
||||
static Shader ps;
|
||||
static Shader ps_simple;
|
||||
static Shader cs_simulate;
|
||||
static Shader cs_finishUpdate;
|
||||
static DepthStencilState dss_default, dss_equal;
|
||||
static RasterizerState rs, ncrs, wirers;
|
||||
static BlendState bs;
|
||||
@@ -171,8 +170,9 @@ namespace wi
|
||||
if (!indirectBuffer.IsValid())
|
||||
{
|
||||
GPUBufferDesc desc;
|
||||
desc.size = sizeof(uint) + sizeof(IndirectDrawArgsIndexedInstanced); // counter + draw args
|
||||
desc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS;
|
||||
desc.stride = sizeof(IndirectDrawArgsIndexedInstanced);
|
||||
desc.size = desc.stride;
|
||||
desc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED | ResourceMiscFlag::INDIRECT_ARGS;
|
||||
desc.bind_flags = BindFlag::UNORDERED_ACCESS;
|
||||
device->CreateBuffer(&desc, nullptr, &indirectBuffer);
|
||||
}
|
||||
@@ -296,6 +296,16 @@ namespace wi
|
||||
hcb.xHairLayerMask = hair.layerMask;
|
||||
hcb.xHairInstanceIndex = item.instanceIndex;
|
||||
device->UpdateBuffer(&hair.constantBuffer, &hcb, cmd);
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER));
|
||||
|
||||
IndirectDrawArgsIndexedInstanced args = {};
|
||||
args.BaseVertexLocation = 0;
|
||||
args.IndexCountPerInstance = 0; // this will use shader atomic
|
||||
args.InstanceCount = 1;
|
||||
args.StartIndexLocation = 0;
|
||||
args.StartInstanceLocation = 0;
|
||||
device->UpdateBuffer(&hair.indirectBuffer, &args, cmd);
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS));
|
||||
|
||||
if (hair.regenerate_frame)
|
||||
{
|
||||
@@ -305,17 +315,14 @@ namespace wi
|
||||
device->ClearUAV(&hair.vertexBuffer_POS[1], 0, cmd);
|
||||
device->ClearUAV(&hair.vertexBuffer_UVS, 0, cmd);
|
||||
device->ClearUAV(&hair.culledIndexBuffer, 0, cmd);
|
||||
device->ClearUAV(&hair.indirectBuffer, 0, cmd);
|
||||
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.simulationBuffer));
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_POS[0]));
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_POS[1]));
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.vertexBuffer_UVS));
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.culledIndexBuffer));
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.indirectBuffer));
|
||||
}
|
||||
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER));
|
||||
}
|
||||
|
||||
barrier_stack_flush();
|
||||
@@ -364,7 +371,7 @@ namespace wi
|
||||
device->Dispatch((hair.strandCount + THREADCOUNT_SIMULATEHAIR - 1) / THREADCOUNT_SIMULATEHAIR, 1, 1, cmd);
|
||||
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.simulationBuffer));
|
||||
barrier_stack.push_back(GPUBarrier::Memory(&hair.indirectBuffer));
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT));
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.vertexBuffer_POS[0], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE));
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.vertexBuffer_UVS, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE));
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.culledIndexBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDEX_BUFFER));
|
||||
@@ -372,29 +379,6 @@ namespace wi
|
||||
|
||||
barrier_stack_flush();
|
||||
|
||||
// Finish update (reset counter, create indirect draw args):
|
||||
device->BindComputeShader(&cs_finishUpdate, cmd);
|
||||
for (uint32_t i = 0; i < itemCount; ++i)
|
||||
{
|
||||
const UpdateGPUItem& item = items[i];
|
||||
const HairParticleSystem& hair = *item.hair;
|
||||
if (hair.strandCount == 0 || !hair.simulationBuffer.IsValid())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&hair.indirectBuffer
|
||||
};
|
||||
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
|
||||
|
||||
device->Dispatch(1, 1, 1, cmd);
|
||||
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&hair.indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT));
|
||||
}
|
||||
|
||||
barrier_stack_flush();
|
||||
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
@@ -433,7 +417,7 @@ namespace wi
|
||||
|
||||
device->BindIndexBuffer(&culledIndexBuffer, GetIndexBufferFormat(culledIndexBuffer.desc.format), 0, cmd);
|
||||
|
||||
device->DrawIndexedInstancedIndirect(&indirectBuffer, 4, cmd);
|
||||
device->DrawIndexedInstancedIndirect(&indirectBuffer, 0, cmd);
|
||||
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
@@ -506,7 +490,6 @@ namespace wi
|
||||
wi::renderer::LoadShader(ShaderStage::PS, ps, "hairparticlePS.cso");
|
||||
|
||||
wi::renderer::LoadShader(ShaderStage::CS, cs_simulate, "hairparticle_simulateCS.cso");
|
||||
wi::renderer::LoadShader(ShaderStage::CS, cs_finishUpdate, "hairparticle_finishUpdateCS.cso");
|
||||
|
||||
GraphicsDevice* device = wi::graphics::GetDevice();
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace wi::version
|
||||
// minor features, major updates, breaking compatibility changes
|
||||
const int minor = 71;
|
||||
// minor bug fixes, alterations, refactors, updates
|
||||
const int revision = 206;
|
||||
const int revision = 207;
|
||||
|
||||
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user