gpu particle updates

This commit is contained in:
turanszkij
2018-05-10 17:44:11 +01:00
parent 757b754745
commit 4c97db1c13
9 changed files with 155 additions and 53 deletions
@@ -8,7 +8,7 @@ RWRAWBUFFER(indirectBuffers, 1);
void main( uint3 DTid : SV_DispatchThreadID )
{
// read real alivecount from after simulation:
int aliveCount_afterSimulation = indirectBuffers.Load(24) / 6;
int aliveCount_afterSimulation = indirectBuffers.Load(ARGUMENTBUFFER_OFFSET_DRAWPARTICLES) / 6;
// and store it for the sorting shaders to read:
counterBuffer[0].aliveCount_afterSimulation = aliveCount_afterSimulation;
+1 -1
View File
@@ -136,7 +136,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint Gid : SV_GroupIndex)
// store squared distance to main camera:
float3 eyeVector = particle.position - g_xFrame_MainCamera_CamPos;
float distSQ = dot(eyeVector, eyeVector);
distanceBuffer[newAliveIndex] = distSQ;
distanceBuffer[particleIndex] = -distSQ; // this can be negated to modify sorting order here instead of rewriting sorting shaders...
#endif // SORTING
}
+9 -11
View File
@@ -40,8 +40,8 @@
// Structured Buffers
//--------------------------------------------------------------------------------------
STRUCTUREDBUFFER(counterBuffer, ParticleCounters, 0);
STRUCTUREDBUFFER(distanceBuffer, float, 1);
RWSTRUCTUREDBUFFER(indexBuffer, uint, 0);
RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
#define NumElements counterBuffer[0].aliveCount_afterSimulation
@@ -61,16 +61,17 @@ void main(uint3 Gid : SV_GroupID,
int GlobalBaseIndex = (Gid.x * SORT_SIZE) + GTid.x;
int LocalBaseIndex = GI;
uint numElementsInThreadGroup = min(SORT_SIZE, NumElements - (Gid.x * SORT_SIZE));
int numElementsInThreadGroup = min(SORT_SIZE, NumElements - (Gid.x * SORT_SIZE));
// Load shared data
uint i;
int i;
[unroll]for (i = 0; i < 2 * ITERATIONS; ++i)
{
if (GI + i * NUM_THREADS < numElementsInThreadGroup)
{
uint loadIndex = GlobalBaseIndex + i * NUM_THREADS;
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]);
uint particleIndex = indexBuffer[GlobalBaseIndex + i * NUM_THREADS];
float dist = distanceBuffer[particleIndex];
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(dist, (float)particleIndex);
}
}
GroupMemoryBarrierWithGroupSync();
@@ -78,7 +79,7 @@ void main(uint3 Gid : SV_GroupID,
// Bitonic sort
for (unsigned int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2)
{
for (uint nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1)
for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1)
{
[unroll]for (i = 0; i < ITERATIONS; ++i)
{
@@ -93,7 +94,7 @@ void main(uint3 Gid : SV_GroupID,
float2 a = g_LDS[index];
float2 b = g_LDS[nSwapElem];
if (a.x < b.x)
if (a.x > b.x)
{
g_LDS[index] = b;
g_LDS[nSwapElem] = a;
@@ -109,10 +110,7 @@ void main(uint3 Gid : SV_GroupID,
{
if (GI + i * NUM_THREADS < numElementsInThreadGroup)
{
uint loadIndex = LocalBaseIndex + i * NUM_THREADS;
uint storeIndex = GlobalBaseIndex + i * NUM_THREADS;
distanceBuffer[storeIndex] = g_LDS[loadIndex].x;
indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y;
indexBuffer[GlobalBaseIndex + i * NUM_THREADS] = (uint)g_LDS[LocalBaseIndex + i * NUM_THREADS].y;
}
}
}
+8 -10
View File
@@ -35,8 +35,8 @@
// Structured Buffers
//--------------------------------------------------------------------------------------
STRUCTUREDBUFFER(counterBuffer, ParticleCounters, 0);
STRUCTUREDBUFFER(distanceBuffer, float, 1);
RWSTRUCTUREDBUFFER(indexBuffer, uint, 0);
RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
#define NumElements counterBuffer[0].aliveCount_afterSimulation
@@ -53,7 +53,7 @@ void main(uint3 Gid : SV_GroupID,
uint3 GTid : SV_GroupThreadID,
uint GI : SV_GroupIndex)
{
uint4 tgp;
int4 tgp;
tgp.x = Gid.x * 256;
tgp.y = 0;
@@ -62,15 +62,16 @@ void main(uint3 Gid : SV_GroupID,
int GlobalBaseIndex = tgp.y + tgp.x * 2 + GTid.x;
int LocalBaseIndex = GI;
uint i;
int i;
// Load shared data
[unroll]for (i = 0; i < 2; ++i)
{
if (GI + i * NUM_THREADS < tgp.w)
{
uint loadIndex = GlobalBaseIndex + i * NUM_THREADS;
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]);
uint particleIndex = indexBuffer[GlobalBaseIndex + i * NUM_THREADS];
float dist = distanceBuffer[particleIndex];
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(dist, (float)particleIndex);
}
}
GroupMemoryBarrierWithGroupSync();
@@ -90,7 +91,7 @@ void main(uint3 Gid : SV_GroupID,
float2 a = g_LDS[index];
float2 b = g_LDS[nSwapElem];
if (a.x < b.x)
if (a.x > b.x)
{
g_LDS[index] = b;
g_LDS[nSwapElem] = a;
@@ -104,10 +105,7 @@ void main(uint3 Gid : SV_GroupID,
{
if (GI + i * NUM_THREADS < tgp.w)
{
uint loadIndex = LocalBaseIndex + i * NUM_THREADS;
uint storeIndex = GlobalBaseIndex + i * NUM_THREADS;
distanceBuffer[storeIndex] = g_LDS[loadIndex].x;
indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y;
indexBuffer[GlobalBaseIndex + i * NUM_THREADS] = (uint)g_LDS[LocalBaseIndex + i * NUM_THREADS].y;
}
}
}
+9 -12
View File
@@ -26,8 +26,8 @@
// Structured Buffers
//--------------------------------------------------------------------------------------
STRUCTUREDBUFFER(counterBuffer, ParticleCounters, 0);
STRUCTUREDBUFFER(distanceBuffer, float, 1);
RWSTRUCTUREDBUFFER(indexBuffer, uint, 0);
RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
#define NumElements counterBuffer[0].aliveCount_afterSimulation
@@ -35,7 +35,7 @@ RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
void main(uint3 Gid : SV_GroupID,
uint3 GTid : SV_GroupThreadID)
{
uint4 tgp;
int4 tgp;
tgp.x = Gid.x * 256;
tgp.y = 0;
@@ -52,18 +52,15 @@ void main(uint3 Gid : SV_GroupID,
if (nSwapElem < tgp.y + tgp.z)
{
float a = distanceBuffer[index];
float b = distanceBuffer[nSwapElem];
uint index_a = indexBuffer[index];
uint index_b = indexBuffer[nSwapElem];
float a = distanceBuffer[index_a];
float b = distanceBuffer[index_b];
if (a < b)
if (a > b)
{
distanceBuffer[index] = b;
distanceBuffer[nSwapElem] = a;
uint aI = indexBuffer[index];
uint bI = indexBuffer[nSwapElem];
indexBuffer[index] = bI;
indexBuffer[nSwapElem] = aI;
indexBuffer[index] = index_b;
indexBuffer[nSwapElem] = index_a;
}
}
}
+10 -6
View File
@@ -13,7 +13,7 @@ STRUCTUREDBUFFER(densityBuffer, float, 2);
RWSTRUCTUREDBUFFER(particleBuffer, Particle, 0);
groupshared float4 positions_densities[THREADCOUNT_SIMULATION];
groupshared float3 velocities[THREADCOUNT_SIMULATION];
groupshared float4 velocities_pressures[THREADCOUNT_SIMULATION];
[numthreads(THREADCOUNT_SIMULATION, 1, 1)]
void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, uint3 Gid : SV_GroupID )
@@ -62,13 +62,17 @@ void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, ui
if (id < aliveCount)
{
uint particleIndex = aliveBuffer_CURRENT[id];
positions_densities[groupIndex] = float4(particleBuffer[particleIndex].position, densityBuffer[particleIndex]);
velocities[groupIndex] = particleBuffer[particleIndex].velocity;
float density = densityBuffer[particleIndex];
positions_densities[groupIndex] = float4(particleBuffer[particleIndex].position, density);
float pressure = K * (density - p0);
velocities_pressures[groupIndex] = float4(particleBuffer[particleIndex].velocity, pressure);
}
else
{
positions_densities[groupIndex] = float4(1000000, 1000000, 1000000, 0); // "infinitely far" try to not contribute non existing particles, zero density
velocities[groupIndex] = float3(0, 0, 0);
velocities_pressures[groupIndex] = float4(0, 0, 0, 0);
}
GroupMemoryBarrierWithGroupSync();
@@ -86,9 +90,9 @@ void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, ui
if (r < h)
{
float3 velocityB = velocities[i];
float3 velocityB = velocities_pressures[i].xyz;
float densityB = positions_densities[i].w;
float pressureB = K * (densityB - p0);
float pressureB = velocities_pressures[i].w;
float3 rNorm = normalize(diff);
float W = (-45 / (PI * h6)) * pow(h - r, 2); // spiky kernel smoothing function
+114 -11
View File
@@ -60,6 +60,8 @@ wiEmittedParticle::wiEmittedParticle()
SAFE_INIT(indirectBuffers);
SAFE_INIT(constantBuffer);
SAFE_INIT(debugDataReadbackBuffer);
SAFE_INIT(debugDataReadbackIndexBuffer);
SAFE_INIT(debugDataReadbackDistanceBuffer);
SetMaxParticleCount(10000);
}
@@ -103,6 +105,8 @@ wiEmittedParticle::wiEmittedParticle(const std::string& newName, const std::stri
SAFE_INIT(indirectBuffers);
SAFE_INIT(constantBuffer);
SAFE_INIT(debugDataReadbackBuffer);
SAFE_INIT(debugDataReadbackIndexBuffer);
SAFE_INIT(debugDataReadbackDistanceBuffer);
SetMaxParticleCount(10000);
}
@@ -135,6 +139,8 @@ wiEmittedParticle::wiEmittedParticle(const wiEmittedParticle& other)
SAFE_INIT(indirectBuffers);
SAFE_INIT(constantBuffer);
SAFE_INIT(debugDataReadbackBuffer);
SAFE_INIT(debugDataReadbackIndexBuffer);
SAFE_INIT(debugDataReadbackDistanceBuffer);
SetMaxParticleCount(other.GetMaxParticleCount());
}
@@ -163,6 +169,8 @@ void wiEmittedParticle::CreateSelfBuffers()
SAFE_DELETE(indirectBuffers);
SAFE_DELETE(constantBuffer);
SAFE_DELETE(debugDataReadbackBuffer);
SAFE_DELETE(debugDataReadbackIndexBuffer);
SAFE_DELETE(debugDataReadbackDistanceBuffer);
particleBuffer = new GPUBuffer;
aliveList[0] = new GPUBuffer;
@@ -174,7 +182,11 @@ void wiEmittedParticle::CreateSelfBuffers()
indirectBuffers = new GPUBuffer;
constantBuffer = new GPUBuffer;
debugDataReadbackBuffer = new GPUBuffer;
debugDataReadbackIndexBuffer = new GPUBuffer;
debugDataReadbackDistanceBuffer = new GPUBuffer;
// GPU-local buffer descriptors:
GPUBufferDesc bd;
bd.Usage = USAGE_DEFAULT;
bd.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
@@ -182,16 +194,18 @@ void wiEmittedParticle::CreateSelfBuffers()
bd.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
SubresourceData data;
bd.ByteWidth = sizeof(Particle) * MAX_PARTICLES;
// Particle buffer:
bd.StructureByteStride = sizeof(Particle);
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, particleBuffer);
bd.ByteWidth = sizeof(uint32_t) * MAX_PARTICLES;
// Alive index lists (double buffered):
bd.StructureByteStride = sizeof(uint32_t);
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, aliveList[0]);
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, aliveList[1]);
// Dead index list:
uint32_t* indices = new uint32_t[MAX_PARTICLES];
for (uint32_t i = 0; i < MAX_PARTICLES; ++i)
{
@@ -202,22 +216,25 @@ void wiEmittedParticle::CreateSelfBuffers()
SAFE_DELETE_ARRAY(indices);
data.pSysMem = nullptr;
// Distance buffer:
bd.StructureByteStride = sizeof(float);
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
float* distances = new float[MAX_PARTICLES];
for (uint32_t i = 0; i < MAX_PARTICLES; ++i)
{
distances[i] = -1;
distances[i] = 0;
}
data.pSysMem = distances;
wiRenderer::GetDevice()->CreateBuffer(&bd, &data, distanceBuffer);
SAFE_DELETE_ARRAY(distances);
data.pSysMem = nullptr;
// Density buffer (for SPH simulation):
bd.StructureByteStride = sizeof(float);
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, densityBuffer);
// Particle System statistics:
ParticleCounters counters;
counters.aliveCount = 0;
counters.deadCount = MAX_PARTICLES;
@@ -230,7 +247,7 @@ void wiEmittedParticle::CreateSelfBuffers()
wiRenderer::GetDevice()->CreateBuffer(&bd, &data, counterBuffer);
data.pSysMem = nullptr;
// Indirect Execution buffer:
bd.BindFlags = BIND_UNORDERED_ACCESS;
bd.MiscFlags = RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS | RESOURCE_MISC_DRAWINDIRECT_ARGS;
bd.ByteWidth =
@@ -240,7 +257,7 @@ void wiEmittedParticle::CreateSelfBuffers()
sizeof(wiGraphicsTypes::IndirectDispatchArgs);
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, indirectBuffers);
// Constant buffer:
bd.Usage = USAGE_DYNAMIC;
bd.ByteWidth = sizeof(EmittedParticleCB);
bd.BindFlags = BIND_CONSTANT_BUFFER;
@@ -248,6 +265,7 @@ void wiEmittedParticle::CreateSelfBuffers()
bd.MiscFlags = 0;
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, constantBuffer);
// Debug information CPU-readback buffer:
{
GPUBufferDesc debugBufDesc = counterBuffer->GetDesc();
debugBufDesc.Usage = USAGE_STAGING;
@@ -255,6 +273,22 @@ void wiEmittedParticle::CreateSelfBuffers()
debugBufDesc.BindFlags = 0;
wiRenderer::GetDevice()->CreateBuffer(&debugBufDesc, nullptr, debugDataReadbackBuffer);
}
// Sorting debug buffers:
{
GPUBufferDesc debugBufDesc = aliveList[0]->GetDesc();
debugBufDesc.Usage = USAGE_STAGING;
debugBufDesc.CPUAccessFlags = CPU_ACCESS_READ;
debugBufDesc.BindFlags = 0;
wiRenderer::GetDevice()->CreateBuffer(&debugBufDesc, nullptr, debugDataReadbackIndexBuffer);
}
{
GPUBufferDesc debugBufDesc = distanceBuffer->GetDesc();
debugBufDesc.Usage = USAGE_STAGING;
debugBufDesc.CPUAccessFlags = CPU_ACCESS_READ;
debugBufDesc.BindFlags = 0;
wiRenderer::GetDevice()->CreateBuffer(&debugBufDesc, nullptr, debugDataReadbackDistanceBuffer);
}
}
uint32_t wiEmittedParticle::GetMemorySizeInBytes() const
@@ -302,6 +336,7 @@ void wiEmittedParticle::Restart()
PAUSED = false;
}
//#define DEBUG_SORTING // slow but great for debug!!
void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
{
@@ -463,7 +498,6 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
{
device->EventBegin("SortEmittedParticles", threadID);
// initialize sorting arguments:
{
GPUResource* uavs[] = {
@@ -475,17 +509,28 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
device->BindComputePSO(&CPSO_kickoffSort, threadID);
device->Dispatch(1, 1, 1, threadID);
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
}
#ifdef DEBUG_SORTING
vector<uint32_t> before(MAX_PARTICLES);
device->DownloadBuffer(aliveList[1], debugDataReadbackIndexBuffer, before.data(), threadID);
device->DownloadBuffer(counterBuffer, debugDataReadbackBuffer, &debugData, threadID);
uint32_t particleCount = debugData.aliveCount_afterSimulation;
#endif // DEBUG_SORTING
GPUResource* uavs[] = {
aliveList[1], // NEW alivelist
distanceBuffer,
};
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
GPUResource* resources[] = {
counterBuffer,
distanceBuffer,
};
device->BindResources(CS, resources, 0, ARRAYSIZE(resources), threadID);
@@ -508,7 +553,8 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
// sort all buffers of size 512 (and presort bigger ones)
device->BindComputePSO(&CPSO_sort, threadID);
device->DispatchIndirect(indirectBuffers, ARGUMENTBUFFER_OFFSET_DISPATCHSORT, threadID);
//device->DispatchIndirect(indirectBuffers, ARGUMENTBUFFER_OFFSET_DISPATCHSORT, threadID);
device->Dispatch(numThreadGroups, 1, 1, threadID);
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
}
@@ -567,10 +613,67 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
device->UnBindResources(0, ARRAYSIZE(resources), threadID);
#ifdef DEBUG_SORTING
vector<uint32_t> after(MAX_PARTICLES);
device->DownloadBuffer(aliveList[1], debugDataReadbackIndexBuffer, after.data(), threadID);
vector<float> distances(MAX_PARTICLES);
device->DownloadBuffer(distanceBuffer, debugDataReadbackDistanceBuffer, distances.data(), threadID);
if (particleCount > 1)
{
// CPU sort:
for (uint32_t i = 0; i < particleCount - 1; ++i)
{
for (uint32_t j = i + 1; j < particleCount; ++j)
{
uint32_t particleIndexA = before[i];
uint32_t particleIndexB = before[j];
float distA = distances[particleIndexA];
float distB = distances[particleIndexB];
if (distA > distB)
{
before[i] = particleIndexB;
before[j] = particleIndexA;
}
}
}
// Validate:
bool valid = true;
uint32_t i = 0;
for (i = 0; i < particleCount; ++i)
{
if (before[i] != after[i])
{
if (distances[before[i]] != distances[after[i]]) // if distances are equal, we just don't care...
{
valid = false;
break;
}
}
}
assert(valid && "Invalid GPU sorting result!");
// Also we can reupload CPU sorted particles to verify:
if (!valid)
{
device->UpdateBuffer(aliveList[1], before.data(), threadID);
}
}
#endif // DEBUG_SORTING
device->EventEnd(threadID);
}
if (!PAUSED)
{
// Swap CURRENT alivelist with NEW alivelist
+2
View File
@@ -24,6 +24,8 @@ public:
private:
ParticleCounters debugData = {};
wiGraphicsTypes::GPUBuffer* debugDataReadbackBuffer;
wiGraphicsTypes::GPUBuffer* debugDataReadbackIndexBuffer;
wiGraphicsTypes::GPUBuffer* debugDataReadbackDistanceBuffer;
wiGraphicsTypes::GPUBuffer* particleBuffer;
wiGraphicsTypes::GPUBuffer* aliveList[2];
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wiVersion
// minor features, major updates
const int minor = 17;
// minor bug fixes, alterations, refactors, updates
const int revision = 14;
const int revision = 15;
long GetVersion()