gpu particle updates
This commit is contained in:
@@ -8,7 +8,7 @@ RWRAWBUFFER(indirectBuffers, 1);
|
||||
void main( uint3 DTid : SV_DispatchThreadID )
|
||||
{
|
||||
// read real alivecount from after simulation:
|
||||
int aliveCount_afterSimulation = indirectBuffers.Load(24) / 6;
|
||||
int aliveCount_afterSimulation = indirectBuffers.Load(ARGUMENTBUFFER_OFFSET_DRAWPARTICLES) / 6;
|
||||
|
||||
// and store it for the sorting shaders to read:
|
||||
counterBuffer[0].aliveCount_afterSimulation = aliveCount_afterSimulation;
|
||||
|
||||
@@ -136,7 +136,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint Gid : SV_GroupIndex)
|
||||
// store squared distance to main camera:
|
||||
float3 eyeVector = particle.position - g_xFrame_MainCamera_CamPos;
|
||||
float distSQ = dot(eyeVector, eyeVector);
|
||||
distanceBuffer[newAliveIndex] = distSQ;
|
||||
distanceBuffer[particleIndex] = -distSQ; // this can be negated to modify sorting order here instead of rewriting sorting shaders...
|
||||
#endif // SORTING
|
||||
|
||||
}
|
||||
|
||||
@@ -40,8 +40,8 @@
|
||||
// Structured Buffers
|
||||
//--------------------------------------------------------------------------------------
|
||||
STRUCTUREDBUFFER(counterBuffer, ParticleCounters, 0);
|
||||
STRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
RWSTRUCTUREDBUFFER(indexBuffer, uint, 0);
|
||||
RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
|
||||
#define NumElements counterBuffer[0].aliveCount_afterSimulation
|
||||
|
||||
@@ -61,16 +61,17 @@ void main(uint3 Gid : SV_GroupID,
|
||||
int GlobalBaseIndex = (Gid.x * SORT_SIZE) + GTid.x;
|
||||
int LocalBaseIndex = GI;
|
||||
|
||||
uint numElementsInThreadGroup = min(SORT_SIZE, NumElements - (Gid.x * SORT_SIZE));
|
||||
int numElementsInThreadGroup = min(SORT_SIZE, NumElements - (Gid.x * SORT_SIZE));
|
||||
|
||||
// Load shared data
|
||||
uint i;
|
||||
int i;
|
||||
[unroll]for (i = 0; i < 2 * ITERATIONS; ++i)
|
||||
{
|
||||
if (GI + i * NUM_THREADS < numElementsInThreadGroup)
|
||||
{
|
||||
uint loadIndex = GlobalBaseIndex + i * NUM_THREADS;
|
||||
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]);
|
||||
uint particleIndex = indexBuffer[GlobalBaseIndex + i * NUM_THREADS];
|
||||
float dist = distanceBuffer[particleIndex];
|
||||
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(dist, (float)particleIndex);
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
@@ -78,7 +79,7 @@ void main(uint3 Gid : SV_GroupID,
|
||||
// Bitonic sort
|
||||
for (unsigned int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2)
|
||||
{
|
||||
for (uint nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1)
|
||||
for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1)
|
||||
{
|
||||
[unroll]for (i = 0; i < ITERATIONS; ++i)
|
||||
{
|
||||
@@ -93,7 +94,7 @@ void main(uint3 Gid : SV_GroupID,
|
||||
float2 a = g_LDS[index];
|
||||
float2 b = g_LDS[nSwapElem];
|
||||
|
||||
if (a.x < b.x)
|
||||
if (a.x > b.x)
|
||||
{
|
||||
g_LDS[index] = b;
|
||||
g_LDS[nSwapElem] = a;
|
||||
@@ -109,10 +110,7 @@ void main(uint3 Gid : SV_GroupID,
|
||||
{
|
||||
if (GI + i * NUM_THREADS < numElementsInThreadGroup)
|
||||
{
|
||||
uint loadIndex = LocalBaseIndex + i * NUM_THREADS;
|
||||
uint storeIndex = GlobalBaseIndex + i * NUM_THREADS;
|
||||
distanceBuffer[storeIndex] = g_LDS[loadIndex].x;
|
||||
indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y;
|
||||
indexBuffer[GlobalBaseIndex + i * NUM_THREADS] = (uint)g_LDS[LocalBaseIndex + i * NUM_THREADS].y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,8 +35,8 @@
|
||||
// Structured Buffers
|
||||
//--------------------------------------------------------------------------------------
|
||||
STRUCTUREDBUFFER(counterBuffer, ParticleCounters, 0);
|
||||
STRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
RWSTRUCTUREDBUFFER(indexBuffer, uint, 0);
|
||||
RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
|
||||
#define NumElements counterBuffer[0].aliveCount_afterSimulation
|
||||
|
||||
@@ -53,7 +53,7 @@ void main(uint3 Gid : SV_GroupID,
|
||||
uint3 GTid : SV_GroupThreadID,
|
||||
uint GI : SV_GroupIndex)
|
||||
{
|
||||
uint4 tgp;
|
||||
int4 tgp;
|
||||
|
||||
tgp.x = Gid.x * 256;
|
||||
tgp.y = 0;
|
||||
@@ -62,15 +62,16 @@ void main(uint3 Gid : SV_GroupID,
|
||||
|
||||
int GlobalBaseIndex = tgp.y + tgp.x * 2 + GTid.x;
|
||||
int LocalBaseIndex = GI;
|
||||
uint i;
|
||||
int i;
|
||||
|
||||
// Load shared data
|
||||
[unroll]for (i = 0; i < 2; ++i)
|
||||
{
|
||||
if (GI + i * NUM_THREADS < tgp.w)
|
||||
{
|
||||
uint loadIndex = GlobalBaseIndex + i * NUM_THREADS;
|
||||
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]);
|
||||
uint particleIndex = indexBuffer[GlobalBaseIndex + i * NUM_THREADS];
|
||||
float dist = distanceBuffer[particleIndex];
|
||||
g_LDS[LocalBaseIndex + i * NUM_THREADS] = float2(dist, (float)particleIndex);
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
@@ -90,7 +91,7 @@ void main(uint3 Gid : SV_GroupID,
|
||||
float2 a = g_LDS[index];
|
||||
float2 b = g_LDS[nSwapElem];
|
||||
|
||||
if (a.x < b.x)
|
||||
if (a.x > b.x)
|
||||
{
|
||||
g_LDS[index] = b;
|
||||
g_LDS[nSwapElem] = a;
|
||||
@@ -104,10 +105,7 @@ void main(uint3 Gid : SV_GroupID,
|
||||
{
|
||||
if (GI + i * NUM_THREADS < tgp.w)
|
||||
{
|
||||
uint loadIndex = LocalBaseIndex + i * NUM_THREADS;
|
||||
uint storeIndex = GlobalBaseIndex + i * NUM_THREADS;
|
||||
distanceBuffer[storeIndex] = g_LDS[loadIndex].x;
|
||||
indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y;
|
||||
indexBuffer[GlobalBaseIndex + i * NUM_THREADS] = (uint)g_LDS[LocalBaseIndex + i * NUM_THREADS].y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
// Structured Buffers
|
||||
//--------------------------------------------------------------------------------------
|
||||
STRUCTUREDBUFFER(counterBuffer, ParticleCounters, 0);
|
||||
STRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
RWSTRUCTUREDBUFFER(indexBuffer, uint, 0);
|
||||
RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
|
||||
#define NumElements counterBuffer[0].aliveCount_afterSimulation
|
||||
|
||||
@@ -35,7 +35,7 @@ RWSTRUCTUREDBUFFER(distanceBuffer, float, 1);
|
||||
void main(uint3 Gid : SV_GroupID,
|
||||
uint3 GTid : SV_GroupThreadID)
|
||||
{
|
||||
uint4 tgp;
|
||||
int4 tgp;
|
||||
|
||||
tgp.x = Gid.x * 256;
|
||||
tgp.y = 0;
|
||||
@@ -52,18 +52,15 @@ void main(uint3 Gid : SV_GroupID,
|
||||
|
||||
if (nSwapElem < tgp.y + tgp.z)
|
||||
{
|
||||
float a = distanceBuffer[index];
|
||||
float b = distanceBuffer[nSwapElem];
|
||||
uint index_a = indexBuffer[index];
|
||||
uint index_b = indexBuffer[nSwapElem];
|
||||
float a = distanceBuffer[index_a];
|
||||
float b = distanceBuffer[index_b];
|
||||
|
||||
if (a < b)
|
||||
if (a > b)
|
||||
{
|
||||
distanceBuffer[index] = b;
|
||||
distanceBuffer[nSwapElem] = a;
|
||||
|
||||
uint aI = indexBuffer[index];
|
||||
uint bI = indexBuffer[nSwapElem];
|
||||
indexBuffer[index] = bI;
|
||||
indexBuffer[nSwapElem] = aI;
|
||||
indexBuffer[index] = index_b;
|
||||
indexBuffer[nSwapElem] = index_a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ STRUCTUREDBUFFER(densityBuffer, float, 2);
|
||||
RWSTRUCTUREDBUFFER(particleBuffer, Particle, 0);
|
||||
|
||||
groupshared float4 positions_densities[THREADCOUNT_SIMULATION];
|
||||
groupshared float3 velocities[THREADCOUNT_SIMULATION];
|
||||
groupshared float4 velocities_pressures[THREADCOUNT_SIMULATION];
|
||||
|
||||
[numthreads(THREADCOUNT_SIMULATION, 1, 1)]
|
||||
void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, uint3 Gid : SV_GroupID )
|
||||
@@ -62,13 +62,17 @@ void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, ui
|
||||
if (id < aliveCount)
|
||||
{
|
||||
uint particleIndex = aliveBuffer_CURRENT[id];
|
||||
positions_densities[groupIndex] = float4(particleBuffer[particleIndex].position, densityBuffer[particleIndex]);
|
||||
velocities[groupIndex] = particleBuffer[particleIndex].velocity;
|
||||
|
||||
float density = densityBuffer[particleIndex];
|
||||
positions_densities[groupIndex] = float4(particleBuffer[particleIndex].position, density);
|
||||
|
||||
float pressure = K * (density - p0);
|
||||
velocities_pressures[groupIndex] = float4(particleBuffer[particleIndex].velocity, pressure);
|
||||
}
|
||||
else
|
||||
{
|
||||
positions_densities[groupIndex] = float4(1000000, 1000000, 1000000, 0); // "infinitely far" try to not contribute non existing particles, zero density
|
||||
velocities[groupIndex] = float3(0, 0, 0);
|
||||
velocities_pressures[groupIndex] = float4(0, 0, 0, 0);
|
||||
}
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
@@ -86,9 +90,9 @@ void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, ui
|
||||
|
||||
if (r < h)
|
||||
{
|
||||
float3 velocityB = velocities[i];
|
||||
float3 velocityB = velocities_pressures[i].xyz;
|
||||
float densityB = positions_densities[i].w;
|
||||
float pressureB = K * (densityB - p0);
|
||||
float pressureB = velocities_pressures[i].w;
|
||||
|
||||
float3 rNorm = normalize(diff);
|
||||
float W = (-45 / (PI * h6)) * pow(h - r, 2); // spiky kernel smoothing function
|
||||
|
||||
@@ -60,6 +60,8 @@ wiEmittedParticle::wiEmittedParticle()
|
||||
SAFE_INIT(indirectBuffers);
|
||||
SAFE_INIT(constantBuffer);
|
||||
SAFE_INIT(debugDataReadbackBuffer);
|
||||
SAFE_INIT(debugDataReadbackIndexBuffer);
|
||||
SAFE_INIT(debugDataReadbackDistanceBuffer);
|
||||
|
||||
SetMaxParticleCount(10000);
|
||||
}
|
||||
@@ -103,6 +105,8 @@ wiEmittedParticle::wiEmittedParticle(const std::string& newName, const std::stri
|
||||
SAFE_INIT(indirectBuffers);
|
||||
SAFE_INIT(constantBuffer);
|
||||
SAFE_INIT(debugDataReadbackBuffer);
|
||||
SAFE_INIT(debugDataReadbackIndexBuffer);
|
||||
SAFE_INIT(debugDataReadbackDistanceBuffer);
|
||||
|
||||
SetMaxParticleCount(10000);
|
||||
}
|
||||
@@ -135,6 +139,8 @@ wiEmittedParticle::wiEmittedParticle(const wiEmittedParticle& other)
|
||||
SAFE_INIT(indirectBuffers);
|
||||
SAFE_INIT(constantBuffer);
|
||||
SAFE_INIT(debugDataReadbackBuffer);
|
||||
SAFE_INIT(debugDataReadbackIndexBuffer);
|
||||
SAFE_INIT(debugDataReadbackDistanceBuffer);
|
||||
|
||||
SetMaxParticleCount(other.GetMaxParticleCount());
|
||||
}
|
||||
@@ -163,6 +169,8 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
SAFE_DELETE(indirectBuffers);
|
||||
SAFE_DELETE(constantBuffer);
|
||||
SAFE_DELETE(debugDataReadbackBuffer);
|
||||
SAFE_DELETE(debugDataReadbackIndexBuffer);
|
||||
SAFE_DELETE(debugDataReadbackDistanceBuffer);
|
||||
|
||||
particleBuffer = new GPUBuffer;
|
||||
aliveList[0] = new GPUBuffer;
|
||||
@@ -174,7 +182,11 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
indirectBuffers = new GPUBuffer;
|
||||
constantBuffer = new GPUBuffer;
|
||||
debugDataReadbackBuffer = new GPUBuffer;
|
||||
debugDataReadbackIndexBuffer = new GPUBuffer;
|
||||
debugDataReadbackDistanceBuffer = new GPUBuffer;
|
||||
|
||||
|
||||
// GPU-local buffer descriptors:
|
||||
GPUBufferDesc bd;
|
||||
bd.Usage = USAGE_DEFAULT;
|
||||
bd.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
@@ -182,16 +194,18 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
bd.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
SubresourceData data;
|
||||
|
||||
|
||||
bd.ByteWidth = sizeof(Particle) * MAX_PARTICLES;
|
||||
// Particle buffer:
|
||||
bd.StructureByteStride = sizeof(Particle);
|
||||
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, particleBuffer);
|
||||
|
||||
bd.ByteWidth = sizeof(uint32_t) * MAX_PARTICLES;
|
||||
// Alive index lists (double buffered):
|
||||
bd.StructureByteStride = sizeof(uint32_t);
|
||||
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, aliveList[0]);
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, aliveList[1]);
|
||||
|
||||
// Dead index list:
|
||||
uint32_t* indices = new uint32_t[MAX_PARTICLES];
|
||||
for (uint32_t i = 0; i < MAX_PARTICLES; ++i)
|
||||
{
|
||||
@@ -202,22 +216,25 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
SAFE_DELETE_ARRAY(indices);
|
||||
data.pSysMem = nullptr;
|
||||
|
||||
// Distance buffer:
|
||||
bd.StructureByteStride = sizeof(float);
|
||||
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
|
||||
float* distances = new float[MAX_PARTICLES];
|
||||
for (uint32_t i = 0; i < MAX_PARTICLES; ++i)
|
||||
{
|
||||
distances[i] = -1;
|
||||
distances[i] = 0;
|
||||
}
|
||||
data.pSysMem = distances;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, &data, distanceBuffer);
|
||||
SAFE_DELETE_ARRAY(distances);
|
||||
data.pSysMem = nullptr;
|
||||
|
||||
|
||||
// Density buffer (for SPH simulation):
|
||||
bd.StructureByteStride = sizeof(float);
|
||||
bd.ByteWidth = bd.StructureByteStride * MAX_PARTICLES;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, densityBuffer);
|
||||
|
||||
|
||||
// Particle System statistics:
|
||||
ParticleCounters counters;
|
||||
counters.aliveCount = 0;
|
||||
counters.deadCount = MAX_PARTICLES;
|
||||
@@ -230,7 +247,7 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, &data, counterBuffer);
|
||||
data.pSysMem = nullptr;
|
||||
|
||||
|
||||
// Indirect Execution buffer:
|
||||
bd.BindFlags = BIND_UNORDERED_ACCESS;
|
||||
bd.MiscFlags = RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS | RESOURCE_MISC_DRAWINDIRECT_ARGS;
|
||||
bd.ByteWidth =
|
||||
@@ -240,7 +257,7 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
sizeof(wiGraphicsTypes::IndirectDispatchArgs);
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, indirectBuffers);
|
||||
|
||||
|
||||
// Constant buffer:
|
||||
bd.Usage = USAGE_DYNAMIC;
|
||||
bd.ByteWidth = sizeof(EmittedParticleCB);
|
||||
bd.BindFlags = BIND_CONSTANT_BUFFER;
|
||||
@@ -248,6 +265,7 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
bd.MiscFlags = 0;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&bd, nullptr, constantBuffer);
|
||||
|
||||
// Debug information CPU-readback buffer:
|
||||
{
|
||||
GPUBufferDesc debugBufDesc = counterBuffer->GetDesc();
|
||||
debugBufDesc.Usage = USAGE_STAGING;
|
||||
@@ -255,6 +273,22 @@ void wiEmittedParticle::CreateSelfBuffers()
|
||||
debugBufDesc.BindFlags = 0;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&debugBufDesc, nullptr, debugDataReadbackBuffer);
|
||||
}
|
||||
|
||||
// Sorting debug buffers:
|
||||
{
|
||||
GPUBufferDesc debugBufDesc = aliveList[0]->GetDesc();
|
||||
debugBufDesc.Usage = USAGE_STAGING;
|
||||
debugBufDesc.CPUAccessFlags = CPU_ACCESS_READ;
|
||||
debugBufDesc.BindFlags = 0;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&debugBufDesc, nullptr, debugDataReadbackIndexBuffer);
|
||||
}
|
||||
{
|
||||
GPUBufferDesc debugBufDesc = distanceBuffer->GetDesc();
|
||||
debugBufDesc.Usage = USAGE_STAGING;
|
||||
debugBufDesc.CPUAccessFlags = CPU_ACCESS_READ;
|
||||
debugBufDesc.BindFlags = 0;
|
||||
wiRenderer::GetDevice()->CreateBuffer(&debugBufDesc, nullptr, debugDataReadbackDistanceBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t wiEmittedParticle::GetMemorySizeInBytes() const
|
||||
@@ -302,6 +336,7 @@ void wiEmittedParticle::Restart()
|
||||
PAUSED = false;
|
||||
}
|
||||
|
||||
//#define DEBUG_SORTING // slow but great for debug!!
|
||||
|
||||
void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
|
||||
{
|
||||
@@ -463,7 +498,6 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
|
||||
{
|
||||
device->EventBegin("SortEmittedParticles", threadID);
|
||||
|
||||
|
||||
// initialize sorting arguments:
|
||||
{
|
||||
GPUResource* uavs[] = {
|
||||
@@ -475,17 +509,28 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
|
||||
device->BindComputePSO(&CPSO_kickoffSort, threadID);
|
||||
device->Dispatch(1, 1, 1, threadID);
|
||||
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
|
||||
|
||||
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
|
||||
}
|
||||
|
||||
|
||||
#ifdef DEBUG_SORTING
|
||||
vector<uint32_t> before(MAX_PARTICLES);
|
||||
device->DownloadBuffer(aliveList[1], debugDataReadbackIndexBuffer, before.data(), threadID);
|
||||
|
||||
device->DownloadBuffer(counterBuffer, debugDataReadbackBuffer, &debugData, threadID);
|
||||
uint32_t particleCount = debugData.aliveCount_afterSimulation;
|
||||
#endif // DEBUG_SORTING
|
||||
|
||||
|
||||
GPUResource* uavs[] = {
|
||||
aliveList[1], // NEW alivelist
|
||||
distanceBuffer,
|
||||
};
|
||||
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
|
||||
|
||||
GPUResource* resources[] = {
|
||||
counterBuffer,
|
||||
distanceBuffer,
|
||||
};
|
||||
device->BindResources(CS, resources, 0, ARRAYSIZE(resources), threadID);
|
||||
|
||||
@@ -508,7 +553,8 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
|
||||
|
||||
// sort all buffers of size 512 (and presort bigger ones)
|
||||
device->BindComputePSO(&CPSO_sort, threadID);
|
||||
device->DispatchIndirect(indirectBuffers, ARGUMENTBUFFER_OFFSET_DISPATCHSORT, threadID);
|
||||
//device->DispatchIndirect(indirectBuffers, ARGUMENTBUFFER_OFFSET_DISPATCHSORT, threadID);
|
||||
device->Dispatch(numThreadGroups, 1, 1, threadID);
|
||||
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
|
||||
}
|
||||
|
||||
@@ -567,10 +613,67 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
|
||||
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
|
||||
device->UnBindResources(0, ARRAYSIZE(resources), threadID);
|
||||
|
||||
|
||||
|
||||
#ifdef DEBUG_SORTING
|
||||
vector<uint32_t> after(MAX_PARTICLES);
|
||||
device->DownloadBuffer(aliveList[1], debugDataReadbackIndexBuffer, after.data(), threadID);
|
||||
|
||||
vector<float> distances(MAX_PARTICLES);
|
||||
device->DownloadBuffer(distanceBuffer, debugDataReadbackDistanceBuffer, distances.data(), threadID);
|
||||
|
||||
if (particleCount > 1)
|
||||
{
|
||||
// CPU sort:
|
||||
for (uint32_t i = 0; i < particleCount - 1; ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < particleCount; ++j)
|
||||
{
|
||||
uint32_t particleIndexA = before[i];
|
||||
uint32_t particleIndexB = before[j];
|
||||
|
||||
float distA = distances[particleIndexA];
|
||||
float distB = distances[particleIndexB];
|
||||
|
||||
if (distA > distB)
|
||||
{
|
||||
before[i] = particleIndexB;
|
||||
before[j] = particleIndexA;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate:
|
||||
bool valid = true;
|
||||
uint32_t i = 0;
|
||||
for (i = 0; i < particleCount; ++i)
|
||||
{
|
||||
if (before[i] != after[i])
|
||||
{
|
||||
if (distances[before[i]] != distances[after[i]]) // if distances are equal, we just don't care...
|
||||
{
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(valid && "Invalid GPU sorting result!");
|
||||
|
||||
// Also we can reupload CPU sorted particles to verify:
|
||||
if (!valid)
|
||||
{
|
||||
device->UpdateBuffer(aliveList[1], before.data(), threadID);
|
||||
}
|
||||
}
|
||||
#endif // DEBUG_SORTING
|
||||
|
||||
|
||||
device->EventEnd(threadID);
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (!PAUSED)
|
||||
{
|
||||
// Swap CURRENT alivelist with NEW alivelist
|
||||
|
||||
@@ -24,6 +24,8 @@ public:
|
||||
private:
|
||||
ParticleCounters debugData = {};
|
||||
wiGraphicsTypes::GPUBuffer* debugDataReadbackBuffer;
|
||||
wiGraphicsTypes::GPUBuffer* debugDataReadbackIndexBuffer;
|
||||
wiGraphicsTypes::GPUBuffer* debugDataReadbackDistanceBuffer;
|
||||
|
||||
wiGraphicsTypes::GPUBuffer* particleBuffer;
|
||||
wiGraphicsTypes::GPUBuffer* aliveList[2];
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace wiVersion
|
||||
// minor features, major updates
|
||||
const int minor = 17;
|
||||
// minor bug fixes, alterations, refactors, updates
|
||||
const int revision = 14;
|
||||
const int revision = 15;
|
||||
|
||||
|
||||
long GetVersion()
|
||||
|
||||
Reference in New Issue
Block a user