diff --git a/WickedEngine/WickedEngine_SHADERS.vcxproj b/WickedEngine/WickedEngine_SHADERS.vcxproj
index f0a1fc8ca..a7ab47df4 100644
--- a/WickedEngine/WickedEngine_SHADERS.vcxproj
+++ b/WickedEngine/WickedEngine_SHADERS.vcxproj
@@ -133,6 +133,10 @@
Compute
5.0
+
+ Compute
+ 5.0
+
Compute
5.0
diff --git a/WickedEngine/WickedEngine_SHADERS.vcxproj.filters b/WickedEngine/WickedEngine_SHADERS.vcxproj.filters
index f50f21ef8..878c27cdd 100644
--- a/WickedEngine/WickedEngine_SHADERS.vcxproj.filters
+++ b/WickedEngine/WickedEngine_SHADERS.vcxproj.filters
@@ -702,6 +702,9 @@
PS
+
+ CS
+
diff --git a/WickedEngine/emittedparticle_nbodyCS.hlsl b/WickedEngine/emittedparticle_nbodyCS.hlsl
new file mode 100644
index 000000000..24fcd34e2
--- /dev/null
+++ b/WickedEngine/emittedparticle_nbodyCS.hlsl
@@ -0,0 +1,201 @@
+#include "globals.hlsli"
+#include "ShaderInterop_EmittedParticle.h"
+
+RWSTRUCTUREDBUFFER(particleBuffer, Particle, 0);
+RWSTRUCTUREDBUFFER(aliveBuffer_CURRENT, uint, 1);
+RWSTRUCTUREDBUFFER(aliveBuffer_NEW, uint, 2);
+RWSTRUCTUREDBUFFER(deadBuffer, uint, 3);
+RWSTRUCTUREDBUFFER(counterBuffer, ParticleCounters, 4);
+
+struct LDSParticle
+{
+ float3 position;
+ float size;
+ float3 v; // velocity
+ float m; // mass
+ float p; // density
+ float P; // Pressure
+};
+groupshared LDSParticle LDSParticles[THREADCOUNT_SIMULATION];
+
+[numthreads(THREADCOUNT_SIMULATION, 1, 1)]
+void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, uint3 Gid : SV_GroupID )
+{
+ uint aliveCount = counterBuffer[0].aliveCount;
+
+ if (DTid.x < aliveCount)
+ {
+ uint particleIndexA = aliveBuffer_CURRENT[DTid.x];
+ Particle particleA = particleBuffer[particleIndexA];
+
+ float lifeLerpA = 1 - particleA.life / particleA.maxLife;
+ float particleSizeA = lerp(particleA.sizeBeginEnd.x, particleA.sizeBeginEnd.y, lifeLerpA);
+
+ LDSParticles[groupIndex].position = particleA.position;
+ LDSParticles[groupIndex].size = particleSizeA;
+ LDSParticles[groupIndex].v = particleA.velocity;
+ LDSParticles[groupIndex].m = 1;
+ LDSParticles[groupIndex].p = 0;
+ LDSParticles[groupIndex].P = 0;
+
+ }
+
+ GroupMemoryBarrierWithGroupSync();
+
+ const uint LDSParticleCount = /*clamp(aliveCount - Gid.x * THREADCOUNT_SIMULATION, 0, THREADCOUNT_SIMULATION)*/ 256;
+
+
+ uint particleIndexA = groupIndex;
+ LDSParticle particleA = LDSParticles[particleIndexA];
+
+
+ // Compute density field:
+
+ const float h = 1.0f; // smoothing radius
+ const float h2 = h*h;
+ const float h3 = h2 * h;
+ const float h9 = h3 * h3;
+
+ uint i;
+
+ for (i = 0; i < LDSParticleCount; ++i)
+ {
+ if (i != particleIndexA)
+ {
+ uint particleIndexB = i;
+ LDSParticle particleB = LDSParticles[particleIndexB];
+
+ float3 diff = particleA.position - particleB.position;
+ float r2 = dot(diff, diff); // distance squared
+
+ float range = particleA.size + particleB.size; // range of affection
+ float range2 = range * range; // range squared
+
+ if (r2 < range2)
+ {
+ float W = (315.0f / (64.0f * PI * h9)) * pow(h2 - r2, 3); // poly6 smoothing kernel
+
+ particleA.p += particleB.m * W;
+ }
+
+ }
+ }
+
+ // Compute particle pressure:
+ const float K = 20; // pressure constant
+ const float p0 = 20; // reference density
+ particleA.P = max(p0, K * (particleA.p - p0));
+
+ // Store the results:
+ LDSParticles[particleIndexA].p = particleA.p;
+ LDSParticles[particleIndexA].P = particleA.P;
+
+
+ // Wait for all particles to compute pressure
+ GroupMemoryBarrierWithGroupSync();
+
+
+ if (particleA.p > 0)
+ {
+
+ // Compute acceleration:
+ float3 a = 0; // pressure force
+ float3 av = 0; // viscosity force
+ const float e = 0.018f; // viscosity constant
+
+ for (i = 0; i < LDSParticleCount; ++i)
+ {
+ if (i != particleIndexA)
+ {
+ uint particleIndexB = i;
+ LDSParticle particleB = LDSParticles[particleIndexB];
+
+ float3 diff = particleA.position - particleB.position;
+ float r2 = dot(diff, diff); // distance squared
+ float r = sqrt(r2);
+
+ float range = particleA.size + particleB.size; // range of affection
+
+ if (r < range)
+ {
+ float3 rNorm = normalize(diff);
+ float W = (-45 / (PI * pow(h, 6))) * pow(h - r, 2); // spiky kernel smoothing function
+
+ a += -(particleB.m / particleA.m) * ((particleA.P + particleB.P) / (2 * particleA.p * particleB.p)) * W * rNorm;
+
+ float r3 = r2 * r;
+ float h2 = h * h;
+ float h3 = h2 * h;
+ W = -(r3 / (2 * h3)) + (r2 / h2) + (h / (2 * r)) - 1;
+ av += e * (particleB.m / particleA.m) * (1.0f / particleB.p) * (particleB.v - particleA.v) * W * rNorm;
+ }
+
+ }
+ }
+
+ //a *= -1;
+
+ //av *= e;
+
+ float3 force = a + av;
+
+ const float dt = g_xFrame_DeltaTime;
+ particleA.v += dt * force / particleA.p;
+
+ }
+
+ float elastic = 0.9;
+
+ if (particleA.position.y - particleA.size < 0)
+ {
+ particleA.position.y = particleA.size;
+ particleA.v.y *= -elastic;
+ }
+
+ //// box collision:
+ //float extent = 4;
+ //if (particleA.position.x + particleA.size > extent)
+ //{
+ // particleA.position.x = extent - particleA.size;
+ // particleA.v.x *= -elastic;
+ //}
+ //if (particleA.position.x - particleA.size < -extent)
+ //{
+ // particleA.position.x = -extent + particleA.size;
+ // particleA.v.x *= -elastic;
+ //}
+ //if (particleA.position.z + particleA.size > extent)
+ //{
+ // particleA.position.z = extent - particleA.size;
+ // particleA.v.z *= -elastic;
+ //}
+ //if (particleA.position.z - particleA.size < -extent)
+ //{
+ // particleA.position.z = -extent + particleA.size;
+ // particleA.v.z *= -elastic;
+ //}
+
+ particleA.v *= 0.99f;
+
+ particleA.v.y -= 0.8f;
+
+
+ if (DTid.x < aliveCount)
+ {
+ uint writeIndex = aliveBuffer_CURRENT[DTid.x];
+ particleBuffer[writeIndex].position = particleA.position;
+ particleBuffer[writeIndex].velocity = particleA.v;
+
+ particleBuffer[writeIndex].color_mirror = 0x00FFFFFF;
+ //particleBuffer[writeIndex].color_mirror |= ((uint)particleA.p) & 0xFF;
+
+ if (particleA.p > 0)
+ {
+ particleBuffer[writeIndex].color_mirror = 0xFF;
+ }
+
+ }
+
+
+}
+
diff --git a/WickedEngine/emittedparticle_sortCS.hlsl b/WickedEngine/emittedparticle_sortCS.hlsl
index f26b6917b..3cbf06be9 100644
--- a/WickedEngine/emittedparticle_sortCS.hlsl
+++ b/WickedEngine/emittedparticle_sortCS.hlsl
@@ -91,7 +91,7 @@ void main(uint3 Gid : SV_GroupID,
float2 a = g_LDS[index];
float2 b = g_LDS[nSwapElem];
- if (a.x > b.x)
+ if (a.x < b.x)
{
g_LDS[index] = b;
g_LDS[nSwapElem] = a;
diff --git a/WickedEngine/emittedparticle_sortInnerCS.hlsl b/WickedEngine/emittedparticle_sortInnerCS.hlsl
index bc9617087..9c6ea65a8 100644
--- a/WickedEngine/emittedparticle_sortInnerCS.hlsl
+++ b/WickedEngine/emittedparticle_sortInnerCS.hlsl
@@ -88,7 +88,7 @@ void main(uint3 Gid : SV_GroupID,
float2 a = g_LDS[index];
float2 b = g_LDS[nSwapElem];
- if (a.x > b.x)
+ if (a.x < b.x)
{
g_LDS[index] = b;
g_LDS[nSwapElem] = a;
diff --git a/WickedEngine/emittedparticle_sortStepCS.hlsl b/WickedEngine/emittedparticle_sortStepCS.hlsl
index 214088ef1..6994f90f7 100644
--- a/WickedEngine/emittedparticle_sortStepCS.hlsl
+++ b/WickedEngine/emittedparticle_sortStepCS.hlsl
@@ -55,7 +55,7 @@ void main(uint3 Gid : SV_GroupID,
float a = distanceBuffer[index];
float b = distanceBuffer[nSwapElem];
- if (a > b)
+ if (a < b)
{
distanceBuffer[index] = b;
distanceBuffer[nSwapElem] = a;
diff --git a/WickedEngine/wiEmittedParticle.cpp b/WickedEngine/wiEmittedParticle.cpp
index 0276190cc..0500afd6a 100644
--- a/WickedEngine/wiEmittedParticle.cpp
+++ b/WickedEngine/wiEmittedParticle.cpp
@@ -14,7 +14,7 @@ using namespace wiGraphicsTypes;
VertexShader *wiEmittedParticle::vertexShader = nullptr;
PixelShader *wiEmittedParticle::pixelShader[PARTICLESHADERTYPE_COUNT] = {};
-ComputeShader *wiEmittedParticle::kickoffUpdateCS, *wiEmittedParticle::emitCS = nullptr, *wiEmittedParticle::simulateCS = nullptr,
+ComputeShader *wiEmittedParticle::kickoffUpdateCS, *wiEmittedParticle::emitCS = nullptr, *wiEmittedParticle::nbodyCS = nullptr, *wiEmittedParticle::simulateCS = nullptr,
*wiEmittedParticle::simulateCS_SORTING = nullptr, *wiEmittedParticle::simulateCS_DEPTHCOLLISIONS = nullptr, *wiEmittedParticle::simulateCS_SORTING_DEPTHCOLLISIONS = nullptr;
ComputeShader *wiEmittedParticle::kickoffSortCS = nullptr, *wiEmittedParticle::sortCS = nullptr, *wiEmittedParticle::sortInnerCS = nullptr, *wiEmittedParticle::sortStepCS = nullptr;
GPUBuffer *wiEmittedParticle::sortCB = nullptr;
@@ -23,7 +23,7 @@ RasterizerState wiEmittedParticle::rasterizerState, wiEmittedParticle::wireFram
DepthStencilState wiEmittedParticle::depthStencilState;
GraphicsPSO wiEmittedParticle::PSO[BLENDMODE_COUNT][PARTICLESHADERTYPE_COUNT];
GraphicsPSO wiEmittedParticle::PSO_wire;
-ComputePSO wiEmittedParticle::CPSO_kickoffUpdate, wiEmittedParticle::CPSO_emit, wiEmittedParticle::CPSO_simulate,
+ComputePSO wiEmittedParticle::CPSO_kickoffUpdate, wiEmittedParticle::CPSO_emit, wiEmittedParticle::CPSO_nbody, wiEmittedParticle::CPSO_simulate,
wiEmittedParticle::CPSO_simulate_SORTING, wiEmittedParticle::CPSO_simulate_DEPTHCOLLISIONS, wiEmittedParticle::CPSO_simulate_SORTING_DEPTHCOLLISIONS;
ComputePSO wiEmittedParticle::CPSO_kickoffSort, wiEmittedParticle::CPSO_sort, wiEmittedParticle::CPSO_sortInner,
wiEmittedParticle::CPSO_sortStep;
@@ -342,6 +342,11 @@ void wiEmittedParticle::UpdateRenderData(GRAPHICSTHREAD threadID)
device->DispatchIndirect(indirectBuffers, ARGUMENTBUFFER_OFFSET_DISPATCHEMIT, threadID);
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
+ // perform N-body collision response simulation:
+ device->BindComputePSO(&CPSO_nbody, threadID);
+ device->DispatchIndirect(indirectBuffers, ARGUMENTBUFFER_OFFSET_DISPATCHSIMULATION, threadID);
+ device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
+
// update CURRENT alive list, write NEW alive list
if (SORTING)
{
@@ -528,6 +533,7 @@ void wiEmittedParticle::LoadShaders()
kickoffUpdateCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "emittedparticle_kickoffUpdateCS.cso", wiResourceManager::COMPUTESHADER));
emitCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "emittedparticle_emitCS.cso", wiResourceManager::COMPUTESHADER));
+ nbodyCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "emittedparticle_nbodyCS.cso", wiResourceManager::COMPUTESHADER));
simulateCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "emittedparticle_simulateCS.cso", wiResourceManager::COMPUTESHADER));
simulateCS_SORTING = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "emittedparticle_simulateCS_SORTING.cso", wiResourceManager::COMPUTESHADER));
simulateCS_DEPTHCOLLISIONS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "emittedparticle_simulateCS_DEPTHCOLLISIONS.cso", wiResourceManager::COMPUTESHADER));
@@ -581,6 +587,9 @@ void wiEmittedParticle::LoadShaders()
desc.cs = emitCS;
device->CreateComputePSO(&desc, &CPSO_emit);
+ desc.cs = nbodyCS;
+ device->CreateComputePSO(&desc, &CPSO_nbody);
+
desc.cs = simulateCS;
device->CreateComputePSO(&desc, &CPSO_simulate);
diff --git a/WickedEngine/wiEmittedParticle.h b/WickedEngine/wiEmittedParticle.h
index fb45e3c1d..89eae494d 100644
--- a/WickedEngine/wiEmittedParticle.h
+++ b/WickedEngine/wiEmittedParticle.h
@@ -34,7 +34,7 @@ private:
wiGraphicsTypes::GPUBuffer* constantBuffer;
void CreateSelfBuffers();
- static wiGraphicsTypes::ComputeShader *kickoffUpdateCS, *emitCS, *simulateCS, *simulateCS_SORTING, *simulateCS_DEPTHCOLLISIONS, *simulateCS_SORTING_DEPTHCOLLISIONS;
+ static wiGraphicsTypes::ComputeShader *kickoffUpdateCS, *emitCS, *nbodyCS, *simulateCS, *simulateCS_SORTING, *simulateCS_DEPTHCOLLISIONS, *simulateCS_SORTING_DEPTHCOLLISIONS;
static wiGraphicsTypes::ComputeShader *kickoffSortCS, *sortCS, *sortInnerCS, *sortStepCS;
static wiGraphicsTypes::GPUBuffer *sortCB;
static wiGraphicsTypes::VertexShader *vertexShader;
@@ -45,7 +45,7 @@ private:
static wiGraphicsTypes::GraphicsPSO PSO[BLENDMODE_COUNT][PARTICLESHADERTYPE_COUNT];
static wiGraphicsTypes::GraphicsPSO PSO_wire;
- static wiGraphicsTypes::ComputePSO CPSO_kickoffUpdate, CPSO_emit, CPSO_simulate, CPSO_simulate_SORTING, CPSO_simulate_DEPTHCOLLISIONS, CPSO_simulate_SORTING_DEPTHCOLLISIONS;
+ static wiGraphicsTypes::ComputePSO CPSO_kickoffUpdate, CPSO_emit, CPSO_nbody, CPSO_simulate, CPSO_simulate_SORTING, CPSO_simulate_DEPTHCOLLISIONS, CPSO_simulate_SORTING_DEPTHCOLLISIONS;
static wiGraphicsTypes::ComputePSO CPSO_kickoffSort, CPSO_sort, CPSO_sortInner, CPSO_sortStep;
public: