bvh cluster classification fixes

This commit is contained in:
turanszkij
2018-06-16 19:36:26 +01:00
parent 05ac5a0dde
commit 1c1d823ca8
3 changed files with 37 additions and 31 deletions
+31 -25
View File
@@ -25,10 +25,12 @@ RWSTRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingAABB, 5);
//#define CLUSTER_GROUP
#ifdef CLUSTER_GROUP
static const float MapFloatToUint = 1000000.0f;
groupshared uint3 GroupMin;
groupshared uint3 GroupMax;
groupshared uint ClusterTriangleCount;
static const uint clusterTriangleCapacity = 8;
static const uint bucketCount = TRACEDRENDERING_BVH_CLASSIFICATION_GROUPSIZE / clusterTriangleCapacity;
static const float MapFloatToUint = 100000.0f;
groupshared uint3 GroupMin[bucketCount];
groupshared uint3 GroupMax[bucketCount];
groupshared uint ClusterTriangleCount[bucketCount];
#endif // CLUSTER_GROUP
@@ -60,20 +62,24 @@ inline uint morton3D(in float3 pos)
[numthreads(TRACEDRENDERING_BVH_CLASSIFICATION_GROUPSIZE, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex)
{
const uint tri = DTid.x;
const uint globalTriangleID = xTraceBVHMeshTriangleOffset + tri;
const bool activeThread = tri < xTraceBVHMeshTriangleCount;
#ifdef CLUSTER_GROUP
if (groupIndex == 0)
const bool isClusterUpdateThread = activeThread && (groupIndex % clusterTriangleCapacity == 0);
const uint bucket = groupIndex / clusterTriangleCapacity;
if (isClusterUpdateThread)
{
GroupMin = 0xFFFFFFFF;
GroupMax = 0;
ClusterTriangleCount = 0;
GroupMin[bucket] = 0xFFFFFFFF;
GroupMax[bucket] = 0;
ClusterTriangleCount[bucket] = 0;
}
GroupMemoryBarrierWithGroupSync();
#endif // CLUSTER_GROUP
uint tri = DTid.x;
uint globalTriangleID = xTraceBVHMeshTriangleOffset + tri;
if (tri < xTraceBVHMeshTriangleCount)
if (activeThread)
{
// load indices of triangle from index buffer
uint i0 = meshIndexBuffer[tri * 3 + 0];
@@ -134,23 +140,23 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex)
#ifdef CLUSTER_GROUP
// Count triangles in the cluster (ideally it would be max cluster size, but might be the end of the mesh...)
InterlockedAdd(ClusterTriangleCount, 1);
InterlockedAdd(ClusterTriangleCount[bucket], 1);
// Remap triangle AABB to [0-1]:
minAABB = (minAABB - g_xFrame_WorldBoundsMin) * g_xFrame_WorldBoundsExtents_Inverse;
maxAABB = (maxAABB - g_xFrame_WorldBoundsMin) * g_xFrame_WorldBoundsExtents_Inverse;
// Atomics can be only performed on integers, so convert:
uint3 uMin = (uint)(minAABB * MapFloatToUint);
uint3 uMax = (uint)(maxAABB * MapFloatToUint);
uint3 uMin = (uint3)(minAABB * MapFloatToUint);
uint3 uMax = (uint3)(maxAABB * MapFloatToUint);
// Merge cluster AABB:
InterlockedMin(GroupMin.x, uMin.x);
InterlockedMin(GroupMin.y, uMin.y);
InterlockedMin(GroupMin.z, uMin.z);
InterlockedMax(GroupMax.x, uMax.x);
InterlockedMax(GroupMax.y, uMax.y);
InterlockedMax(GroupMax.z, uMax.z);
InterlockedMin(GroupMin[bucket].x, uMin.x);
InterlockedMin(GroupMin[bucket].y, uMin.y);
InterlockedMin(GroupMin[bucket].z, uMin.z);
InterlockedMax(GroupMax[bucket].x, uMax.x);
InterlockedMax(GroupMax[bucket].y, uMax.y);
InterlockedMax(GroupMax[bucket].z, uMax.z);
#else
// Each triangle is its own cluster:
@@ -177,7 +183,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex)
#ifdef CLUSTER_GROUP
GroupMemoryBarrierWithGroupSync();
if (groupIndex == 0)
if (isClusterUpdateThread)
{
// Store cluster data:
uint clusterID;
@@ -185,14 +191,14 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex)
clusterIndexBuffer[clusterID] = clusterID;
float3 minAABB = ((float)GroupMin / MapFloatToUint) * g_xFrame_WorldBoundsExtents + g_xFrame_WorldBoundsMin;
float3 maxAABB = ((float)GroupMax / MapFloatToUint) * g_xFrame_WorldBoundsExtents + g_xFrame_WorldBoundsMin;
float3 minAABB = ((float3)GroupMin[bucket] / MapFloatToUint) * g_xFrame_WorldBoundsExtents + g_xFrame_WorldBoundsMin;
float3 maxAABB = ((float3)GroupMax[bucket] / MapFloatToUint) * g_xFrame_WorldBoundsExtents + g_xFrame_WorldBoundsMin;
float3 centerAABB = (minAABB + maxAABB) * 0.5f;
float3 remappedCenter = (centerAABB - g_xFrame_WorldBoundsMin) * g_xFrame_WorldBoundsExtents_Inverse;
clusterMortonBuffer[clusterID] = morton3D(remappedCenter);
clusterOffsetBuffer[clusterID] = uint2(globalTriangleID, ClusterTriangleCount);
clusterOffsetBuffer[clusterID] = uint2(globalTriangleID, ClusterTriangleCount[bucket]);
clusterAABBBuffer[clusterID].min = minAABB;
clusterAABBBuffer[clusterID].max = maxAABB;
+2 -2
View File
@@ -105,10 +105,10 @@ inline RayHit TraceScene(Ray ray, uint groupIndex)
// Internal node
if (stackpos < stacksize - 1)
{
//push left node
// push left child
stack[stackpos] = node.LeftChildIndex;
stackpos++;
// push right node
// push right child
stack[stackpos] = node.RightChildIndex;
stackpos++;
}
+4 -4
View File
@@ -6339,7 +6339,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
uint _raycount = _width * _height;
static GPUBuffer* materialBuffer = nullptr;
static MaterialCB materialArray[10] = {};
static MaterialCB materialArray[1000] = {};
static GPUBuffer* rayBuffer[2] = {};
static GPUBuffer* indirectBuffer = nullptr;
static GPUBuffer* counterBuffer[2] = {};
@@ -6354,7 +6354,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
desc.BindFlags = BIND_SHADER_RESOURCE;
desc.StructureByteStride = sizeof(MaterialCB);
desc.ByteWidth = desc.StructureByteStride * 10;
desc.ByteWidth = desc.StructureByteStride * ARRAYSIZE(materialArray);
desc.CPUAccessFlags = 0;
desc.Format = FORMAT_UNKNOWN;
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
@@ -6423,8 +6423,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
static GPUBuffer* clusterSortedMortonBuffer = nullptr;
static GPUBuffer* clusterOffsetBuffer = nullptr;
static GPUBuffer* clusterAABBBuffer = nullptr;
const uint maxClusterCount = 100000;
const uint maxTriangleCount = 100000;
const uint maxClusterCount = 800000;
const uint maxTriangleCount = 800000;
if (allocateBVH)
{