bvh aabb propagation

This commit is contained in:
turanszkij
2018-06-15 17:25:00 +01:00
parent a32df9531c
commit c037e7bbef
8 changed files with 143 additions and 5 deletions
+2 -1
View File
@@ -5,6 +5,7 @@
#define TRACEDRENDERING_BVH_CLASSIFICATION_GROUPSIZE 64
#define TRACEDRENDERING_BVH_SORTEDMORTON_GROUPSIZE 64
#define TRACEDRENDERING_BVH_HIERARCHY_GROUPSIZE 64
#define TRACEDRENDERING_BVH_PROPAGATEAABB_GROUPSIZE 64
#define TRACEDRENDERING_CLEAR_BLOCKSIZE 8
#define TRACEDRENDERING_LAUNCH_BLOCKSIZE 8
@@ -33,7 +34,7 @@ struct TracedRenderingMeshTriangle
float2 t0, t1, t2;
uint materialIndex;
};
struct TracedRenderingClusterAABB
struct TracedRenderingAABB
{
float3 min;
float3 max;
@@ -531,6 +531,10 @@
<FxCompile Include="hairparticlePS_tiledforward.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
</FxCompile>
<FxCompile Include="raytrace_bvh_propagateaabbCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
</FxCompile>
<FxCompile Include="raytrace_bvh_hierarchyCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
@@ -750,6 +750,9 @@
<FxCompile Include="raytrace_bvh_sortedmortonCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="raytrace_bvh_propagateaabbCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="PS">
@@ -19,7 +19,7 @@ RWRAWBUFFER(clusterCounterBuffer, 1);
RWSTRUCTUREDBUFFER(clusterIndexBuffer, uint, 2);
RWSTRUCTUREDBUFFER(clusterMortonBuffer, uint, 3);
RWSTRUCTUREDBUFFER(clusterOffsetBuffer, uint2, 4); // offset, count
RWSTRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingClusterAABB, 5);
RWSTRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingAABB, 5);
groupshared int3 GroupMin;
groupshared int3 GroupMax;
+5 -1
View File
@@ -3,7 +3,7 @@
#include "tracedRenderingHF.hlsli"
// This shader will construct the BVH from sorted cluster morton codes.
// Output is a list of continuous BVH tree nodes in memory: [parentIndex, leftChildNodeIndex, rightChildNodeIndex]
// Output is a list of continuous BVH tree nodes in memory: [parentIndex, leftChildNodeIndex, rightChildNodeIndex]. Additionally, we will reset the BVH Flag Buffer (used for AABB propagation step)
// The output node is a leaf node if: leftChildNodeIndex == rightChildNodeIndex == 0
// Else the output node is an intermediate node
// Also, we know that intermediate nodes start at arrayIndex == 0 (starting with root node)
@@ -17,6 +17,7 @@ RAWBUFFER(clusterCounterBuffer, TEXSLOT_ONDEMAND0);
STRUCTUREDBUFFER(clusterMortonBuffer, uint, TEXSLOT_ONDEMAND1);
RWSTRUCTUREDBUFFER(bvhNodeBuffer, BVHNode, 0);
RWSTRUCTUREDBUFFER(bvhFlagBuffer, uint, 1);
int CountLeadingZeroes(uint num)
{
@@ -137,5 +138,8 @@ void main( uint3 DTid : SV_DispatchThreadID )
WriteParent(idx, childAIndex, childBIndex);
WriteChild(childAIndex, idx);
WriteChild(childBIndex, idx);
// Reset bvh node flag (only internal nodes):
bvhFlagBuffer[idx] = 0;
}
}
@@ -0,0 +1,72 @@
#include "globals.hlsli"
#include "ShaderInterop_TracedRendering.h"
#include "tracedRenderingHF.hlsli"
// This shader will traverse the BVH from bottom to up, and propagate AABBs from leaves to internal nodes
// Cluster nodes are already computed, which correspond directly to BVH leaf node AABBs
// Each thread starts at a cluster (leaf)
// Each thread goes to the parent node, but only if both children are complete, else terminate (bvhFlagBuffer tracks this with atomic operations)
// Parent node will merge child AABBs and store
// Loop until we reach the root...
RAWBUFFER(clusterCounterBuffer, TEXSLOT_ONDEMAND0);
STRUCTUREDBUFFER(clusterIndexBuffer, uint, TEXSLOT_ONDEMAND1);
STRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingAABB, TEXSLOT_ONDEMAND2);
STRUCTUREDBUFFER(bvhNodeBuffer, BVHNode, TEXSLOT_ONDEMAND3);
RWSTRUCTUREDBUFFER(bvhAABBBuffer, TracedRenderingAABB, 0);
RWSTRUCTUREDBUFFER(bvhFlagBuffer, uint, 1);
[numthreads(TRACEDRENDERING_BVH_PROPAGATEAABB_GROUPSIZE, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const uint clusterCount = clusterCounterBuffer.Load(0);
if (DTid.x < clusterCount)
{
const uint leafNodeOffset = clusterCount - 1;
const uint clusterIndex = clusterIndexBuffer[DTid.x];
uint nodeIndex = leafNodeOffset + DTid.x;
// First, we read the current (leaf) node:
BVHNode node = bvhNodeBuffer[nodeIndex];
// Leaf node will receive the corresponding cluster AABB:
TracedRenderingAABB clusterAABB = clusterAABBBuffer[clusterIndex];
bvhAABBBuffer[nodeIndex] = clusterAABB;
// Propagate until we reach root node:
do
{
// Move up in the tree:
nodeIndex = node.ParentIndex;
// Atomic flag to only allow one thread to write into parent. The other thread is discarded.
// If the previous value was 0, that means it's the first child to arrive here, this will be discarded, because maybe the second child is not yet computed its AABB.
// Else, this is the second child to arrive, we can continue to parent, because there was already a child that arrived here and been discarded.
uint flag;
InterlockedAdd(bvhFlagBuffer[node.ParentIndex], 1, flag);
if (flag == 0)
{
return;
}
// We arrived to the parent node:
node = bvhNodeBuffer[nodeIndex];
// Load up its two children's AABBs
TracedRenderingAABB leftAABB = bvhAABBBuffer[node.LeftChildIndex];
TracedRenderingAABB rightAABB = bvhAABBBuffer[node.RightChildIndex];
// Merge the child AABBs:
TracedRenderingAABB mergedAABB;
mergedAABB.min = min(leftAABB.min, rightAABB.min);
mergedAABB.max = max(leftAABB.max, rightAABB.max);
// Write the merged AABB to this node:
bvhAABBBuffer[nodeIndex] = mergedAABB;
} while (nodeIndex != 0);
}
}
+1
View File
@@ -277,6 +277,7 @@ enum CSTYPES
CSTYPE_RAYTRACE_BVH_KICKHIERARCHY,
CSTYPE_RAYTRACE_BVH_SORTEDMORTON,
CSTYPE_RAYTRACE_BVH_HIERARCHY,
CSTYPE_RAYTRACE_BVH_PROPAGATEAABB,
CSTYPE_RAYTRACE_CLEAR,
CSTYPE_RAYTRACE_LAUNCH,
CSTYPE_RAYTRACE_PRIMARY,
+55 -2
View File
@@ -1479,6 +1479,7 @@ void wiRenderer::LoadShaders()
computeShaders[CSTYPE_RAYTRACE_BVH_KICKHIERARCHY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_kickhierarchyCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_SORTEDMORTON] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_sortedmortonCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_HIERARCHY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_hierarchyCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_PROPAGATEAABB] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_propagateaabbCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_CLEAR] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_clearCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_LAUNCH] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_launchCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_PRIMARY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_primaryCS.cso", wiResourceManager::COMPUTESHADER));
@@ -6413,6 +6414,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
static bool allocateBVH = true;
static GPUBuffer* bvhNodeBuffer = nullptr;
static GPUBuffer* bvhAABBBuffer = nullptr;
static GPUBuffer* bvhFlagBuffer = nullptr;
static GPUBuffer* triangleBuffer = nullptr;
static GPUBuffer* clusterCounterBuffer = nullptr;
static GPUBuffer* clusterIndexBuffer = nullptr;
@@ -6430,6 +6433,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
HRESULT hr;
SAFE_DELETE(bvhNodeBuffer);
SAFE_DELETE(bvhAABBBuffer);
SAFE_DELETE(bvhFlagBuffer);
SAFE_DELETE(triangleBuffer);
SAFE_DELETE(clusterCounterBuffer);
SAFE_DELETE(clusterIndexBuffer);
@@ -6438,6 +6443,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
SAFE_DELETE(clusterOffsetBuffer);
SAFE_DELETE(clusterAABBBuffer);
bvhNodeBuffer = new GPUBuffer;
bvhAABBBuffer = new GPUBuffer;
bvhFlagBuffer = new GPUBuffer;
triangleBuffer = new GPUBuffer;
clusterCounterBuffer = new GPUBuffer;
clusterIndexBuffer = new GPUBuffer;
@@ -6448,7 +6455,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.StructureByteStride = sizeof(BVHNode);
desc.ByteWidth = desc.StructureByteStride * maxClusterCount;
desc.ByteWidth = desc.StructureByteStride * maxClusterCount * 2;
desc.CPUAccessFlags = 0;
desc.Format = FORMAT_UNKNOWN;
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
@@ -6456,6 +6463,26 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
hr = device->CreateBuffer(&desc, nullptr, bvhNodeBuffer);
assert(SUCCEEDED(hr));
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.StructureByteStride = sizeof(TracedRenderingAABB);
desc.ByteWidth = desc.StructureByteStride * maxClusterCount * 2;
desc.CPUAccessFlags = 0;
desc.Format = FORMAT_UNKNOWN;
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
desc.Usage = USAGE_DEFAULT;
hr = device->CreateBuffer(&desc, nullptr, bvhAABBBuffer);
assert(SUCCEEDED(hr));
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.StructureByteStride = sizeof(uint);
desc.ByteWidth = desc.StructureByteStride * (maxClusterCount - 1); // only for internal nodes
desc.CPUAccessFlags = 0;
desc.Format = FORMAT_UNKNOWN;
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
desc.Usage = USAGE_DEFAULT;
hr = device->CreateBuffer(&desc, nullptr, bvhFlagBuffer);
assert(SUCCEEDED(hr));
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.StructureByteStride = sizeof(TracedRenderingMeshTriangle);
desc.ByteWidth = desc.StructureByteStride * 10000;
@@ -6508,7 +6535,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
assert(SUCCEEDED(hr));
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.StructureByteStride = sizeof(TracedRenderingClusterAABB);
desc.StructureByteStride = sizeof(TracedRenderingAABB);
desc.ByteWidth = desc.StructureByteStride * maxClusterCount;
desc.CPUAccessFlags = 0;
desc.Format = FORMAT_UNKNOWN;
@@ -6664,6 +6691,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
device->BindComputePSO(CPSO[CSTYPE_RAYTRACE_BVH_HIERARCHY], threadID);
GPUResource* uavs[] = {
bvhNodeBuffer,
bvhFlagBuffer,
};
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
@@ -6681,6 +6709,31 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
}
device->EventEnd(threadID);
device->EventBegin("BVH - Propagate AABB", threadID);
{
device->BindComputePSO(CPSO[CSTYPE_RAYTRACE_BVH_PROPAGATEAABB], threadID);
GPUResource* uavs[] = {
bvhAABBBuffer,
bvhFlagBuffer,
};
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
GPUResource* res[] = {
clusterCounterBuffer,
clusterIndexBuffer,
clusterAABBBuffer,
bvhNodeBuffer,
};
device->BindResources(CS, res, TEXSLOT_ONDEMAND0, ARRAYSIZE(res), threadID);
device->DispatchIndirect(indirectBuffer, 0, threadID);
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
}
device->EventEnd(threadID);
const XMFLOAT4& halton = wiMath::GetHaltonSequence((int)GetDevice()->GetFrameCount());
TracedRenderingCB cb;