bvh aabb propagation
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
#define TRACEDRENDERING_BVH_CLASSIFICATION_GROUPSIZE 64
|
||||
#define TRACEDRENDERING_BVH_SORTEDMORTON_GROUPSIZE 64
|
||||
#define TRACEDRENDERING_BVH_HIERARCHY_GROUPSIZE 64
|
||||
#define TRACEDRENDERING_BVH_PROPAGATEAABB_GROUPSIZE 64
|
||||
|
||||
#define TRACEDRENDERING_CLEAR_BLOCKSIZE 8
|
||||
#define TRACEDRENDERING_LAUNCH_BLOCKSIZE 8
|
||||
@@ -33,7 +34,7 @@ struct TracedRenderingMeshTriangle
|
||||
float2 t0, t1, t2;
|
||||
uint materialIndex;
|
||||
};
|
||||
struct TracedRenderingClusterAABB
|
||||
struct TracedRenderingAABB
|
||||
{
|
||||
float3 min;
|
||||
float3 max;
|
||||
|
||||
@@ -531,6 +531,10 @@
|
||||
<FxCompile Include="hairparticlePS_tiledforward.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="raytrace_bvh_propagateaabbCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="raytrace_bvh_hierarchyCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
|
||||
@@ -750,6 +750,9 @@
|
||||
<FxCompile Include="raytrace_bvh_sortedmortonCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="raytrace_bvh_propagateaabbCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="PS">
|
||||
|
||||
@@ -19,7 +19,7 @@ RWRAWBUFFER(clusterCounterBuffer, 1);
|
||||
RWSTRUCTUREDBUFFER(clusterIndexBuffer, uint, 2);
|
||||
RWSTRUCTUREDBUFFER(clusterMortonBuffer, uint, 3);
|
||||
RWSTRUCTUREDBUFFER(clusterOffsetBuffer, uint2, 4); // offset, count
|
||||
RWSTRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingClusterAABB, 5);
|
||||
RWSTRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingAABB, 5);
|
||||
|
||||
groupshared int3 GroupMin;
|
||||
groupshared int3 GroupMax;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#include "tracedRenderingHF.hlsli"
|
||||
|
||||
// This shader will construct the BVH from sorted cluster morton codes.
|
||||
// Output is a list of continuous BVH tree nodes in memory: [parentIndex, leftChildNodeIndex, rightChildNodeIndex]
|
||||
// Output is a list of continuous BVH tree nodes in memory: [parentIndex, leftChildNodeIndex, rightChildNodeIndex]. Additionally, we will reset the BVH Flag Buffer (used for AABB propagation step)
|
||||
// The output node is a leaf node if: leftChildNodeIndex == rightChildNodeIndex == 0
|
||||
// Else the output node is an intermediate node
|
||||
// Also, we know that intermediate nodes start at arrayIndex == 0 (starting with root node)
|
||||
@@ -17,6 +17,7 @@ RAWBUFFER(clusterCounterBuffer, TEXSLOT_ONDEMAND0);
|
||||
STRUCTUREDBUFFER(clusterMortonBuffer, uint, TEXSLOT_ONDEMAND1);
|
||||
|
||||
RWSTRUCTUREDBUFFER(bvhNodeBuffer, BVHNode, 0);
|
||||
RWSTRUCTUREDBUFFER(bvhFlagBuffer, uint, 1);
|
||||
|
||||
int CountLeadingZeroes(uint num)
|
||||
{
|
||||
@@ -137,5 +138,8 @@ void main( uint3 DTid : SV_DispatchThreadID )
|
||||
WriteParent(idx, childAIndex, childBIndex);
|
||||
WriteChild(childAIndex, idx);
|
||||
WriteChild(childBIndex, idx);
|
||||
|
||||
// Reset bvh node flag (only internal nodes):
|
||||
bvhFlagBuffer[idx] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
#include "globals.hlsli"
|
||||
#include "ShaderInterop_TracedRendering.h"
|
||||
#include "tracedRenderingHF.hlsli"
|
||||
|
||||
// This shader will traverse the BVH from bottom to up, and propagate AABBs from leaves to internal nodes
|
||||
// Cluster nodes are already computed, which correspond directly to BVH leaf node AABBs
|
||||
// Each thread starts at a cluster (leaf)
|
||||
// Each thread goes to the parent node, but only if both children are complete, else terminate (bvhFlagBuffer tracks this with atomic operations)
|
||||
// Parent node will merge child AABBs and store
|
||||
// Loop until we reach the root...
|
||||
|
||||
RAWBUFFER(clusterCounterBuffer, TEXSLOT_ONDEMAND0);
|
||||
STRUCTUREDBUFFER(clusterIndexBuffer, uint, TEXSLOT_ONDEMAND1);
|
||||
STRUCTUREDBUFFER(clusterAABBBuffer, TracedRenderingAABB, TEXSLOT_ONDEMAND2);
|
||||
STRUCTUREDBUFFER(bvhNodeBuffer, BVHNode, TEXSLOT_ONDEMAND3);
|
||||
|
||||
RWSTRUCTUREDBUFFER(bvhAABBBuffer, TracedRenderingAABB, 0);
|
||||
RWSTRUCTUREDBUFFER(bvhFlagBuffer, uint, 1);
|
||||
|
||||
[numthreads(TRACEDRENDERING_BVH_PROPAGATEAABB_GROUPSIZE, 1, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const uint clusterCount = clusterCounterBuffer.Load(0);
|
||||
|
||||
if (DTid.x < clusterCount)
|
||||
{
|
||||
const uint leafNodeOffset = clusterCount - 1;
|
||||
const uint clusterIndex = clusterIndexBuffer[DTid.x];
|
||||
uint nodeIndex = leafNodeOffset + DTid.x;
|
||||
|
||||
// First, we read the current (leaf) node:
|
||||
BVHNode node = bvhNodeBuffer[nodeIndex];
|
||||
|
||||
// Leaf node will receive the corresponding cluster AABB:
|
||||
TracedRenderingAABB clusterAABB = clusterAABBBuffer[clusterIndex];
|
||||
bvhAABBBuffer[nodeIndex] = clusterAABB;
|
||||
|
||||
// Propagate until we reach root node:
|
||||
do
|
||||
{
|
||||
// Move up in the tree:
|
||||
nodeIndex = node.ParentIndex;
|
||||
|
||||
// Atomic flag to only allow one thread to write into parent. The other thread is discarded.
|
||||
// If the previous value was 0, that means it's the first child to arrive here, this will be discarded, because maybe the second child is not yet computed its AABB.
|
||||
// Else, this is the second child to arrive, we can continue to parent, because there was already a child that arrived here and been discarded.
|
||||
uint flag;
|
||||
InterlockedAdd(bvhFlagBuffer[node.ParentIndex], 1, flag);
|
||||
if (flag == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// We arrived to the parent node:
|
||||
node = bvhNodeBuffer[nodeIndex];
|
||||
|
||||
// Load up its two children's AABBs
|
||||
TracedRenderingAABB leftAABB = bvhAABBBuffer[node.LeftChildIndex];
|
||||
TracedRenderingAABB rightAABB = bvhAABBBuffer[node.RightChildIndex];
|
||||
|
||||
// Merge the child AABBs:
|
||||
TracedRenderingAABB mergedAABB;
|
||||
mergedAABB.min = min(leftAABB.min, rightAABB.min);
|
||||
mergedAABB.max = max(leftAABB.max, rightAABB.max);
|
||||
|
||||
// Write the merged AABB to this node:
|
||||
bvhAABBBuffer[nodeIndex] = mergedAABB;
|
||||
|
||||
} while (nodeIndex != 0);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -277,6 +277,7 @@ enum CSTYPES
|
||||
CSTYPE_RAYTRACE_BVH_KICKHIERARCHY,
|
||||
CSTYPE_RAYTRACE_BVH_SORTEDMORTON,
|
||||
CSTYPE_RAYTRACE_BVH_HIERARCHY,
|
||||
CSTYPE_RAYTRACE_BVH_PROPAGATEAABB,
|
||||
CSTYPE_RAYTRACE_CLEAR,
|
||||
CSTYPE_RAYTRACE_LAUNCH,
|
||||
CSTYPE_RAYTRACE_PRIMARY,
|
||||
|
||||
@@ -1479,6 +1479,7 @@ void wiRenderer::LoadShaders()
|
||||
computeShaders[CSTYPE_RAYTRACE_BVH_KICKHIERARCHY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_kickhierarchyCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
computeShaders[CSTYPE_RAYTRACE_BVH_SORTEDMORTON] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_sortedmortonCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
computeShaders[CSTYPE_RAYTRACE_BVH_HIERARCHY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_hierarchyCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
computeShaders[CSTYPE_RAYTRACE_BVH_PROPAGATEAABB] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_propagateaabbCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
computeShaders[CSTYPE_RAYTRACE_CLEAR] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_clearCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
computeShaders[CSTYPE_RAYTRACE_LAUNCH] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_launchCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
computeShaders[CSTYPE_RAYTRACE_PRIMARY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_primaryCS.cso", wiResourceManager::COMPUTESHADER));
|
||||
@@ -6413,6 +6414,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
|
||||
static bool allocateBVH = true;
|
||||
static GPUBuffer* bvhNodeBuffer = nullptr;
|
||||
static GPUBuffer* bvhAABBBuffer = nullptr;
|
||||
static GPUBuffer* bvhFlagBuffer = nullptr;
|
||||
static GPUBuffer* triangleBuffer = nullptr;
|
||||
static GPUBuffer* clusterCounterBuffer = nullptr;
|
||||
static GPUBuffer* clusterIndexBuffer = nullptr;
|
||||
@@ -6430,6 +6433,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
HRESULT hr;
|
||||
|
||||
SAFE_DELETE(bvhNodeBuffer);
|
||||
SAFE_DELETE(bvhAABBBuffer);
|
||||
SAFE_DELETE(bvhFlagBuffer);
|
||||
SAFE_DELETE(triangleBuffer);
|
||||
SAFE_DELETE(clusterCounterBuffer);
|
||||
SAFE_DELETE(clusterIndexBuffer);
|
||||
@@ -6438,6 +6443,8 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
SAFE_DELETE(clusterOffsetBuffer);
|
||||
SAFE_DELETE(clusterAABBBuffer);
|
||||
bvhNodeBuffer = new GPUBuffer;
|
||||
bvhAABBBuffer = new GPUBuffer;
|
||||
bvhFlagBuffer = new GPUBuffer;
|
||||
triangleBuffer = new GPUBuffer;
|
||||
clusterCounterBuffer = new GPUBuffer;
|
||||
clusterIndexBuffer = new GPUBuffer;
|
||||
@@ -6448,7 +6455,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
|
||||
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
desc.StructureByteStride = sizeof(BVHNode);
|
||||
desc.ByteWidth = desc.StructureByteStride * maxClusterCount;
|
||||
desc.ByteWidth = desc.StructureByteStride * maxClusterCount * 2;
|
||||
desc.CPUAccessFlags = 0;
|
||||
desc.Format = FORMAT_UNKNOWN;
|
||||
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
@@ -6456,6 +6463,26 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
hr = device->CreateBuffer(&desc, nullptr, bvhNodeBuffer);
|
||||
assert(SUCCEEDED(hr));
|
||||
|
||||
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
desc.StructureByteStride = sizeof(TracedRenderingAABB);
|
||||
desc.ByteWidth = desc.StructureByteStride * maxClusterCount * 2;
|
||||
desc.CPUAccessFlags = 0;
|
||||
desc.Format = FORMAT_UNKNOWN;
|
||||
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
desc.Usage = USAGE_DEFAULT;
|
||||
hr = device->CreateBuffer(&desc, nullptr, bvhAABBBuffer);
|
||||
assert(SUCCEEDED(hr));
|
||||
|
||||
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
desc.StructureByteStride = sizeof(uint);
|
||||
desc.ByteWidth = desc.StructureByteStride * (maxClusterCount - 1); // only for internal nodes
|
||||
desc.CPUAccessFlags = 0;
|
||||
desc.Format = FORMAT_UNKNOWN;
|
||||
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
desc.Usage = USAGE_DEFAULT;
|
||||
hr = device->CreateBuffer(&desc, nullptr, bvhFlagBuffer);
|
||||
assert(SUCCEEDED(hr));
|
||||
|
||||
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
desc.StructureByteStride = sizeof(TracedRenderingMeshTriangle);
|
||||
desc.ByteWidth = desc.StructureByteStride * 10000;
|
||||
@@ -6508,7 +6535,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
assert(SUCCEEDED(hr));
|
||||
|
||||
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
desc.StructureByteStride = sizeof(TracedRenderingClusterAABB);
|
||||
desc.StructureByteStride = sizeof(TracedRenderingAABB);
|
||||
desc.ByteWidth = desc.StructureByteStride * maxClusterCount;
|
||||
desc.CPUAccessFlags = 0;
|
||||
desc.Format = FORMAT_UNKNOWN;
|
||||
@@ -6664,6 +6691,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
device->BindComputePSO(CPSO[CSTYPE_RAYTRACE_BVH_HIERARCHY], threadID);
|
||||
GPUResource* uavs[] = {
|
||||
bvhNodeBuffer,
|
||||
bvhFlagBuffer,
|
||||
};
|
||||
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
|
||||
|
||||
@@ -6681,6 +6709,31 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
|
||||
}
|
||||
device->EventEnd(threadID);
|
||||
|
||||
device->EventBegin("BVH - Propagate AABB", threadID);
|
||||
{
|
||||
device->BindComputePSO(CPSO[CSTYPE_RAYTRACE_BVH_PROPAGATEAABB], threadID);
|
||||
GPUResource* uavs[] = {
|
||||
bvhAABBBuffer,
|
||||
bvhFlagBuffer,
|
||||
};
|
||||
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
|
||||
|
||||
GPUResource* res[] = {
|
||||
clusterCounterBuffer,
|
||||
clusterIndexBuffer,
|
||||
clusterAABBBuffer,
|
||||
bvhNodeBuffer,
|
||||
};
|
||||
device->BindResources(CS, res, TEXSLOT_ONDEMAND0, ARRAYSIZE(res), threadID);
|
||||
|
||||
device->DispatchIndirect(indirectBuffer, 0, threadID);
|
||||
|
||||
|
||||
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
|
||||
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
|
||||
}
|
||||
device->EventEnd(threadID);
|
||||
|
||||
|
||||
const XMFLOAT4& halton = wiMath::GetHaltonSequence((int)GetDevice()->GetFrameCount());
|
||||
TracedRenderingCB cb;
|
||||
|
||||
Reference in New Issue
Block a user