bvh tree construction seems good now!

This commit is contained in:
turanszkij
2018-06-15 15:26:47 +01:00
parent 0d75c96f5e
commit a32df9531c
7 changed files with 152 additions and 76 deletions
+4 -5
View File
@@ -3,6 +3,7 @@
#include "ShaderInterop.h"
#define TRACEDRENDERING_BVH_CLASSIFICATION_GROUPSIZE 64
#define TRACEDRENDERING_BVH_SORTEDMORTON_GROUPSIZE 64
#define TRACEDRENDERING_BVH_HIERARCHY_GROUPSIZE 64
#define TRACEDRENDERING_CLEAR_BLOCKSIZE 8
@@ -41,11 +42,9 @@ struct TracedRenderingClusterAABB
struct BVHNode
{
uint parent;
uint childA;
uint childB;
uint ParentIndex;
uint LeftChildIndex;
uint RightChildIndex;
};
inline uint BVH_MakeLeafNode(uint nodeID) { return nodeID | (1 << 31); }
inline bool BVH_IsLeafNode(uint nodeID) { return nodeID & (1 << 31); }
#endif // _SHADERINTEROP_TRACEDRENDERING_H_
@@ -543,6 +543,10 @@
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
</FxCompile>
<FxCompile Include="raytrace_bvh_sortedmortonCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
</FxCompile>
<FxCompile Include="raytrace_clearCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
@@ -747,6 +747,9 @@
<FxCompile Include="raytrace_bvh_kickhierarchyCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="raytrace_bvh_sortedmortonCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="PS">
+89 -68
View File
@@ -2,53 +2,102 @@
#include "ShaderInterop_TracedRendering.h"
#include "tracedRenderingHF.hlsli"
// This shader will construct the BVH from sorted cluster morton codes.
// Output is a list of continuous BVH tree nodes in memory: [parentIndex, leftChildNodeIndex, rightChildNodeIndex]
// The output node is a leaf node if: leftChildNodeIndex == rightChildNodeIndex == 0
// Else the output node is an intermediate node
// Also, we know that intermediate nodes start at arrayIndex == 0 (starting with root node)
// Also, we know that leaf nodes will start at arrayIndex == clusterCount -1 (and they will correspond to a single cluster, which is indexable by clusterIndexBuffer later)
// Using the Karras's 2012 parallel BVH construction algorithm outlined
// in "Maximizing Parallelism in the Construction of BVHs, Octrees,
// and k-d Trees"
RAWBUFFER(clusterCounterBuffer, TEXSLOT_ONDEMAND0);
STRUCTUREDBUFFER(clusterIndexBuffer, uint, TEXSLOT_ONDEMAND1);
STRUCTUREDBUFFER(clusterMortonBuffer, uint, TEXSLOT_ONDEMAND2);
STRUCTUREDBUFFER(clusterMortonBuffer, uint, TEXSLOT_ONDEMAND1);
RWSTRUCTUREDBUFFER(bvhNodeBuffer, BVHNode, 0);
#define __clz firstbithigh
inline int2 determineRange(uint count, uint idx)
int CountLeadingZeroes(uint num)
{
//todo
return int2(count, idx);
return 31 - firstbithigh(num);
}
inline int findSplit(int first, int last)
void WriteChild(uint childIndex, uint parentIndex)
{
// Identical Morton codes => split the range in the middle.
bvhNodeBuffer[childIndex].ParentIndex = parentIndex;
}
uint firstCode = clusterMortonBuffer[first];
uint lastCode = clusterMortonBuffer[last];
void WriteParent(uint parentIndex, int leftBoxIndex, int rightBoxIndex)
{
bvhNodeBuffer[parentIndex].LeftChildIndex = leftBoxIndex;
bvhNodeBuffer[parentIndex].RightChildIndex = rightBoxIndex;
}
if (firstCode == lastCode)
return (first + last) >> 1;
int GetLongestCommonPrefix(uint indexA, uint indexB, uint elementCount)
{
if (indexA >= elementCount || indexB >= elementCount)
{
return -1;
}
else
{
uint mortonCodeA = clusterMortonBuffer[indexA];
uint mortonCodeB = clusterMortonBuffer[indexB];
if (mortonCodeA != mortonCodeB)
{
return CountLeadingZeroes(clusterMortonBuffer[indexA] ^ clusterMortonBuffer[indexB]);
}
else
{
// TODO: Technically this should be primitive ID
return CountLeadingZeroes(indexA ^ indexB) + 31;
}
}
}
// Calculate the number of highest bits that are the same
// for all objects, using the count-leading-zeros intrinsic.
uint2 DetermineRange(uint idx, uint elementCount)
{
int d = GetLongestCommonPrefix(idx, idx + 1, elementCount) - GetLongestCommonPrefix(idx, idx - 1, elementCount);
d = clamp(d, -1, 1);
int minPrefix = GetLongestCommonPrefix(idx, idx - d, elementCount);
int commonPrefix = __clz(firstCode ^ lastCode);
// TODO: Consider starting this at a higher number
int maxLength = 2;
while (GetLongestCommonPrefix(idx, idx + maxLength * d, elementCount) > minPrefix)
{
maxLength *= 4;
}
// Use binary search to find where the next bit differs.
// Specifically, we are looking for the highest object that
// shares more than commonPrefix bits with the first one.
int length = 0;
for (int t = maxLength / 2; t > 0; t /= 2)
{
if (GetLongestCommonPrefix(idx, idx + (length + t) * d, elementCount) > minPrefix)
{
length = length + t;
}
}
int split = first; // initial guess
int j = idx + length * d;
return uint2(min(idx, j), max(idx, j));
}
int FindSplit(int first, uint last, uint elementCount)
{
int commonPrefix = GetLongestCommonPrefix(first, last, elementCount);
int split = first;
int step = last - first;
do
{
step = (step + 1) >> 1; // exponential decrease
int newSplit = split + step; // proposed new position
step = (step + 1) >> 1;
int newSplit = split + step;
if (newSplit < last)
{
uint splitCode = clusterMortonBuffer[newSplit];
int splitPrefix = __clz(firstCode ^ splitCode);
int splitPrefix = GetLongestCommonPrefix(first, newSplit, elementCount);
if (splitPrefix > commonPrefix)
split = newSplit; // accept proposal
split = newSplit;
}
} while (step > 1);
@@ -65,56 +114,28 @@ void main( uint3 DTid : SV_DispatchThreadID )
if (idx < clusterCount - 1)
{
// Find out which range of objects the node corresponds to.
// (This is where the magic happens!)
uint2 range = DetermineRange(idx, clusterCount);
uint first = range.x;
uint last = range.y;
int2 range = determineRange(clusterCount, idx);
int first = range.x;
int last = range.y;
uint split = FindSplit(first, last, clusterCount);
// Determine where to split the range.
int split = findSplit(first, last);
// Select childA.
uint childA = split;
uint internalNodeOffset = 0;
uint leafNodeOffset = clusterCount - 1;
uint childAIndex;
if (split == first)
{
//childA = &leafNodes[split];
childA = BVH_MakeLeafNode(childA);
}
childAIndex = leafNodeOffset + split;
else
{
//childA = &internalNodes[split];
}
childAIndex = internalNodeOffset + split;
// Select childB.
uint childB = split + 1;
uint childBIndex;
if (split + 1 == last)
{
//childB = &leafNodes[split + 1];
childB = BVH_MakeLeafNode(childB);
}
childBIndex = leafNodeOffset + split + 1;
else
{
//childB = &internalNodes[split + 1];
}
childBIndex = internalNodeOffset + split + 1;
// Record parent-child relationships.
bvhNodeBuffer[idx].childA = childA;
bvhNodeBuffer[idx].childB = childB;
//childA->parent = &internalNodes[idx];
//childB->parent = &internalNodes[idx];
if (!BVH_IsLeafNode(childA))
{
bvhNodeBuffer[childA].parent = idx;
}
if (!BVH_IsLeafNode(childB))
{
bvhNodeBuffer[childB].parent = idx;
}
WriteParent(idx, childAIndex, childBIndex);
WriteChild(childAIndex, idx);
WriteChild(childBIndex, idx);
}
}
@@ -0,0 +1,21 @@
#include "globals.hlsli"
#include "ShaderInterop_TracedRendering.h"
#include "tracedRenderingHF.hlsli"
// This shader reads the cluster index buffer (sorted by morton)
// and outputs the direct sorted morton codes
RAWBUFFER(clusterCounterBuffer, TEXSLOT_ONDEMAND0);
STRUCTUREDBUFFER(clusterIndexBuffer, uint, TEXSLOT_ONDEMAND1);
STRUCTUREDBUFFER(clusterMortonBuffer, uint, TEXSLOT_ONDEMAND2);
RWSTRUCTUREDBUFFER(clusterSortedMortonBuffer, uint, 0);
[numthreads(TRACEDRENDERING_BVH_SORTEDMORTON_GROUPSIZE, 1, 1)]
void main( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x < clusterCounterBuffer.Load(0))
{
clusterSortedMortonBuffer[DTid.x] = clusterMortonBuffer[clusterIndexBuffer[DTid.x]];
}
}
+1
View File
@@ -275,6 +275,7 @@ enum CSTYPES
CSTYPE_RAYTRACE_BVH_RESET,
CSTYPE_RAYTRACE_BVH_CLASSIFICATION,
CSTYPE_RAYTRACE_BVH_KICKHIERARCHY,
CSTYPE_RAYTRACE_BVH_SORTEDMORTON,
CSTYPE_RAYTRACE_BVH_HIERARCHY,
CSTYPE_RAYTRACE_CLEAR,
CSTYPE_RAYTRACE_LAUNCH,
+30 -3
View File
@@ -1477,6 +1477,7 @@ void wiRenderer::LoadShaders()
computeShaders[CSTYPE_RAYTRACE_BVH_RESET] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_resetCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_CLASSIFICATION] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_classificationCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_KICKHIERARCHY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_kickhierarchyCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_SORTEDMORTON] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_sortedmortonCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_BVH_HIERARCHY] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_bvh_hierarchyCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_CLEAR] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_clearCS.cso", wiResourceManager::COMPUTESHADER));
computeShaders[CSTYPE_RAYTRACE_LAUNCH] = static_cast<ComputeShader*>(wiResourceManager::GetShaderManager()->add(SHADERPATH + "raytrace_launchCS.cso", wiResourceManager::COMPUTESHADER));
@@ -6416,6 +6417,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
static GPUBuffer* clusterCounterBuffer = nullptr;
static GPUBuffer* clusterIndexBuffer = nullptr;
static GPUBuffer* clusterMortonBuffer = nullptr;
static GPUBuffer* clusterSortedMortonBuffer = nullptr;
static GPUBuffer* clusterOffsetBuffer = nullptr;
static GPUBuffer* clusterAABBBuffer = nullptr;
const uint maxClusterCount = 1000;
@@ -6432,6 +6434,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
SAFE_DELETE(clusterCounterBuffer);
SAFE_DELETE(clusterIndexBuffer);
SAFE_DELETE(clusterMortonBuffer);
SAFE_DELETE(clusterSortedMortonBuffer);
SAFE_DELETE(clusterOffsetBuffer);
SAFE_DELETE(clusterAABBBuffer);
bvhNodeBuffer = new GPUBuffer;
@@ -6439,6 +6442,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
clusterCounterBuffer = new GPUBuffer;
clusterIndexBuffer = new GPUBuffer;
clusterMortonBuffer = new GPUBuffer;
clusterSortedMortonBuffer = new GPUBuffer;
clusterOffsetBuffer = new GPUBuffer;
clusterAABBBuffer = new GPUBuffer;
@@ -6490,6 +6494,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED;
desc.Usage = USAGE_DEFAULT;
hr = device->CreateBuffer(&desc, nullptr, clusterMortonBuffer);
hr = device->CreateBuffer(&desc, nullptr, clusterSortedMortonBuffer);
assert(SUCCEEDED(hr));
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
@@ -6607,7 +6612,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
device->EventEnd(threadID);
device->EventBegin("BVH - Sort Clusters", threadID);
device->EventBegin("BVH - Sort Cluster Mortons", threadID);
wiGPUSortLib::Sort(maxClusterCount, clusterMortonBuffer, clusterCounterBuffer, 0, clusterIndexBuffer, threadID);
device->EventEnd(threadID);
@@ -6631,6 +6636,29 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
}
device->EventEnd(threadID);
device->EventBegin("BVH - Assemble Sorted Mortons", threadID);
{
device->BindComputePSO(CPSO[CSTYPE_RAYTRACE_BVH_SORTEDMORTON], threadID);
GPUResource* uavs[] = {
clusterSortedMortonBuffer,
};
device->BindUnorderedAccessResourcesCS(uavs, 0, ARRAYSIZE(uavs), threadID);
GPUResource* res[] = {
clusterCounterBuffer,
clusterIndexBuffer,
clusterMortonBuffer,
};
device->BindResources(CS, res, TEXSLOT_ONDEMAND0, ARRAYSIZE(res), threadID);
device->DispatchIndirect(indirectBuffer, 0, threadID);
device->UAVBarrier(uavs, ARRAYSIZE(uavs), threadID);
device->UnBindUnorderedAccessResources(0, ARRAYSIZE(uavs), threadID);
}
device->EventEnd(threadID);
device->EventBegin("BVH - Build Hierarchy", threadID);
{
device->BindComputePSO(CPSO[CSTYPE_RAYTRACE_BVH_HIERARCHY], threadID);
@@ -6641,8 +6669,7 @@ void wiRenderer::DrawTracedScene(Camera* camera, wiGraphicsTypes::Texture2D* res
GPUResource* res[] = {
clusterCounterBuffer,
clusterIndexBuffer,
clusterMortonBuffer,
clusterSortedMortonBuffer,
};
device->BindResources(CS, res, TEXSLOT_ONDEMAND0, ARRAYSIZE(res), threadID);