collider bvh rebuilding is done on background thread

This commit is contained in:
Turanszki Janos
2025-05-02 08:12:53 +02:00
parent 66181dacbd
commit 956ce163d3
5 changed files with 156 additions and 96 deletions
+87 -55
View File
@@ -24,6 +24,7 @@ namespace wi
constexpr bool IsValid() const { return nodes != nullptr; }
// Completely rebuilds tree from scratch
void Build(const wi::primitive::AABB* aabbs, uint32_t aabb_count)
{
node_count = 0;
@@ -50,73 +51,36 @@ namespace wi
Subdivide(0, aabbs);
}
void Subdivide(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data)
// Updates the AABBs, but doesn't modify the tree structure (fast update mode)
void Update(const wi::primitive::AABB* aabbs, uint32_t aabb_count)
{
Node& node = nodes[nodeIndex];
if (node.count <= 2)
if (node_count == 0)
return;
if (aabb_count == 0)
return;
if (aabb_count != leaf_count)
return;
XMFLOAT3 extent = node.aabb.getHalfWidth();
XMFLOAT3 min = node.aabb.getMin();
int axis = 0;
if (extent.y > extent.x) axis = 1;
if (extent.z > ((float*)&extent)[axis]) axis = 2;
float splitPos = ((float*)&min)[axis] + ((float*)&extent)[axis] * 0.5f;
// in-place partition
int i = node.offset;
int j = i + node.count - 1;
while (i <= j)
for (uint32_t i = node_count - 1; i > 0; --i)
{
XMFLOAT3 center = leaf_aabb_data[leaf_indices[i]].getCenter();
float value = ((float*)&center)[axis];
if (value < splitPos)
Node& node = nodes[i];
node.aabb = wi::primitive::AABB();
if (node.isLeaf())
{
i++;
for (uint32_t j = 0; j < node.count; ++j)
{
node.aabb = wi::primitive::AABB::Merge(node.aabb, aabbs[leaf_indices[node.offset + j]]);
}
}
else
{
std::swap(leaf_indices[i], leaf_indices[j--]);
node.aabb = wi::primitive::AABB::Merge(node.aabb, nodes[node.left].aabb);
node.aabb = wi::primitive::AABB::Merge(node.aabb, nodes[node.left + 1].aabb);
}
}
// abort split if one of the sides is empty
int leftCount = i - node.offset;
if (leftCount == 0 || leftCount == node.count)
return;
// create child nodes
uint32_t left_child_index = node_count++;
uint32_t right_child_index = node_count++;
node.left = left_child_index;
nodes[left_child_index] = {};
nodes[left_child_index].offset = node.offset;
nodes[left_child_index].count = leftCount;
nodes[right_child_index] = {};
nodes[right_child_index].offset = i;
nodes[right_child_index].count = node.count - leftCount;
node.count = 0;
UpdateNodeBounds(left_child_index, leaf_aabb_data);
UpdateNodeBounds(right_child_index, leaf_aabb_data);
// recurse
Subdivide(left_child_index, leaf_aabb_data);
Subdivide(right_child_index, leaf_aabb_data);
}
void UpdateNodeBounds(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data)
{
Node& node = nodes[nodeIndex];
node.aabb = {};
for (uint32_t i = 0; i < node.count; ++i)
{
uint32_t offset = node.offset + i;
uint32_t index = leaf_indices[offset];
node.aabb = wi::primitive::AABB::Merge(node.aabb, leaf_aabb_data[index]);
}
}
// Intersect with a primitive shape and return the closest hit
template <typename T>
void Intersects(
const T& primitive,
@@ -173,5 +137,73 @@ namespace wi
}
return false;
}
private:
void UpdateNodeBounds(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data)
{
Node& node = nodes[nodeIndex];
node.aabb = {};
for (uint32_t i = 0; i < node.count; ++i)
{
uint32_t offset = node.offset + i;
uint32_t index = leaf_indices[offset];
node.aabb = wi::primitive::AABB::Merge(node.aabb, leaf_aabb_data[index]);
}
}
void Subdivide(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data)
{
Node& node = nodes[nodeIndex];
if (node.count <= 2)
return;
XMFLOAT3 extent = node.aabb.getHalfWidth();
XMFLOAT3 min = node.aabb.getMin();
int axis = 0;
if (extent.y > extent.x) axis = 1;
if (extent.z > ((float*)&extent)[axis]) axis = 2;
float splitPos = ((float*)&min)[axis] + ((float*)&extent)[axis] * 0.5f;
// in-place partition
int i = node.offset;
int j = i + node.count - 1;
while (i <= j)
{
XMFLOAT3 center = leaf_aabb_data[leaf_indices[i]].getCenter();
float value = ((float*)&center)[axis];
if (value < splitPos)
{
i++;
}
else
{
std::swap(leaf_indices[i], leaf_indices[j--]);
}
}
// abort split if one of the sides is empty
int leftCount = i - node.offset;
if (leftCount == 0 || leftCount == node.count)
return;
// create child nodes
uint32_t left_child_index = node_count++;
uint32_t right_child_index = node_count++;
node.left = left_child_index;
nodes[left_child_index] = {};
nodes[left_child_index].offset = node.offset;
nodes[left_child_index].count = leftCount;
nodes[right_child_index] = {};
nodes[right_child_index].offset = i;
nodes[right_child_index].count = node.count - leftCount;
node.count = 0;
UpdateNodeBounds(left_child_index, leaf_aabb_data);
UpdateNodeBounds(right_child_index, leaf_aabb_data);
// recurse
Subdivide(left_child_index, leaf_aabb_data);
Subdivide(right_child_index, leaf_aabb_data);
}
};
}
+63 -38
View File
@@ -6,7 +6,6 @@
#include "wiJobSystem.h"
#include "wiSpinLock.h"
#include "wiHelper.h"
#include "wiRenderer.h"
#include "wiBacklog.h"
#include "wiTimer.h"
#include "wiUnorderedMap.h"
@@ -62,6 +61,11 @@ namespace wi::scene
}
}
// count colliders in background thread before procedural anim system
wi::jobsystem::Execute(collider_bvh_workload, [this](wi::jobsystem::JobArgs args) {
CountCPUandGPUColliders();
});
ScanSpringDependencies(); // after terrain, because this saves transform ptrs and terrain can add transforms
StartBuildTopDownHierarchy();
@@ -1014,20 +1018,19 @@ namespace wi::scene
matrix_objects_prev.insert(matrix_objects_prev.end(), other.matrix_objects_prev.begin(), other.matrix_objects_prev.end());
// Recount colliders:
collider_allocator_cpu.store(0u);
collider_allocator_gpu.store(0u);
CountCPUandGPUColliders();
const size_t size =
sizeof(wi::primitive::AABB) * colliders.GetCount() +
sizeof(wi::primitive::AABB) * colliders.GetCount() +
sizeof(ColliderComponent) * colliders.GetCount() +
sizeof(ColliderComponent) * colliders.GetCount()
sizeof(wi::primitive::AABB) * collider_count_cpu +
sizeof(wi::primitive::AABB) * collider_count_gpu +
sizeof(ColliderComponent) * collider_count_cpu +
sizeof(ColliderComponent) * collider_count_gpu
;
collider_deinterleaved_data.reserve(size);
ASAN_UNPOISON_MEMORY_REGION(collider_deinterleaved_data.data(), size);
aabb_colliders_cpu = (wi::primitive::AABB*)collider_deinterleaved_data.data();
aabb_colliders_gpu = aabb_colliders_cpu + colliders.GetCount();
colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + colliders.GetCount());
colliders_gpu = colliders_cpu + colliders.GetCount();
aabb_colliders_gpu = aabb_colliders_cpu + collider_count_cpu;
colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + collider_count_gpu);
colliders_gpu = colliders_cpu + collider_count_cpu;
for (size_t i = 0; i < colliders.GetCount(); ++i)
{
@@ -1087,19 +1090,15 @@ namespace wi::scene
if (collider.IsCPUEnabled())
{
uint32_t index = collider_allocator_cpu.fetch_add(1u);
colliders_cpu[index] = collider;
aabb_colliders_cpu[index] = aabb;
colliders_cpu[collider.cpu_index] = collider;
aabb_colliders_cpu[collider.cpu_index] = aabb;
}
if (collider.IsGPUEnabled())
{
uint32_t index = collider_allocator_gpu.fetch_add(1u);
colliders_gpu[index] = collider;
aabb_colliders_gpu[index] = aabb;
colliders_gpu[collider.gpu_index] = collider;
aabb_colliders_gpu[collider.gpu_index] = aabb;
}
}
collider_count_cpu = collider_allocator_cpu.load();
collider_count_gpu = collider_allocator_gpu.load();
collider_bvh.Build(aabb_colliders_cpu, collider_count_cpu);
}
Entity Scene::Instantiate(Scene& prefab, bool attached)
@@ -3652,20 +3651,20 @@ namespace wi::scene
}
// Colliders:
collider_allocator_cpu.store(0u);
collider_allocator_gpu.store(0u);
wi::jobsystem::Wait(collider_bvh_workload); // waits for BVH build and collider counts
std::swap(collider_bvh, collider_bvh_next);
const size_t size =
sizeof(wi::primitive::AABB) * colliders.GetCount() +
sizeof(wi::primitive::AABB) * colliders.GetCount() +
sizeof(ColliderComponent) * colliders.GetCount() +
sizeof(ColliderComponent) * colliders.GetCount()
sizeof(wi::primitive::AABB) * collider_count_cpu +
sizeof(wi::primitive::AABB) * collider_count_gpu +
sizeof(ColliderComponent) * collider_count_cpu +
sizeof(ColliderComponent) * collider_count_gpu
;
collider_deinterleaved_data.reserve(size);
ASAN_UNPOISON_MEMORY_REGION(collider_deinterleaved_data.data(), size);
aabb_colliders_cpu = (wi::primitive::AABB*)collider_deinterleaved_data.data();
aabb_colliders_gpu = aabb_colliders_cpu + colliders.GetCount();
colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + colliders.GetCount());
colliders_gpu = colliders_cpu + colliders.GetCount();
aabb_colliders_gpu = aabb_colliders_cpu + collider_count_cpu;
colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + collider_count_gpu);
colliders_gpu = colliders_cpu + collider_count_cpu;
wi::jobsystem::Dispatch(ctx, (uint32_t)colliders.GetCount(), small_subtask_groupsize, [&](wi::jobsystem::JobArgs args) {
@@ -3731,23 +3730,19 @@ namespace wi::scene
if (collider.IsCPUEnabled())
{
uint32_t index = collider_allocator_cpu.fetch_add(1u);
colliders_cpu[index] = collider;
aabb_colliders_cpu[index] = aabb;
colliders_cpu[collider.cpu_index] = collider;
aabb_colliders_cpu[collider.cpu_index] = aabb;
}
if (collider.IsGPUEnabled())
{
uint32_t index = collider_allocator_gpu.fetch_add(1u);
colliders_gpu[index] = collider;
aabb_colliders_gpu[index] = aabb;
colliders_gpu[collider.gpu_index] = collider;
aabb_colliders_gpu[collider.gpu_index] = aabb;
}
});
});
wi::jobsystem::Wait(ctx);
collider_count_cpu = collider_allocator_cpu.load();
collider_count_gpu = collider_allocator_gpu.load();
collider_bvh.Build(aabb_colliders_cpu, collider_count_cpu);
collider_bvh.Update(aabb_colliders_cpu, collider_count_cpu);
// Springs:
wi::jobsystem::Wait(spring_dependency_scan_workload);
@@ -3755,10 +3750,19 @@ namespace wi::scene
{
wi::jobsystem::Dispatch(ctx, (uint32_t)spring_queues.size(), 1, [this](wi::jobsystem::JobArgs args) {
UpdateSpringsTopDownRecursive(nullptr, *spring_queues[args.jobIndex]);
});
});
wi::jobsystem::Wait(ctx);
}
if (collider_count_cpu > 0)
{
// Issue the bvh rebuild on a background thread, the result will be used next frame...
collider_bvh_workload.priority = wi::jobsystem::Priority::Low;
wi::jobsystem::Execute(collider_bvh_workload, [this](wi::jobsystem::JobArgs args) {
collider_bvh_next.Build(aabb_colliders_cpu, collider_count_cpu);
});
}
wi::profiler::EndRange(range);
}
void Scene::RunArmatureUpdateSystem(wi::jobsystem::context& ctx)
@@ -7928,6 +7932,27 @@ namespace wi::scene
}
}
void Scene::CountCPUandGPUColliders()
{
// Note: the collider arrays must be consistent across frames, so can't be counted with multiple threads, this is why it's separated from collider updating
collider_count_cpu = 0;
collider_count_gpu = 0;
for (size_t i = 0; i < colliders.GetCount(); ++i)
{
ColliderComponent& collider = colliders[i];
if (collider.IsCPUEnabled())
{
collider.cpu_index = collider_count_cpu;
collider_count_cpu++;
}
if (collider.IsGPUEnabled())
{
collider.gpu_index = collider_count_gpu;
collider_count_gpu++;
}
}
}
void Scene::ScanAnimationDependencies()
{
if (animations.GetCount() == 0)
+3 -2
View File
@@ -270,8 +270,6 @@ namespace wi::scene
wi::vector<TransformComponent> transforms_temp;
// CPU/GPU Colliders:
std::atomic<uint32_t> collider_allocator_cpu{ 0 };
std::atomic<uint32_t> collider_allocator_gpu{ 0 };
wi::vector<uint8_t> collider_deinterleaved_data;
uint32_t collider_count_cpu = 0;
uint32_t collider_count_gpu = 0;
@@ -280,6 +278,9 @@ namespace wi::scene
ColliderComponent* colliders_cpu = nullptr;
ColliderComponent* colliders_gpu = nullptr;
wi::BVH collider_bvh;
wi::BVH collider_bvh_next;
wi::jobsystem::context collider_bvh_workload;
void CountCPUandGPUColliders();
// Ocean GPU state:
wi::Ocean ocean;
+2
View File
@@ -2027,6 +2027,8 @@ namespace wi::scene
wi::primitive::Plane plane;
uint32_t layerMask = ~0u;
float dist = 0;
uint32_t cpu_index = 0;
uint32_t gpu_index = 0;
void Serialize(wi::Archive& archive, wi::ecs::EntitySerializer& seri);
};
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 757;
const int revision = 758;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);