diff --git a/WickedEngine/wiBVH.h b/WickedEngine/wiBVH.h index b2b54e3bb..15ed4f365 100644 --- a/WickedEngine/wiBVH.h +++ b/WickedEngine/wiBVH.h @@ -24,6 +24,7 @@ namespace wi constexpr bool IsValid() const { return nodes != nullptr; } + // Completely rebuilds tree from scratch void Build(const wi::primitive::AABB* aabbs, uint32_t aabb_count) { node_count = 0; @@ -50,73 +51,36 @@ namespace wi Subdivide(0, aabbs); } - void Subdivide(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data) + // Updates the AABBs, but doesn't modify the tree structure (fast update mode) + void Update(const wi::primitive::AABB* aabbs, uint32_t aabb_count) { - Node& node = nodes[nodeIndex]; - if (node.count <= 2) + if (node_count == 0) + return; + if (aabb_count == 0) + return; + if (aabb_count != leaf_count) return; - XMFLOAT3 extent = node.aabb.getHalfWidth(); - XMFLOAT3 min = node.aabb.getMin(); - int axis = 0; - if (extent.y > extent.x) axis = 1; - if (extent.z > ((float*)&extent)[axis]) axis = 2; - float splitPos = ((float*)&min)[axis] + ((float*)&extent)[axis] * 0.5f; - - // in-place partition - int i = node.offset; - int j = i + node.count - 1; - while (i <= j) + for (uint32_t i = node_count - 1; i > 0; --i) { - XMFLOAT3 center = leaf_aabb_data[leaf_indices[i]].getCenter(); - float value = ((float*)¢er)[axis]; - - if (value < splitPos) + Node& node = nodes[i]; + node.aabb = wi::primitive::AABB(); + if (node.isLeaf()) { - i++; + for (uint32_t j = 0; j < node.count; ++j) + { + node.aabb = wi::primitive::AABB::Merge(node.aabb, aabbs[leaf_indices[node.offset + j]]); + } } else { - std::swap(leaf_indices[i], leaf_indices[j--]); + node.aabb = wi::primitive::AABB::Merge(node.aabb, nodes[node.left].aabb); + node.aabb = wi::primitive::AABB::Merge(node.aabb, nodes[node.left + 1].aabb); } } - - // abort split if one of the sides is empty - int leftCount = i - node.offset; - if (leftCount == 0 || leftCount == node.count) - return; - - // create child nodes - uint32_t left_child_index = node_count++; - uint32_t right_child_index = node_count++; - node.left = left_child_index; - nodes[left_child_index] = {}; - nodes[left_child_index].offset = node.offset; - nodes[left_child_index].count = leftCount; - nodes[right_child_index] = {}; - nodes[right_child_index].offset = i; - nodes[right_child_index].count = node.count - leftCount; - node.count = 0; - UpdateNodeBounds(left_child_index, leaf_aabb_data); - UpdateNodeBounds(right_child_index, leaf_aabb_data); - - // recurse - Subdivide(left_child_index, leaf_aabb_data); - Subdivide(right_child_index, leaf_aabb_data); - } - - void UpdateNodeBounds(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data) - { - Node& node = nodes[nodeIndex]; - node.aabb = {}; - for (uint32_t i = 0; i < node.count; ++i) - { - uint32_t offset = node.offset + i; - uint32_t index = leaf_indices[offset]; - node.aabb = wi::primitive::AABB::Merge(node.aabb, leaf_aabb_data[index]); - } } + // Intersect with a primitive shape and return the closest hit template void Intersects( const T& primitive, @@ -173,5 +137,73 @@ namespace wi } return false; } + + private: + void UpdateNodeBounds(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data) + { + Node& node = nodes[nodeIndex]; + node.aabb = {}; + for (uint32_t i = 0; i < node.count; ++i) + { + uint32_t offset = node.offset + i; + uint32_t index = leaf_indices[offset]; + node.aabb = wi::primitive::AABB::Merge(node.aabb, leaf_aabb_data[index]); + } + } + + void Subdivide(uint32_t nodeIndex, const wi::primitive::AABB* leaf_aabb_data) + { + Node& node = nodes[nodeIndex]; + if (node.count <= 2) + return; + + XMFLOAT3 extent = node.aabb.getHalfWidth(); + XMFLOAT3 min = node.aabb.getMin(); + int axis = 0; + if (extent.y > extent.x) axis = 1; + if (extent.z > ((float*)&extent)[axis]) axis = 2; + float splitPos = ((float*)&min)[axis] + ((float*)&extent)[axis] * 0.5f; + + // in-place partition + int i = node.offset; + int j = i + node.count - 1; + while (i <= j) + { + XMFLOAT3 center = leaf_aabb_data[leaf_indices[i]].getCenter(); + float value = ((float*)¢er)[axis]; + + if (value < splitPos) + { + i++; + } + else + { + std::swap(leaf_indices[i], leaf_indices[j--]); + } + } + + // abort split if one of the sides is empty + int leftCount = i - node.offset; + if (leftCount == 0 || leftCount == node.count) + return; + + // create child nodes + uint32_t left_child_index = node_count++; + uint32_t right_child_index = node_count++; + node.left = left_child_index; + nodes[left_child_index] = {}; + nodes[left_child_index].offset = node.offset; + nodes[left_child_index].count = leftCount; + nodes[right_child_index] = {}; + nodes[right_child_index].offset = i; + nodes[right_child_index].count = node.count - leftCount; + node.count = 0; + UpdateNodeBounds(left_child_index, leaf_aabb_data); + UpdateNodeBounds(right_child_index, leaf_aabb_data); + + // recurse + Subdivide(left_child_index, leaf_aabb_data); + Subdivide(right_child_index, leaf_aabb_data); + } }; } diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index fce1d05ce..6be6162df 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -6,7 +6,6 @@ #include "wiJobSystem.h" #include "wiSpinLock.h" #include "wiHelper.h" -#include "wiRenderer.h" #include "wiBacklog.h" #include "wiTimer.h" #include "wiUnorderedMap.h" @@ -62,6 +61,11 @@ namespace wi::scene } } + // count colliders in background thread before procedural anim system + wi::jobsystem::Execute(collider_bvh_workload, [this](wi::jobsystem::JobArgs args) { + CountCPUandGPUColliders(); + }); + ScanSpringDependencies(); // after terrain, because this saves transform ptrs and terrain can add transforms StartBuildTopDownHierarchy(); @@ -1014,20 +1018,19 @@ namespace wi::scene matrix_objects_prev.insert(matrix_objects_prev.end(), other.matrix_objects_prev.begin(), other.matrix_objects_prev.end()); // Recount colliders: - collider_allocator_cpu.store(0u); - collider_allocator_gpu.store(0u); + CountCPUandGPUColliders(); const size_t size = - sizeof(wi::primitive::AABB) * colliders.GetCount() + - sizeof(wi::primitive::AABB) * colliders.GetCount() + - sizeof(ColliderComponent) * colliders.GetCount() + - sizeof(ColliderComponent) * colliders.GetCount() + sizeof(wi::primitive::AABB) * collider_count_cpu + + sizeof(wi::primitive::AABB) * collider_count_gpu + + sizeof(ColliderComponent) * collider_count_cpu + + sizeof(ColliderComponent) * collider_count_gpu ; collider_deinterleaved_data.reserve(size); ASAN_UNPOISON_MEMORY_REGION(collider_deinterleaved_data.data(), size); aabb_colliders_cpu = (wi::primitive::AABB*)collider_deinterleaved_data.data(); - aabb_colliders_gpu = aabb_colliders_cpu + colliders.GetCount(); - colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + colliders.GetCount()); - colliders_gpu = colliders_cpu + colliders.GetCount(); + aabb_colliders_gpu = aabb_colliders_cpu + collider_count_cpu; + colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + collider_count_gpu); + colliders_gpu = colliders_cpu + collider_count_cpu; for (size_t i = 0; i < colliders.GetCount(); ++i) { @@ -1087,19 +1090,15 @@ namespace wi::scene if (collider.IsCPUEnabled()) { - uint32_t index = collider_allocator_cpu.fetch_add(1u); - colliders_cpu[index] = collider; - aabb_colliders_cpu[index] = aabb; + colliders_cpu[collider.cpu_index] = collider; + aabb_colliders_cpu[collider.cpu_index] = aabb; } if (collider.IsGPUEnabled()) { - uint32_t index = collider_allocator_gpu.fetch_add(1u); - colliders_gpu[index] = collider; - aabb_colliders_gpu[index] = aabb; + colliders_gpu[collider.gpu_index] = collider; + aabb_colliders_gpu[collider.gpu_index] = aabb; } } - collider_count_cpu = collider_allocator_cpu.load(); - collider_count_gpu = collider_allocator_gpu.load(); collider_bvh.Build(aabb_colliders_cpu, collider_count_cpu); } Entity Scene::Instantiate(Scene& prefab, bool attached) @@ -3652,20 +3651,20 @@ namespace wi::scene } // Colliders: - collider_allocator_cpu.store(0u); - collider_allocator_gpu.store(0u); + wi::jobsystem::Wait(collider_bvh_workload); // waits for BVH build and collider counts + std::swap(collider_bvh, collider_bvh_next); const size_t size = - sizeof(wi::primitive::AABB) * colliders.GetCount() + - sizeof(wi::primitive::AABB) * colliders.GetCount() + - sizeof(ColliderComponent) * colliders.GetCount() + - sizeof(ColliderComponent) * colliders.GetCount() + sizeof(wi::primitive::AABB) * collider_count_cpu + + sizeof(wi::primitive::AABB) * collider_count_gpu + + sizeof(ColliderComponent) * collider_count_cpu + + sizeof(ColliderComponent) * collider_count_gpu ; collider_deinterleaved_data.reserve(size); ASAN_UNPOISON_MEMORY_REGION(collider_deinterleaved_data.data(), size); aabb_colliders_cpu = (wi::primitive::AABB*)collider_deinterleaved_data.data(); - aabb_colliders_gpu = aabb_colliders_cpu + colliders.GetCount(); - colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + colliders.GetCount()); - colliders_gpu = colliders_cpu + colliders.GetCount(); + aabb_colliders_gpu = aabb_colliders_cpu + collider_count_cpu; + colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + collider_count_gpu); + colliders_gpu = colliders_cpu + collider_count_cpu; wi::jobsystem::Dispatch(ctx, (uint32_t)colliders.GetCount(), small_subtask_groupsize, [&](wi::jobsystem::JobArgs args) { @@ -3731,23 +3730,19 @@ namespace wi::scene if (collider.IsCPUEnabled()) { - uint32_t index = collider_allocator_cpu.fetch_add(1u); - colliders_cpu[index] = collider; - aabb_colliders_cpu[index] = aabb; + colliders_cpu[collider.cpu_index] = collider; + aabb_colliders_cpu[collider.cpu_index] = aabb; } if (collider.IsGPUEnabled()) { - uint32_t index = collider_allocator_gpu.fetch_add(1u); - colliders_gpu[index] = collider; - aabb_colliders_gpu[index] = aabb; + colliders_gpu[collider.gpu_index] = collider; + aabb_colliders_gpu[collider.gpu_index] = aabb; } - }); + }); wi::jobsystem::Wait(ctx); - collider_count_cpu = collider_allocator_cpu.load(); - collider_count_gpu = collider_allocator_gpu.load(); - collider_bvh.Build(aabb_colliders_cpu, collider_count_cpu); + collider_bvh.Update(aabb_colliders_cpu, collider_count_cpu); // Springs: wi::jobsystem::Wait(spring_dependency_scan_workload); @@ -3755,10 +3750,19 @@ namespace wi::scene { wi::jobsystem::Dispatch(ctx, (uint32_t)spring_queues.size(), 1, [this](wi::jobsystem::JobArgs args) { UpdateSpringsTopDownRecursive(nullptr, *spring_queues[args.jobIndex]); - }); + }); wi::jobsystem::Wait(ctx); } + if (collider_count_cpu > 0) + { + // Issue the bvh rebuild on a background thread, the result will be used next frame... + collider_bvh_workload.priority = wi::jobsystem::Priority::Low; + wi::jobsystem::Execute(collider_bvh_workload, [this](wi::jobsystem::JobArgs args) { + collider_bvh_next.Build(aabb_colliders_cpu, collider_count_cpu); + }); + } + wi::profiler::EndRange(range); } void Scene::RunArmatureUpdateSystem(wi::jobsystem::context& ctx) @@ -7928,6 +7932,27 @@ namespace wi::scene } } + void Scene::CountCPUandGPUColliders() + { + // Note: the collider arrays must be consistent across frames, so can't be counted with multiple threads, this is why it's separated from collider updating + collider_count_cpu = 0; + collider_count_gpu = 0; + for (size_t i = 0; i < colliders.GetCount(); ++i) + { + ColliderComponent& collider = colliders[i]; + if (collider.IsCPUEnabled()) + { + collider.cpu_index = collider_count_cpu; + collider_count_cpu++; + } + if (collider.IsGPUEnabled()) + { + collider.gpu_index = collider_count_gpu; + collider_count_gpu++; + } + } + } + void Scene::ScanAnimationDependencies() { if (animations.GetCount() == 0) diff --git a/WickedEngine/wiScene.h b/WickedEngine/wiScene.h index eac01457d..5239fa586 100644 --- a/WickedEngine/wiScene.h +++ b/WickedEngine/wiScene.h @@ -270,8 +270,6 @@ namespace wi::scene wi::vector transforms_temp; // CPU/GPU Colliders: - std::atomic collider_allocator_cpu{ 0 }; - std::atomic collider_allocator_gpu{ 0 }; wi::vector collider_deinterleaved_data; uint32_t collider_count_cpu = 0; uint32_t collider_count_gpu = 0; @@ -280,6 +278,9 @@ namespace wi::scene ColliderComponent* colliders_cpu = nullptr; ColliderComponent* colliders_gpu = nullptr; wi::BVH collider_bvh; + wi::BVH collider_bvh_next; + wi::jobsystem::context collider_bvh_workload; + void CountCPUandGPUColliders(); // Ocean GPU state: wi::Ocean ocean; diff --git a/WickedEngine/wiScene_Components.h b/WickedEngine/wiScene_Components.h index 3f6eab4f9..4f5410c3f 100644 --- a/WickedEngine/wiScene_Components.h +++ b/WickedEngine/wiScene_Components.h @@ -2027,6 +2027,8 @@ namespace wi::scene wi::primitive::Plane plane; uint32_t layerMask = ~0u; float dist = 0; + uint32_t cpu_index = 0; + uint32_t gpu_index = 0; void Serialize(wi::Archive& archive, wi::ecs::EntitySerializer& seri); }; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 450839ca4..b943b322a 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 757; + const int revision = 758; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);