From 39d3ab1e19cd143d09e8d6f6aea0800e04b92ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Fri, 17 Jun 2022 15:19:42 +0200 Subject: [PATCH] Impostor persistent data (#465) * Impostor persistent gpu data * impostor buffer frustum culling * fixes * impostor ao * impostor wireframe fix --- WickedEngine/offlineshadercompiler.cpp | 1 + WickedEngine/shaders/ShaderInterop_Renderer.h | 83 ++++++++++-- WickedEngine/shaders/Shaders_SOURCE.vcxitems | 4 + .../shaders/Shaders_SOURCE.vcxitems.filters | 3 + WickedEngine/shaders/impostorPS.hlsl | 15 +++ WickedEngine/shaders/impostorPS_prepass.hlsl | 9 +- WickedEngine/shaders/impostorVS.hlsl | 68 ++-------- WickedEngine/shaders/impostor_prepareCS.hlsl | 89 +++++++++++++ .../shaders/visibility_resolveCS.hlsl | 34 ++--- WickedEngine/wiEnums.h | 1 + WickedEngine/wiRenderPath3D.cpp | 11 +- WickedEngine/wiRenderer.cpp | 126 ++++++++---------- WickedEngine/wiScene.cpp | 122 +++++++++++++++-- WickedEngine/wiScene.h | 14 +- WickedEngine/wiVersion.cpp | 2 +- 15 files changed, 407 insertions(+), 175 deletions(-) create mode 100644 WickedEngine/shaders/impostor_prepareCS.hlsl diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index d73ed8767..59f6141fe 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -192,6 +192,7 @@ wi::vector shaders = { {"ddgi_updateCS_depth", wi::graphics::ShaderStage::CS }, {"terrainVirtualTextureUpdateCS", wi::graphics::ShaderStage::CS }, {"meshlet_prepareCS", wi::graphics::ShaderStage::CS }, + {"impostor_prepareCS", wi::graphics::ShaderStage::CS }, {"emittedparticlePS_soft", wi::graphics::ShaderStage::PS }, diff --git a/WickedEngine/shaders/ShaderInterop_Renderer.h b/WickedEngine/shaders/ShaderInterop_Renderer.h index febfe0ddf..4abcdf670 100644 --- a/WickedEngine/shaders/ShaderInterop_Renderer.h +++ b/WickedEngine/shaders/ShaderInterop_Renderer.h @@ -12,7 +12,7 @@ struct ShaderScene int envmaparray; int globalenvmap; - int padding0; + int impostorInstanceOffset; int padding1; int TLAS; @@ -125,6 +125,70 @@ struct ShaderMaterial int texture_specularmap_index; uint shaderType; + void init() + { + baseColor = float4(1, 1, 1, 1); + subsurfaceScattering = float4(0, 0, 0, 0); + subsurfaceScattering_inv = float4(0, 0, 0, 0); + texMulAdd = float4(1, 1, 0, 0); + + roughness = 0; + reflectance = 0; + metalness = 0; + refraction = 0; + + normalMapStrength = 0; + parallaxOcclusionMapping = 0; + alphaTest = 0; + displacementMapping = 0; + + transmission = 0; + options = 0u; + emissive_r11g11b10 = 0; + specular_r11g11b10 = 0; + + layerMask = ~0u; + uvset_baseColorMap = -1; + uvset_surfaceMap = -1; + uvset_normalMap = -1; + + uvset_displacementMap = -1; + uvset_emissiveMap = -1; + uvset_occlusionMap = -1; + uvset_transmissionMap = -1; + + uvset_sheenColorMap = -1; + uvset_sheenRoughnessMap = -1; + uvset_clearcoatMap = -1; + uvset_clearcoatRoughnessMap = -1; + + uvset_clearcoatNormalMap = -1; + uvset_specularMap = -1; + sheenColor_r11g11b10 = 0; + sheenRoughness = 0; + + clearcoat = 0; + clearcoatRoughness = 0; + texture_basecolormap_index = -1; + texture_surfacemap_index = -1; + + texture_emissivemap_index = -1; + texture_normalmap_index = -1; + texture_displacementmap_index = -1; + texture_occlusionmap_index = -1; + + texture_transmissionmap_index = -1; + texture_sheencolormap_index = -1; + texture_sheenroughnessmap_index = -1; + texture_clearcoatmap_index = -1; + + texture_clearcoatroughnessmap_index = -1; + texture_clearcoatnormalmap_index = -1; + texture_specularmap_index = -1; + shaderType = 0; + + } + #ifndef __cplusplus float3 GetEmissive() { return Unpack_R11G11B10_FLOAT(emissive_r11g11b10); } float3 GetSpecular() { return Unpack_R11G11B10_FLOAT(specular_r11g11b10); } @@ -196,7 +260,7 @@ struct ShaderGeometry uint materialIndex; uint meshletOffset; // offset of this subset in meshlets uint meshletCount; - uint padding; + int impostorSliceOffset; float3 aabb_min; uint flags; @@ -218,6 +282,7 @@ struct ShaderGeometry materialIndex = 0; meshletOffset = 0; meshletCount = 0; + impostorSliceOffset = -1; aabb_min = float3(0, 0, 0); flags = 0; @@ -284,18 +349,17 @@ struct ShaderMeshInstance int lightmap; uint meshletOffset; // offset in the global meshlet buffer for first subset + float fadeDistance; int padding0; int padding1; - int padding2; + + float3 center; + float radius; ShaderTransform transform; ShaderTransform transformInverseTranspose; // This correctly handles non uniform scaling for normals ShaderTransform transformPrev; - // Bounding sphere of the instance: - float3 center; - float radius; - void init() { uid = 0; @@ -307,11 +371,12 @@ struct ShaderMeshInstance geometryOffset = 0; geometryCount = 0; meshletOffset = ~0u; + fadeDistance = 0; + center = float3(0, 0, 0); + radius = 0; transform.init(); transformInverseTranspose.init(); transformPrev.init(); - center = float3(0, 0, 0); - radius = 0; } }; diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 54db35f82..c64825541 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -484,6 +484,10 @@ Compute 4.0 + + Compute + 4.0 + Vertex Vertex diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index e0edf4930..5caa1e51e 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -1052,6 +1052,9 @@ CS + + CS + diff --git a/WickedEngine/shaders/impostorPS.hlsl b/WickedEngine/shaders/impostorPS.hlsl index c734cc95f..d27ea4de3 100644 --- a/WickedEngine/shaders/impostorPS.hlsl +++ b/WickedEngine/shaders/impostorPS.hlsl @@ -24,6 +24,9 @@ float4 main(VSOut input) : SV_Target float dist = length(V); V /= dist; + const uint2 pixel = input.pos.xy; + const float2 ScreenCoord = pixel * GetCamera().internal_resolution_rcp; + Surface surface; surface.init(); surface.flags |= SURFACE_FLAG_RECEIVE_SHADOW; @@ -37,6 +40,18 @@ float4 main(VSOut input) : SV_Target surface.pixel = input.pos.xy; surface.update(); +#ifndef PREPASS +#ifndef ENVMAPRENDERING +#ifndef TRANSPARENT + [branch] + if (GetCamera().texture_ao_index >= 0) + { + surface.occlusion *= bindless_textures_float[GetCamera().texture_ao_index].SampleLevel(sampler_linear_clamp, ScreenCoord, 0).r; + } +#endif // TRANSPARENT +#endif // ENVMAPRENDERING +#endif // PREPASS + Lighting lighting; lighting.create(0, 0, GetAmbient(surface.N), 0); diff --git a/WickedEngine/shaders/impostorPS_prepass.hlsl b/WickedEngine/shaders/impostorPS_prepass.hlsl index afb47d639..e2cb1c0c8 100644 --- a/WickedEngine/shaders/impostorPS_prepass.hlsl +++ b/WickedEngine/shaders/impostorPS_prepass.hlsl @@ -2,11 +2,16 @@ #include "impostorHF.hlsli" #include "objectHF.hlsli" -uint main(VSOut input, out uint coverage : SV_Coverage) : SV_Target +uint main(VSOut input, in uint primitiveID : SV_PrimitiveID, out uint coverage : SV_Coverage) : SV_Target { clip(dither(input.pos.xy + GetTemporalAASampleRotation()) - input.dither); float3 uv_col = float3(input.uv, input.slice); float alpha = impostorTex.Sample(sampler_linear_clamp, uv_col).a; coverage = AlphaToCoverage(alpha, 0.75, input.pos); - return ~0u; + + PrimitiveID prim; + prim.primitiveIndex = primitiveID; + prim.instanceIndex = GetScene().impostorInstanceOffset; + prim.subsetIndex = 0; + return prim.pack(); } diff --git a/WickedEngine/shaders/impostorVS.hlsl b/WickedEngine/shaders/impostorVS.hlsl index fdfc4d31c..4dcb35c37 100644 --- a/WickedEngine/shaders/impostorVS.hlsl +++ b/WickedEngine/shaders/impostorVS.hlsl @@ -1,66 +1,26 @@ #include "globals.hlsli" #include "impostorHF.hlsli" -struct ImpostorPush -{ - uint instanceOffset; -}; -PUSHCONSTANT(push, ImpostorPush); - -static const float3 BILLBOARD[] = { - float3(-1, -1, 0), - float3(1, -1, 0), - float3(-1, 1, 0), - float3(-1, 1, 0), - float3(1, -1, 0), - float3(1, 1, 0), +static const float2 BILLBOARD[] = { + float2(-1, -1), + float2(1, -1), + float2(-1, 1), + float2(1, 1), }; -ByteAddressBuffer impostorBuffer : register(t0); +ByteAddressBuffer vb_pos_nor : register(t0); +ByteAddressBuffer impostor_data : register(t2); -VSOut main(uint fakeIndex : SV_VERTEXID) +VSOut main(uint vertexID : SV_VertexID) { - const uint vertexID = fakeIndex % 6; - const uint instanceID = fakeIndex / 6; - - ShaderMeshInstancePointer poi = impostorBuffer.Load(push.instanceOffset + instanceID * sizeof(ShaderMeshInstancePointer)); - ShaderMeshInstance instance = load_instance(poi.GetInstanceIndex()); - - float3 pos = BILLBOARD[vertexID]; - float2 uv = float2(pos.xy * float2(0.5f, -0.5f) + 0.5f); - uint slice = poi.GetFrustumIndex() * impostorCaptureAngles * 3; - - // We rotate the billboard to face camera, but unlike emitted particles, - // they don't rotate according to camera rotation, but the camera position relative - // to the impostor (at least for now) - float3 origin = instance.center; - float3 up = float3(0, 1, 0); - float3 face = GetCamera().position - origin; - face.y = 0; // only rotate around Y axis! - face = normalize(face); - float3 right = normalize(cross(face, up)); - pos = mul(pos, float3x3(right, up, face)); - - pos *= instance.radius; - pos += instance.center; - - // Decide which slice to show according to billboard facing direction: - float angle = acos(dot(face.xz, float2(0, 1))) / PI; - if (cross(face, float3(0, 0, 1)).y < 0) - { - angle = 2 - angle; - } - angle *= 0.5f; - angle = saturate(angle - 0.0001); - slice += uint(angle * impostorCaptureAngles) * 3; + uint2 data = impostor_data.Load2((vertexID / 4u) * sizeof(uint2)); VSOut Out; - Out.pos3D = pos; + Out.pos3D = asfloat(vb_pos_nor.Load3(vertexID * sizeof(uint4))); Out.pos = mul(GetCamera().view_projection, float4(Out.pos3D, 1)); - Out.uv = uv; - Out.slice = slice; - Out.dither = poi.GetDither(); - Out.instanceColor = instance.color; - + Out.uv = float2(BILLBOARD[vertexID % 4u] * float2(0.5f, -0.5f) + 0.5f); + Out.slice = data.x & 0xFFFFFF; + Out.dither = float((data.x >> 24u) & 0xFF) / 255.0; + Out.instanceColor = data.y; return Out; } diff --git a/WickedEngine/shaders/impostor_prepareCS.hlsl b/WickedEngine/shaders/impostor_prepareCS.hlsl new file mode 100644 index 000000000..e5f075927 --- /dev/null +++ b/WickedEngine/shaders/impostor_prepareCS.hlsl @@ -0,0 +1,89 @@ +#include "globals.hlsli" + +static const uint THREADCOUNT = 64; + +static const float3 BILLBOARD[] = { + float3(-1, -1, 0), + float3(1, -1, 0), + float3(-1, 1, 0), + float3(1, 1, 0), +}; + +RWBuffer output_indices : register(u0); +RWByteAddressBuffer output_vertices_pos_nor : register(u1); +RWByteAddressBuffer output_impostor_data : register(u2); +RWStructuredBuffer output_indirect : register(u3); + +[numthreads(THREADCOUNT, 1, 1)] +void main(uint3 DTid : SV_DispatchThreadID) +{ + const uint instanceIndex = DTid.x; + ShaderMeshInstance instance = load_instance(instanceIndex); + ShaderGeometry geometry = load_geometry(instance.geometryOffset); + if (geometry.impostorSliceOffset < 0) + return; + float dist = distance(GetCamera().position, instance.center); + if (dist < instance.fadeDistance - instance.radius) + return; + + // Frustum culling: + ShaderSphere sphere; + sphere.center = instance.center; + sphere.radius = instance.radius; + if (!GetCamera().frustum.intersects(sphere)) + return; + + uint slice = uint(geometry.impostorSliceOffset); + + uint indexOffset; + InterlockedAdd(output_indirect[0].IndexCountPerInstance, 6u, indexOffset); + uint impostorOffset = indexOffset / 6u; + uint vertexOffset = impostorOffset * 4u; + + // Write out indices: + output_indices[indexOffset + 0] = vertexOffset + 0; + output_indices[indexOffset + 1] = vertexOffset + 1; + output_indices[indexOffset + 2] = vertexOffset + 2; + output_indices[indexOffset + 3] = vertexOffset + 2; + output_indices[indexOffset + 4] = vertexOffset + 1; + output_indices[indexOffset + 5] = vertexOffset + 3; + + // We rotate the billboard to face camera, but unlike emitted particles, + // they don't rotate according to camera rotation, but the camera position relative + // to the impostor (at least for now) + float3 origin = instance.center; + float3 up = float3(0, 1, 0); + float3 face = GetCamera().position - origin; + face.y = 0; // only rotate around Y axis! + face = normalize(face); + float3 right = normalize(cross(face, up)); + + // Decide which slice to show according to billboard facing direction: + float angle = acos(dot(face.xz, float2(0, 1))) / PI; + if (cross(face, float3(0, 0, 1)).y < 0) + { + angle = 2 - angle; + } + angle *= 0.5f; + angle = saturate(angle - 0.0001); + slice += uint(angle * impostorCaptureAngles) * 3; + + const float dither = max(0, instance.fadeDistance - dist) / instance.radius; + + // Write out per impostor data: + uint2 data = 0; + data.x |= slice & 0xFFFFFF; + data.x |= (uint(dither * 255) & 0xFF) << 24u; + data.y = instance.color; + output_impostor_data.Store2(impostorOffset * sizeof(uint2), data); + + // Write out vertices: + for (uint vertexID = 0; vertexID < 4; ++vertexID) + { + float3 pos = BILLBOARD[vertexID]; + pos = mul(pos, float3x3(right, up, face)); + pos *= instance.radius; + pos += instance.center; + output_vertices_pos_nor.Store4((vertexOffset + vertexID) * sizeof(uint4), uint4(asuint(pos), pack_unitvector(face))); + } +} diff --git a/WickedEngine/shaders/visibility_resolveCS.hlsl b/WickedEngine/shaders/visibility_resolveCS.hlsl index b4980e1f7..21fbe159e 100644 --- a/WickedEngine/shaders/visibility_resolveCS.hlsl +++ b/WickedEngine/shaders/visibility_resolveCS.hlsl @@ -67,32 +67,20 @@ void main(uint2 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex) [branch] if (any(primitiveID)) { + PrimitiveID prim; + prim.unpack(primitiveID); + + Surface surface; + surface.init(); + [branch] - if (primitiveID == ~0u) + if (surface.load(prim, ray.Origin, ray.Direction)) { - // Hack: impostors write ~0u primitiveID because their geometry is temporary and non indexable - // But we don't want to handle them like sky pixels, so force them to foreground - // This solves some issues with sky, cloud rendering when impostors are visible - float depth = 1; // invalid - uint bin = ~0u; // invalid - } - else - { - PrimitiveID prim; - prim.unpack(primitiveID); + float4 tmp = mul(GetCamera().view_projection, float4(surface.P, 1)); + tmp.xyz /= tmp.w; + depth = tmp.z; - Surface surface; - surface.init(); - - [branch] - if (surface.load(prim, ray.Origin, ray.Direction)) - { - float4 tmp = mul(GetCamera().view_projection, float4(surface.P, 1)); - tmp.xyz /= tmp.w; - depth = tmp.z; - - bin = surface.material.shaderType; - } + bin = surface.material.shaderType; } } else diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index a842f5da3..35b410bde 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -363,6 +363,7 @@ namespace wi::enums CSTYPE_DDGI_UPDATE_DEPTH, CSTYPE_TERRAIN_VIRTUALTEXTURE_UPDATE, CSTYPE_MESHLET_PREPARE, + CSTYPE_IMPOSTOR_PREPARE, // raytracing pipelines: diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index 74a1c0be5..efc098126 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -753,6 +753,7 @@ void RenderPath3D::Render() const static const uint32_t drawscene_flags = wi::renderer::DRAWSCENE_OPAQUE | + wi::renderer::DRAWSCENE_IMPOSTOR | wi::renderer::DRAWSCENE_HAIRPARTICLE | wi::renderer::DRAWSCENE_TESSELLATION | wi::renderer::DRAWSCENE_OCCLUSIONCULLING @@ -787,7 +788,7 @@ void RenderPath3D::Render() const vp.width = (float)depthBuffer_Main.GetDesc().width; vp.height = (float)depthBuffer_Main.GetDesc().height; device->BindViewports(1, &vp, cmd); - wi::renderer::DrawScene(visibility_main, RENDERPASS_PREPASS, cmd, drawscene_flags | wi::renderer::DRAWSCENE_IMPOSTOR); + wi::renderer::DrawScene(visibility_main, RENDERPASS_PREPASS, cmd, drawscene_flags); wi::profiler::EndRange(range); device->EventEnd(cmd); @@ -1106,7 +1107,12 @@ void RenderPath3D::Render() const device->RenderPassBegin(&renderpass_main, cmd); - if (!visibility_shading_in_compute) + if (visibility_shading_in_compute) + { + // In visibility compute shading, the impostors must still be drawn using rasterization: + wi::renderer::DrawScene(visibility_main, RENDERPASS_MAIN, cmd, wi::renderer::DRAWSCENE_IMPOSTOR); + } + else { auto range = wi::profiler::BeginRangeGPU("Opaque Scene", cmd); wi::renderer::DrawScene(visibility_main, RENDERPASS_MAIN, cmd, drawscene_flags); @@ -1420,7 +1426,6 @@ void RenderPath3D::RenderTransparents(CommandList cmd) const drawscene_flags |= wi::renderer::DRAWSCENE_OCCLUSIONCULLING; drawscene_flags |= wi::renderer::DRAWSCENE_HAIRPARTICLE; drawscene_flags |= wi::renderer::DRAWSCENE_TESSELLATION; - drawscene_flags |= wi::renderer::DRAWSCENE_IMPOSTOR; wi::renderer::DrawScene(visibility_main, RENDERPASS_MAIN, cmd, drawscene_flags); device->EventEnd(cmd); diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 206457f5f..556777aa4 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -1076,6 +1076,7 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_DDGI_UPDATE_DEPTH], "ddgi_updateCS_depth.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_TERRAIN_VIRTUALTEXTURE_UPDATE], "terrainVirtualTextureUpdateCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_MESHLET_PREPARE], "meshlet_prepareCS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_IMPOSTOR_PREPARE], "impostor_prepareCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::HS, shaders[HSTYPE_OBJECT], "objectHS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::HS, shaders[HSTYPE_OBJECT_PREPASS], "objectHS_prepass.cso"); }); @@ -2625,83 +2626,34 @@ void RenderImpostors( const PipelineState* pso = &PSO_impostor[renderPass]; if (IsWireRender()) { - switch (renderPass) + if (renderPass != RENDERPASS_PREPASS) { - case RENDERPASS_MAIN: pso = &PSO_impostor_wire; - break; - default: + } + else + { return; } } - if (vis.scene->impostors.GetCount() > 0 && pso != nullptr) + if (vis.scene->impostors.GetCount() > 0 && pso != nullptr && vis.scene->impostorBuffer.IsValid()) { - uint32_t instanceCount = 0; - for (size_t impostorID = 0; impostorID < vis.scene->impostors.GetCount(); ++impostorID) - { - const ImpostorComponent& impostor = vis.scene->impostors[impostorID]; - instanceCount += (uint32_t)impostor.instances.size(); - } - - if (instanceCount == 0) - { - return; - } - - // Pre-allocate space for all the instances in GPU-buffer: - const uint32_t instanceDataSize = sizeof(ShaderMeshInstancePointer); - const size_t alloc_size = instanceCount * instanceDataSize; - GraphicsDevice::GPUAllocation instances = device->AllocateGPU(alloc_size, cmd); - - uint32_t drawableInstanceCount = 0; - for (size_t impostorID = 0; impostorID < vis.scene->impostors.GetCount(); ++impostorID) - { - const ImpostorComponent& impostor = vis.scene->impostors[impostorID]; - - for (auto& instanceIndex : impostor.instances) - { - const AABB& aabb = vis.scene->aabb_objects[instanceIndex]; - if (!vis.camera->frustum.CheckBoxFast(aabb)) - { - continue; - } - - const XMFLOAT3 center = aabb.getCenter(); - float distance = wi::math::Distance(vis.camera->Eye, center); - float radius = aabb.getRadius(); - - if (distance < impostor.swapInDistance - radius) - { - continue; - } - - const float dither = std::max(0.0f, impostor.swapInDistance - distance) / radius; - - ShaderMeshInstancePointer poi; - poi.Create(instanceIndex, uint32_t(impostor.textureIndex), dither); - - // memcpy whole structure into mapped pointer to avoid read from uncached memory: - std::memcpy((ShaderMeshInstancePointer*)instances.data + drawableInstanceCount, &poi, sizeof(poi)); - - drawableInstanceCount++; - } - } - - if (drawableInstanceCount == 0) - return; - device->EventBegin("RenderImpostors", cmd); device->BindStencilRef(STENCILREF_DEFAULT, cmd); device->BindPipelineState(pso, cmd); - device->PushConstants(&instances.offset, sizeof(uint), cmd); - - device->BindResource(&instances.buffer, 0, cmd); + device->BindIndexBuffer( + &vis.scene->impostorBuffer, + vis.scene->impostor_ib_format == Format::R32_UINT ? IndexBufferFormat::UINT32 : IndexBufferFormat::UINT16, + vis.scene->impostor_ib.offset, + cmd + ); + device->BindResource(&vis.scene->impostorBuffer, 0, cmd, vis.scene->impostor_vb.subresource_srv); + device->BindResource(&vis.scene->impostorBuffer, 2, cmd, vis.scene->impostor_data.subresource_srv); device->BindResource(&vis.scene->impostorArray, 1, cmd); - device->Draw(drawableInstanceCount * 6, 0, cmd); + device->DrawIndexedInstancedIndirect(&vis.scene->impostorIndirectBuffer, 0, cmd); device->EventEnd(cmd); } @@ -3718,6 +3670,41 @@ void UpdateRenderData( wi::profiler::EndRange(range); } + // Impostor prepare: + if (vis.scene->instanceArraySize > 0 && vis.scene->meshletBuffer.IsValid()) + { + device->EventBegin("Impostor prepare", cmd); + auto range = wi::profiler::BeginRangeGPU("Impostor prepare", cmd); + + barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorIndirectBuffer, ResourceState::INDIRECT_ARGUMENT, ResourceState::COPY_DST)); + barrier_stack_flush(cmd); + IndirectDrawArgsIndexedInstanced clear_indirect = {}; + clear_indirect.index_count_per_instance = 0; + clear_indirect.instance_count = 1; + clear_indirect.start_index_location = 0; + clear_indirect.base_vertex_location = 0; + clear_indirect.start_instance_location = 0; + device->UpdateBuffer(&vis.scene->impostorIndirectBuffer, &clear_indirect, cmd, sizeof(clear_indirect), 0); + barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorIndirectBuffer, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); + barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS)); + barrier_stack_flush(cmd); + + device->BindComputeShader(&shaders[CSTYPE_IMPOSTOR_PREPARE], cmd); + device->BindUAV(&vis.scene->impostorBuffer, 0, cmd, vis.scene->impostor_ib.subresource_uav); + device->BindUAV(&vis.scene->impostorBuffer, 1, cmd, vis.scene->impostor_vb.subresource_uav); + device->BindUAV(&vis.scene->impostorBuffer, 2, cmd, vis.scene->impostor_data.subresource_uav); + device->BindUAV(&vis.scene->impostorIndirectBuffer, 3, cmd); + + device->Dispatch(uint32_t(vis.scene->objects.GetCount() + 63u) / 64u, 1, 1, cmd); + + barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); + barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorIndirectBuffer,ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT)); + barrier_stack_flush(cmd); + + wi::profiler::EndRange(range); + device->EventEnd(cmd); + } + // Meshlets: if(vis.scene->instanceArraySize > 0 && vis.scene->meshletBuffer.IsValid()) { @@ -4968,9 +4955,6 @@ void DrawScene( } } - if (IsWireRender() && !transparent) - return; - uint32_t renderTypeFlags = 0; if (opaque) { @@ -4987,11 +4971,6 @@ void DrawScene( renderTypeFlags = RENDERTYPE_ALL; } - if (impostor) - { - RenderImpostors(vis, renderPass, cmd); - } - if (hairparticle) { if (IsWireRender() || !transparent) @@ -5039,6 +5018,11 @@ void DrawScene( RenderMeshes(vis, renderQueue, renderPass, renderTypeFlags, cmd, tessellation); } + if (impostor) + { + RenderImpostors(vis, renderPass, cmd); + } + device->BindShadingRate(ShadingRate::RATE_1X1, cmd); device->EventEnd(cmd); diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index db43d90dc..761e234b6 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -1480,6 +1480,11 @@ namespace wi::scene GraphicsDevice* device = wi::graphics::GetDevice(); instanceArraySize = objects.GetCount() + hairs.GetCount() + emitters.GetCount(); + if (impostors.GetCount() > 0) + { + impostorInstanceOffset = uint32_t(instanceArraySize); + instanceArraySize += 1; + } if (instanceBuffer.desc.size < (instanceArraySize * sizeof(ShaderMeshInstance))) { GPUBufferDesc desc; @@ -1502,6 +1507,11 @@ namespace wi::scene instanceArrayMapped = (ShaderMeshInstance*)instanceUploadBuffer[device->GetBufferIndex()].mapped_data; materialArraySize = materials.GetCount(); + if (impostors.GetCount() > 0) + { + impostorMaterialOffset = uint32_t(materialArraySize); + materialArraySize += 1; + } if (materialBuffer.desc.size < (materialArraySize * sizeof(ShaderMaterial))) { GPUBufferDesc desc; @@ -1619,6 +1629,11 @@ namespace wi::scene geometryArraySize = geometryAllocator.load(); geometryArraySize += hairs.GetCount(); geometryArraySize += emitters.GetCount(); + if (impostors.GetCount() > 0) + { + impostorGeometryOffset = uint32_t(geometryArraySize); + geometryArraySize += 1; + } if (geometryBuffer.desc.size < (geometryArraySize * sizeof(ShaderGeometry))) { GPUBufferDesc desc; @@ -1652,8 +1667,6 @@ namespace wi::scene RunArmatureUpdateSystem(ctx); - RunImpostorUpdateSystem(ctx); - RunWeatherUpdateSystem(ctx); wi::jobsystem::Wait(ctx); // dependencies @@ -1674,6 +1687,8 @@ namespace wi::scene RunSoundUpdateSystem(ctx); + RunImpostorUpdateSystem(ctx); + wi::jobsystem::Wait(ctx); // dependencies // Merge parallel bounds computation (depends on object update system): @@ -1879,6 +1894,54 @@ namespace wi::scene ddgiDepthTexture[1] = {}; } + impostor_ib_format = (((objects.GetCount() * 4) < 655536) ? Format::R16_UINT : Format::R32_UINT); + const size_t impostor_index_stride = impostor_ib_format == Format::R16_UINT ? sizeof(uint16_t) : sizeof(uint32_t); + const uint64_t required_impostor_buffer_size = objects.GetCount() * (sizeof(impostor_index_stride) * 6 + sizeof(uint4) * 4 + sizeof(uint2)); + if (impostorBuffer.desc.size < required_impostor_buffer_size) + { + GPUBufferDesc desc; + desc.usage = Usage::DEFAULT; + desc.size = required_impostor_buffer_size * 2; // *2 to grow fast + desc.bind_flags = BindFlag::VERTEX_BUFFER | BindFlag::INDEX_BUFFER | BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.misc_flags = ResourceMiscFlag::BUFFER_RAW; + device->CreateBuffer(&desc, nullptr, &impostorBuffer); + device->SetName(&impostorBuffer, "impostorBuffer"); + + const uint64_t alignment = device->GetMinOffsetAlignment(&desc); + uint64_t buffer_offset = 0ull; + + impostor_ib.offset = buffer_offset; + impostor_ib.size = objects.GetCount() * sizeof(impostor_index_stride) * 6; + buffer_offset += AlignTo(impostor_ib.size, alignment); + impostor_ib.subresource_srv = device->CreateSubresource(&impostorBuffer, SubresourceType::SRV, impostor_ib.offset, impostor_ib.size, &impostor_ib_format); + impostor_ib.subresource_uav = device->CreateSubresource(&impostorBuffer, SubresourceType::UAV, impostor_ib.offset, impostor_ib.size, &impostor_ib_format); + impostor_ib.descriptor_srv = device->GetDescriptorIndex(&impostorBuffer, SubresourceType::SRV, impostor_ib.subresource_srv); + impostor_ib.descriptor_uav = device->GetDescriptorIndex(&impostorBuffer, SubresourceType::UAV, impostor_ib.subresource_uav); + + impostor_vb.offset = buffer_offset; + impostor_vb.size = objects.GetCount() * sizeof(uint4) * 4; + buffer_offset += AlignTo(impostor_vb.size, alignment); + impostor_vb.subresource_srv = device->CreateSubresource(&impostorBuffer, SubresourceType::SRV, impostor_vb.offset, impostor_vb.size); + impostor_vb.subresource_uav = device->CreateSubresource(&impostorBuffer, SubresourceType::UAV, impostor_vb.offset, impostor_vb.size); + impostor_vb.descriptor_srv = device->GetDescriptorIndex(&impostorBuffer, SubresourceType::SRV, impostor_vb.subresource_srv); + impostor_vb.descriptor_uav = device->GetDescriptorIndex(&impostorBuffer, SubresourceType::UAV, impostor_vb.subresource_uav); + + impostor_data.offset = buffer_offset; + impostor_data.size = objects.GetCount() * sizeof(uint2); + buffer_offset += AlignTo(impostor_data.size, alignment); + impostor_data.subresource_srv = device->CreateSubresource(&impostorBuffer, SubresourceType::SRV, impostor_data.offset, impostor_data.size); + impostor_data.subresource_uav = device->CreateSubresource(&impostorBuffer, SubresourceType::UAV, impostor_data.offset, impostor_data.size); + impostor_data.descriptor_srv = device->GetDescriptorIndex(&impostorBuffer, SubresourceType::SRV, impostor_data.subresource_srv); + impostor_data.descriptor_uav = device->GetDescriptorIndex(&impostorBuffer, SubresourceType::UAV, impostor_data.subresource_uav); + + desc.stride = sizeof(IndirectDrawArgsIndexedInstanced); + desc.size = desc.stride; + desc.bind_flags = BindFlag::UNORDERED_ACCESS; + desc.misc_flags = ResourceMiscFlag::INDIRECT_ARGS | ResourceMiscFlag::BUFFER_STRUCTURED; + device->CreateBuffer(&desc, nullptr, &impostorIndirectBuffer); + device->SetName(&impostorIndirectBuffer, "impostorIndirectBuffer"); + } + // Shader scene resources: shaderscene.instancebuffer = device->GetDescriptorIndex(&instanceBuffer, SubresourceType::SRV); shaderscene.geometrybuffer = device->GetDescriptorIndex(&geometryBuffer, SubresourceType::SRV); @@ -1893,6 +1956,7 @@ namespace wi::scene { shaderscene.globalenvmap = -1; } + shaderscene.impostorInstanceOffset = impostorInstanceOffset; shaderscene.TLAS = device->GetDescriptorIndex(&TLAS, SubresourceType::SRV); shaderscene.BVH_counter = device->GetDescriptorIndex(&BVH.primitiveCounterBuffer, SubresourceType::SRV); shaderscene.BVH_nodes = device->GetDescriptorIndex(&BVH.bvhNodeBuffer, SubresourceType::SRV); @@ -3091,6 +3155,12 @@ namespace wi::scene geometry.aabb_max = mesh.aabb._max; geometry.tessellation_factor = mesh.tessellationFactor; + const ImpostorComponent* impostor = impostors.GetComponent(entity); + if (impostor != nullptr && impostor->textureIndex >= 0) + { + geometry.impostorSliceOffset = impostor->textureIndex * impostorCaptureAngles * 3; + } + if (mesh.IsDoubleSided()) { geometry.flags |= SHADERMESH_FLAG_DOUBLE_SIDED; @@ -3226,7 +3296,11 @@ namespace wi::scene renderpassdesc.attachments.push_back( RenderPassAttachment::RenderTarget( &impostorArray, - RenderPassAttachment::LoadOp::CLEAR + RenderPassAttachment::LoadOp::CLEAR, + RenderPassAttachment::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::RENDERTARGET, + ResourceState::SHADER_RESOURCE ) ); renderpassdesc.attachments.back().subresource = i * 3; @@ -3234,7 +3308,11 @@ namespace wi::scene renderpassdesc.attachments.push_back( RenderPassAttachment::RenderTarget( &impostorArray, - RenderPassAttachment::LoadOp::CLEAR + RenderPassAttachment::LoadOp::CLEAR, + RenderPassAttachment::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::RENDERTARGET, + ResourceState::SHADER_RESOURCE ) ); renderpassdesc.attachments.back().subresource = i * 3 + 1; @@ -3242,7 +3320,11 @@ namespace wi::scene renderpassdesc.attachments.push_back( RenderPassAttachment::RenderTarget( &impostorArray, - RenderPassAttachment::LoadOp::CLEAR + RenderPassAttachment::LoadOp::CLEAR, + RenderPassAttachment::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::RENDERTARGET, + ResourceState::SHADER_RESOURCE ) ); renderpassdesc.attachments.back().subresource = i * 3 + 2; @@ -3277,7 +3359,6 @@ namespace wi::scene for (size_t i = 0; i < impostors.GetCount(); ++i) { ImpostorComponent& impostor = impostors[i]; - impostor.instances.clear(); if (impostor.IsDirty()) { @@ -3299,6 +3380,30 @@ namespace wi::scene } } } + + if (impostors.GetCount() > 0) + { + ShaderMaterial material; + material.init(); + material.shaderType = ~0u; + std::memcpy(materialArrayMapped + impostorMaterialOffset, &material, sizeof(material)); + + ShaderGeometry geometry; + geometry.init(); + geometry.meshletCount = triangle_count_to_meshlet_count(uint32_t(objects.GetCount()) * 2); + geometry.meshletOffset = 0; // local meshlet offset + geometry.ib = impostor_ib.descriptor_srv; + geometry.vb_pos_nor_wind = impostor_vb.descriptor_srv; + geometry.materialIndex = impostorMaterialOffset; + std::memcpy(geometryArrayMapped + impostorGeometryOffset, &geometry, sizeof(geometry)); + + ShaderMeshInstance instance; + instance.init(); + instance.geometryOffset = impostorGeometryOffset; + instance.geometryCount = 1; + instance.meshletOffset = meshletAllocator.fetch_add(geometry.meshletCount); // global meshlet offset + std::memcpy(instanceArrayMapped + impostorInstanceOffset, &instance, sizeof(instance)); + } } void Scene::RunObjectUpdateSystem(wi::jobsystem::context& ctx) { @@ -3407,10 +3512,6 @@ namespace wi::scene if (impostor != nullptr) { object.fadeDistance = std::min(object.fadeDistance, impostor->swapInDistance); - - locker.lock(); - impostor->instances.push_back(args.jobIndex); - locker.unlock(); } SoftBodyPhysicsComponent* softbody = softbodies.GetComponent(object.meshID); @@ -3463,6 +3564,7 @@ namespace wi::scene inst.geometryOffset = mesh.geometryOffset; inst.geometryCount = (uint)mesh.subsets.size(); inst.meshletOffset = meshletAllocator.fetch_add(mesh.meshletCount); + inst.fadeDistance = object.fadeDistance; inst.center = object.center; inst.radius = object.radius; diff --git a/WickedEngine/wiScene.h b/WickedEngine/wiScene.h index ec071c671..02de73a67 100644 --- a/WickedEngine/wiScene.h +++ b/WickedEngine/wiScene.h @@ -618,7 +618,6 @@ namespace wi::scene float swapInDistance = 100.0f; // Non-serialized attributes: - wi::vector instances; mutable bool render_dirty = false; int textureIndex = -1; @@ -1328,6 +1327,7 @@ namespace wi::scene // 1) objects // 2) hair particles // 3) emitted particles + // 4) impostors wi::graphics::GPUBuffer instanceUploadBuffer[wi::graphics::GraphicsDevice::GetBufferCount()]; ShaderMeshInstance* instanceArrayMapped = nullptr; size_t instanceArraySize = 0; @@ -1338,6 +1338,7 @@ namespace wi::scene // 1) meshes * mesh.subsetCount // 2) hair particles * 1 // 3) emitted particles * 1 + // 4) impostors * 1 wi::graphics::GPUBuffer geometryUploadBuffer[wi::graphics::GraphicsDevice::GetBufferCount()]; ShaderGeometry* geometryArrayMapped = nullptr; size_t geometryArraySize = 0; @@ -1388,12 +1389,21 @@ namespace wi::scene wi::graphics::Texture envmapArray; wi::vector renderpasses_envmap; - // Impostor texture array state: + // Impostor state: static constexpr uint32_t maxImpostorCount = 8; static constexpr uint32_t impostorTextureDim = 128; wi::graphics::Texture impostorDepthStencil; wi::graphics::Texture impostorArray; wi::vector renderpasses_impostor; + wi::graphics::GPUBuffer impostorBuffer; + MeshComponent::BufferView impostor_ib; + MeshComponent::BufferView impostor_vb; + MeshComponent::BufferView impostor_data; + wi::graphics::Format impostor_ib_format = wi::graphics::Format::R32_UINT; + wi::graphics::GPUBuffer impostorIndirectBuffer; + uint32_t impostorInstanceOffset = ~0u; + uint32_t impostorGeometryOffset = ~0u; + uint32_t impostorMaterialOffset = ~0u; mutable std::atomic_bool lightmap_refresh_needed{ false }; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index cdd2ab207..8b0fc38c4 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 60; // minor bug fixes, alterations, refactors, updates - const int revision = 94; + const int revision = 95; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);