From 0da0f3b57cae3fec68dfdb63282985dbd82c907f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Thu, 20 Mar 2025 08:20:45 +0100 Subject: [PATCH] improved stencil composition for renderpath2D --- .../scripts/fighting_game/fighting_game.lua | 6 +- WickedEngine/offlineshadercompiler.cpp | 5 + WickedEngine/shaders/ShaderInterop_Renderer.h | 7 + WickedEngine/shaders/Shaders_SOURCE.vcxitems | 3 + .../shaders/Shaders_SOURCE.vcxitems.filters | 3 + WickedEngine/shaders/copyStencilBitPS.hlsl | 39 ++- WickedEngine/shaders/extractStencilBitPS.hlsl | 12 + WickedEngine/wiApplication.cpp | 40 +-- WickedEngine/wiEnums.h | 11 +- WickedEngine/wiRenderPath2D.cpp | 216 +++++----------- WickedEngine/wiRenderPath2D.h | 5 +- WickedEngine/wiRenderPath3D.cpp | 5 +- WickedEngine/wiRenderer.cpp | 232 ++++++++++++++++-- WickedEngine/wiRenderer.h | 14 ++ WickedEngine/wiVersion.cpp | 2 +- 15 files changed, 390 insertions(+), 210 deletions(-) create mode 100644 WickedEngine/shaders/extractStencilBitPS.hlsl diff --git a/Content/scripts/fighting_game/fighting_game.lua b/Content/scripts/fighting_game/fighting_game.lua index 33c4149f5..1b6ca3194 100644 --- a/Content/scripts/fighting_game/fighting_game.lua +++ b/Content/scripts/fighting_game/fighting_game.lua @@ -1910,7 +1910,9 @@ runProcess(function() -- Also save the active component that we can restore when ESCAPE is pressed local prevPath = application.GetActivePath() local path = RenderPath3D() - application.SetActivePath(path) + --path.SetResolutionScale(0.1) + --path.SetMSAASampleCount(8) + application.SetActivePath(path, 0.5, 0, 0, 0, FadeType.CrossFade) local help_text = "" help_text = help_text .. "Wicked Engine Fighting game sample script\n" @@ -2026,7 +2028,7 @@ runProcess(function() -- so if you loaded this script from the editor, you can go back to the editor with ESC backlog_post("EXIT") killProcesses() - application.SetActivePath(prevPath) + application.SetActivePath(prevPath, 0.5, 0, 0, 0, FadeType.CrossFade) return end if(not backlog_isactive() and input.Press(string.byte('R'))) then diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 21359f545..913cd4011 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -269,6 +269,7 @@ wi::vector shaders = { {"ddgi_debugPS", wi::graphics::ShaderStage::PS }, {"copyDepthPS", wi::graphics::ShaderStage::PS }, {"copyStencilBitPS", wi::graphics::ShaderStage::PS }, + {"extractStencilBitPS", wi::graphics::ShaderStage::PS }, {"trailPS", wi::graphics::ShaderStage::PS }, @@ -480,6 +481,10 @@ int main(int argc, char* argv[]) shaders.push_back({ "ssgi_upsampleCS", wi::graphics::ShaderStage::CS }); shaders.back().permutations.emplace_back().defines = { "WIDE" }; + // permutations for copyStencilBitPS: + shaders.push_back({ "copyStencilBitPS", wi::graphics::ShaderStage::PS }); + shaders.back().permutations.emplace_back().defines = { "MSAA" }; + wi::jobsystem::Initialize(); wi::jobsystem::context ctx; diff --git a/WickedEngine/shaders/ShaderInterop_Renderer.h b/WickedEngine/shaders/ShaderInterop_Renderer.h index cecdb8423..af9cd0ec0 100644 --- a/WickedEngine/shaders/ShaderInterop_Renderer.h +++ b/WickedEngine/shaders/ShaderInterop_Renderer.h @@ -1687,6 +1687,13 @@ struct VirtualTextureTileRequestsPush int padding2; }; +struct StencilBitPush +{ + float2 output_resolution_rcp; + uint input_resolution; + uint bit; +}; + CBUFFER(TrailRendererCB, CBSLOT_TRAILRENDERER) { float4x4 g_xTrailTransform; diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 2a5c52aa8..85567082d 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -286,6 +286,9 @@ Compute 4.0 + + Pixel + Pixel diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 440a362d3..f82e93841 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -1127,6 +1127,9 @@ CS + + PS + diff --git a/WickedEngine/shaders/copyStencilBitPS.hlsl b/WickedEngine/shaders/copyStencilBitPS.hlsl index 9b71a467e..b4a3ec81c 100644 --- a/WickedEngine/shaders/copyStencilBitPS.hlsl +++ b/WickedEngine/shaders/copyStencilBitPS.hlsl @@ -7,17 +7,42 @@ // Note: The simpler method is to use CopyTexture from ImageAspect::COLOR to ImageAspect::STENCIL // But Vulkan doesn't support that currently, so this is a workaround for that // Vulkan issue: https://github.com/KhronosGroup/Vulkan-Docs/issues/2079 +// +// Note: this is also used for scaling stencil, that's why it is working with UV coordinates + +PUSHCONSTANT(push, StencilBitPush); + +#ifdef MSAA + +Texture2DMS input_stencil : register(t0); + +void main(float4 pos : SV_Position, out uint coverage : SV_Coverage) +{ + const float2 uv = pos.xy * push.output_resolution_rcp; + const uint2 input_resolution = uint2(push.input_resolution & 0xFFFF, push.input_resolution >> 16u); + const uint2 input_pixel = uint2(uv * input_resolution); + uint2 dim; + uint sampleCount; + input_stencil.GetDimensions(dim.x, dim.y, sampleCount); + coverage = 0; + for(uint sam = 0; sam < sampleCount; ++sam) + { + if ((input_stencil.Load(input_pixel, sam) & push.bit) != 0) + coverage |= 1u << sam; + } +} + +#else Texture2D input_stencil : register(t0); -struct StencilBitPush -{ - uint bit; -}; -PUSHCONSTANT(push, StencilBitPush); - void main(float4 pos : SV_Position) { - if ((input_stencil[uint2(pos.xy)] & push.bit) == 0) + const float2 uv = pos.xy * push.output_resolution_rcp; + const uint2 input_resolution = uint2(push.input_resolution & 0xFFFF, push.input_resolution >> 16u); + const uint2 input_pixel = uint2(uv * input_resolution); + if ((input_stencil[input_pixel] & push.bit) == 0) discard; } + +#endif // MSAA diff --git a/WickedEngine/shaders/extractStencilBitPS.hlsl b/WickedEngine/shaders/extractStencilBitPS.hlsl new file mode 100644 index 000000000..fc19acfa0 --- /dev/null +++ b/WickedEngine/shaders/extractStencilBitPS.hlsl @@ -0,0 +1,12 @@ +#include "globals.hlsli" +// This shader is running 8 full screen passes for each stencil bit to extract a color image +// Note: The simpler method is to use CopyTexture from ImageAspect::COLOR to ImageAspect::STENCIL +// But Vulkan doesn't support that currently, so this is a workaround for that +// Vulkan issue: https://github.com/KhronosGroup/Vulkan-Docs/issues/2079 + +PUSHCONSTANT(push, StencilBitPush); + +uint main() : SV_TARGET +{ + return push.bit; +} diff --git a/WickedEngine/wiApplication.cpp b/WickedEngine/wiApplication.cpp index 1ab8833b9..b5aef6ae5 100644 --- a/WickedEngine/wiApplication.cpp +++ b/WickedEngine/wiApplication.cpp @@ -70,9 +70,10 @@ namespace wi // Fade manager will activate on fadeout fadeManager.Start(fadeSeconds, fadeColor, [this, component]() { - if (GetActivePath() != nullptr) + RenderPath* renderpath = GetActivePath(); + if (renderpath != nullptr) { - GetActivePath()->Stop(); + renderpath->Stop(); } if (component != nullptr) @@ -173,12 +174,13 @@ namespace wi fadeManager.Update(deltaTime); - if (GetActivePath() != nullptr) + RenderPath* renderpath = GetActivePath(); + if (renderpath != nullptr) { ColorSpace colorspace = graphicsDevice->GetSwapChainColorSpace(&swapChain); - GetActivePath()->colorspace = colorspace; - GetActivePath()->init(canvas); - GetActivePath()->PreUpdate(); + renderpath->colorspace = colorspace; + renderpath->init(canvas); + renderpath->PreUpdate(); } // Fixed time update: @@ -270,10 +272,11 @@ namespace wi wi::resourcemanager::UpdateStreamingResources(dt); - if (GetActivePath() != nullptr) + RenderPath* renderpath = GetActivePath(); + if (renderpath != nullptr) { - GetActivePath()->Update(dt); - GetActivePath()->PostUpdate(); + renderpath->Update(dt); + renderpath->PostUpdate(); } wi::profiler::EndRange(range); // Update @@ -283,9 +286,10 @@ namespace wi { wi::lua::FixedUpdate(); - if (GetActivePath() != nullptr) + RenderPath* renderpath = GetActivePath(); + if (renderpath != nullptr) { - GetActivePath()->FixedUpdate(); + renderpath->FixedUpdate(); } } @@ -295,11 +299,12 @@ namespace wi wi::lua::Render(); - if (GetActivePath() != nullptr) + RenderPath* renderpath = GetActivePath(); + if (renderpath != nullptr) { - GetActivePath()->PreRender(); - GetActivePath()->Render(); - GetActivePath()->PostRender(); + renderpath->PreRender(); + renderpath->Render(); + renderpath->PostRender(); } wi::profiler::EndRange(range); // Render @@ -310,9 +315,10 @@ namespace wi auto range = wi::profiler::BeginRangeCPU("Compose"); ColorSpace colorspace = graphicsDevice->GetSwapChainColorSpace(&swapChain); - if (GetActivePath() != nullptr) + RenderPath* renderpath = GetActivePath(); + if (renderpath != nullptr) { - GetActivePath()->Compose(cmd); + renderpath->Compose(cmd); } if (fadeManager.IsActive()) diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index aef11b060..9a3a97f6d 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -197,6 +197,8 @@ namespace wi::enums PSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_UPSAMPLE, PSTYPE_COPY_DEPTH, PSTYPE_COPY_STENCIL_BIT, + PSTYPE_COPY_STENCIL_BIT_MSAA, + PSTYPE_EXTRACT_STENCIL_BIT, // geometry shaders @@ -460,7 +462,14 @@ namespace wi::enums DSSTYPE_COPY_STENCIL_BIT_5, DSSTYPE_COPY_STENCIL_BIT_6, DSSTYPE_COPY_STENCIL_BIT_7, - DSSTYPE_COPY_STENCIL_BIT_8, + DSSTYPE_EXTRACT_STENCIL_BIT_0, + DSSTYPE_EXTRACT_STENCIL_BIT_1, + DSSTYPE_EXTRACT_STENCIL_BIT_2, + DSSTYPE_EXTRACT_STENCIL_BIT_3, + DSSTYPE_EXTRACT_STENCIL_BIT_4, + DSSTYPE_EXTRACT_STENCIL_BIT_5, + DSSTYPE_EXTRACT_STENCIL_BIT_6, + DSSTYPE_EXTRACT_STENCIL_BIT_7, DSSTYPE_COUNT }; // blend states diff --git a/WickedEngine/wiRenderPath2D.cpp b/WickedEngine/wiRenderPath2D.cpp index 430b88205..9297daa5a 100644 --- a/WickedEngine/wiRenderPath2D.cpp +++ b/WickedEngine/wiRenderPath2D.cpp @@ -15,8 +15,8 @@ namespace wi rtFinal = {}; rtFinal_MSAA = {}; - rtStenciled = {}; - rtStenciled_resolved = {}; + rtStencilExtracted = {}; + stencilScaled = {}; } void RenderPath2D::ResizeBuffers() @@ -34,20 +34,24 @@ namespace wi TextureDesc desc = GetDepthStencil()->GetDesc(); desc.layout = ResourceState::SHADER_RESOURCE; desc.bind_flags = BindFlag::RENDER_TARGET | BindFlag::SHADER_RESOURCE; - desc.format = Format::R8G8B8A8_UNORM; - device->CreateTexture(&desc, nullptr, &rtStenciled); - device->SetName(&rtStenciled, "rtStenciled"); + desc.format = Format::R8_UINT; + desc.layout = ResourceState::SHADER_RESOURCE; + device->CreateTexture(&desc, nullptr, &rtStencilExtracted); + device->SetName(&rtStencilExtracted, "rtStencilExtracted"); - if (desc.sample_count > 1) - { - desc.sample_count = 1; - device->CreateTexture(&desc, nullptr, &rtStenciled_resolved); - device->SetName(&rtStenciled_resolved, "rtStenciled_resolved"); - } + desc.width = GetPhysicalWidth(); + desc.height = GetPhysicalHeight(); + desc.sample_count = sampleCount; + desc.bind_flags = BindFlag::DEPTH_STENCIL; + desc.format = Format::D24_UNORM_S8_UINT; + desc.layout = ResourceState::DEPTHSTENCIL; + device->CreateTexture(&desc, nullptr, &stencilScaled); + device->SetName(&stencilScaled, "stencilScaled"); } else { - rtStenciled = {}; // this will be deleted here + rtStencilExtracted = {}; + stencilScaled = {}; } { @@ -146,6 +150,7 @@ namespace wi { GraphicsDevice* device = wi::graphics::GetDevice(); CommandList cmd = device->BeginCommandList(); + device->EventBegin("RenderPath2D::Render", cmd); wi::image::SetCanvas(*this); wi::font::SetCanvas(*this); @@ -158,164 +163,60 @@ namespace wi const Texture* dsv = GetDepthStencil(); - // Special care for internal resolution, because stencil buffer is of internal resolution, - // so we might need to render stencil sprites to separate render target that matches internal resolution! - if (rtStenciled.IsValid()) + if (rtStencilExtracted.IsValid()) { - if (rtStenciled.GetDesc().sample_count > 1) - { - RenderPassImage rp[] = { - RenderPassImage::RenderTarget(&rtStenciled, RenderPassImage::LoadOp::CLEAR), - RenderPassImage::Resolve(&rtStenciled_resolved), - RenderPassImage::DepthStencil( - dsv, - RenderPassImage::LoadOp::LOAD, - RenderPassImage::StoreOp::STORE - ), - }; - device->RenderPassBegin(rp, arraysize(rp), cmd); - } - else - { - RenderPassImage rp[] = { - RenderPassImage::RenderTarget(&rtStenciled, RenderPassImage::LoadOp::CLEAR), - RenderPassImage::DepthStencil( - dsv, - RenderPassImage::LoadOp::LOAD, - RenderPassImage::StoreOp::STORE - ), - }; - device->RenderPassBegin(rp, arraysize(rp), cmd); - } - dsv = nullptr; - - Viewport vp; - vp.width = (float)rtStenciled.GetDesc().width; - vp.height = (float)rtStenciled.GetDesc().height; - device->BindViewports(1, &vp, cmd); - - device->EventBegin("STENCIL Sprite Layers", cmd); - for (auto& x : layers) - { - for (auto& y : x.items) - { - if (y.type == RenderItem2D::TYPE::SPRITE && - y.sprite != nullptr && - y.sprite->params.stencilComp != wi::image::STENCILMODE_DISABLED) - { - y.sprite->Draw(cmd); - } - } - } - device->EventEnd(cmd); - - device->RenderPassEnd(cmd); + wi::renderer::ExtractStencil(*dsv, rtStencilExtracted, cmd); } - if (dsv != nullptr && !rtStenciled.IsValid()) + RenderPassImage rp[4]; + uint32_t rp_count = 0; + if (rtFinal_MSAA.IsValid()) { - if (rtFinal_MSAA.IsValid()) - { - RenderPassImage rp[] = { - RenderPassImage::RenderTarget( - &rtFinal_MSAA, - RenderPassImage::LoadOp::CLEAR, - RenderPassImage::StoreOp::DONTCARE, - ResourceState::RENDERTARGET, - ResourceState::RENDERTARGET - ), - RenderPassImage::Resolve(&rtFinal), - RenderPassImage::DepthStencil( - dsv, - RenderPassImage::LoadOp::LOAD, - RenderPassImage::StoreOp::STORE - ), - }; - device->RenderPassBegin(rp, arraysize(rp), cmd); - } - else - { - RenderPassImage rp[] = { - RenderPassImage::RenderTarget(&rtFinal, RenderPassImage::LoadOp::CLEAR), - RenderPassImage::DepthStencil( - dsv, - RenderPassImage::LoadOp::LOAD, - RenderPassImage::StoreOp::STORE - ), - }; - device->RenderPassBegin(rp, arraysize(rp), cmd); - } + // MSAA: + rp[rp_count++] = RenderPassImage::RenderTarget( + &rtFinal_MSAA, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::DONTCARE, + ResourceState::RENDERTARGET, + ResourceState::RENDERTARGET + ); + rp[rp_count++] = RenderPassImage::Resolve(&rtFinal); } else { - if (rtFinal_MSAA.IsValid()) - { - RenderPassImage rp[] = { - RenderPassImage::RenderTarget( - &rtFinal_MSAA, - RenderPassImage::LoadOp::CLEAR, - RenderPassImage::StoreOp::DONTCARE, - ResourceState::RENDERTARGET, - ResourceState::RENDERTARGET - ), - RenderPassImage::Resolve(&rtFinal), - }; - device->RenderPassBegin(rp, arraysize(rp), cmd); - } - else - { - RenderPassImage rp[] = { - RenderPassImage::RenderTarget( - &rtFinal, - RenderPassImage::LoadOp::CLEAR - ), - }; - device->RenderPassBegin(rp, arraysize(rp), cmd); - } + // Single sample: + rp[rp_count++] = RenderPassImage::RenderTarget(&rtFinal, RenderPassImage::LoadOp::CLEAR); } + if (stencilScaled.IsValid()) + { + // Scaled stencil: + rp[rp_count++] = RenderPassImage::DepthStencil(&stencilScaled, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::DONTCARE); + } + else if (dsv != nullptr) + { + // Native stencil: + rp[rp_count++] = RenderPassImage::DepthStencil(dsv, RenderPassImage::LoadOp::LOAD, RenderPassImage::StoreOp::STORE); + } + device->RenderPassBegin(rp, rp_count, cmd); Viewport vp; vp.width = (float)rtFinal.GetDesc().width; vp.height = (float)rtFinal.GetDesc().height; device->BindViewports(1, &vp, cmd); - if (GetDepthStencil() != nullptr) + Rect rect; + rect.left = 0; + rect.right = (int32_t)rtFinal.GetDesc().width; + rect.top = 0; + rect.bottom = (int32_t)rtFinal.GetDesc().height; + device->BindScissorRects(1, &rect, cmd); + + if (stencilScaled.IsValid()) { - if (rtStenciled.IsValid()) - { - device->EventBegin("Copy STENCIL Sprite Layers", cmd); - wi::image::Params fx; - fx.enableFullScreen(); - if (rtStenciled.GetDesc().sample_count > 1) - { - wi::image::Draw(&rtStenciled_resolved, fx, cmd); - } - else - { - wi::image::Draw(&rtStenciled, fx, cmd); - } - device->EventEnd(cmd); - } - else - { - device->EventBegin("STENCIL Sprite Layers", cmd); - for (auto& x : layers) - { - for (auto& y : x.items) - { - if (y.type == RenderItem2D::TYPE::SPRITE && - y.sprite != nullptr && - y.sprite->params.stencilComp != wi::image::STENCILMODE_DISABLED) - { - y.sprite->Draw(cmd); - } - } - } - device->EventEnd(cmd); - } + wi::renderer::ScaleStencilMask(vp, rtStencilExtracted, cmd); } - device->EventBegin("Sprite Layers", cmd); + device->EventBegin("Layers", cmd); for (auto& x : layers) { for (auto& y : x.items) @@ -324,7 +225,7 @@ namespace wi { default: case RenderItem2D::TYPE::SPRITE: - if (y.sprite != nullptr && y.sprite->params.stencilComp == wi::image::STENCILMODE_DISABLED) + if (y.sprite != nullptr) { y.sprite->Draw(cmd); } @@ -344,10 +245,15 @@ namespace wi device->RenderPassEnd(cmd); + device->EventEnd(cmd); + RenderPath::Render(); } void RenderPath2D::Compose(CommandList cmd) const { + GraphicsDevice* device = wi::graphics::GetDevice(); + device->EventBegin("RenderPath2D::Compose", cmd); + wi::image::Params fx; fx.enableFullScreen(); fx.blendFlag = wi::enums::BLENDMODE_PREMULTIPLIED; @@ -358,6 +264,8 @@ namespace wi } wi::image::Draw(&GetRenderResult(), fx, cmd); + device->EventEnd(cmd); + RenderPath::Compose(cmd); } diff --git a/WickedEngine/wiRenderPath2D.h b/WickedEngine/wiRenderPath2D.h index 9cdf2bdf0..b00ab6bd4 100644 --- a/WickedEngine/wiRenderPath2D.h +++ b/WickedEngine/wiRenderPath2D.h @@ -14,8 +14,9 @@ namespace wi public RenderPath { protected: - wi::graphics::Texture rtStenciled; - wi::graphics::Texture rtStenciled_resolved; + wi::graphics::Texture rtStencilExtracted; + wi::graphics::Texture stencilScaled; + wi::graphics::Texture rtFinal; wi::graphics::Texture rtFinal_MSAA; diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index 21a6d8399..23e102351 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -1786,15 +1786,14 @@ namespace wi void RenderPath3D::Compose(CommandList cmd) const { GraphicsDevice* device = wi::graphics::GetDevice(); + device->EventBegin("RenderPath3D::Compose", cmd); wi::image::Params fx; fx.blendFlag = BLENDMODE_OPAQUE; fx.quality = wi::image::QUALITY_LINEAR; fx.enableFullScreen(); - device->EventBegin("Composition", cmd); wi::image::Draw(GetLastPostprocessRT(), fx, cmd); - device->EventEnd(cmd); if ( wi::renderer::GetDebugLightCulling() || @@ -1807,6 +1806,8 @@ namespace wi wi::image::Draw(&debugUAV, fx, cmd); } + device->EventEnd(cmd); + RenderPath2D::Compose(cmd); } diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index b6dad47de..daca671e6 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -673,6 +673,8 @@ PipelineState PSO_volumetricclouds_upsample; PipelineState PSO_outline; PipelineState PSO_copyDepth; PipelineState PSO_copyStencilBit[8]; +PipelineState PSO_copyStencilBit_MSAA[8]; +PipelineState PSO_extractStencilBit[8]; RaytracingPipelineState RTPSO_reflection; @@ -937,6 +939,8 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::PS, shaders[PSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_UPSAMPLE], "volumetricCloud_upsamplePS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::PS, shaders[PSTYPE_COPY_DEPTH], "copyDepthPS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::PS, shaders[PSTYPE_COPY_STENCIL_BIT], "copyStencilBitPS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::PS, shaders[PSTYPE_COPY_STENCIL_BIT_MSAA], "copyStencilBitPS.cso", ShaderModel::SM_6_0, {"MSAA"}); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::PS, shaders[PSTYPE_EXTRACT_STENCIL_BIT], "extractStencilBitPS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::PS, shaders[PSTYPE_PAINTDECAL], "paintdecalPS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::GS, shaders[GSTYPE_VOXELIZER], "objectGS_voxelizer.cso"); }); @@ -1493,6 +1497,20 @@ void LoadShaders() desc.dss = &depthStencils[DSSTYPE_COPY_STENCIL_BIT_0 + i]; device->CreatePipelineState(&desc, &PSO_copyStencilBit[i]); } + + desc.ps = &shaders[PSTYPE_COPY_STENCIL_BIT_MSAA]; + for (int i = 0; i < 8; ++i) + { + desc.dss = &depthStencils[DSSTYPE_COPY_STENCIL_BIT_0 + i]; + device->CreatePipelineState(&desc, &PSO_copyStencilBit_MSAA[i]); + } + + desc.ps = &shaders[PSTYPE_EXTRACT_STENCIL_BIT]; + for (int i = 0; i < 8; ++i) + { + desc.dss = &depthStencils[DSSTYPE_EXTRACT_STENCIL_BIT_0 + i]; + device->CreatePipelineState(&desc, &PSO_extractStencilBit[i]); + } }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { PipelineStateDesc desc; @@ -2342,6 +2360,16 @@ void SetUpStates() depthStencils[DSSTYPE_COPY_STENCIL_BIT_0 + i] = dsd; } + dsd.stencil_write_mask = 0; + dsd.front_face.stencil_func = ComparisonFunc::EQUAL; + dsd.front_face.stencil_pass_op = StencilOp::KEEP; + dsd.back_face = dsd.front_face; + for (int i = 0; i < 8; ++i) + { + dsd.stencil_read_mask = uint8_t(1 << i); + depthStencils[DSSTYPE_EXTRACT_STENCIL_BIT_0 + i] = dsd; + } + BlendState bd; bd.render_target[0].blend_enable = false; @@ -17805,6 +17833,7 @@ void CopyDepthStencil( if (manual_depthstencil_copy_required) { + // Vulkan workaround: PushBarrier(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::SHADER_RESOURCE)); PushBarrier(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::SHADER_RESOURCE)); FlushBarriers(cmd); @@ -17844,15 +17873,27 @@ void CopyDepthStencil( device->EventBegin("CopyStencilBits", cmd); device->BindResource(input_stencil, 0, cmd); + StencilBitPush push = {}; + push.output_resolution_rcp.x = 1.0f / vp.width; + push.output_resolution_rcp.y = 1.0f / vp.height; + push.input_resolution = (input_stencil->desc.width & 0xFFFF) | (input_stencil->desc.height << 16u); + uint32_t bit_index = 0; while (stencil_bits_to_copy != 0) { if (stencil_bits_to_copy & 0x1) { - device->BindPipelineState(&PSO_copyStencilBit[bit_index], cmd); - const uint bit = 1u << bit_index; - device->PushConstants(&bit, sizeof(bit), cmd); - device->BindStencilRef(bit, cmd); + if (input_stencil->desc.sample_count > 1) + { + device->BindPipelineState(&PSO_copyStencilBit_MSAA[bit_index], cmd); + } + else + { + device->BindPipelineState(&PSO_copyStencilBit[bit_index], cmd); + } + push.bit = 1u << bit_index; + device->PushConstants(&push, sizeof(push), cmd); + device->BindStencilRef(push.bit, cmd); device->Draw(3, 0, cmd); } bit_index++; @@ -17869,30 +17910,49 @@ void CopyDepthStencil( } else { - PushBarrier(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::COPY_SRC)); - PushBarrier(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::COPY_SRC)); + // Normal copy from color to depth/stencil aspects: + if (input_depth != nullptr) + { + PushBarrier(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::COPY_SRC)); + } + if (input_stencil != nullptr) + { + PushBarrier(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::COPY_SRC)); + } PushBarrier(GPUBarrier::Image(&output_depth_stencil, output_depth_stencil.desc.layout, ResourceState::COPY_DST)); FlushBarriers(cmd); - device->CopyTexture( - &output_depth_stencil, 0, 0, 0, 0, 0, - input_depth, 0, 0, - cmd, - nullptr, - ImageAspect::DEPTH, - ImageAspect::COLOR - ); - device->CopyTexture( - &output_depth_stencil, 0, 0, 0, 0, 0, - input_stencil, 0, 0, - cmd, - nullptr, - ImageAspect::STENCIL, - ImageAspect::COLOR - ); + if (input_depth != nullptr) + { + device->CopyTexture( + &output_depth_stencil, 0, 0, 0, 0, 0, + input_depth, 0, 0, + cmd, + nullptr, + ImageAspect::DEPTH, + ImageAspect::COLOR + ); + } + if (input_stencil != nullptr) + { + device->CopyTexture( + &output_depth_stencil, 0, 0, 0, 0, 0, + input_stencil, 0, 0, + cmd, + nullptr, + ImageAspect::STENCIL, + ImageAspect::COLOR + ); + } - PushBarrier(GPUBarrier::Image(input_depth, ResourceState::COPY_SRC, input_depth->desc.layout)); - PushBarrier(GPUBarrier::Image(input_stencil, ResourceState::COPY_SRC, input_stencil->desc.layout)); + if (input_depth != nullptr) + { + PushBarrier(GPUBarrier::Image(input_depth, ResourceState::COPY_SRC, input_depth->desc.layout)); + } + if (input_stencil != nullptr) + { + PushBarrier(GPUBarrier::Image(input_stencil, ResourceState::COPY_SRC, input_stencil->desc.layout)); + } PushBarrier(GPUBarrier::Image(&output_depth_stencil, ResourceState::COPY_DST, output_depth_stencil.desc.layout)); FlushBarriers(cmd); } @@ -17900,6 +17960,130 @@ void CopyDepthStencil( device->EventEnd(cmd); } +void ScaleStencilMask( + const Viewport& vp, + const Texture& input, + CommandList cmd +) +{ + device->EventBegin("ScaleStencilMask", cmd); + + device->BindResource(&input, 0, cmd); + + RenderPassInfo info = device->GetRenderPassInfo(cmd); + assert(IsFormatStencilSupport(info.ds_format)); // the current render pass must have stencil + + StencilBitPush push = {}; + push.output_resolution_rcp.x = 1.0f / vp.width; + push.output_resolution_rcp.y = 1.0f / vp.height; + push.input_resolution = (input.desc.width & 0xFFFF) | (input.desc.height << 16u); + + uint8_t stencil_bits_to_copy = 0xFF; + uint32_t bit_index = 0; + while (stencil_bits_to_copy != 0) + { + if (stencil_bits_to_copy & 0x1) + { + if (input.desc.sample_count > 1) + { + device->BindPipelineState(&PSO_copyStencilBit_MSAA[bit_index], cmd); + } + else + { + device->BindPipelineState(&PSO_copyStencilBit[bit_index], cmd); + } + push.bit = 1u << bit_index; + device->PushConstants(&push, sizeof(push), cmd); + device->BindStencilRef(push.bit, cmd); + device->Draw(3, 0, cmd); + } + bit_index++; + stencil_bits_to_copy >>= 1; + } + + device->EventEnd(cmd); +} + +void ExtractStencil( + const Texture& input_depthstencil, + const Texture& output, + CommandList cmd +) +{ + device->EventBegin("ExtractStencil", cmd); + + if (device->CheckCapability(GraphicsDeviceCapability::COPY_BETWEEN_DIFFERENT_IMAGE_ASPECTS_NOT_SUPPORTED)) + { + // Vulkan workaround: + device->EventBegin("ExtractStencilBits", cmd); + + RenderPassImage rp[] = { + RenderPassImage::RenderTarget(&output,RenderPassImage::LoadOp::CLEAR), + RenderPassImage::DepthStencil(&input_depthstencil), + }; + device->RenderPassBegin(rp, arraysize(rp), cmd); + + Viewport vp; + vp.width = (float)output.desc.width; + vp.height = (float)output.desc.height; + device->BindViewports(1, &vp, cmd); + + Rect rect; + rect.left = 0; + rect.right = output.desc.width; + rect.top = 0; + rect.bottom = output.desc.height; + device->BindScissorRects(1, &rect, cmd); + + StencilBitPush push = {}; + push.output_resolution_rcp.x = 1.0f / vp.width; + push.output_resolution_rcp.y = 1.0f / vp.height; + push.input_resolution = (input_depthstencil.desc.width & 0xFFFF) | (input_depthstencil.desc.height << 16u); + + device->BindStencilRef(0xFFFFFFFF, cmd); + + uint8_t stencil_bits_to_extract = 0xFF; + uint32_t bit_index = 0; + while (stencil_bits_to_extract != 0) + { + if (stencil_bits_to_extract & 0x1) + { + device->BindPipelineState(&PSO_extractStencilBit[bit_index], cmd); + push.bit = 1u << bit_index; + device->PushConstants(&push, sizeof(push), cmd); + device->Draw(3, 0, cmd); + } + bit_index++; + stencil_bits_to_extract >>= 1; + } + + device->RenderPassEnd(cmd); + + device->EventEnd(cmd); + } + else + { + // Normal copy from stencil aspect to color: + PushBarrier(GPUBarrier::Image(&input_depthstencil, input_depthstencil.desc.layout, ResourceState::COPY_SRC)); + PushBarrier(GPUBarrier::Image(&output, output.desc.layout, ResourceState::COPY_DST)); + FlushBarriers(cmd); + + device->CopyTexture( + &output, 0, 0, 0, 0, 0, + &input_depthstencil, 0, 0, + cmd, + nullptr, + ImageAspect::COLOR, + ImageAspect::STENCIL + ); + + PushBarrier(GPUBarrier::Image(&input_depthstencil, ResourceState::COPY_SRC, input_depthstencil.desc.layout)); + PushBarrier(GPUBarrier::Image(&output, ResourceState::COPY_DST, output.desc.layout)); + FlushBarriers(cmd); + } + + device->EventEnd(cmd); +} void ComputeReprojectedDepthPyramid( const Texture& input_depth, diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index c4dbaaeee..6488f3497 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -980,6 +980,20 @@ namespace wi::renderer bool depthstencil_already_cleared = false ); + // The input texture mask is scaled into the stencil of the current render pass with the specified viewport + void ScaleStencilMask( + const wi::graphics::Viewport& vp, + const wi::graphics::Texture& input, + wi::graphics::CommandList cmd + ); + + // Extract stencil from a depth stencil texture into a R8_UINT format texture + void ExtractStencil( + const wi::graphics::Texture& input_depthstencil, + const wi::graphics::Texture& output, + wi::graphics::CommandList cmd + ); + // Render the scene with ray tracing void RayTraceScene( const wi::scene::Scene& scene, diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 25fff7e9e..895b22e78 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 714; + const int revision = 715; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);