diff --git a/.github/workflows/build-nightly.yml b/.github/workflows/build-nightly.yml index 484cdfdaa..037c1c0bc 100644 --- a/.github/workflows/build-nightly.yml +++ b/.github/workflows/build-nightly.yml @@ -26,10 +26,8 @@ jobs: shell: cmd run: | move BUILD\x64\Release\Editor_Windows\Editor_Windows.exe Editor\ - xcopy WickedEngine\*.dll Editor\ move BUILD\x64\Release\Tests\Tests.exe Tests\ - xcopy WickedEngine\*.dll Tests\ - name: Package Editor uses: actions/upload-artifact@v2 @@ -47,7 +45,6 @@ jobs: Editor/*.ico Editor/*.lua Editor/*.exe - Editor/*.dll - name: Package Tests uses: actions/upload-artifact@v2 @@ -66,7 +63,6 @@ jobs: Tests/*.lua Tests/*.ttf Tests/*.exe - Tests/*.dll linux: diff --git a/.github/workflows/build-pr.yml b/.github/workflows/build-pr.yml index 06511dce3..116c3d49c 100644 --- a/.github/workflows/build-pr.yml +++ b/.github/workflows/build-pr.yml @@ -25,10 +25,8 @@ jobs: shell: cmd run: | move BUILD\x64\Release\Editor_Windows\Editor_Windows.exe Editor\ - xcopy WickedEngine\*.dll Editor\ move BUILD\x64\Release\Tests\Tests.exe Tests\ - xcopy WickedEngine\*.dll Tests\ - name: Package Editor uses: actions/upload-artifact@v2 @@ -46,7 +44,6 @@ jobs: Editor/*.ico Editor/*.lua Editor/*.exe - Editor/*.dll - name: Package Tests uses: actions/upload-artifact@v2 @@ -65,7 +62,6 @@ jobs: Tests/*.lua Tests/*.ttf Tests/*.exe - Tests/*.dll linux: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index de4e01111..914da43ea 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,10 +26,8 @@ jobs: shell: cmd run: | move BUILD\x64\Release\Editor_Windows\Editor_Windows.exe Editor\ - xcopy WickedEngine\*.dll Editor\ move BUILD\x64\Release\Tests\Tests.exe Tests\ - xcopy WickedEngine\*.dll Tests\ - name: Package Editor uses: actions/upload-artifact@v2 @@ -47,7 +45,6 @@ jobs: Editor/*.ico Editor/*.lua Editor/*.exe - Editor/*.dll - name: Package Tests uses: actions/upload-artifact@v2 @@ -66,7 +63,6 @@ jobs: Tests/*.lua Tests/*.ttf Tests/*.exe - Tests/*.dll linux: diff --git a/Content/Documentation/WickedEngine-Documentation.md b/Content/Documentation/WickedEngine-Documentation.md index ff3b984f3..08e27ef7c 100644 --- a/Content/Documentation/WickedEngine-Documentation.md +++ b/Content/Documentation/WickedEngine-Documentation.md @@ -546,7 +546,7 @@ Wicked Engine supports bindless resource management, this can greatly improve pe Related functions to this feature: - `GetDescriptorIndex()` : returns an `int` that identifies the resource in bindless space. The queried resource can be a `Sampler` or a `GPUResource`. If the resource is not usable (for example if it was not created), then the function returns `-1`. **In this case, the shaders must not use the resource, but instead rely on dynamic branching to avoid it, because this would be undefined behaviour and could result in a GPU hang**. Otherwise, the index can be used by shaders to index into a descriptor heap. -- `PushConstants()` : This is an easy way to set a small amount of 32-bit values on the GPU, usable by shaders that declared a `PUSHCONSTANT(name, type)` block. There can be one push constant block per pipeline (graphics, compute or raytracing). +- `PushConstants()` : This is an easy way to set a small amount of 32-bit values on the GPU, usable by shaders that declared a `PUSHCONSTANT(name, type)` block. There can be one push constant block per pipeline (graphics, compute or raytracing). The push constants will be bound to the last set pipeline, so only use this after binding a graphics pipeline state or compute shader. The shaders can use bindless descriptors with the following syntax example: diff --git a/Editor/Editor_SOURCE.vcxitems b/Editor/Editor_SOURCE.vcxitems index cf9f74db1..2057bf38f 100644 --- a/Editor/Editor_SOURCE.vcxitems +++ b/Editor/Editor_SOURCE.vcxitems @@ -153,16 +153,6 @@ - - true - true - Document - - - true - true - Document - true true diff --git a/Editor/Editor_SOURCE.vcxitems.filters b/Editor/Editor_SOURCE.vcxitems.filters index 00dc20113..5cd26018f 100644 --- a/Editor/Editor_SOURCE.vcxitems.filters +++ b/Editor/Editor_SOURCE.vcxitems.filters @@ -166,8 +166,4 @@ images - - - - \ No newline at end of file diff --git a/Editor/Editor_Windows.vcxproj b/Editor/Editor_Windows.vcxproj index 5dfa26291..4493943df 100644 --- a/Editor/Editor_Windows.vcxproj +++ b/Editor/Editor_Windows.vcxproj @@ -133,6 +133,12 @@ + + Document + + + Document + Discomap true diff --git a/Editor/PaintToolWindow.cpp b/Editor/PaintToolWindow.cpp index 103e5e922..9c3105bd1 100644 --- a/Editor/PaintToolWindow.cpp +++ b/Editor/PaintToolWindow.cpp @@ -307,7 +307,6 @@ void PaintToolWindow::Update(float dt) device->BindComputeShader(wi::renderer::GetShader(wi::enums::CSTYPE_PAINT_TEXTURE), cmd); - wi::renderer::BindCommonResources(cmd); device->BindResource(wi::texturehelper::getWhite(), 0, cmd); device->BindUAV(&editTexture, 0, cmd); diff --git a/Example_ImGui/Example_ImGui.cpp b/Example_ImGui/Example_ImGui.cpp index 57992015d..e684d5fff 100644 --- a/Example_ImGui/Example_ImGui.cpp +++ b/Example_ImGui/Example_ImGui.cpp @@ -20,6 +20,7 @@ using namespace wi::graphics; Shader imguiVS; Shader imguiPS; Texture fontTexture; +Sampler sampler; InputLayout imguiInputLayout; PipelineState imguiPSO; @@ -59,6 +60,13 @@ bool ImGui_Impl_CreateDeviceObjects() wi::graphics::GetDevice()->CreateTexture(&textureDesc, &textureData, &fontTexture); + SamplerDesc samplerDesc; + samplerDesc.address_u = TextureAddressMode::WRAP; + samplerDesc.address_v = TextureAddressMode::WRAP; + samplerDesc.address_w = TextureAddressMode::WRAP; + samplerDesc.filter = Filter::MAXIMUM_MIN_MAG_MIP_LINEAR; + wi::graphics::GetDevice()->CreateSampler(&samplerDesc, &sampler); + // Store our identifier io.Fonts->SetTexID((ImTextureID)&fontTexture); @@ -237,6 +245,8 @@ void Example_ImGui::Compose(wi::graphics::CommandList cmd) device->BindPipelineState(&imguiPSO, cmd); + device->BindSampler(&sampler, 0, cmd); + // Will project scissor/clipping rectangles into framebuffer space ImVec2 clip_off = drawData->DisplayPos; // (0,0) unless using multi-viewports ImVec2 clip_scale = drawData->FramebufferScale; // (1,1) unless using retina display which are often (2,2) diff --git a/Example_ImGui/ImGuiPS.hlsl b/Example_ImGui/ImGuiPS.hlsl index acbe4fb87..a01501e24 100644 --- a/Example_ImGui/ImGuiPS.hlsl +++ b/Example_ImGui/ImGuiPS.hlsl @@ -8,6 +8,7 @@ struct VertexOutput Texture2D texture0 : register(t0); SamplerState sampler0 : register(s0); +[RootSignature("RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(Sampler(s0))")] float4 main(VertexOutput input) : SV_TARGET { return input.col * texture0.Sample(sampler0, input.uv); diff --git a/Example_ImGui/ImGuiVS.hlsl b/Example_ImGui/ImGuiVS.hlsl index 79ba15e11..76822d220 100644 --- a/Example_ImGui/ImGuiVS.hlsl +++ b/Example_ImGui/ImGuiVS.hlsl @@ -17,6 +17,7 @@ cbuffer vertexBuffer : register(b0) float4x4 ProjectionMatrix; }; +[RootSignature("RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(Sampler(s0))")] VertexOutput main(VertexInput input) { VertexOutput output; diff --git a/README.md b/README.md index 477c2f78c..5b0ff40d3 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,11 @@ You can specify command line arguments (without any prefix) to switch between re gpuvalidation - Use GPU Based Validation for graphics. This must be used together with `debugdevice` argument. + Use GPU Based Validation for graphics. This must be used together with the debugdevice argument. Currently DX12 only. + + + alwaysactive + The application will not be paused when the window is in the background. diff --git a/Template_UWP/Template_UWP.vcxproj b/Template_UWP/Template_UWP.vcxproj index c41a87a29..6e5304edf 100644 --- a/Template_UWP/Template_UWP.vcxproj +++ b/Template_UWP/Template_UWP.vcxproj @@ -159,16 +159,6 @@ - - true - Document - true - - - true - Document - true - diff --git a/Template_UWP/Template_UWP.vcxproj.filters b/Template_UWP/Template_UWP.vcxproj.filters index 724c704f0..b20708c84 100644 --- a/Template_UWP/Template_UWP.vcxproj.filters +++ b/Template_UWP/Template_UWP.vcxproj.filters @@ -43,8 +43,4 @@ - - - - \ No newline at end of file diff --git a/Template_Windows/Template_Windows.vcxproj b/Template_Windows/Template_Windows.vcxproj index 9daa99cad..8330ab19a 100644 --- a/Template_Windows/Template_Windows.vcxproj +++ b/Template_Windows/Template_Windows.vcxproj @@ -130,14 +130,10 @@ - true Document - true - true Document - true diff --git a/Template_Windows/Template_Windows.vcxproj.filters b/Template_Windows/Template_Windows.vcxproj.filters index 87ab8dae9..d35db9e48 100644 --- a/Template_Windows/Template_Windows.vcxproj.filters +++ b/Template_Windows/Template_Windows.vcxproj.filters @@ -48,7 +48,7 @@ - + \ No newline at end of file diff --git a/Tests/Tests.vcxproj b/Tests/Tests.vcxproj index c34ba14c0..b5e87af37 100644 --- a/Tests/Tests.vcxproj +++ b/Tests/Tests.vcxproj @@ -134,14 +134,10 @@ - true Document - true - true Document - true true diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 008932ad0..d27b5b4bb 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -108,11 +108,12 @@ int main(int argc, char* argv[]) "tonemapCS.hlsl" , "fsr_upscalingCS.hlsl" , "fsr_sharpenCS.hlsl" , + "ssaoCS.hlsl" , + "rtreflectionCS.hlsl" , + "ssr_raytraceCS.hlsl" , "ssr_resolveCS.hlsl" , "ssr_temporalCS.hlsl" , - "ssaoCS.hlsl" , "ssr_medianCS.hlsl" , - "ssr_raytraceCS.hlsl" , "sharpenCS.hlsl" , "skinningCS.hlsl" , "resolveMSAADepthStencilCS.hlsl" , @@ -237,7 +238,6 @@ int main(int argc, char* argv[]) shaders[static_cast(ShaderStage::PS)] = { "emittedparticlePS_soft.hlsl" , - "screenPS.hlsl" , "imagePS.hlsl" , "emittedparticlePS_soft_lighting.hlsl" , "oceanSurfacePS.hlsl" , @@ -328,7 +328,7 @@ int main(int argc, char* argv[]) "shadowVS_transparent.hlsl" , "shadowVS.hlsl" , "shadowVS_alphatest.hlsl" , - "screenVS.hlsl" , + "postprocessVS.hlsl" , "renderlightmapVS.hlsl" , "raytrace_screenVS.hlsl" , "oceanSurfaceVS.hlsl" , @@ -340,7 +340,7 @@ int main(int argc, char* argv[]) "objectVS_prepass_alphatest.hlsl" , "objectVS_prepass_tessellation.hlsl" , "objectVS_prepass_alphatest_tessellation.hlsl" , - "objectVS_simple_tessellation.hlsl" , + "objectVS_simple_tessellation.hlsl" , "objectVS_debug.hlsl" , "lensFlareVS.hlsl" , "impostorVS.hlsl" , @@ -356,7 +356,7 @@ int main(int argc, char* argv[]) "cubeShadowVS_alphatest_emulation.hlsl" , "cubeShadowVS_transparent.hlsl" , "cubeShadowVS_transparent_emulation.hlsl" , - "cubeVS.hlsl", + "occludeeVS.hlsl", }; shaders[static_cast(ShaderStage::GS)] = { @@ -400,6 +400,7 @@ int main(int argc, char* argv[]) minshadermodels["rtshadowCS.hlsl"] = ShaderModel::SM_6_5; minshadermodels["rtaoCS.hlsl"] = ShaderModel::SM_6_5; minshadermodels["surfel_raytraceCS_rtapi.hlsl"] = ShaderModel::SM_6_5; + minshadermodels["rtreflectionCS.hlsl"] = ShaderModel::SM_6_5; wi::jobsystem::context ctx; diff --git a/WickedEngine/shaders/ShaderInterop.h b/WickedEngine/shaders/ShaderInterop.h index 61188a0a3..7f14044d4 100644 --- a/WickedEngine/shaders/ShaderInterop.h +++ b/WickedEngine/shaders/ShaderInterop.h @@ -30,8 +30,10 @@ using int4 = XMINT4; // Shader - side types: -#define CBUFFER(name, slot) cbuffer name : register(b ## slot) -#define CONSTANTBUFFER(name, type, slot) ConstantBuffer< type > name : register(b ## slot) +#define PASTE1(a, b) a##b +#define PASTE(a, b) PASTE1(a, b) +#define CBUFFER(name, slot) cbuffer name : register(PASTE(b, slot)) +#define CONSTANTBUFFER(name, type, slot) ConstantBuffer< type > name : register(PASTE(b, slot)) #ifdef SPIRV #define PUSHCONSTANT(name, type) [[vk::push_constant]] type name; @@ -68,28 +70,31 @@ struct IndirectDispatchArgs // Common buffers: // These are usable by all shaders +#define CBSLOT_IMAGE 0 +#define CBSLOT_FONT 0 #define CBSLOT_RENDERER_FRAME 0 #define CBSLOT_RENDERER_CAMERA 1 // On demand buffers: // These are bound on demand and alive until another is bound at the same slot -#define CBSLOT_RENDERER_MISC 5 -#define CBSLOT_RENDERER_FORWARD_LIGHTMASK 7 -#define CBSLOT_RENDERER_VOLUMELIGHT 7 -#define CBSLOT_RENDERER_VOXELIZER 7 -#define CBSLOT_RENDERER_TRACED 7 -#define CBSLOT_RENDERER_BVH 7 -#define CBSLOT_RENDERER_CUBEMAPRENDER 8 +#define CBSLOT_RENDERER_FORWARD_LIGHTMASK 2 +#define CBSLOT_RENDERER_CUBEMAPRENDER 3 +#define CBSLOT_RENDERER_VOLUMELIGHT 3 +#define CBSLOT_RENDERER_VOXELIZER 3 +#define CBSLOT_RENDERER_TRACED 2 +#define CBSLOT_RENDERER_MISC 3 -#define CBSLOT_OTHER_EMITTEDPARTICLE 7 -#define CBSLOT_OTHER_HAIRPARTICLE 7 -#define CBSLOT_OTHER_FFTGENERATOR 7 -#define CBSLOT_OTHER_OCEAN_SIMULATION_IMMUTABLE 7 -#define CBSLOT_OTHER_OCEAN_SIMULATION_PERFRAME 8 -#define CBSLOT_OTHER_OCEAN_RENDER 7 -#define CBSLOT_OTHER_CLOUDGENERATOR 7 -#define CBSLOT_OTHER_GPUSORTLIB 8 +#define CBSLOT_OTHER_EMITTEDPARTICLE 4 +#define CBSLOT_OTHER_HAIRPARTICLE 4 +#define CBSLOT_OTHER_FFTGENERATOR 3 +#define CBSLOT_OTHER_OCEAN_SIMULATION_IMMUTABLE 3 +#define CBSLOT_OTHER_OCEAN_SIMULATION_PERFRAME 4 +#define CBSLOT_OTHER_OCEAN_RENDER 3 +#define CBSLOT_OTHER_CLOUDGENERATOR 3 +#define CBSLOT_OTHER_GPUSORTLIB 4 +#define CBSLOT_MSAO 4 +#define CBSLOT_FSR 4 #endif // WI_SHADERINTEROP_H diff --git a/WickedEngine/shaders/ShaderInterop_Font.h b/WickedEngine/shaders/ShaderInterop_Font.h index 5f6c94a49..d743dd151 100644 --- a/WickedEngine/shaders/ShaderInterop_Font.h +++ b/WickedEngine/shaders/ShaderInterop_Font.h @@ -2,15 +2,25 @@ #define WI_SHADERINTEROP_FONT_H #include "ShaderInterop.h" -struct PushConstantsFont +struct FontVertex +{ + float2 pos; + float2 uv; +}; + +struct FontConstants { float4x4 transform; +}; +CONSTANTBUFFER(font, FontConstants, CBSLOT_FONT); + +struct FontPushConstants +{ uint color; int buffer_index; uint buffer_offset; int texture_index; }; -PUSHCONSTANT(push, PushConstantsFont); - +PUSHCONSTANT(font_push, FontPushConstants); #endif // WI_SHADERINTEROP_FONT_H diff --git a/WickedEngine/shaders/ShaderInterop_GPUSortLib.h b/WickedEngine/shaders/ShaderInterop_GPUSortLib.h index 63ad213c6..ea6fdb307 100644 --- a/WickedEngine/shaders/ShaderInterop_GPUSortLib.h +++ b/WickedEngine/shaders/ShaderInterop_GPUSortLib.h @@ -3,10 +3,11 @@ #include "ShaderInterop.h" -CBUFFER(SortConstants, CBSLOT_OTHER_GPUSORTLIB) +struct SortConstants { int3 job_params; uint counterReadOffset; }; +PUSHCONSTANT(sort, SortConstants); #endif // WI_SHADERINTEROP_GPUSORTLIB_H diff --git a/WickedEngine/shaders/ShaderInterop_Image.h b/WickedEngine/shaders/ShaderInterop_Image.h index de4f36261..dec366ab7 100644 --- a/WickedEngine/shaders/ShaderInterop_Image.h +++ b/WickedEngine/shaders/ShaderInterop_Image.h @@ -5,8 +5,9 @@ static const uint IMAGE_FLAG_EXTRACT_NORMALMAP = 1 << 0; static const uint IMAGE_FLAG_OUTPUT_COLOR_SPACE_HDR10_ST2084 = 1 << 1; static const uint IMAGE_FLAG_OUTPUT_COLOR_SPACE_LINEAR = 1 << 2; +static const uint IMAGE_FLAG_FULLSCREEN = 1 << 3; -struct PushConstantsImage +struct ImageConstants { float4 corners0; float4 corners1; @@ -16,15 +17,22 @@ struct PushConstantsImage uint2 texMulAdd; // packed half4 uint2 texMulAdd2; // packed half4 + uint2 output_resolution; + float2 output_resolution_rcp; +}; +CONSTANTBUFFER(image, ImageConstants, CBSLOT_IMAGE); + +struct ImagePushConstants +{ uint2 packed_color; // packed half4 uint flags; - int sampler_index; + float hdr_scaling; + int sampler_index; int texture_base_index; int texture_mask_index; int texture_background_index; }; -PUSHCONSTANT(push, PushConstantsImage); - +PUSHCONSTANT(image_push, ImagePushConstants); #endif // WI_SHADERINTEROP_IMAGE_H diff --git a/WickedEngine/shaders/ShaderInterop_Postprocess.h b/WickedEngine/shaders/ShaderInterop_Postprocess.h index 03387e5ae..363686ddf 100644 --- a/WickedEngine/shaders/ShaderInterop_Postprocess.h +++ b/WickedEngine/shaders/ShaderInterop_Postprocess.h @@ -71,6 +71,8 @@ struct MSAO float xRejectFadeoff; float xRcpAccentuation; }; +CONSTANTBUFFER(msao, MSAO, CBSLOT_MSAO); + //#define MSAO_SAMPLE_EXHAUSTIVELY struct MSAO_UPSAMPLE { @@ -102,6 +104,7 @@ struct FSR uint4 Const2; uint4 Const3; }; +CONSTANTBUFFER(fsr, FSR, CBSLOT_FSR); static const uint MOTIONBLUR_TILESIZE = 32; #define motionblur_strength postprocess.params0.x diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 379b713d4..75aed4a38 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -481,7 +481,7 @@ Vertex Vertex - + Vertex Vertex Vertex @@ -909,6 +909,9 @@ Vertex + + Vertex + Compute 4.0 @@ -932,6 +935,10 @@ Compute 4.0 + + Compute + 4.0 + Compute 4.0 @@ -2398,26 +2405,6 @@ Compute Compute - - Pixel - Pixel - Pixel - Pixel - Pixel - Pixel - Pixel - Pixel - - - Vertex - Vertex - Vertex - Vertex - Vertex - Vertex - Vertex - Vertex - Pixel Pixel diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 763ae7859..d2767b3a5 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -494,9 +494,6 @@ PS - - PS - PS @@ -668,9 +665,6 @@ VS - - VS - VS @@ -719,7 +713,7 @@ VS - + VS @@ -1004,6 +998,12 @@ CS + + VS + + + CS + diff --git a/WickedEngine/shaders/circlePS.hlsl b/WickedEngine/shaders/circlePS.hlsl index e10e515b2..3d616d93a 100644 --- a/WickedEngine/shaders/circlePS.hlsl +++ b/WickedEngine/shaders/circlePS.hlsl @@ -1,10 +1,10 @@ struct GSOutput { - float4 pos : SV_POSITION; + float4 pos : SV_Position; float4 col : COLOR; }; -float4 main(GSOutput PSIn) : SV_TARGET +float4 main(GSOutput PSIn) : SV_Target { - return float4(PSIn.col.rgb,1); -} \ No newline at end of file + return float4(PSIn.col.rgb, 1); +} diff --git a/WickedEngine/shaders/cubeShadowGS_alphatest_emulation.hlsl b/WickedEngine/shaders/cubeShadowGS_alphatest_emulation.hlsl index 29c6a5c29..c60e354c1 100644 --- a/WickedEngine/shaders/cubeShadowGS_alphatest_emulation.hlsl +++ b/WickedEngine/shaders/cubeShadowGS_alphatest_emulation.hlsl @@ -4,14 +4,14 @@ struct GSInput { float4 pos : SV_POSITION; - float2 uv : UV; + float4 uvsets : UVSETS; uint RTIndex : RTINDEX; }; struct GSOutput { float4 pos : SV_POSITION; - float2 uv : UV; + float4 uvsets : UVSETS; uint RTIndex : SV_RenderTargetArrayIndex; }; @@ -25,7 +25,7 @@ void main( { GSOutput element; element.pos = input[i].pos; - element.uv = input[i].uv; + element.uvsets = input[i].uvsets; element.RTIndex = input[i].RTIndex; output.Append(element); } diff --git a/WickedEngine/shaders/cubeVS.hlsl b/WickedEngine/shaders/cubeVS.hlsl deleted file mode 100644 index 159154ecb..000000000 --- a/WickedEngine/shaders/cubeVS.hlsl +++ /dev/null @@ -1,17 +0,0 @@ -#include "globals.hlsli" -#include "cube.hlsli" - -struct CubePushConstants -{ - float4x4 transform; -}; -PUSHCONSTANT(push, CubePushConstants); - -float4 main(uint vID : SV_VERTEXID) : SV_Position -{ - // This is a 36 vertexcount variant: - //return mul(g_xTransform, CUBE[vID]); - - // This is a 14 vertex count trianglestrip variant: - return mul(push.transform, float4(vertexID_create_cube(vID) * 2 - 1,1)); -} diff --git a/WickedEngine/shaders/emittedparticleMS.hlsl b/WickedEngine/shaders/emittedparticleMS.hlsl index e8fe55f42..9241765aa 100644 --- a/WickedEngine/shaders/emittedparticleMS.hlsl +++ b/WickedEngine/shaders/emittedparticleMS.hlsl @@ -10,10 +10,10 @@ static const float3 BILLBOARD[] = { }; static const uint BILLBOARD_VERTEXCOUNT = 4; -ByteAddressBuffer counterBuffer : register(t20); -StructuredBuffer particleBuffer : register(t21); -StructuredBuffer culledIndirectionBuffer : register(t22); -StructuredBuffer culledIndirectionBuffer2 : register(t23); +ByteAddressBuffer counterBuffer : register(t0); +StructuredBuffer particleBuffer : register(t1); +StructuredBuffer culledIndirectionBuffer : register(t2); +StructuredBuffer culledIndirectionBuffer2 : register(t3); static const uint VERTEXCOUNT = THREADCOUNT_MESH_SHADER * BILLBOARD_VERTEXCOUNT; static const uint PRIMITIVECOUNT = THREADCOUNT_MESH_SHADER * 2; diff --git a/WickedEngine/shaders/emittedparticlePS_simple.hlsl b/WickedEngine/shaders/emittedparticlePS_simple.hlsl index 17ad96eb9..366fabc03 100644 --- a/WickedEngine/shaders/emittedparticlePS_simple.hlsl +++ b/WickedEngine/shaders/emittedparticlePS_simple.hlsl @@ -1,4 +1,6 @@ +#include "globals.hlsli" + float4 main() : SV_TARGET { return float4(0.8f, 0.8f, 0.8f, 1.0f); -} \ No newline at end of file +} diff --git a/WickedEngine/shaders/emittedparticleVS.hlsl b/WickedEngine/shaders/emittedparticleVS.hlsl index ec9b47817..23d81eda8 100644 --- a/WickedEngine/shaders/emittedparticleVS.hlsl +++ b/WickedEngine/shaders/emittedparticleVS.hlsl @@ -9,9 +9,9 @@ static const float3 BILLBOARD[] = { float3(1, 1, 0), // 4 }; -StructuredBuffer particleBuffer : register(t21); -StructuredBuffer culledIndirectionBuffer : register(t22); -StructuredBuffer culledIndirectionBuffer2 : register(t23); +StructuredBuffer particleBuffer : register(t1); +StructuredBuffer culledIndirectionBuffer : register(t2); +StructuredBuffer culledIndirectionBuffer2 : register(t3); VertextoPixel main(uint vid : SV_VertexID, uint instanceID : SV_InstanceID) { diff --git a/WickedEngine/shaders/envMapGS_emulation.hlsl b/WickedEngine/shaders/envMapGS_emulation.hlsl index 6faf9553a..eb37d1240 100644 --- a/WickedEngine/shaders/envMapGS_emulation.hlsl +++ b/WickedEngine/shaders/envMapGS_emulation.hlsl @@ -4,6 +4,8 @@ struct GSInput { float4 pos : SV_POSITION; + uint instanceID : INSTANCEID; + uint emissiveColor : EMISSIVECOLOR; float4 color : COLOR; float4 uvsets : UVSETS; float2 atl : ATLAS; @@ -16,6 +18,8 @@ struct GSInput struct GSOutput { float4 pos : SV_POSITION; + uint instanceID : INSTANCEID; + uint emissiveColor : EMISSIVECOLOR; float4 color : COLOR; float4 uvsets : UVSETS; float2 atl : ATLAS; @@ -35,6 +39,8 @@ void main( { GSOutput element; element.pos = input[i].pos; + element.instanceID = input[i].instanceID; + element.emissiveColor = input[i].emissiveColor; element.color = input[i].color; element.uvsets = input[i].uvsets; element.atl = input[i].atl; @@ -44,4 +50,4 @@ void main( element.RTIndex = input[i].RTIndex; output.Append(element); } -} \ No newline at end of file +} diff --git a/WickedEngine/shaders/fft_512x512_c2c_CS.hlsl b/WickedEngine/shaders/fft_512x512_c2c_CS.hlsl index 9faccab3a..a9d6a566f 100644 --- a/WickedEngine/shaders/fft_512x512_c2c_CS.hlsl +++ b/WickedEngine/shaders/fft_512x512_c2c_CS.hlsl @@ -1,3 +1,4 @@ +#include "globals.hlsli" #include "ShaderInterop_FFTGenerator.h" #define COS_PI_4_16 0.70710678118654752440084436210485f diff --git a/WickedEngine/shaders/fft_512x512_c2c_v2_CS.hlsl b/WickedEngine/shaders/fft_512x512_c2c_v2_CS.hlsl index c2ad805cf..8b7783a43 100644 --- a/WickedEngine/shaders/fft_512x512_c2c_v2_CS.hlsl +++ b/WickedEngine/shaders/fft_512x512_c2c_v2_CS.hlsl @@ -1,3 +1,3 @@ #define FFT_V2 - +#include "globals.hlsli" #include "fft_512x512_c2c_CS.hlsl" diff --git a/WickedEngine/shaders/fontPS.hlsl b/WickedEngine/shaders/fontPS.hlsl index 39f37ecd9..21f959c15 100644 --- a/WickedEngine/shaders/fontPS.hlsl +++ b/WickedEngine/shaders/fontPS.hlsl @@ -1,15 +1,13 @@ #include "globals.hlsli" #include "ShaderInterop_Font.h" -SamplerState sampler_font : register(s0); - struct VertextoPixel { - float4 pos : SV_POSITION; - float2 tex : TEXCOORD0; + float4 pos : SV_Position; + float2 uv : TEXCOORD0; }; -float4 main(VertextoPixel PSIn) : SV_TARGET +float4 main(VertextoPixel input) : SV_TARGET { - return bindless_textures[push.texture_index].SampleLevel(sampler_font, PSIn.tex, 0).rrrr * unpack_rgba(push.color); + return bindless_textures[font_push.texture_index].SampleLevel(sampler_linear_clamp, input.uv, 0).rrrr * unpack_rgba(font_push.color); } diff --git a/WickedEngine/shaders/fontVS.hlsl b/WickedEngine/shaders/fontVS.hlsl index 65ca56a1e..c46de918e 100644 --- a/WickedEngine/shaders/fontVS.hlsl +++ b/WickedEngine/shaders/fontVS.hlsl @@ -3,18 +3,17 @@ struct VertextoPixel { - float4 pos : SV_POSITION; - float2 tex : TEXCOORD0; + float4 pos : SV_Position; + float2 uv : TEXCOORD0; }; -VertextoPixel main(uint vertexID : SV_VERTEXID, uint instanceID : SV_InstanceID) +VertextoPixel main(uint vertexID : SV_VertexID, uint instanceID : SV_InstanceID) { - VertextoPixel Out; - uint vID = instanceID * 4 + vertexID; - uint3 raw = bindless_buffers[push.buffer_index].Load3(push.buffer_offset + vID * 12); - Out.pos = mul(push.transform, float4(asfloat(raw.xy), 0, 1)); - Out.tex = unpack_half2(raw.z); + FontVertex vertex = bindless_buffers[font_push.buffer_index].Load(font_push.buffer_offset + vID * sizeof(FontVertex)); + VertextoPixel Out; + Out.pos = mul(font.transform, float4(asfloat(vertex.pos), 0, 1)); + Out.uv = vertex.uv; return Out; } diff --git a/WickedEngine/shaders/fsr_sharpenCS.hlsl b/WickedEngine/shaders/fsr_sharpenCS.hlsl index 8c1575e56..3aa1f6186 100644 --- a/WickedEngine/shaders/fsr_sharpenCS.hlsl +++ b/WickedEngine/shaders/fsr_sharpenCS.hlsl @@ -1,8 +1,6 @@ #include "globals.hlsli" #include "ShaderInterop_Postprocess.h" -PUSHCONSTANT(fsr, FSR); - #define A_GPU 1 #define A_HLSL 1 #include "ffx-fsr/ffx_a.h" diff --git a/WickedEngine/shaders/fsr_upscalingCS.hlsl b/WickedEngine/shaders/fsr_upscalingCS.hlsl index dd0c534b3..4a32cf68a 100644 --- a/WickedEngine/shaders/fsr_upscalingCS.hlsl +++ b/WickedEngine/shaders/fsr_upscalingCS.hlsl @@ -1,8 +1,6 @@ #include "globals.hlsli" #include "ShaderInterop_Postprocess.h" -PUSHCONSTANT(fsr, FSR); - #define A_GPU 1 #define A_HLSL 1 #include "ffx-fsr/ffx_a.h" diff --git a/WickedEngine/shaders/globals.hlsli b/WickedEngine/shaders/globals.hlsli index 24f11278e..ec3cafca8 100644 --- a/WickedEngine/shaders/globals.hlsli +++ b/WickedEngine/shaders/globals.hlsli @@ -5,6 +5,56 @@ #include "ShaderInterop.h" #include "ShaderInterop_Renderer.h" +// The root signature will affect shader compilation for DX12. +// The shader compiler will take the defined name: WICKED_ENGINE_DEFAULT_ROOTSIGNATURE and use it as root signature +// If you wish to specify custom root signature, make sure that this define is not available +// (for example: not including this file, or using #undef WICKED_ENGINE_DEFAULT_ROOTSIGNATURE) +#define WICKED_ENGINE_DEFAULT_ROOTSIGNATURE \ + "RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), " \ + "RootConstants(num32BitConstants=12, b999), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "DescriptorTable( " \ + "CBV(b2, numDescriptors = 12, flags = DATA_STATIC_WHILE_SET_AT_EXECUTE)," \ + "SRV(t0, numDescriptors = 16, flags = DESCRIPTORS_VOLATILE | DATA_STATIC_WHILE_SET_AT_EXECUTE)," \ + "UAV(u0, numDescriptors = 16, flags = DESCRIPTORS_VOLATILE | DATA_STATIC_WHILE_SET_AT_EXECUTE)" \ + ")," \ + "DescriptorTable( " \ + "Sampler(s0, offset = 0, numDescriptors = 8, flags = DESCRIPTORS_VOLATILE)" \ + ")," \ + "DescriptorTable( " \ + "Sampler(s0, space = 1, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)" \ + ")," \ + "DescriptorTable( " \ + "SRV(t0, space = 2, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 3, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 4, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 5, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 6, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 7, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 8, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 9, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 10, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 11, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 12, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 13, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "UAV(u0, space = 14, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "UAV(u0, space = 15, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "UAV(u0, space = 16, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "UAV(u0, space = 17, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)" \ + "), " \ + "StaticSampler(s100, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_MIN_MAG_MIP_LINEAR)," \ + "StaticSampler(s101, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, addressW = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_LINEAR)," \ + "StaticSampler(s102, addressU = TEXTURE_ADDRESS_MIRROR, addressV = TEXTURE_ADDRESS_MIRROR, addressW = TEXTURE_ADDRESS_MIRROR, filter = FILTER_MIN_MAG_MIP_LINEAR)," \ + "StaticSampler(s103, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_MIN_MAG_MIP_POINT)," \ + "StaticSampler(s104, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, addressW = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_POINT)," \ + "StaticSampler(s105, addressU = TEXTURE_ADDRESS_MIRROR, addressV = TEXTURE_ADDRESS_MIRROR, addressW = TEXTURE_ADDRESS_MIRROR, filter = FILTER_MIN_MAG_MIP_POINT)," \ + "StaticSampler(s106, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_ANISOTROPIC)," \ + "StaticSampler(s107, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, addressW = TEXTURE_ADDRESS_WRAP, filter = FILTER_ANISOTROPIC)," \ + "StaticSampler(s108, addressU = TEXTURE_ADDRESS_MIRROR, addressV = TEXTURE_ADDRESS_MIRROR, addressW = TEXTURE_ADDRESS_MIRROR, filter = FILTER_ANISOTROPIC)," \ + "StaticSampler(s109, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT, comparisonFunc = COMPARISON_GREATER_EQUAL)," + + // These are static samplers, they don't need to be bound: // They are also on slots that are not bindable as sampler bind slots must be in [0,15] range! SamplerState sampler_linear_clamp : register(s100); @@ -18,27 +68,26 @@ SamplerState sampler_aniso_wrap : register(s107); SamplerState sampler_aniso_mirror : register(s108); SamplerComparisonState sampler_cmp_depth : register(s109); -Texture2D bindless_textures[] : register(space1); -ByteAddressBuffer bindless_buffers[] : register(space2); -SamplerState bindless_samplers[] : register(space3); +SamplerState bindless_samplers[] : register(space1); +Texture2D bindless_textures[] : register(space2); +ByteAddressBuffer bindless_buffers[] : register(space3); Buffer bindless_ib[] : register(space4); #ifdef RTAPI RaytracingAccelerationStructure bindless_accelerationstructures[] : register(space5); #endif // RTAPI - Texture2DArray bindless_textures2DArray[] : register(space6); TextureCube bindless_cubemaps[] : register(space7); TextureCubeArray bindless_cubearrays[] : register(space8); Texture3D bindless_textures3D[] : register(space9); -RWTexture2D bindless_rwtextures[] : register(space10); -RWByteAddressBuffer bindless_rwbuffers[] : register(space11); -RWTexture2DArray bindless_rwtextures2DArray[] : register(space12); -RWTexture3D bindless_rwtextures3D[] : register(space13); +Texture2D bindless_textures_float[] : register(space10); +Texture2D bindless_textures_float2[] : register(space11); +Texture2D bindless_textures_uint2[] : register(space12); +Texture2D bindless_textures_uint4[] : register(space13); -Texture2D bindless_textures_float[] : register(space14); -Texture2D bindless_textures_float2[] : register(space15); -Texture2D bindless_textures_uint2[] : register(space16); -Texture2D bindless_textures_uint4[] : register(space17); +RWTexture2D bindless_rwtextures[] : register(space14); +RWByteAddressBuffer bindless_rwbuffers[] : register(space15); +RWTexture2DArray bindless_rwtextures2DArray[] : register(space16); +RWTexture3D bindless_rwtextures3D[] : register(space17); inline FrameCB GetFrame() { diff --git a/WickedEngine/shaders/gpusortlib_kickoffSortCS.hlsl b/WickedEngine/shaders/gpusortlib_kickoffSortCS.hlsl index e2f374ec5..27e1b4ccd 100644 --- a/WickedEngine/shaders/gpusortlib_kickoffSortCS.hlsl +++ b/WickedEngine/shaders/gpusortlib_kickoffSortCS.hlsl @@ -9,7 +9,7 @@ RWByteAddressBuffer indirectBuffers : register(u0); void main( uint3 DTid : SV_DispatchThreadID ) { // retrieve GPU itemcount: - uint itemCount = counterBuffer.Load(counterReadOffset); + uint itemCount = counterBuffer.Load(sort.counterReadOffset); if (itemCount > 0) { diff --git a/WickedEngine/shaders/gpusortlib_sortCS.hlsl b/WickedEngine/shaders/gpusortlib_sortCS.hlsl index 558ac73c3..a03e7b722 100644 --- a/WickedEngine/shaders/gpusortlib_sortCS.hlsl +++ b/WickedEngine/shaders/gpusortlib_sortCS.hlsl @@ -20,6 +20,7 @@ // THE SOFTWARE. // +#include "globals.hlsli" #include "ShaderInterop_GPUSortLib.h" #define SORT_SIZE 512 @@ -57,7 +58,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex) { - uint NumElements = counterBuffer.Load(counterReadOffset); + uint NumElements = counterBuffer.Load(sort.counterReadOffset); uint GlobalBaseIndex = (Gid.x * SORT_SIZE) + GTid.x; uint LocalBaseIndex = GI; diff --git a/WickedEngine/shaders/gpusortlib_sortInnerCS.hlsl b/WickedEngine/shaders/gpusortlib_sortInnerCS.hlsl index 002ad2b01..a4b876dbf 100644 --- a/WickedEngine/shaders/gpusortlib_sortInnerCS.hlsl +++ b/WickedEngine/shaders/gpusortlib_sortInnerCS.hlsl @@ -20,6 +20,7 @@ // THE SOFTWARE. // +#include "globals.hlsli" #include "ShaderInterop_GPUSortLib.h" #define SORT_SIZE 512 @@ -52,7 +53,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex) { - uint NumElements = counterBuffer.Load(counterReadOffset); + uint NumElements = counterBuffer.Load(sort.counterReadOffset); uint4 tgp; diff --git a/WickedEngine/shaders/gpusortlib_sortStepCS.hlsl b/WickedEngine/shaders/gpusortlib_sortStepCS.hlsl index 629ca514f..ffae6c127 100644 --- a/WickedEngine/shaders/gpusortlib_sortStepCS.hlsl +++ b/WickedEngine/shaders/gpusortlib_sortStepCS.hlsl @@ -20,6 +20,7 @@ // THE SOFTWARE. // +#include "globals.hlsli" #include "ShaderInterop_GPUSortLib.h" //-------------------------------------------------------------------------------------- @@ -34,7 +35,7 @@ RWStructuredBuffer indexBuffer : register(u0); void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) { - uint NumElements = counterBuffer.Load(counterReadOffset); + uint NumElements = counterBuffer.Load(sort.counterReadOffset); uint4 tgp; @@ -45,11 +46,11 @@ void main(uint3 Gid : SV_GroupID, uint localID = tgp.x + GTid.x; // calculate threadID within this sortable-array - uint index_low = localID & (job_params.x - 1); + uint index_low = localID & (sort.job_params.x - 1); uint index_high = 2 * (localID - index_low); uint index = tgp.y + index_high + index_low; - uint nSwapElem = tgp.y + index_high + job_params.y + job_params.z*index_low; + uint nSwapElem = tgp.y + index_high + sort.job_params.y + sort.job_params.z*index_low; if (nSwapElem < tgp.y + tgp.z) { diff --git a/WickedEngine/shaders/hairparticle_simulateCS.hlsl b/WickedEngine/shaders/hairparticle_simulateCS.hlsl index a4296d284..d6e320190 100644 --- a/WickedEngine/shaders/hairparticle_simulateCS.hlsl +++ b/WickedEngine/shaders/hairparticle_simulateCS.hlsl @@ -155,7 +155,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIn // Apply forces: velocity += force; - normal += velocity * GetFrame().delta_time; + normal += velocity * clamp(GetFrame().delta_time, 0, 0.1); // clamp delta time to avoid simulation blowing up // Drag: velocity *= 0.98f; diff --git a/WickedEngine/shaders/imageHF.hlsli b/WickedEngine/shaders/imageHF.hlsli index 40f726a4b..bfaacb263 100644 --- a/WickedEngine/shaders/imageHF.hlsli +++ b/WickedEngine/shaders/imageHF.hlsli @@ -15,43 +15,59 @@ struct VertextoPixel float2 b1 : TEXCOORD4; float2 b2 : TEXCOORD5; float2 b3 : TEXCOORD6; - float4 uv_screen : TEXCOORD2; + float2 uv_screen() + { + return pos.xy * image.output_resolution_rcp; + } float4 compute_uvs() { - // Quad interpolation: http://reedbeta.com/blog/quadrilateral-interpolation-part-2/ + float2 uv0; + float2 uv1; - // Set up quadratic formula - float A = Wedge2D(b2, b3); - float B = Wedge2D(b3, q) - Wedge2D(b1, b2); - float C = Wedge2D(b1, q); - - // Solve for v - float2 uv; - if (abs(A) < 0.001) + [branch] + if (image_push.flags & IMAGE_FLAG_FULLSCREEN) { - // Linear form - uv.y = -C / B; + uv0 = uv_screen(); + uv1 = uv0; } else { - // Quadratic form. Take positive root for CCW winding with V-up - float discrim = B * B - 4 * A * C; - uv.y = 0.5 * (-B + sqrt(discrim)) / A; + // Quad interpolation: http://reedbeta.com/blog/quadrilateral-interpolation-part-2/ + + // Set up quadratic formula + float A = Wedge2D(b2, b3); + float B = Wedge2D(b3, q) - Wedge2D(b1, b2); + float C = Wedge2D(b1, q); + + // Solve for v + float2 uv; + if (abs(A) < 0.001) + { + // Linear form + uv.y = -C / B; + } + else + { + // Quadratic form. Take positive root for CCW winding with V-up + float discrim = B * B - 4 * A * C; + uv.y = 0.5 * (-B + sqrt(discrim)) / A; + } + + // Solve for u, using largest-magnitude component + float2 denom = b1 + uv.y * b3; + if (abs(denom.x) > abs(denom.y)) + uv.x = (q.x - b2.x * uv.y) / denom.x; + else + uv.x = (q.y - b2.y * uv.y) / denom.y; + + float4 texMulAdd = unpack_half4(image.texMulAdd); + float4 texMulAdd2 = unpack_half4(image.texMulAdd2); + + uv0 = mad(uv, texMulAdd.xy, texMulAdd.zw); + uv1 = mad(uv, texMulAdd2.xy, texMulAdd2.zw); } - // Solve for u, using largest-magnitude component - float2 denom = b1 + uv.y * b3; - if (abs(denom.x) > abs(denom.y)) - uv.x = (q.x - b2.x * uv.y) / denom.x; - else - uv.x = (q.y - b2.y * uv.y) / denom.y; - - float4 texMulAdd = unpack_half4(push.texMulAdd); - float4 texMulAdd2 = unpack_half4(push.texMulAdd2); - - float2 uv0 = mad(uv, texMulAdd.xy, texMulAdd.zw); - float2 uv1 = mad(uv, texMulAdd2.xy, texMulAdd2.zw); return float4(uv0, uv1); } }; diff --git a/WickedEngine/shaders/imagePS.hlsl b/WickedEngine/shaders/imagePS.hlsl index fbe4b8770..4869a62c1 100644 --- a/WickedEngine/shaders/imagePS.hlsl +++ b/WickedEngine/shaders/imagePS.hlsl @@ -2,17 +2,17 @@ float4 main(VertextoPixel input) : SV_TARGET { - SamplerState sam = bindless_samplers[push.sampler_index]; + SamplerState sam = bindless_samplers[image_push.sampler_index]; float4 uvsets = input.compute_uvs(); - float4 color = unpack_half4(push.packed_color); + float4 color = unpack_half4(image_push.packed_color); [branch] - if (push.texture_base_index >= 0) + if (image_push.texture_base_index >= 0) { - float4 tex = bindless_textures[push.texture_base_index].Sample(sam, uvsets.xy); + float4 tex = bindless_textures[image_push.texture_base_index].Sample(sam, uvsets.xy); - if (push.flags & IMAGE_FLAG_EXTRACT_NORMALMAP) + if (image_push.flags & IMAGE_FLAG_EXTRACT_NORMALMAP) { tex.rgb = tex.rgb * 2 - 1; } @@ -22,18 +22,36 @@ float4 main(VertextoPixel input) : SV_TARGET float4 mask = 1; [branch] - if (push.texture_mask_index >= 0) + if (image_push.texture_mask_index >= 0) { - mask = bindless_textures[push.texture_mask_index].Sample(sam, uvsets.zw); + mask = bindless_textures[image_push.texture_mask_index].Sample(sam, uvsets.zw); } color *= mask; [branch] - if (push.texture_background_index >= 0) + if (image_push.texture_background_index >= 0) { - float3 background = bindless_textures[push.texture_background_index].Sample(sam, (input.uv_screen.xy * float2(0.5f, -0.5f) + 0.5f) / input.uv_screen.w).rgb; + float3 background = bindless_textures[image_push.texture_background_index].Sample(sam, input.uv_screen()).rgb; color = float4(lerp(background, color.rgb, color.a), mask.a); } + [branch] + if (image_push.flags & IMAGE_FLAG_OUTPUT_COLOR_SPACE_HDR10_ST2084) + { + // https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12HDR/src/presentPS.hlsl + const float referenceWhiteNits = 80.0; + const float st2084max = 10000.0; + const float hdrScalar = referenceWhiteNits / st2084max; + // The input is in Rec.709, but the display is Rec.2020 + color.rgb = REC709toREC2020(color.rgb); + // Apply the ST.2084 curve to the result. + color.rgb = ApplyREC2084Curve(color.rgb * hdrScalar); + } + else if (image_push.flags & IMAGE_FLAG_OUTPUT_COLOR_SPACE_LINEAR) + { + color.rgb = DEGAMMA(color.rgb); + color.rgb *= image_push.hdr_scaling; + } + return color; } diff --git a/WickedEngine/shaders/imageVS.hlsl b/WickedEngine/shaders/imageVS.hlsl index fc960c2ce..e38aa20d6 100644 --- a/WickedEngine/shaders/imageVS.hlsl +++ b/WickedEngine/shaders/imageVS.hlsl @@ -1,41 +1,48 @@ #include "globals.hlsli" #include "imageHF.hlsli" -VertextoPixel main(uint vI : SV_VERTEXID) +VertextoPixel main(uint vI : SV_VertexID) { VertextoPixel Out; - // This vertex shader generates a trianglestrip like this: - // 1--2 - // / - // / - // 3--4 - - // Since the corners are push constants, they cannot be indexed dynamically - // (This was only a problem on AMD in practice) - switch (vI) + [branch] + if (image_push.flags & IMAGE_FLAG_FULLSCREEN) { - default: - case 0: - Out.pos = push.corners0; - break; - case 1: - Out.pos = push.corners1; - break; - case 2: - Out.pos = push.corners2; - break; - case 3: - Out.pos = push.corners3; - break; + vertexID_create_fullscreen_triangle(vI, Out.pos); } - Out.uv_screen = Out.pos; + else + { + // This vertex shader generates a trianglestrip like this: + // 1--2 + // / + // / + // 3--4 - // Set up inverse bilinear interpolation - Out.q = Out.pos.xy - push.corners0.xy; - Out.b1 = push.corners1.xy - push.corners0.xy; - Out.b2 = push.corners2.xy - push.corners0.xy; - Out.b3 = push.corners0.xy - push.corners1.xy - push.corners2.xy + push.corners3.xy; + // If the corners are push constants, they cannot be indexed dynamically + // (This was only a problem on AMD in practice) + switch (vI) + { + default: + case 0: + Out.pos = image.corners0; + break; + case 1: + Out.pos = image.corners1; + break; + case 2: + Out.pos = image.corners2; + break; + case 3: + Out.pos = image.corners3; + break; + } + + // Set up inverse bilinear interpolation + Out.q = Out.pos.xy - image.corners0.xy; + Out.b1 = image.corners1.xy - image.corners0.xy; + Out.b2 = image.corners2.xy - image.corners0.xy; + Out.b3 = image.corners0.xy - image.corners1.xy - image.corners2.xy + image.corners3.xy; + } return Out; } diff --git a/WickedEngine/shaders/impostorHF.hlsli b/WickedEngine/shaders/impostorHF.hlsli index aea1c4448..7f7992ee3 100644 --- a/WickedEngine/shaders/impostorHF.hlsli +++ b/WickedEngine/shaders/impostorHF.hlsli @@ -11,6 +11,6 @@ struct VSOut uint instanceID : INSTANCEID; }; -Texture2DArray impostorTex : register(t0); +Texture2DArray impostorTex : register(t1); #endif // WI_IMPOSTOR_HF diff --git a/WickedEngine/shaders/impostorPS_wire.hlsl b/WickedEngine/shaders/impostorPS_wire.hlsl index be3a9ae1b..5458a7b2d 100644 --- a/WickedEngine/shaders/impostorPS_wire.hlsl +++ b/WickedEngine/shaders/impostorPS_wire.hlsl @@ -1,3 +1,5 @@ +#include "globals.hlsli" + float4 main() : SV_TARGET { return float4(1,1,1,1); diff --git a/WickedEngine/shaders/impostorVS.hlsl b/WickedEngine/shaders/impostorVS.hlsl index c83dfb4c1..122fbd441 100644 --- a/WickedEngine/shaders/impostorVS.hlsl +++ b/WickedEngine/shaders/impostorVS.hlsl @@ -16,7 +16,7 @@ static const float3 BILLBOARD[] = { float3(1, 1, 0), }; -ByteAddressBuffer impostorBuffer : register(t21); +ByteAddressBuffer impostorBuffer : register(t0); VSOut main(uint fakeIndex : SV_VERTEXID) { diff --git a/WickedEngine/shaders/msaoCS.hlsl b/WickedEngine/shaders/msaoCS.hlsl index 3a160794f..b70dcdc8b 100644 --- a/WickedEngine/shaders/msaoCS.hlsl +++ b/WickedEngine/shaders/msaoCS.hlsl @@ -3,8 +3,6 @@ #include "globals.hlsli" #include "ShaderInterop_Postprocess.h" -PUSHCONSTANT(msao, MSAO); - #ifdef INTERLEAVE_RESULT Texture2DArray texture_lineardepth_deinterleaved : register(t0); #else diff --git a/WickedEngine/shaders/objectGS_voxelizer.hlsl b/WickedEngine/shaders/objectGS_voxelizer.hlsl index 8c5a5ecba..d3948e38b 100644 --- a/WickedEngine/shaders/objectGS_voxelizer.hlsl +++ b/WickedEngine/shaders/objectGS_voxelizer.hlsl @@ -1,4 +1,5 @@ #include "globals.hlsli" +#include "objectHF.hlsli" struct GSInput { diff --git a/WickedEngine/shaders/objectHF.hlsli b/WickedEngine/shaders/objectHF.hlsli index 1800c0cbb..4f42f1772 100644 --- a/WickedEngine/shaders/objectHF.hlsli +++ b/WickedEngine/shaders/objectHF.hlsli @@ -127,7 +127,6 @@ uint load_entitytile(uint tileIndex) #ifdef OBJECTSHADER_LAYOUT_SHADOW_TEX #define OBJECTSHADER_USE_WIND #define OBJECTSHADER_USE_UVSETS -#define OBJECTSHADER_USE_COLOR #endif // OBJECTSHADER_LAYOUT_SHADOW_TEX #ifdef OBJECTSHADER_LAYOUT_PREPASS diff --git a/WickedEngine/shaders/objectVS_simple.hlsl b/WickedEngine/shaders/objectVS_simple.hlsl index ba84a94e2..8d7a436b3 100644 --- a/WickedEngine/shaders/objectVS_simple.hlsl +++ b/WickedEngine/shaders/objectVS_simple.hlsl @@ -1,4 +1,3 @@ #define OBJECTSHADER_COMPILE_VS #define OBJECTSHADER_LAYOUT_SHADOW_TEX -#define OBJECTSHADER_USE_COLOR #include "objectHF.hlsli" diff --git a/WickedEngine/shaders/occludeeVS.hlsl b/WickedEngine/shaders/occludeeVS.hlsl new file mode 100644 index 000000000..60a654ab3 --- /dev/null +++ b/WickedEngine/shaders/occludeeVS.hlsl @@ -0,0 +1,15 @@ +#include "globals.hlsli" + +struct CubeConstants +{ + float4x4 transform; +}; +PUSHCONSTANT(cube, CubeConstants); + +#undef WICKED_ENGINE_DEFAULT_ROOTSIGNATURE // don't use auto root signature! +[RootSignature("RootConstants(num32BitConstants=16, b999, visibility = SHADER_VISIBILITY_VERTEX)")] +float4 main(uint vID : SV_VertexID) : SV_Position +{ + // This is a 14 vertex count trianglestrip cube: + return mul(cube.transform, float4(vertexID_create_cube(vID) * 2 - 1, 1)); +} diff --git a/WickedEngine/shaders/oceanUpdateDisplacementMapCS.hlsl b/WickedEngine/shaders/oceanUpdateDisplacementMapCS.hlsl index ad804b621..1a1880ffd 100644 --- a/WickedEngine/shaders/oceanUpdateDisplacementMapCS.hlsl +++ b/WickedEngine/shaders/oceanUpdateDisplacementMapCS.hlsl @@ -1,3 +1,4 @@ +#include "globals.hlsli" #include "ShaderInterop_Ocean.h" StructuredBuffer g_InputDxyz : register(t0); diff --git a/WickedEngine/shaders/screenVS.hlsl b/WickedEngine/shaders/postprocessVS.hlsl similarity index 85% rename from WickedEngine/shaders/screenVS.hlsl rename to WickedEngine/shaders/postprocessVS.hlsl index 7db279f37..28d98ac62 100644 --- a/WickedEngine/shaders/screenVS.hlsl +++ b/WickedEngine/shaders/postprocessVS.hlsl @@ -1,4 +1,5 @@ #include "globals.hlsli" +#include "ShaderInterop_Postprocess.h" struct Output { diff --git a/WickedEngine/shaders/raytraceCS.hlsl b/WickedEngine/shaders/raytraceCS.hlsl index d61fc82ed..fc59dc82d 100644 --- a/WickedEngine/shaders/raytraceCS.hlsl +++ b/WickedEngine/shaders/raytraceCS.hlsl @@ -68,7 +68,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) prim.subsetIndex = q.CandidateGeometryIndex(); Surface surface; - surface.load(prim, q.CandidateTriangleBarycentrics()); + if (!surface.load(prim, q.CandidateTriangleBarycentrics())) + break; [branch] if (surface.opacity - rand(seed, uv) >= 0) @@ -114,13 +115,15 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) prim.subsetIndex = q.CommittedGeometryIndex(); surface.is_frontface = q.CommittedTriangleFrontFace(); - surface.load(prim, q.CommittedTriangleBarycentrics()); + if (!surface.load(prim, q.CommittedTriangleBarycentrics())) + return; #else // ray origin updated for next bounce: ray.Origin = ray.Origin + ray.Direction * hit.distance; - surface.load(hit.primitiveID, hit.bary); + if (!surface.load(hit.primitiveID, hit.bary)) + return; #endif // RTAPI @@ -257,7 +260,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) prim.subsetIndex = q.CandidateGeometryIndex(); Surface surface; - surface.load(prim, q.CandidateTriangleBarycentrics()); + if (!surface.load(prim, q.CandidateTriangleBarycentrics())) + break; shadow *= lerp(1, surface.albedo * surface.transmission, surface.opacity); diff --git a/WickedEngine/shaders/renderlightmapPS.hlsl b/WickedEngine/shaders/renderlightmapPS.hlsl index cec4e65cf..ecb8b03b4 100644 --- a/WickedEngine/shaders/renderlightmapPS.hlsl +++ b/WickedEngine/shaders/renderlightmapPS.hlsl @@ -169,7 +169,8 @@ float4 main(Input input) : SV_TARGET prim.subsetIndex = q.CandidateGeometryIndex(); Surface surface; - surface.load(prim, q.CandidateTriangleBarycentrics()); + if (!surface.load(prim, q.CandidateTriangleBarycentrics())) + break; shadow *= lerp(1, surface.albedo * surface.transmission, surface.opacity); @@ -243,13 +244,15 @@ float4 main(Input input) : SV_TARGET prim.instanceIndex = q.CommittedInstanceID(); prim.subsetIndex = q.CommittedGeometryIndex(); - surface.load(prim, q.CommittedTriangleBarycentrics()); + if (!surface.load(prim, q.CommittedTriangleBarycentrics())) + return 0; #else // ray origin updated for next bounce: ray.Origin = ray.Origin + ray.Direction * hit.distance; - surface.load(hit.primitiveID, hit.bary); + if (!surface.load(hit.primitiveID, hit.bary)) + return 0; #endif // RTAPI diff --git a/WickedEngine/shaders/rtaoCS.hlsl b/WickedEngine/shaders/rtaoCS.hlsl index cdc91db7b..f2852ffd6 100644 --- a/WickedEngine/shaders/rtaoCS.hlsl +++ b/WickedEngine/shaders/rtaoCS.hlsl @@ -74,7 +74,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : prim.subsetIndex = q.CandidateGeometryIndex(); Surface surface; - surface.load(prim, q.CandidateTriangleBarycentrics()); + if (!surface.load(prim, q.CandidateTriangleBarycentrics())) + break; float alphatest = clamp(blue_noise(DTid.xy, q.CandidateTriangleRayT()).r, 0, 0.99); diff --git a/WickedEngine/shaders/rtreflectionCS.hlsl b/WickedEngine/shaders/rtreflectionCS.hlsl new file mode 100644 index 000000000..b5814433c --- /dev/null +++ b/WickedEngine/shaders/rtreflectionCS.hlsl @@ -0,0 +1,209 @@ +#define RTAPI +#define DISABLE_SOFT_SHADOWMAP +#define DISABLE_TRANSPARENT_SHADOWMAP + +#include "globals.hlsli" +#include "ShaderInterop_Postprocess.h" +#include "raytracingHF.hlsli" +#include "stochasticSSRHF.hlsli" +#include "lightingHF.hlsli" + +PUSHCONSTANT(postprocess, PostProcess); + +RWTexture2D output : register(u0); +RWTexture2D output_rayLengths : register(u1); + +struct RayPayload +{ + float4 data; +}; + +[numthreads(8, 4, 1)] +void main(uint2 DTid : SV_DispatchThreadID) +{ + const float2 uv = ((float2)DTid.xy + 0.5) * postprocess.resolution_rcp; + const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0); + if (depth == 0) + return; + + const float3 P = reconstruct_position(uv, depth); + const float3 V = normalize(GetCamera().position - P); + + PrimitiveID prim; + prim.unpack(texture_gbuffer0[DTid.xy * 2]); + + //output[DTid] = float4(saturate(P * 0.1), 1); + //return; + + Surface surface; + if (!surface.load(prim, P)) + { + return; + } + if (surface.roughness > 0.6) + { + output[DTid.xy] = float4(max(0, EnvironmentReflection_Global(surface)), 1); + output_rayLengths[DTid.xy] = FLT_MAX; + return; + } + + float3 N = surface.N; + float roughness = surface.roughness; + + // The ray direction selection part is the same as in from ssr_raytraceCS.hlsl: + float4 H; + float3 L; + if (roughness > 0.05f) + { + float3x3 tangentBasis = GetTangentBasis(N); + float3 tangentV = mul(tangentBasis, V); + + const float2 bluenoise = blue_noise(DTid.xy).xy; + + float2 Xi = bluenoise.xy; + + Xi.y = lerp(Xi.y, 0.0f, GGX_IMPORTANCE_SAMPLE_BIAS); + + H = ImportanceSampleVisibleGGX(SampleDisk(Xi), roughness, tangentV); + + // Tangent to world + H.xyz = mul(H.xyz, tangentBasis); + + + L = reflect(-V, H.xyz); + } + else + { + H = float4(N.xyz, 1.0f); + L = reflect(-V, H.xyz); + } + + + const float3 R = L; + + float seed = GetFrame().time; + + RayDesc ray; + ray.TMin = 0.01; + ray.TMax = rtreflection_range; + ray.Origin = P; + ray.Direction = normalize(R); + + RayPayload payload; + payload.data = 0; + + +#ifdef RTAPI + RayQuery< + RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES + > q; + q.TraceRayInline( + scene_acceleration_structure, // RaytracingAccelerationStructure AccelerationStructure + 0, // uint RayFlags + asuint(postprocess.params1.x), // uint InstanceInclusionMask + ray // RayDesc Ray + ); + while (q.Proceed()) + { + PrimitiveID prim; + prim.primitiveIndex = q.CandidatePrimitiveIndex(); + prim.instanceIndex = q.CandidateInstanceID(); + prim.subsetIndex = q.CandidateGeometryIndex(); + + Surface surface; + if (!surface.load(prim, q.CandidateTriangleBarycentrics())) + break; + + float alphatest = clamp(blue_noise(DTid.xy, q.CandidateTriangleRayT()).r, 0, 0.99); + + [branch] + if (surface.opacity - alphatest >= 0) + { + q.CommitNonOpaqueTriangleHit(); + break; + } + } + if (q.CommittedStatus() != COMMITTED_TRIANGLE_HIT) +#else + RayHit hit = TraceRay_Closest(ray, asuint(postprocess.params1.x), seed, uv, groupIndex); + + if (hit.distance >= FLT_MAX - 1) +#endif // RTAPI + { + // miss: + payload.data.xyz += GetDynamicSkyColor(q.WorldRayDirection()); + payload.data.w = FLT_MAX; + } + else + { + // closest hit: + PrimitiveID prim; + prim.primitiveIndex = q.CommittedPrimitiveIndex(); + prim.instanceIndex = q.CommittedInstanceID(); + prim.subsetIndex = q.CommittedGeometryIndex(); + + Surface surface; + surface.is_frontface = q.CommittedTriangleFrontFace(); + if (!surface.load(prim, q.CommittedTriangleBarycentrics())) + return; + + surface.pixel = DTid.xy; + surface.screenUV = surface.pixel * postprocess.resolution_rcp.xy; + + if (surface.material.IsUnlit()) + { + payload.data.xyz += surface.albedo + surface.emissiveColor; + } + else + { + // Light sampling: + surface.P = q.WorldRayOrigin() + q.WorldRayDirection() * q.CommittedRayT(); + surface.V = -q.WorldRayDirection(); + surface.update(); + + Lighting lighting; + lighting.create(0, 0, GetAmbient(surface.N), 0); + + [loop] + for (uint iterator = 0; iterator < GetFrame().lightarray_count; iterator++) + { + ShaderEntity light = load_entity(GetFrame().lightarray_offset + iterator); + if ((light.layerMask & surface.material.layerMask) == 0) + continue; + + if (light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) + { + continue; // static lights will be skipped (they are used in lightmap baking) + } + + switch (light.GetType()) + { + case ENTITY_TYPE_DIRECTIONALLIGHT: + { + light_directional(light, surface, lighting); + } + break; + case ENTITY_TYPE_POINTLIGHT: + { + light_point(light, surface, lighting); + } + break; + case ENTITY_TYPE_SPOTLIGHT: + { + light_spot(light, surface, lighting); + } + break; + } + } + + lighting.indirect.specular += max(0, EnvironmentReflection_Global(surface)); + + LightingPart combined_lighting = CombineLighting(surface, lighting); + payload.data.xyz += surface.albedo * combined_lighting.diffuse + combined_lighting.specular + surface.emissiveColor; + } + payload.data.w = q.CommittedRayT(); + } + + output[DTid.xy] = float4(payload.data.xyz, 1); + output_rayLengths[DTid.xy] = payload.data.w; +} diff --git a/WickedEngine/shaders/rtreflectionLIB.hlsl b/WickedEngine/shaders/rtreflectionLIB.hlsl index 36985064c..056a2359d 100644 --- a/WickedEngine/shaders/rtreflectionLIB.hlsl +++ b/WickedEngine/shaders/rtreflectionLIB.hlsl @@ -18,6 +18,13 @@ struct RayPayload float4 data; }; +#ifndef SPIRV +GlobalRootSignature MyGlobalRootSignature = +{ + WICKED_ENGINE_DEFAULT_ROOTSIGNATURE +}; +#endif // SPIRV + [shader("raygeneration")] void RTReflection_Raygen() { @@ -33,8 +40,14 @@ void RTReflection_Raygen() PrimitiveID prim; prim.unpack(texture_gbuffer0[DTid.xy * 2]); + //output[DTid] = float4(saturate(P * 0.1), 1); + //return; + Surface surface; - surface.load(prim, P); + if (!surface.load(prim, P)) + { + return; + } if (surface.roughness > 0.6) { output[DTid.xy] = float4(max(0, EnvironmentReflection_Global(surface)), 1); @@ -112,7 +125,8 @@ void RTReflection_ClosestHit(inout RayPayload payload, in BuiltInTriangleInterse Surface surface; surface.is_frontface = (HitKind() == HIT_KIND_TRIANGLE_FRONT_FACE); - surface.load(prim, attr.barycentrics); + if (!surface.load(prim, attr.barycentrics)) + return; surface.pixel = DispatchRaysIndex().xy; surface.screenUV = surface.pixel / (float2)DispatchRaysDimensions().xy; @@ -180,7 +194,8 @@ void RTReflection_AnyHit(inout RayPayload payload, in BuiltInTriangleIntersectio prim.subsetIndex = GeometryIndex(); Surface surface; - surface.load(prim, attr.barycentrics); + if (!surface.load(prim, attr.barycentrics)) + return; float alphatest = clamp(blue_noise(DispatchRaysIndex().xy, RayTCurrent()).r, 0, 0.99); diff --git a/WickedEngine/shaders/screenPS.hlsl b/WickedEngine/shaders/screenPS.hlsl deleted file mode 100644 index 329282860..000000000 --- a/WickedEngine/shaders/screenPS.hlsl +++ /dev/null @@ -1,33 +0,0 @@ -#include "imageHF.hlsli" - -float4 main(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_TARGET -{ - SamplerState sam = bindless_samplers[push.sampler_index]; - - float4 color = unpack_half4(push.packed_color); - [branch] - if (push.texture_base_index >= 0) - { - color *= bindless_textures[push.texture_base_index].Sample(sam, uv); - } - - [branch] - if (push.flags & IMAGE_FLAG_OUTPUT_COLOR_SPACE_HDR10_ST2084) - { - // https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12HDR/src/presentPS.hlsl - const float referenceWhiteNits = 80.0; - const float st2084max = 10000.0; - const float hdrScalar = referenceWhiteNits / st2084max; - // The input is in Rec.709, but the display is Rec.2020 - color.rgb = REC709toREC2020(color.rgb); - // Apply the ST.2084 curve to the result. - color.rgb = ApplyREC2084Curve(color.rgb * hdrScalar); - } - else if (push.flags & IMAGE_FLAG_OUTPUT_COLOR_SPACE_LINEAR) - { - color.rgb = DEGAMMA(color.rgb); - color.rgb *= push.corners0.x; // hdr_scaling (corners0 is not used for anything else in full screen image rendering) - } - - return color; -} diff --git a/WickedEngine/shaders/screenspaceshadowCS.hlsl b/WickedEngine/shaders/screenspaceshadowCS.hlsl index d73afbcb4..dd8b99066 100644 --- a/WickedEngine/shaders/screenspaceshadowCS.hlsl +++ b/WickedEngine/shaders/screenspaceshadowCS.hlsl @@ -234,7 +234,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : prim.subsetIndex = q.CandidateGeometryIndex(); Surface surface; - surface.load(prim, q.CandidateTriangleBarycentrics()); + if (!surface.load(prim, q.CandidateTriangleBarycentrics())) + break; float alphatest = clamp(blue_noise(DTid.xy, q.CandidateTriangleRayT()).r, 0, 0.99); diff --git a/WickedEngine/shaders/ssr_raytraceCS.hlsl b/WickedEngine/shaders/ssr_raytraceCS.hlsl index cdbea1c26..a8e5dec78 100644 --- a/WickedEngine/shaders/ssr_raytraceCS.hlsl +++ b/WickedEngine/shaders/ssr_raytraceCS.hlsl @@ -232,7 +232,10 @@ void main(uint3 DTid : SV_DispatchThreadID) prim.unpack(texture_gbuffer0[DTid.xy * 2]); Surface surface; - surface.load(prim, reconstruct_position(uv, depth)); + if (!surface.load(prim, reconstruct_position(uv, depth))) + { + return; + } if (surface.roughness > 0.6) { texture_raytrace[DTid.xy] = 0; diff --git a/WickedEngine/shaders/ssr_resolveCS.hlsl b/WickedEngine/shaders/ssr_resolveCS.hlsl index 1640d4182..7e5f8e7c5 100644 --- a/WickedEngine/shaders/ssr_resolveCS.hlsl +++ b/WickedEngine/shaders/ssr_resolveCS.hlsl @@ -105,7 +105,10 @@ void main(uint3 DTid : SV_DispatchThreadID) prim.unpack(texture_gbuffer0[DTid.xy * 2]); Surface surface; - surface.load(prim, P); + if (!surface.load(prim, P)) + { + return; + } const float3 N = normalize(mul((float3x3)GetCamera().view, surface.N)); const float roughness = GetRoughness(surface.roughness); diff --git a/WickedEngine/shaders/ssr_temporalCS.hlsl b/WickedEngine/shaders/ssr_temporalCS.hlsl index 38fd0e5f8..4d35fad53 100644 --- a/WickedEngine/shaders/ssr_temporalCS.hlsl +++ b/WickedEngine/shaders/ssr_temporalCS.hlsl @@ -121,7 +121,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 prim.unpack(texture_gbuffer0[DTid.xy * 2]); Surface surface; - surface.load(prim, P); + if (!surface.load(prim, P)) + return; const float roughness = surface.roughness; diff --git a/WickedEngine/shaders/surfel_raytraceCS.hlsl b/WickedEngine/shaders/surfel_raytraceCS.hlsl index c9c72dd42..e7b76e154 100644 --- a/WickedEngine/shaders/surfel_raytraceCS.hlsl +++ b/WickedEngine/shaders/surfel_raytraceCS.hlsl @@ -108,7 +108,8 @@ void main(uint3 DTid : SV_DispatchThreadID) prim.instanceIndex = q.CommittedInstanceID(); prim.subsetIndex = q.CommittedGeometryIndex(); - surface.load(prim, q.CommittedTriangleBarycentrics()); + if(!surface.load(prim, q.CommittedTriangleBarycentrics())) + break; #else @@ -116,7 +117,8 @@ void main(uint3 DTid : SV_DispatchThreadID) ray.Origin = ray.Origin + ray.Direction * hit.distance; hit_depth = hit.distance; - surface.load(hit.primitiveID, hit.bary); + if (!surface.load(hit.primitiveID, hit.bary)) + break; #endif // RTAPI diff --git a/WickedEngine/shaders/voxelPS.hlsl b/WickedEngine/shaders/voxelPS.hlsl index a5b00844a..8035a6eb2 100644 --- a/WickedEngine/shaders/voxelPS.hlsl +++ b/WickedEngine/shaders/voxelPS.hlsl @@ -1,4 +1,4 @@ -float4 main(float4 pos : SV_POSITION, float4 col : TEXCOORD) : SV_TARGET +float4 main(float4 pos : SV_Position, float4 col : TEXCOORD) : SV_Target { return col; -} \ No newline at end of file +} diff --git a/WickedEngine/wiApplication.cpp b/WickedEngine/wiApplication.cpp index 00a749171..3c4b027c3 100644 --- a/WickedEngine/wiApplication.cpp +++ b/WickedEngine/wiApplication.cpp @@ -117,7 +117,7 @@ namespace wi wi::lua::RunFile("startup.lua"); } - if (!is_window_active) + if (!is_window_active && !wi::arguments::HasArgument("alwaysactive")) { // If the application is not active, disable Update loops: deltaTimeAccumulator = 0; @@ -455,7 +455,6 @@ namespace wi } } wi::graphics::GetDevice() = graphicsDevice.get(); - wi::renderer::InitializeCommonSamplers(); canvas.init(window); diff --git a/WickedEngine/wiEmittedParticle.cpp b/WickedEngine/wiEmittedParticle.cpp index 9bfb88f54..b44532079 100644 --- a/WickedEngine/wiEmittedParticle.cpp +++ b/WickedEngine/wiEmittedParticle.cpp @@ -635,6 +635,7 @@ namespace wi GPUBarrier::Buffer(&counterBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Buffer(&distanceBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE), }; + device->Barrier(barriers, arraysize(barriers), cmd); } device->EventEnd(cmd); @@ -731,7 +732,7 @@ namespace wi &culledIndirectionBuffer, &culledIndirectionBuffer2, }; - device->BindResources(res, 20, arraysize(res), cmd); + device->BindResources(res, 0, arraysize(res), cmd); if (ALLOW_MESH_SHADER && device->CheckCapability(GraphicsDeviceCapability::MESH_SHADER)) { diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 0e087b0b9..47d78a452 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -113,14 +113,14 @@ namespace wi::enums VSTYPE_ENVMAP, VSTYPE_ENVMAP_SKY, VSTYPE_SPHERE, - VSTYPE_CUBE, + VSTYPE_OCCLUDEE, VSTYPE_VOXELIZER, VSTYPE_VOXEL, VSTYPE_FORCEFIELDVISUALIZER_POINT, VSTYPE_FORCEFIELDVISUALIZER_PLANE, VSTYPE_RENDERLIGHTMAP, VSTYPE_RAYTRACE_SCREEN, - VSTYPE_SCREEN, + VSTYPE_POSTPROCESS, VSTYPE_LENSFLARE, @@ -281,6 +281,7 @@ namespace wi::enums CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_BLENDOUT, CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN, CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN_BLENDOUT, + CSTYPE_POSTPROCESS_RTREFLECTION, CSTYPE_POSTPROCESS_SSR_RAYTRACE, CSTYPE_POSTPROCESS_SSR_RESOLVE, CSTYPE_POSTPROCESS_SSR_TEMPORAL, diff --git a/WickedEngine/wiFont.cpp b/WickedEngine/wiFont.cpp index 3f0731ac1..bd5902705 100644 --- a/WickedEngine/wiFont.cpp +++ b/WickedEngine/wiFont.cpp @@ -33,7 +33,6 @@ namespace wi::font BlendState blendState; RasterizerState rasterizerState; DepthStencilState depthStencilState; - Sampler sampler; Shader vertexShader; Shader pixelShader; @@ -49,10 +48,10 @@ namespace wi::font float y; float width; float height; - uint16_t tc_left; - uint16_t tc_right; - uint16_t tc_top; - uint16_t tc_bottom; + float tc_left; + float tc_right; + float tc_top; + float tc_bottom; }; wi::unordered_map glyph_lookup; wi::unordered_map rect_lookup; @@ -99,14 +98,8 @@ namespace wi::font }; wi::vector fontStyles; - struct FontVertex - { - XMFLOAT2 Pos; - XMHALF2 Tex; - }; - template - uint32_t WriteVertices(volatile FontVertex* vertexList, const T* text, Params params) + uint32_t WriteVertices(FontVertex* vertexList, const T* text, Params params) { const FontStyle& fontStyle = fontStyles[params.style]; const float fontScale = stbtt_ScaleForPixelHeight(&fontStyle.fontInfo, (float)params.size); @@ -123,11 +116,11 @@ namespace wi::font if (last_word_begin > 0 && params.h_wrap >= 0 && pos >= params.h_wrap - 1) { // Word ended and wrap detected, push down last word by one line: - float word_offset = vertexList[last_word_begin].Pos.x; + float word_offset = vertexList[last_word_begin].pos.x; // possibly uncached memory read? for (size_t i = last_word_begin; i < quadCount * 4; ++i) { - vertexList[i].Pos.x -= word_offset; - vertexList[i].Pos.y += LINEBREAK_SIZE; + vertexList[i].pos.x -= word_offset; // possibly uncached memory read? + vertexList[i].pos.y += LINEBREAK_SIZE; // possibly uncached memory read? } line += LINEBREAK_SIZE; pos -= word_offset; @@ -198,23 +191,15 @@ namespace wi::font const float top = line + glyphOffsetY; const float bottom = top + glyphHeight; - vertexList[vertexID + 0].Pos.x = left; - vertexList[vertexID + 0].Pos.y = top; - vertexList[vertexID + 1].Pos.x = right; - vertexList[vertexID + 1].Pos.y = top; - vertexList[vertexID + 2].Pos.x = left; - vertexList[vertexID + 2].Pos.y = bottom; - vertexList[vertexID + 3].Pos.x = right; - vertexList[vertexID + 3].Pos.y = bottom; + vertexList[vertexID + 0].pos = float2(left, top); + vertexList[vertexID + 1].pos = float2(right, top); + vertexList[vertexID + 2].pos = float2(left, bottom); + vertexList[vertexID + 3].pos = float2(right, bottom); - vertexList[vertexID + 0].Tex.x = glyph.tc_left; - vertexList[vertexID + 0].Tex.y = glyph.tc_top; - vertexList[vertexID + 1].Tex.x = glyph.tc_right; - vertexList[vertexID + 1].Tex.y = glyph.tc_top; - vertexList[vertexID + 2].Tex.x = glyph.tc_left; - vertexList[vertexID + 2].Tex.y = glyph.tc_bottom; - vertexList[vertexID + 3].Tex.x = glyph.tc_right; - vertexList[vertexID + 3].Tex.y = glyph.tc_bottom; + vertexList[vertexID + 0].uv = float2(glyph.tc_left, glyph.tc_top); + vertexList[vertexID + 1].uv = float2(glyph.tc_right, glyph.tc_top); + vertexList[vertexID + 2].uv = float2(glyph.tc_left, glyph.tc_bottom); + vertexList[vertexID + 3].uv = float2(glyph.tc_right, glyph.tc_bottom); pos += glyph.width * params.scaling + params.spacingX; pos_last_letter = pos; @@ -235,15 +220,8 @@ namespace wi::font void LoadShaders() { wi::renderer::LoadShader(ShaderStage::VS, vertexShader, "fontVS.cso"); - - - pixelShader.auto_samplers.emplace_back(); - pixelShader.auto_samplers.back().sampler = sampler; - pixelShader.auto_samplers.back().slot = 0; - wi::renderer::LoadShader(ShaderStage::PS, pixelShader, "fontPS.cso"); - PipelineStateDesc desc; desc.vs = &vertexShader; desc.ps = &pixelShader; @@ -294,19 +272,6 @@ namespace wi::font dsd.stencil_enable = false; depthStencilState = dsd; - SamplerDesc samplerDesc; - samplerDesc.filter = Filter::MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.address_u = TextureAddressMode::BORDER; - samplerDesc.address_v = TextureAddressMode::BORDER; - samplerDesc.address_w = TextureAddressMode::BORDER; - samplerDesc.mip_lod_bias = 0.0f; - samplerDesc.max_anisotropy = 0; - samplerDesc.comparison_func = ComparisonFunc::NEVER; - samplerDesc.border_color = SamplerBorderColor::TRANSPARENT_BLACK; - samplerDesc.min_lod = 0; - samplerDesc.max_lod = 0; - device->CreateSampler(&samplerDesc, &sampler); - static wi::eventhandler::Handle handle1 = wi::eventhandler::Subscribe(wi::eventhandler::EVENT_RELOAD_SHADERS, [](uint64_t userdata) { LoadShaders(); }); LoadShaders(); @@ -407,20 +372,15 @@ namespace wi::font stbtt_MakeCodepointBitmap(&fontStyle.fontInfo, bitmap.data() + byteOffset, rect.w, rect.h, bitmapWidth, fontScaling, fontScaling, code); // Compute texture coordinates for the glyph: - float tc_left = float(rect.x); - float tc_right = tc_left + float(rect.w); - float tc_top = float(rect.y); - float tc_bottom = tc_top + float(rect.h); + glyph.tc_left = float(rect.x); + glyph.tc_right = glyph.tc_left + float(rect.w); + glyph.tc_top = float(rect.y); + glyph.tc_bottom = glyph.tc_top + float(rect.h); - tc_left *= inv_width; - tc_right *= inv_width; - tc_top *= inv_height; - tc_bottom *= inv_height; - - glyph.tc_left = XMConvertFloatToHalf(tc_left); - glyph.tc_right = XMConvertFloatToHalf(tc_right); - glyph.tc_top = XMConvertFloatToHalf(tc_top); - glyph.tc_bottom = XMConvertFloatToHalf(tc_bottom); + glyph.tc_left *= inv_width; + glyph.tc_right *= inv_width; + glyph.tc_top *= inv_height; + glyph.tc_bottom *= inv_height; } // Upload the CPU-side texture atlas bitmap to the GPU: @@ -559,7 +519,7 @@ namespace wi::font { return; } - volatile FontVertex* textBuffer = (volatile FontVertex*)mem.data; + FontVertex* textBuffer = (FontVertex*)mem.data; const uint32_t quadCount = WriteVertices(textBuffer, text, newProps); if (quadCount > 0) @@ -568,10 +528,11 @@ namespace wi::font device->BindPipelineState(&PSO, cmd); - PushConstantsFont push; - push.buffer_index = device->GetDescriptorIndex(&mem.buffer, SubresourceType::SRV); - push.buffer_offset = (uint32_t)mem.offset; - push.texture_index = device->GetDescriptorIndex(&texture, SubresourceType::SRV); + FontConstants font; + FontPushConstants font_push; + font_push.buffer_index = device->GetDescriptorIndex(&mem.buffer, SubresourceType::SRV); + font_push.buffer_offset = (uint32_t)mem.offset; + font_push.texture_index = device->GetDescriptorIndex(&texture, SubresourceType::SRV); const wi::Canvas& canvas = canvases[cmd]; // Asserts will check that a proper canvas was set for this cmd with wi::image::SetCanvas() @@ -584,24 +545,28 @@ namespace wi::font if (newProps.shadowColor.getA() > 0) { // font shadow render: - XMStoreFloat4x4(&push.transform, + XMStoreFloat4x4(&font.transform, XMMatrixTranslation((float)newProps.posX + 1, (float)newProps.posY + 1, 0) * Projection ); - push.color = newProps.shadowColor.rgba; + device->BindDynamicConstantBuffer(font, CBSLOT_FONT, cmd); + + font_push.color = newProps.shadowColor.rgba; + device->PushConstants(&font_push, sizeof(font_push), cmd); - device->PushConstants(&push, sizeof(push), cmd); device->DrawInstanced(4, quadCount, 0, 0, cmd); } // font base render: - XMStoreFloat4x4(&push.transform, + XMStoreFloat4x4(&font.transform, XMMatrixTranslation((float)newProps.posX, (float)newProps.posY, 0) * Projection ); - push.color = newProps.color.rgba; + device->BindDynamicConstantBuffer(font, CBSLOT_FONT, cmd); + + font_push.color = newProps.color.rgba; + device->PushConstants(&font_push, sizeof(font_push), cmd); - device->PushConstants(&push, sizeof(push), cmd); device->DrawInstanced(4, quadCount, 0, 0, cmd); device->EventEnd(cmd); diff --git a/WickedEngine/wiGPUSortLib.cpp b/WickedEngine/wiGPUSortLib.cpp index 9ee4f76f0..2b0fa8907 100644 --- a/WickedEngine/wiGPUSortLib.cpp +++ b/WickedEngine/wiGPUSortLib.cpp @@ -57,9 +57,8 @@ namespace wi::gpusortlib device->EventBegin("GPUSortLib", cmd); - SortConstants sc; - sc.counterReadOffset = counterReadOffset; - device->BindDynamicConstantBuffer(sc, CB_GETBINDSLOT(SortConstants), cmd); + SortConstants sort; + sort.counterReadOffset = counterReadOffset; // initialize sorting arguments: { @@ -82,6 +81,7 @@ namespace wi::gpusortlib device->Barrier(barriers, arraysize(barriers), cmd); } + device->PushConstants(&sort, sizeof(sort), cmd); device->Dispatch(1, 1, 1, cmd); { @@ -125,6 +125,7 @@ namespace wi::gpusortlib // sort all buffers of size 512 (and presort bigger ones) device->BindComputeShader(&sortCS, cmd); + device->PushConstants(&sort, sizeof(sort), cmd); device->DispatchIndirect(&indirectBuffer, 0, cmd); GPUBarrier barriers[] = { @@ -158,22 +159,21 @@ namespace wi::gpusortlib uint32_t nMergeSize = presorted * 2; for (uint32_t nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 256; nMergeSubSize = nMergeSubSize >> 1) { - SortConstants sc; - sc.job_params.x = nMergeSubSize; + SortConstants sort; + sort.job_params.x = nMergeSubSize; if (nMergeSubSize == nMergeSize >> 1) { - sc.job_params.y = (2 * nMergeSubSize - 1); - sc.job_params.z = -1; + sort.job_params.y = (2 * nMergeSubSize - 1); + sort.job_params.z = -1; } else { - sc.job_params.y = nMergeSubSize; - sc.job_params.z = 1; + sort.job_params.y = nMergeSubSize; + sort.job_params.z = 1; } - sc.counterReadOffset = counterReadOffset; - - device->BindDynamicConstantBuffer(sc, CB_GETBINDSLOT(SortConstants), cmd); + sort.counterReadOffset = counterReadOffset; + device->PushConstants(&sort, sizeof(sort), cmd); device->Dispatch(numThreadGroups, 1, 1, cmd); GPUBarrier barriers[] = { @@ -183,6 +183,7 @@ namespace wi::gpusortlib } device->BindComputeShader(&sortInnerCS, cmd); + device->PushConstants(&sort, sizeof(sort), cmd); device->Dispatch(numThreadGroups, 1, 1, cmd); GPUBarrier barriers[] = { diff --git a/WickedEngine/wiGUI.cpp b/WickedEngine/wiGUI.cpp index ffc17d236..2465422e3 100644 --- a/WickedEngine/wiGUI.cpp +++ b/WickedEngine/wiGUI.cpp @@ -1422,8 +1422,9 @@ namespace wi::gui } } } - font.params.posY = translation.y + sprites[state].params.siz.y * 0.5f; + + selected = std::min((int)items.size(), selected); } void ComboBox::Render(const wi::Canvas& canvas, CommandList cmd) const { diff --git a/WickedEngine/wiGraphics.h b/WickedEngine/wiGraphics.h index 4486de090..102976036 100644 --- a/WickedEngine/wiGraphics.h +++ b/WickedEngine/wiGraphics.h @@ -791,16 +791,9 @@ namespace wi::graphics const SamplerDesc& GetDesc() const { return desc; } }; - struct StaticSampler - { - Sampler sampler; - uint32_t slot = 0; - }; - struct Shader : public GraphicsDeviceChild { ShaderStage stage = ShaderStage::Count; - wi::vector auto_samplers; // ability to set static samplers without explicit root signature }; struct GPUResource : public GraphicsDeviceChild diff --git a/WickedEngine/wiGraphicsDevice.h b/WickedEngine/wiGraphicsDevice.h index fe542e843..4ff62152c 100644 --- a/WickedEngine/wiGraphicsDevice.h +++ b/WickedEngine/wiGraphicsDevice.h @@ -25,10 +25,10 @@ namespace wi::graphics // Descriptor binding counts: // It's OK increase these limits if not enough // But it's better to refactor shaders to use bindless descriptors if they require more resources - static constexpr uint32_t DESCRIPTORBINDER_CBV_COUNT = 15; - static constexpr uint32_t DESCRIPTORBINDER_SRV_COUNT = 64; + static constexpr uint32_t DESCRIPTORBINDER_CBV_COUNT = 14; + static constexpr uint32_t DESCRIPTORBINDER_SRV_COUNT = 16; static constexpr uint32_t DESCRIPTORBINDER_UAV_COUNT = 16; - static constexpr uint32_t DESCRIPTORBINDER_SAMPLER_COUNT = 16; + static constexpr uint32_t DESCRIPTORBINDER_SAMPLER_COUNT = 8; struct DescriptorBindingTable { GPUBuffer CBV[DESCRIPTORBINDER_CBV_COUNT]; @@ -95,9 +95,6 @@ namespace wi::graphics virtual void WriteTopLevelAccelerationStructureInstance(const RaytracingAccelerationStructureDesc::TopLevel::Instance* instance, void* dest) const {} virtual void WriteShaderIdentifier(const RaytracingPipelineState* rtpso, uint32_t group_index, void* dest) const {} - // Set a sampler that can be used by any shaders that will be created after this call, without needing to bind that sampler - virtual void SetCommonSampler(const StaticSampler* sam) = 0; - // Set a debug name for the GPUResource, which will be visible in graphics debuggers virtual void SetName(GPUResource* pResource, const char* name) = 0; @@ -196,7 +193,7 @@ namespace wi::graphics virtual void BuildRaytracingAccelerationStructure(const RaytracingAccelerationStructure* dst, CommandList cmd, const RaytracingAccelerationStructure* src = nullptr) {} virtual void BindRaytracingPipelineState(const RaytracingPipelineState* rtpso, CommandList cmd) {} virtual void DispatchRays(const DispatchRaysDesc* desc, CommandList cmd) {} - virtual void PushConstants(const void* data, uint32_t size, CommandList cmd) = 0; + virtual void PushConstants(const void* data, uint32_t size, CommandList cmd, uint32_t offset = 0) = 0; virtual void PredicationBegin(const GPUBuffer* buffer, uint64_t offset, PredicationOp op, CommandList cmd) {} virtual void PredicationEnd(CommandList cmd) {} @@ -204,6 +201,7 @@ namespace wi::graphics virtual void EventEnd(CommandList cmd) = 0; virtual void SetMarker(const char* name, CommandList cmd) = 0; + virtual const RenderPass* GetCurrentRenderPass(CommandList cmd) const = 0; // Some useful helpers: diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index 0eb1b158f..033a91409 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -9,9 +9,6 @@ #include "Utility/dx12/d3dx12.h" #include "Utility/D3D12MemAlloc.h" -#include "Utility/dxcapi.h" -#include "Utility/dx12/d3d12shader.h" - #include #include @@ -23,6 +20,7 @@ #include #include +#include // _BitScanReverse64 using namespace Microsoft::WRL; @@ -32,12 +30,8 @@ namespace wi::graphics namespace dx12_internal { // Bindless allocation limits: -#define BINDLESS_RESOURCE_CAPACITY 500000 -#define BINDLESS_SAMPLER_CAPACITY 256 - -// Choose how many constant buffers will be placed in root in auto root signature: -#define CONSTANT_BUFFER_AUTO_PLACEMENT_IN_ROOT 4 - static_assert(DESCRIPTORBINDER_CBV_COUNT < 32, "cbv root mask must fit into uint32_t!"); + static constexpr int BINDLESS_RESOURCE_CAPACITY = 500000; + static constexpr int BINDLESS_SAMPLER_CAPACITY = 256; #ifdef PLATFORM_UWP @@ -54,11 +48,9 @@ namespace dx12_internal #endif static PFN_D3D12_CREATE_DEVICE D3D12CreateDevice = nullptr; - static PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE D3D12SerializeVersionedRootSignature = nullptr; + static PFN_D3D12_CREATE_VERSIONED_ROOT_SIGNATURE_DESERIALIZER D3D12CreateVersionedRootSignatureDeserializer = nullptr; #endif // PLATFORM_UWP - ComPtr dxcUtils; - // Engine -> Native converters constexpr uint32_t _ParseColorWriteMask(ColorWrite value) { @@ -762,23 +754,6 @@ namespace dx12_internal } return D3D12_SHADING_RATE_1X1; } - constexpr D3D12_STATIC_SAMPLER_DESC _ConvertStaticSampler(const StaticSampler& x) - { - D3D12_STATIC_SAMPLER_DESC desc = {}; - desc.ShaderRegister = x.slot; - desc.Filter = _ConvertFilter(x.sampler.desc.filter); - desc.AddressU = _ConvertTextureAddressMode(x.sampler.desc.address_u); - desc.AddressV = _ConvertTextureAddressMode(x.sampler.desc.address_v); - desc.AddressW = _ConvertTextureAddressMode(x.sampler.desc.address_w); - desc.MipLODBias = x.sampler.desc.mip_lod_bias; - desc.MaxAnisotropy = x.sampler.desc.max_anisotropy; - desc.ComparisonFunc = _ConvertComparisonFunc(x.sampler.desc.comparison_func); - desc.BorderColor = _ConvertSamplerBorderColor(x.sampler.desc.border_color); - desc.MinLOD = x.sampler.desc.min_lod; - desc.MaxLOD = x.sampler.desc.max_lod; - desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - return desc; - } // Native -> Engine converters constexpr Format _ConvertFormat_Inv(DXGI_FORMAT value) @@ -1014,31 +989,140 @@ namespace dx12_internal return retVal; } - - enum RESOURCEBINDING + struct RootSignatureOptimizer { - CONSTANTBUFFER, - RAWBUFFER, - STRUCTUREDBUFFER, - TYPEDBUFFER, - TEXTURE1D, - TEXTURE1DARRAY, - TEXTURE2D, - TEXTURE2DARRAY, - TEXTURECUBE, - TEXTURECUBEARRAY, - TEXTURE3D, - ACCELERATIONSTRUCTURE, - RWRAWBUFFER, - RWSTRUCTUREDBUFFER, - RWTYPEDBUFFER, - RWTEXTURE1D, - RWTEXTURE1DARRAY, - RWTEXTURE2D, - RWTEXTURE2DARRAY, - RWTEXTURE3D, + static constexpr uint8_t INVALID_ROOT_PARAMETER = 0xFF; + // These map shader registers in the binding space (space=0) to root parameters + uint8_t CBV[DESCRIPTORBINDER_CBV_COUNT]; + uint8_t SRV[DESCRIPTORBINDER_SRV_COUNT]; + uint8_t UAV[DESCRIPTORBINDER_UAV_COUNT]; + uint8_t SAM[DESCRIPTORBINDER_SAMPLER_COUNT]; + uint8_t PUSH; + // This is the bitflag of root all parameters: + uint64_t root_mask = 0ull; + // For each root parameter, store some statistics: + struct RootParameterStatistics + { + uint32_t descriptorCopyCount = 0u; + bool sampler_table = false; + }; + wi::vector root_stats; + const D3D12_VERSIONED_ROOT_SIGNATURE_DESC* rootsig_desc = nullptr; - RESOURCEBINDING_COUNT + void init(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC& desc) + { + rootsig_desc = &desc; + + // First, initialize all to point to invalid root parameter: + for (int i = 0; i < arraysize(CBV); ++i) + { + CBV[i] = INVALID_ROOT_PARAMETER; + } + for (int i = 0; i < arraysize(SRV); ++i) + { + SRV[i] = INVALID_ROOT_PARAMETER; + } + for (int i = 0; i < arraysize(UAV); ++i) + { + UAV[i] = INVALID_ROOT_PARAMETER; + } + for (int i = 0; i < arraysize(SAM); ++i) + { + SAM[i] = INVALID_ROOT_PARAMETER; + } + PUSH = INVALID_ROOT_PARAMETER; + + assert(desc.Desc_1_1.NumParameters < 64u); // root parameter indices should fit into 64-bit root_mask + root_stats.resize(desc.Desc_1_1.NumParameters); // one stat for each root parameter + for (UINT root_parameter_index = 0; root_parameter_index < desc.Desc_1_1.NumParameters; ++root_parameter_index) + { + const D3D12_ROOT_PARAMETER1& param = desc.Desc_1_1.pParameters[root_parameter_index]; + RootParameterStatistics& stats = root_stats[root_parameter_index]; + + if (param.ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) // root constant dirtyness is not tracked, because those are set immediately + { + root_mask |= 1ull << root_parameter_index; + } + + switch (param.ParameterType) + { + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + for (UINT range_index = 0; range_index < param.DescriptorTable.NumDescriptorRanges; ++range_index) + { + const D3D12_DESCRIPTOR_RANGE1& range = param.DescriptorTable.pDescriptorRanges[range_index]; + stats.descriptorCopyCount += range.NumDescriptors == UINT_MAX ? 0 : range.NumDescriptors; + stats.sampler_table = range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + + if (range.RegisterSpace != 0) + continue; // we only care for the binding space (space=0) + + switch (range.RangeType) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + for (UINT i = 0; i < range.NumDescriptors; ++i) + { + CBV[range.BaseShaderRegister + i] = (uint8_t)root_parameter_index; + } + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + for (UINT i = 0; i < range.NumDescriptors; ++i) + { + SRV[range.BaseShaderRegister + i] = (uint8_t)root_parameter_index; + } + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + for (UINT i = 0; i < range.NumDescriptors; ++i) + { + UAV[range.BaseShaderRegister + i] = (uint8_t)root_parameter_index; + } + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + for (UINT i = 0; i < range.NumDescriptors; ++i) + { + SAM[range.BaseShaderRegister + i] = (uint8_t)root_parameter_index; + } + break; + default: + assert(0); + break; + } + } + break; + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + if (param.Constants.RegisterSpace == 0) + { + assert(PUSH == INVALID_ROOT_PARAMETER); // check that push constant block was not already set, because only one is supported currently + PUSH = (uint8_t)root_parameter_index; + } + break; + case D3D12_ROOT_PARAMETER_TYPE_CBV: + if (param.Descriptor.RegisterSpace == 0) + { + assert(param.Descriptor.ShaderRegister < arraysize(CBV)); + CBV[param.Descriptor.ShaderRegister] = (uint8_t)root_parameter_index; + } + break; + case D3D12_ROOT_PARAMETER_TYPE_SRV: + if (param.Descriptor.RegisterSpace == 0) + { + assert(param.Descriptor.ShaderRegister < arraysize(CBV)); + SRV[param.Descriptor.ShaderRegister] = (uint8_t)root_parameter_index; + } + break; + case D3D12_ROOT_PARAMETER_TYPE_UAV: + if (param.Descriptor.RegisterSpace == 0) + { + assert(param.Descriptor.ShaderRegister < arraysize(CBV)); + UAV[param.Descriptor.ShaderRegister] = (uint8_t)root_parameter_index; + } + break; + default: + assert(0); + break; + } + } + + } }; struct SingleDescriptor @@ -1075,6 +1159,7 @@ namespace dx12_internal allocationhandler->destroylocker.unlock(); if (index >= 0) { + assert(index < BINDLESS_RESOURCE_CAPACITY); D3D12_CPU_DESCRIPTOR_HANDLE dst_bindless = device->descriptorheap_res.start_cpu; dst_bindless.ptr += index * allocationhandler->device->GetDescriptorHandleIncrementSize(type); allocationhandler->device->CopyDescriptorsSimple(1, dst_bindless, handle, type); @@ -1098,6 +1183,7 @@ namespace dx12_internal allocationhandler->destroylocker.unlock(); if (index >= 0) { + assert(index < BINDLESS_RESOURCE_CAPACITY); D3D12_CPU_DESCRIPTOR_HANDLE dst_bindless = device->descriptorheap_res.start_cpu; dst_bindless.ptr += index * allocationhandler->device->GetDescriptorHandleIncrementSize(type); allocationhandler->device->CopyDescriptorsSimple(1, dst_bindless, handle, type); @@ -1121,6 +1207,7 @@ namespace dx12_internal allocationhandler->destroylocker.unlock(); if (index >= 0) { + assert(index < BINDLESS_RESOURCE_CAPACITY); D3D12_CPU_DESCRIPTOR_HANDLE dst_bindless = device->descriptorheap_res.start_cpu; dst_bindless.ptr += index * allocationhandler->device->GetDescriptorHandleIncrementSize(type); allocationhandler->device->CopyDescriptorsSimple(1, dst_bindless, handle, type); @@ -1144,6 +1231,7 @@ namespace dx12_internal allocationhandler->destroylocker.unlock(); if (index >= 0) { + assert(index < BINDLESS_SAMPLER_CAPACITY); D3D12_CPU_DESCRIPTOR_HANDLE dst_bindless = device->descriptorheap_sam.start_cpu; dst_bindless.ptr += index * allocationhandler->device->GetDescriptorHandleIncrementSize(type); allocationhandler->device->CopyDescriptorsSimple(1, dst_bindless, handle, type); @@ -1222,9 +1310,6 @@ namespace dx12_internal D3D12_GPU_VIRTUAL_ADDRESS gpu_address = 0; - uint64_t cbv_mask_frame[COMMANDLIST_COUNT] = {}; - uint32_t cbv_mask[COMMANDLIST_COUNT] = {}; - virtual ~Resource_DX12() { allocationhandler->destroylocker.lock(); @@ -1303,37 +1388,18 @@ namespace dx12_internal ComPtr resource; ComPtr rootSignature; - wi::vector root_cbvs; - wi::vector resources; - wi::vector samplers; - - uint32_t resource_binding_count_unrolled = 0; - uint32_t sampler_binding_count_unrolled = 0; - - wi::vector resource_bindings; - - wi::vector bindless_res; - wi::vector bindless_sam; - - D3D12_ROOT_PARAMETER1 rootconstants; - - wi::vector staticsamplers; - - uint32_t bindpoint_rootconstant = 0; - uint32_t bindpoint_rootdescriptor = 0; - uint32_t bindpoint_res = 0; - uint32_t bindpoint_sam = 0; - uint32_t bindpoint_bindless = 0; - wi::vector shadercode; wi::vector input_elements; D3D_PRIMITIVE_TOPOLOGY primitiveTopology; + ComPtr rootsig_deserializer; + const D3D12_VERSIONED_ROOT_SIGNATURE_DESC* rootsig_desc = nullptr; + RootSignatureOptimizer rootsig_optimizer; + struct PSO_STREAM { struct PSO_STREAM1 { - CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; CD3DX12_PIPELINE_STATE_STREAM_VS VS; CD3DX12_PIPELINE_STATE_STREAM_HS HS; CD3DX12_PIPELINE_STATE_STREAM_DS DS; @@ -1379,6 +1445,8 @@ namespace dx12_internal std::shared_ptr allocationhandler; ComPtr resource; + ComPtr stateObjectProperties; + wi::vector export_strings; wi::vector exports; wi::vector library_descs; @@ -1598,340 +1666,291 @@ using namespace dx12_internal; void GraphicsDevice_DX12::DescriptorBinder::reset() { table = {}; - dirty_res = true; - dirty_sam = true; - ringOffset_res = 0; - ringOffset_sam = 0; + optimizer_graphics = nullptr; + dirty_graphics = 0ull; + optimizer_compute = nullptr; + dirty_compute = 0ull; } void GraphicsDevice_DX12::DescriptorBinder::flush(bool graphics, CommandList cmd) { + uint64_t& dirty = graphics ? dirty_graphics : dirty_compute; + if (dirty == 0ull) + return; + + ID3D12GraphicsCommandList6* commandlist = device->GetCommandList(cmd); auto pso_internal = graphics ? to_internal(device->active_pso[cmd]) : to_internal(device->active_cs[cmd]); + const RootSignatureOptimizer& optimizer = pso_internal->rootsig_optimizer; - // Bind root descriptors: - if (dirty_root_cbvs != 0) + DWORD index; + while (_BitScanReverse64(&index, dirty)) // This will make sure that only the dirty root params are iterated, bit-by-bit { - uint32_t root_param = pso_internal->bindpoint_rootdescriptor; - for (auto& x : pso_internal->root_cbvs) + const UINT root_parameter_index = (UINT)index; + const uint64_t parameter_mask = 1ull << root_parameter_index; + dirty &= ~parameter_mask; // remove dirty bit of this root parameter + const D3D12_ROOT_PARAMETER1& param = pso_internal->rootsig_desc->Desc_1_1.pParameters[root_parameter_index]; + const RootSignatureOptimizer::RootParameterStatistics& stats = optimizer.root_stats[root_parameter_index]; + + switch (param.ParameterType) { - bool dirty = dirty_root_cbvs & (1 << x.ShaderRegister); - if (!dirty) - { - root_param++; - continue; - } - const GPUBuffer& buffer = table.CBV[x.ShaderRegister]; - uint64_t offset = table.CBV_offset[x.ShaderRegister]; + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + { + DescriptorHeapGPU& heap = stats.sampler_table ? device->descriptorheap_sam : device->descriptorheap_res; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = heap.start_gpu; - D3D12_GPU_VIRTUAL_ADDRESS address; + if (stats.descriptorCopyCount > 0) + { + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = heap.start_cpu; + const uint32_t descriptorSize = stats.sampler_table ? device->sampler_descriptor_size : device->resource_descriptor_size; + const uint32_t bindless_capacity = stats.sampler_table ? BINDLESS_SAMPLER_CAPACITY : BINDLESS_RESOURCE_CAPACITY; - if (!buffer.IsValid()) - { - // this must not happen, root descriptor must be always valid! - // this happens when constant buffer was not bound by engine - // TODO: use a null initialized GPU VA here for safety? - address = 0; - assert(0); - } - else - { - auto internal_state = to_internal(&buffer); - address = internal_state->gpu_address; - address += offset; + // Remarks: + // This is allocating from the global shader visible descriptor heaps in a simple incrementing + // lockless ring buffer fashion. + // In this lockless method, a descriptor array that is to be allocated might not fit without + // completely wrapping the beginning of the allocation. + // But completely wrapping after the fact we discovered that the array couldn't fit, + // it wouldn't be thread safe any more without introducing locks + // For that reason, we are reserving an excess amount of descriptors at the end which can't be normally + // allocated, but any out of bounds descriptors can still be safely written into it + // + // This method wastes a number of descriptors essentially at the end of the heap, but it is simple + // and safe to implement + // + // The excess amount is essentially equal to the maximum number of descriptors that can be allocated at once. + + // The reservation is the maximum amount of descriptors that can be allocated once + static constexpr uint32_t wrap_reservation_cbv_srv_uav = DESCRIPTORBINDER_CBV_COUNT + DESCRIPTORBINDER_SRV_COUNT + DESCRIPTORBINDER_UAV_COUNT; + static constexpr uint32_t wrap_reservation_sampler = DESCRIPTORBINDER_SAMPLER_COUNT; + const uint32_t wrap_reservation = stats.sampler_table ? wrap_reservation_sampler : wrap_reservation_cbv_srv_uav; + const uint32_t wrap_effective_size = heap.heapDesc.NumDescriptors - bindless_capacity - wrap_reservation; + assert(wrap_reservation >= stats.descriptorCopyCount); // for correct lockless wrap behaviour + + const uint64_t offset = heap.allocationOffset.fetch_add(stats.descriptorCopyCount); + const uint64_t wrapped_offset = offset % wrap_effective_size; + const uint32_t ringoffset = (bindless_capacity + (uint32_t)wrapped_offset) * descriptorSize; + const uint64_t wrapped_offset_end = wrapped_offset + stats.descriptorCopyCount; + + // Check that gpu offset doesn't intersect with our newly allocated range, if it does, we need to wait until gpu finishes with it: + uint64_t wrapped_gpu_offset = heap.cached_completedValue % wrap_effective_size; + int loop_cnt = 0; // safety + while (wrapped_offset < wrapped_gpu_offset && wrapped_gpu_offset <= wrapped_offset_end) + { + wrapped_gpu_offset = device->descriptorheap_res.fence->GetCompletedValue() % wrap_effective_size; + + // Check that the GPU has even a chance of freeing up the requested descriptors: + const uint64_t wrapped_signaled_offset = heap.fenceValue % wrap_effective_size; + if (wrapped_signaled_offset <= wrapped_offset_end || loop_cnt > 10) + { + assert(0); + break; // break out from waiting for GPU, because it might cause infinite loop + } + loop_cnt++; + } + + gpu_handle.ptr += (size_t)ringoffset; + cpu_handle.ptr += (size_t)ringoffset; + + for (UINT i = 0; i < param.DescriptorTable.NumDescriptorRanges; ++i) + { + const D3D12_DESCRIPTOR_RANGE1& range = param.DescriptorTable.pDescriptorRanges[i]; + switch (range.RangeType) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + assert(range.NumDescriptors <= DESCRIPTORBINDER_SRV_COUNT); + device->device->CopyDescriptorsSimple(range.NumDescriptors, cpu_handle, device->nullSRV, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + for (UINT idx = 0; idx < range.NumDescriptors; ++idx) + { + const UINT reg = range.BaseShaderRegister + idx; + const GPUResource& resource = table.SRV[reg]; + if (resource.IsValid()) + { + int subresource = table.SRV_index[reg]; + auto internal_state = to_internal(&resource); + D3D12_CPU_DESCRIPTOR_HANDLE src_handle = subresource < 0 ? internal_state->srv.handle : internal_state->subresources_srv[subresource].handle; + device->device->CopyDescriptorsSimple(1, cpu_handle, src_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + cpu_handle.ptr += descriptorSize; + } + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + assert(range.NumDescriptors <= DESCRIPTORBINDER_UAV_COUNT); + device->device->CopyDescriptorsSimple(range.NumDescriptors, cpu_handle, device->nullUAV, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + for (UINT idx = 0; idx < range.NumDescriptors; ++idx) + { + const UINT reg = range.BaseShaderRegister + idx; + const GPUResource& resource = table.UAV[reg]; + if (resource.IsValid()) + { + int subresource = table.UAV_index[reg]; + auto internal_state = to_internal(&resource); + D3D12_CPU_DESCRIPTOR_HANDLE src_handle = subresource < 0 ? internal_state->uav.handle : internal_state->subresources_uav[subresource].handle; + device->device->CopyDescriptorsSimple(1, cpu_handle, src_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + cpu_handle.ptr += descriptorSize; + } + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + assert(range.NumDescriptors <= DESCRIPTORBINDER_CBV_COUNT); + device->device->CopyDescriptorsSimple(range.NumDescriptors, cpu_handle, device->nullCBV, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + for (UINT idx = 0; idx < range.NumDescriptors; ++idx) + { + const UINT reg = range.BaseShaderRegister + idx; + const GPUBuffer& buffer = table.CBV[reg]; + if (buffer.IsValid()) + { + uint64_t offset = table.CBV_offset[reg]; + auto internal_state = to_internal(&buffer); + + D3D12_CONSTANT_BUFFER_VIEW_DESC cbv; + cbv.BufferLocation = internal_state->gpu_address; + cbv.BufferLocation += offset; + cbv.SizeInBytes = (UINT)AlignTo(buffer.desc.size - offset, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + cbv.SizeInBytes = std::min(cbv.SizeInBytes, 65536u); + + device->device->CreateConstantBufferView(&cbv, cpu_handle); + } + cpu_handle.ptr += descriptorSize; + } + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + assert(range.NumDescriptors <= DESCRIPTORBINDER_SAMPLER_COUNT); + device->device->CopyDescriptorsSimple(range.NumDescriptors, cpu_handle, device->nullSAM, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + for (UINT idx = 0; idx < range.NumDescriptors; ++idx) + { + const UINT reg = range.BaseShaderRegister + idx; + const Sampler& sam = table.SAM[reg]; + if (sam.IsValid()) + { + auto internal_state = to_internal(&sam); + D3D12_CPU_DESCRIPTOR_HANDLE src_handle = internal_state->descriptor.handle; + device->device->CopyDescriptorsSimple(1, cpu_handle, src_handle, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + } + cpu_handle.ptr += descriptorSize; + } + break; + default: + assert(0); + break; + } + } } if (graphics) { - device->GetCommandList(cmd)->SetGraphicsRootConstantBufferView(root_param, address); + commandlist->SetGraphicsRootDescriptorTable( + root_parameter_index, + gpu_handle + ); } else { - device->GetCommandList(cmd)->SetComputeRootConstantBufferView(root_param, address); - } - root_param++; - } - - dirty_root_cbvs = 0; - } - - - // Remarks: - // This is allocating from the global shader visible descriptor heaps in a simple incrementing - // lockless ring buffer fashion. - // In this lockless method, a descriptor array that is to be allocated might not fit without - // completely wrapping the beginning of the allocation. - // But completely wrapping after the fact we discovered that the array couldn't fit, - // it wouldn't be thread safe any more without introducing locks - // For that reason, we are reserving an excess amount of descriptors at the end which can't be normally - // allocated, but any out of bounds descriptors can still be safely written into it - // - // This method wastes a number of descriptors essentially at the end of the heap, but it is simple - // and safe to implement - // - // The excess amount is essentially equal to the maximum number of descriptors that can be allocated at once. - - - // Resources: - if (pso_internal->resource_binding_count_unrolled > 0 && dirty_res) - { - uint32_t resources = pso_internal->resource_binding_count_unrolled; - if (resources > 0) - { - // The reservation is the maximum amount of descriptors that can be allocated once - static constexpr uint32_t wrap_reservation = DESCRIPTORBINDER_CBV_COUNT + DESCRIPTORBINDER_SRV_COUNT + DESCRIPTORBINDER_UAV_COUNT; - const uint32_t wrap_effective_size = device->descriptorheap_res.heapDesc.NumDescriptors - BINDLESS_RESOURCE_CAPACITY - wrap_reservation; - assert(wrap_reservation > resources); // for correct lockless wrap behaviour - - const uint64_t offset = device->descriptorheap_res.allocationOffset.fetch_add(resources); - const uint64_t wrapped_offset = BINDLESS_RESOURCE_CAPACITY + offset % wrap_effective_size; - ringOffset_res = (uint32_t)wrapped_offset; - const uint64_t wrapped_offset_end = wrapped_offset + resources; - - uint64_t gpu_offset = device->descriptorheap_res.cached_completedValue; - uint64_t wrapped_gpu_offset = gpu_offset % wrap_effective_size; - while (wrapped_offset < wrapped_gpu_offset && wrapped_offset_end > wrapped_gpu_offset) - { - assert(device->descriptorheap_res.fenceValue > wrapped_offset_end); // simply not enough space, even with GPU drain - gpu_offset = device->descriptorheap_res.fence->GetCompletedValue(); - wrapped_gpu_offset = gpu_offset % wrap_effective_size; + commandlist->SetComputeRootDescriptorTable( + root_parameter_index, + gpu_handle + ); } } + break; - dirty_res = false; - auto& heap = device->descriptorheap_res; - D3D12_GPU_DESCRIPTOR_HANDLE binding_table = heap.start_gpu; - binding_table.ptr += (UINT64)ringOffset_res * (UINT64)device->resource_descriptor_size; - - int i = 0; - for (auto& x : pso_internal->resources) - { - RESOURCEBINDING binding = pso_internal->resource_bindings[i++]; - - for (UINT descriptor_index = 0; descriptor_index < x.NumDescriptors; ++descriptor_index) + case D3D12_ROOT_PARAMETER_TYPE_CBV: { - D3D12_CPU_DESCRIPTOR_HANDLE dst = heap.start_cpu; - uint32_t ringOffset = ringOffset_res++; - dst.ptr += ringOffset * device->resource_descriptor_size; + assert(param.Descriptor.ShaderRegister < DESCRIPTORBINDER_CBV_COUNT); + const GPUBuffer& buffer = table.CBV[param.Descriptor.ShaderRegister]; - UINT ShaderRegister = x.BaseShaderRegister + descriptor_index; - - switch (x.RangeType) + D3D12_GPU_VIRTUAL_ADDRESS address = {}; + if (buffer.IsValid()) { - default: - case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: - { - const GPUResource& resource = table.SRV[ShaderRegister]; - const int subresource = table.SRV_index[ShaderRegister]; - if (!resource.IsValid()) - { - switch (binding) - { - case RAWBUFFER: - case STRUCTUREDBUFFER: - case TYPEDBUFFER: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_buffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURE1D: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture1d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURE1DARRAY: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture1darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURE2D: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture2d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURE2DARRAY: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture2darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURECUBE: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texturecube, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURECUBEARRAY: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texturecubearray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case TEXTURE3D: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture3d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case ACCELERATIONSTRUCTURE: - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_accelerationstructure, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - default: - assert(0); - break; - } - } - else - { - auto internal_state = to_internal(&resource); - - if (resource.IsAccelerationStructure()) - { - device->device->CopyDescriptorsSimple(1, dst, internal_state->srv.handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - else - { - if (subresource < 0) - { - device->device->CopyDescriptorsSimple(1, dst, internal_state->srv.handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - else - { - device->device->CopyDescriptorsSimple(1, dst, internal_state->subresources_srv[subresource].handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - } - } + uint64_t offset = table.CBV_offset[param.Descriptor.ShaderRegister]; + auto internal_state = to_internal(&buffer); + address = internal_state->gpu_address; + address += offset; } - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + if (graphics) { - const GPUResource& resource = table.UAV[ShaderRegister]; - const int subresource = table.UAV_index[ShaderRegister]; - if (!resource.IsValid()) - { - switch (binding) - { - case RWRAWBUFFER: - case RWSTRUCTUREDBUFFER: - case RWTYPEDBUFFER: - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_buffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case RWTEXTURE1D: - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture1d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case RWTEXTURE1DARRAY: - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture1darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case RWTEXTURE2D: - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture2d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case RWTEXTURE2DARRAY: - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture2darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - case RWTEXTURE3D: - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture3d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - break; - default: - assert(0); - break; - } - } - else - { - auto internal_state = to_internal(&resource); - - if (subresource < 0) - { - device->device->CopyDescriptorsSimple(1, dst, internal_state->uav.handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - else - { - device->device->CopyDescriptorsSimple(1, dst, internal_state->subresources_uav[subresource].handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - } - } - break; - - case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: - { - const GPUBuffer& buffer = table.CBV[ShaderRegister]; - uint64_t offset = table.CBV_offset[ShaderRegister]; - - if (!buffer.IsValid()) - { - device->device->CopyDescriptorsSimple(1, dst, device->nullCBV, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - else - { - auto internal_state = to_internal(&buffer); - - D3D12_CONSTANT_BUFFER_VIEW_DESC cbv; - cbv.BufferLocation = internal_state->gpu_address; - cbv.BufferLocation += offset; - cbv.SizeInBytes = AlignTo((UINT)buffer.desc.size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); - - device->device->CreateConstantBufferView(&cbv, dst); - } - } - break; - } - } - } - - if (graphics) - { - device->GetCommandList(cmd)->SetGraphicsRootDescriptorTable(pso_internal->bindpoint_res, binding_table); - } - else - { - device->GetCommandList(cmd)->SetComputeRootDescriptorTable(pso_internal->bindpoint_res, binding_table); - } - } - - // Samplers: - if (pso_internal->sampler_binding_count_unrolled > 0 && dirty_sam) - { - uint32_t samplers = pso_internal->sampler_binding_count_unrolled; - if (samplers > 0) - { - // The reservation is the maximum amount of descriptors that can be allocated once - static constexpr uint32_t wrap_reservation = DESCRIPTORBINDER_SAMPLER_COUNT; - const uint32_t wrap_effective_size = device->descriptorheap_sam.heapDesc.NumDescriptors - BINDLESS_SAMPLER_CAPACITY - wrap_reservation; - assert(wrap_reservation > samplers); // for correct lockless wrap behaviour - - const uint64_t offset = device->descriptorheap_sam.allocationOffset.fetch_add(samplers); - const uint64_t wrapped_offset = BINDLESS_SAMPLER_CAPACITY + offset % wrap_effective_size; - ringOffset_sam = (uint32_t)wrapped_offset; - const uint64_t wrapped_offset_end = wrapped_offset + samplers; - - uint64_t gpu_offset = device->descriptorheap_sam.cached_completedValue; - uint64_t wrapped_gpu_offset = gpu_offset % wrap_effective_size; - while (wrapped_offset < wrapped_gpu_offset && wrapped_offset_end > wrapped_gpu_offset) - { - assert(device->descriptorheap_sam.fenceValue > wrapped_offset_end); // simply not enough space, even with GPU drain - gpu_offset = device->descriptorheap_sam.fence->GetCompletedValue(); - wrapped_gpu_offset = gpu_offset % wrap_effective_size; - } - } - - dirty_sam = false; - auto& heap = device->descriptorheap_sam; - D3D12_GPU_DESCRIPTOR_HANDLE binding_table = heap.start_gpu; - binding_table.ptr += (UINT64)ringOffset_sam * (UINT64)device->sampler_descriptor_size; - - for (auto& x : pso_internal->samplers) - { - for (UINT descriptor_index = 0; descriptor_index < x.NumDescriptors; ++descriptor_index) - { - D3D12_CPU_DESCRIPTOR_HANDLE dst = heap.start_cpu; - uint32_t ringOffset = ringOffset_sam++; - dst.ptr += ringOffset * device->sampler_descriptor_size; - - UINT ShaderRegister = x.BaseShaderRegister + descriptor_index; - - const Sampler& sampler = table.SAM[ShaderRegister]; - if (!sampler.IsValid()) - { - device->device->CopyDescriptorsSimple(1, dst, device->nullSAM, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandlist->SetGraphicsRootConstantBufferView( + root_parameter_index, + address + ); } else { - auto internal_state = to_internal(&sampler); - device->device->CopyDescriptorsSimple(1, dst, internal_state->descriptor.handle, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandlist->SetComputeRootConstantBufferView( + root_parameter_index, + address + ); } } - } + break; - if (graphics) - { - device->GetCommandList(cmd)->SetGraphicsRootDescriptorTable(pso_internal->bindpoint_sam, binding_table); - } - else - { - device->GetCommandList(cmd)->SetComputeRootDescriptorTable(pso_internal->bindpoint_sam, binding_table); + case D3D12_ROOT_PARAMETER_TYPE_SRV: + { + assert(param.Descriptor.ShaderRegister < DESCRIPTORBINDER_SRV_COUNT); + const GPUResource& resource = table.SRV[param.Descriptor.ShaderRegister]; + int subresource = table.SRV_index[param.Descriptor.ShaderRegister]; + + D3D12_GPU_VIRTUAL_ADDRESS address = {}; + if (resource.IsValid()) + { + auto internal_state = to_internal(&resource); + address = internal_state->gpu_address; + } + + if (graphics) + { + commandlist->SetGraphicsRootShaderResourceView( + root_parameter_index, + address + ); + } + else + { + commandlist->SetComputeRootShaderResourceView( + root_parameter_index, + address + ); + } + } + break; + + case D3D12_ROOT_PARAMETER_TYPE_UAV: + { + assert(param.Descriptor.ShaderRegister < DESCRIPTORBINDER_UAV_COUNT); + const GPUResource& resource = table.UAV[param.Descriptor.ShaderRegister]; + int subresource = table.UAV_index[param.Descriptor.ShaderRegister]; + + D3D12_GPU_VIRTUAL_ADDRESS address = {}; + if (resource.IsValid()) + { + auto internal_state = to_internal(&resource); + address = internal_state->gpu_address; + } + + if (graphics) + { + commandlist->SetGraphicsRootUnorderedAccessView( + root_parameter_index, + address + ); + } + else + { + commandlist->SetComputeRootUnorderedAccessView( + root_parameter_index, + address + ); + } + } + break; + + default: + assert(0); + break; } } + assert(dirty == 0ull); // check that all dirty root parameters were handled } @@ -2069,32 +2088,10 @@ using namespace dx12_internal; pso_validate(cmd); binders[cmd].flush(true, cmd); - - auto pso_internal = to_internal(active_pso[cmd]); - if (pso_internal->rootconstants.Constants.Num32BitValues > 0) - { - GetCommandList(cmd)->SetGraphicsRoot32BitConstants( - pso_internal->bindpoint_rootconstant, - pso_internal->rootconstants.Constants.Num32BitValues, - pushconstants[cmd].data, - 0 - ); - } } void GraphicsDevice_DX12::predispatch(CommandList cmd) { binders[cmd].flush(false, cmd); - - auto cs_internal = to_internal(active_cs[cmd]); - if (cs_internal->rootconstants.Constants.Num32BitValues > 0) - { - GetCommandList(cmd)->SetComputeRoot32BitConstants( - cs_internal->bindpoint_rootconstant, - cs_internal->rootconstants.Constants.Num32BitValues, - pushconstants[cmd].data, - 0 - ); - } } @@ -2109,17 +2106,6 @@ using namespace dx12_internal; DEBUGDEVICE = debuglayer; - HMODULE dxcompiler = wiLoadLibrary("dxcompiler.dll"); - if (dxcompiler == nullptr) - { - std::stringstream ss(""); - ss << "Failed to load dxcompiler.dll! ERROR: 0x" << std::hex << GetLastError() << std::endl; - ss << "Ensure that dxcompiler.dll is found near the exe" << std::endl; - ss << "You might also need to install the latest Visual C++ Redistributable package from Microsoft" << std::endl; - wi::helper::messageBox(ss.str(), "Error!"); - wi::platform::Exit(); - } - #ifndef PLATFORM_UWP HMODULE dxgi = LoadLibraryEx(L"dxgi.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32); if (dxgi == nullptr) @@ -2167,39 +2153,17 @@ using namespace dx12_internal; wi::platform::Exit(); } - D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)wiGetProcAddress(dx12, "D3D12SerializeVersionedRootSignature"); - assert(D3D12SerializeVersionedRootSignature != nullptr); - if (D3D12SerializeVersionedRootSignature == nullptr) + D3D12CreateVersionedRootSignatureDeserializer = (PFN_D3D12_CREATE_VERSIONED_ROOT_SIGNATURE_DESERIALIZER)wiGetProcAddress(dx12, "D3D12CreateVersionedRootSignatureDeserializer"); + assert(D3D12CreateVersionedRootSignatureDeserializer != nullptr); + if (D3D12CreateVersionedRootSignatureDeserializer == nullptr) { std::stringstream ss(""); - ss << "Failed to load D3D12SerializeVersionedRootSignature! ERROR: 0x" << std::hex << GetLastError(); + ss << "Failed to load D3D12CreateVersionedRootSignatureDeserializer! ERROR: 0x" << std::hex << GetLastError(); wi::helper::messageBox(ss.str(), "Error!"); wi::platform::Exit(); } #endif // PLATFORM_UWP - DxcCreateInstanceProc DxcCreateInstance = (DxcCreateInstanceProc)wiGetProcAddress(dxcompiler, "DxcCreateInstance"); - assert(DxcCreateInstance != nullptr); - if (DxcCreateInstance == nullptr) - { - std::stringstream ss(""); - ss << "Failed to load DxcCreateInstance! ERROR: 0x" << std::hex << GetLastError(); - wi::helper::messageBox(ss.str(), "Error!"); - wi::platform::Exit(); - } - - HRESULT hr; - - hr = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&dxcUtils)); - assert(SUCCEEDED(hr)); - if (FAILED(hr)) - { - std::stringstream ss(""); - ss << "DxcCreateInstance failed! ERROR: 0x" << std::hex << hr; - wi::helper::messageBox(ss.str(), "Error!"); - wi::platform::Exit(); - } - #if !defined(PLATFORM_UWP) if (debuglayer) { @@ -2226,8 +2190,9 @@ using namespace dx12_internal; ComPtr dxgiInfoQueue; if (DXGIGetDebugInterface1 != nullptr && SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(dxgiInfoQueue.GetAddressOf())))) { - dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, true); dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, true); + dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, true); + //dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING, true); DXGI_INFO_QUEUE_MESSAGE_ID hide[] = { @@ -2242,6 +2207,8 @@ using namespace dx12_internal; } #endif + HRESULT hr; + hr = CreateDXGIFactory2(debuglayer ? DXGI_CREATE_FACTORY_DEBUG : 0u, IID_PPV_ARGS(&dxgiFactory)); if (FAILED(hr)) { @@ -2341,6 +2308,7 @@ using namespace dx12_internal; #ifdef _DEBUG d3dInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); d3dInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); + //d3dInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); #endif D3D12_MESSAGE_ID hide[] = @@ -2667,16 +2635,70 @@ using namespace dx12_internal; } } - allocationhandler->descriptors_res.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - allocationhandler->descriptors_sam.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - allocationhandler->descriptors_rtv.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - allocationhandler->descriptors_dsv.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + allocationhandler->descriptors_res.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4096); + allocationhandler->descriptors_sam.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 256); + allocationhandler->descriptors_rtv.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 512); + allocationhandler->descriptors_dsv.init(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 128); + D3D12_DESCRIPTOR_HEAP_DESC nullHeapDesc = {}; + nullHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + nullHeapDesc.NumDescriptors = DESCRIPTORBINDER_CBV_COUNT + DESCRIPTORBINDER_SRV_COUNT + DESCRIPTORBINDER_UAV_COUNT; + hr = device->CreateDescriptorHeap(&nullHeapDesc, IID_PPV_ARGS(&nulldescriptorheap_cbv_srv_uav)); + assert(SUCCEEDED(hr)); + if (FAILED(hr)) + { + std::stringstream ss(""); + ss << "ID3D12Device::CreateDescriptorHeap[nulldescriptorheap_cbv_srv_uav] failed! ERROR: 0x" << std::hex << hr; + wi::helper::messageBox(ss.str(), "Error!"); + } + + nullHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + nullHeapDesc.NumDescriptors = DESCRIPTORBINDER_SAMPLER_COUNT; + device->CreateDescriptorHeap(&nullHeapDesc, IID_PPV_ARGS(&nulldescriptorheap_sampler)); + assert(SUCCEEDED(hr)); + if (FAILED(hr)) + { + std::stringstream ss(""); + ss << "ID3D12Device::CreateDescriptorHeap[nulldescriptorheap_sampler] failed! ERROR: 0x" << std::hex << hr; + wi::helper::messageBox(ss.str(), "Error!"); + } + + nullCBV = nulldescriptorheap_cbv_srv_uav->GetCPUDescriptorHandleForHeapStart(); + for (uint32_t i = 0; i < DESCRIPTORBINDER_CBV_COUNT; ++i) { D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {}; - nullCBV = allocationhandler->descriptors_res.allocate(); - device->CreateConstantBufferView(&cbv_desc, nullCBV); + D3D12_CPU_DESCRIPTOR_HANDLE handle = nullCBV; + handle.ptr += i * resource_descriptor_size; + device->CreateConstantBufferView(&cbv_desc, handle); } + + nullSRV = nullCBV; + nullSRV.ptr += DESCRIPTORBINDER_CBV_COUNT * resource_descriptor_size; + for (uint32_t i = 0; i < DESCRIPTORBINDER_SRV_COUNT; ++i) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Format = DXGI_FORMAT_R32_UINT; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + D3D12_CPU_DESCRIPTOR_HANDLE handle = nullSRV; + handle.ptr += i * resource_descriptor_size; + device->CreateShaderResourceView(nullptr, &srv_desc, handle); + } + + nullUAV = nullSRV; + nullUAV.ptr += DESCRIPTORBINDER_SRV_COUNT * resource_descriptor_size; + for (uint32_t i = 0; i < DESCRIPTORBINDER_UAV_COUNT; ++i) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; + uav_desc.Format = DXGI_FORMAT_R32_UINT; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + D3D12_CPU_DESCRIPTOR_HANDLE handle = nullUAV; + handle.ptr += i * resource_descriptor_size; + device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV); + } + + nullSAM = nulldescriptorheap_sampler->GetCPUDescriptorHandleForHeapStart(); + for (uint32_t i = 0; i < DESCRIPTORBINDER_SAMPLER_COUNT; ++i) { D3D12_SAMPLER_DESC sampler_desc = {}; sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; @@ -2684,125 +2706,10 @@ using namespace dx12_internal; sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; - nullSAM = allocationhandler->descriptors_sam.allocate(); + D3D12_CPU_DESCRIPTOR_HANDLE handle = nullSAM; + handle.ptr += i * sampler_descriptor_size; device->CreateSampler(&sampler_desc, nullSAM); } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R32_UINT; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - nullSRV_buffer = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_buffer); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - nullSRV_texture1d = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texture1d); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; - nullSRV_texture1darray = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texture1darray); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - nullSRV_texture2d = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texture2d); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - nullSRV_texture2darray = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texture2darray); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - nullSRV_texturecube = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texturecube); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - nullSRV_texturecubearray = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texturecubearray); - } - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - nullSRV_texture3d = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_texture3d); - } - if(CheckCapability(GraphicsDeviceCapability::RAYTRACING)) - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Format = DXGI_FORMAT_UNKNOWN; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; - nullSRV_accelerationstructure = allocationhandler->descriptors_res.allocate(); - device->CreateShaderResourceView(nullptr, &srv_desc, nullSRV_accelerationstructure); - } - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_UINT; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - nullUAV_buffer = allocationhandler->descriptors_res.allocate(); - device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV_buffer); - } - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_UINT; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - nullUAV_texture1d = allocationhandler->descriptors_res.allocate(); - device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV_texture1d); - } - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_UINT; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY; - nullUAV_texture1darray = allocationhandler->descriptors_res.allocate(); - device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV_texture1darray); - } - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_UINT; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - nullUAV_texture2d = allocationhandler->descriptors_res.allocate(); - device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV_texture2d); - } - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_UINT; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - nullUAV_texture2darray = allocationhandler->descriptors_res.allocate(); - device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV_texture2darray); - } - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; - uav_desc.Format = DXGI_FORMAT_R32_UINT; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; - nullUAV_texture3d = allocationhandler->descriptors_res.allocate(); - device->CreateUnorderedAccessView(nullptr, nullptr, &uav_desc, nullUAV_texture3d); - } - hr = queues[QUEUE_GRAPHICS].queue->GetTimestampFrequency(&TIMESTAMP_FREQUENCY); assert(SUCCEEDED(hr)); @@ -2985,7 +2892,9 @@ using namespace dx12_internal; } internal_state->dummyTexture.desc.format = pDesc->format; - internal_state->renderpass = RenderPass(); + internal_state->dummyTexture.desc.width = pDesc->width; + internal_state->dummyTexture.desc.height = pDesc->height; + internal_state->renderpass = {}; wi::helper::hash_combine(internal_state->renderpass.hash, pDesc->format); internal_state->renderpass.desc.attachments.push_back(RenderPassAttachment::RenderTarget(&internal_state->dummyTexture)); @@ -3082,7 +2991,7 @@ using namespace dx12_internal; { auto cmd = copyAllocator.allocate(pDesc->size); - memcpy(cmd.uploadbuffer.mapped_data, pInitialData, pDesc->size); + std::memcpy(cmd.uploadbuffer.mapped_data, pInitialData, pDesc->size); cmd.commandList->CopyBufferRegion( internal_state->resource.Get(), @@ -3332,561 +3241,42 @@ using namespace dx12_internal; HRESULT hr = (internal_state->shadercode.empty() ? E_FAIL : S_OK); assert(SUCCEEDED(hr)); - wi::unordered_set library_binding_resolver; - + hr = D3D12CreateVersionedRootSignatureDeserializer( + internal_state->shadercode.data(), + internal_state->shadercode.size(), + IID_PPV_ARGS(&internal_state->rootsig_deserializer) + ); + if (SUCCEEDED(hr)) { - auto insert_descriptor = [&](const D3D12_SHADER_INPUT_BIND_DESC& desc, const D3D12_SHADER_BUFFER_DESC& bufferdesc) + hr = internal_state->rootsig_deserializer->GetRootSignatureDescAtVersion(D3D_ROOT_SIGNATURE_VERSION_1_1, &internal_state->rootsig_desc); + if (SUCCEEDED(hr)) { - if (library_binding_resolver.count(desc.Name) != 0) - { - return; - } - library_binding_resolver.insert(desc.Name); + assert(internal_state->rootsig_desc->Version == D3D_ROOT_SIGNATURE_VERSION_1_1); - if (desc.Type == D3D_SIT_CBUFFER && desc.BindPoint >= 999) - { - internal_state->rootconstants.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - internal_state->rootconstants.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - internal_state->rootconstants.Constants.ShaderRegister = desc.BindPoint; - internal_state->rootconstants.Constants.RegisterSpace = desc.Space; - internal_state->rootconstants.Constants.Num32BitValues = bufferdesc.Size / sizeof(uint32_t); - return; - } - - const bool bindless = desc.Space > 0; - - if (desc.Type == D3D_SIT_SAMPLER) - { - if (!bindless && desc.Space == 0) - { - for (auto& sam : pShader->auto_samplers) - { - if (desc.BindPoint == sam.slot) - { - internal_state->staticsamplers.push_back(_ConvertStaticSampler(sam)); - return; // static sampler will be used instead - } - } - for (auto& sam : common_samplers) - { - if (desc.BindPoint == sam.ShaderRegister) - { - internal_state->staticsamplers.push_back(sam); - return; // static sampler will be used instead - } - } - } - - D3D12_DESCRIPTOR_RANGE1& descriptor = bindless ? internal_state->bindless_sam.emplace_back() : internal_state->samplers.emplace_back(); - - descriptor.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; - - if (bindless) - { - descriptor.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; - } - - descriptor.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - descriptor.BaseShaderRegister = desc.BindPoint; - descriptor.NumDescriptors = desc.BindCount == 0 ? ~0 : desc.BindCount; - descriptor.RegisterSpace = desc.Space; - descriptor.OffsetInDescriptorsFromTableStart = bindless ? 0 : D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } - else - { - D3D12_DESCRIPTOR_RANGE1& descriptor = bindless ? internal_state->bindless_res.emplace_back() : internal_state->resources.emplace_back(); - - descriptor.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; - - switch (desc.Type) - { - default: - case D3D_SIT_CBUFFER: - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; - descriptor.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - break; - case D3D_SIT_TBUFFER: - case D3D_SIT_STRUCTURED: - case D3D_SIT_BYTEADDRESS: - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; - descriptor.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - break; - case D3D_SIT_TEXTURE: - case D3D_SIT_RTACCELERATIONSTRUCTURE: - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; - descriptor.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - break; - case D3D_SIT_UAV_RWSTRUCTURED: - case D3D_SIT_UAV_RWBYTEADDRESS: - case D3D_SIT_UAV_APPEND_STRUCTURED: - case D3D_SIT_UAV_CONSUME_STRUCTURED: - case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: - case D3D_SIT_UAV_RWTYPED: - case D3D_SIT_UAV_FEEDBACKTEXTURE: - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; - descriptor.Flags |= D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; - descriptor.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - break; - } - - if (bindless) - { - // bindless is always volatile - descriptor.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; - } - - descriptor.BaseShaderRegister = desc.BindPoint; - descriptor.NumDescriptors = desc.BindCount == 0 ? ~0 : desc.BindCount; - descriptor.RegisterSpace = desc.Space; - descriptor.OffsetInDescriptorsFromTableStart = bindless ? 0 : D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - - if (bindless) - { - return; - } - - RESOURCEBINDING& binding = internal_state->resource_bindings.emplace_back(); - - switch (desc.Type) - { - default: - case D3D_SIT_CBUFFER: - binding = CONSTANTBUFFER; - break; - case D3D_SIT_TBUFFER: - binding = TYPEDBUFFER; - break; - case D3D_SIT_STRUCTURED: - binding = STRUCTUREDBUFFER; - break; - case D3D_SIT_BYTEADDRESS: - binding = RAWBUFFER; - break; - case D3D_SIT_TEXTURE: - switch (desc.Dimension) - { - case D3D_SRV_DIMENSION_BUFFER: - binding = TYPEDBUFFER; - break; - case D3D_SRV_DIMENSION_TEXTURE1D: - binding = TEXTURE1D; - break; - case D3D_SRV_DIMENSION_TEXTURE1DARRAY: - binding = TEXTURE1DARRAY; - break; - case D3D_SRV_DIMENSION_TEXTURE2D: - case D3D_SRV_DIMENSION_TEXTURE2DMS: - binding = TEXTURE2D; - break; - case D3D_SRV_DIMENSION_TEXTURE2DARRAY: - case D3D_SRV_DIMENSION_TEXTURE2DMSARRAY: - binding = TEXTURE2DARRAY; - break; - case D3D_SRV_DIMENSION_TEXTURE3D: - binding = TEXTURE3D; - break; - case D3D_SRV_DIMENSION_TEXTURECUBE: - binding = TEXTURECUBE; - break; - case D3D_SRV_DIMENSION_TEXTURECUBEARRAY: - binding = TEXTURECUBEARRAY; - break; - default: - assert(0); - break; - } - break; - case D3D_SIT_RTACCELERATIONSTRUCTURE: - binding = ACCELERATIONSTRUCTURE; - break; - case D3D_SIT_UAV_RWSTRUCTURED: - case D3D_SIT_UAV_APPEND_STRUCTURED: - case D3D_SIT_UAV_CONSUME_STRUCTURED: - case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: - binding = RWSTRUCTUREDBUFFER; - break; - case D3D_SIT_UAV_RWBYTEADDRESS: - binding = RWRAWBUFFER; - break; - case D3D_SIT_UAV_RWTYPED: - switch (desc.Dimension) - { - case D3D_SRV_DIMENSION_BUFFER: - binding = RWTYPEDBUFFER; - break; - case D3D_SRV_DIMENSION_TEXTURE1D: - binding = RWTEXTURE1D; - break; - case D3D_SRV_DIMENSION_TEXTURE1DARRAY: - binding = RWTEXTURE1DARRAY; - break; - case D3D_SRV_DIMENSION_TEXTURE2D: - binding = RWTEXTURE2D; - break; - case D3D_SRV_DIMENSION_TEXTURE2DARRAY: - binding = RWTEXTURE2DARRAY; - break; - case D3D_SRV_DIMENSION_TEXTURE3D: - binding = RWTEXTURE3D; - break; - default: - assert(0); - break; - } - break; - case D3D_SIT_UAV_FEEDBACKTEXTURE: - switch (desc.Dimension) - { - case D3D_SRV_DIMENSION_BUFFER: - binding = RWTYPEDBUFFER; - break; - case D3D_SRV_DIMENSION_TEXTURE1D: - binding = RWTEXTURE1D; - break; - case D3D_SRV_DIMENSION_TEXTURE1DARRAY: - binding = RWTEXTURE1DARRAY; - break; - case D3D_SRV_DIMENSION_TEXTURE2D: - binding = RWTEXTURE2D; - break; - case D3D_SRV_DIMENSION_TEXTURE2DARRAY: - binding = RWTEXTURE2DARRAY; - break; - case D3D_SRV_DIMENSION_TEXTURE3D: - binding = RWTEXTURE3D; - break; - default: - assert(0); - break; - } - break; - } - } - }; - - DxcBuffer ReflectionData; - ReflectionData.Encoding = DXC_CP_ACP; - ReflectionData.Ptr = pShaderBytecode; - ReflectionData.Size = (SIZE_T)BytecodeLength; - - if (stage == ShaderStage::LIB) - { - ComPtr reflection; - hr = dxcUtils->CreateReflection(&ReflectionData, IID_PPV_ARGS(&reflection)); + hr = device->CreateRootSignature( + 0, + internal_state->shadercode.data(), + internal_state->shadercode.size(), + IID_PPV_ARGS(&internal_state->rootSignature) + ); assert(SUCCEEDED(hr)); - - D3D12_LIBRARY_DESC library_desc; - hr = reflection->GetDesc(&library_desc); - assert(SUCCEEDED(hr)); - - for (UINT i = 0; i < library_desc.FunctionCount; ++i) - { - ID3D12FunctionReflection* function_reflection = reflection->GetFunctionByIndex((INT)i); - assert(function_reflection != nullptr); - D3D12_FUNCTION_DESC function_desc; - hr = function_reflection->GetDesc(&function_desc); - assert(SUCCEEDED(hr)); - - for (UINT i = 0; i < function_desc.BoundResources; ++i) - { - D3D12_SHADER_INPUT_BIND_DESC desc; - hr = function_reflection->GetResourceBindingDesc(i, &desc); - assert(SUCCEEDED(hr)); - D3D12_SHADER_BUFFER_DESC bufferdesc = {}; - if (desc.Type == D3D_SIT_CBUFFER) - { - auto constantbuffer = function_reflection->GetConstantBufferByIndex(i); - if (constantbuffer != nullptr) - { - hr = constantbuffer->GetDesc(&bufferdesc); - assert(SUCCEEDED(hr)); - } - } - insert_descriptor(desc, bufferdesc); - } - } } - else // Shader reflection - { - ComPtr reflection; - hr = dxcUtils->CreateReflection(&ReflectionData, IID_PPV_ARGS(&reflection)); - assert(SUCCEEDED(hr)); + } - D3D12_SHADER_DESC shader_desc; - hr = reflection->GetDesc(&shader_desc); - assert(SUCCEEDED(hr)); - - for (UINT i = 0; i < shader_desc.BoundResources; ++i) - { - D3D12_SHADER_INPUT_BIND_DESC desc; - hr = reflection->GetResourceBindingDesc(i, &desc); - assert(SUCCEEDED(hr)); - D3D12_SHADER_BUFFER_DESC bufferdesc = {}; - if (desc.Type == D3D_SIT_CBUFFER) - { - auto constantbuffer = reflection->GetConstantBufferByIndex(i); - if (constantbuffer != nullptr) - { - hr = constantbuffer->GetDesc(&bufferdesc); - assert(SUCCEEDED(hr)); - } - } - insert_descriptor(desc, bufferdesc); - } - } - - for (auto& sam : internal_state->staticsamplers) - { - switch (stage) - { - case ShaderStage::MS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_MESH; - break; - case ShaderStage::AS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_AMPLIFICATION; - break; - case ShaderStage::VS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - break; - case ShaderStage::HS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_HULL; - break; - case ShaderStage::DS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN; - break; - case ShaderStage::GS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_GEOMETRY; - break; - case ShaderStage::PS: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - break; - default: - sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - break; - } - } - - - if (stage == ShaderStage::CS || stage == ShaderStage::LIB) - { - wi::vector params; - - internal_state->bindpoint_rootconstant = (uint32_t)params.size(); - if (internal_state->rootconstants.Constants.Num32BitValues > 0) - { - auto& param = params.emplace_back(); - param = internal_state->rootconstants; - } - - // Split resources into root descriptors and tables: - { - wi::vector resources; - wi::vector bindings; - int i = 0; - for (auto& x : internal_state->resources) - { - RESOURCEBINDING binding = internal_state->resource_bindings[i++]; - if (x.NumDescriptors == 1 && x.RegisterSpace == 0 && binding == CONSTANTBUFFER && internal_state->root_cbvs.size() < CONSTANT_BUFFER_AUTO_PLACEMENT_IN_ROOT) - { - D3D12_ROOT_DESCRIPTOR1& descriptor = internal_state->root_cbvs.emplace_back(); - descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; - descriptor.ShaderRegister = x.BaseShaderRegister; - descriptor.RegisterSpace = x.RegisterSpace; - } - else - { - resources.push_back(x); - bindings.push_back(binding); - } - } - internal_state->resources = resources; - internal_state->resource_bindings = bindings; - } - - for (auto& x : internal_state->resources) - { - internal_state->resource_binding_count_unrolled += x.NumDescriptors; - } - for (auto& x : internal_state->samplers) - { - internal_state->sampler_binding_count_unrolled += x.NumDescriptors; - } - - internal_state->bindpoint_rootdescriptor = (uint32_t)params.size(); - for (auto& x : internal_state->root_cbvs) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.Descriptor = x; - } - - internal_state->bindpoint_res = (uint32_t)params.size(); - if (!internal_state->resources.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->resources.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->resources.data(); - } - - internal_state->bindpoint_sam = (uint32_t)params.size(); - if (!internal_state->samplers.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->samplers.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->samplers.data(); - } - - internal_state->bindpoint_bindless = (uint32_t)params.size(); - if(!internal_state->bindless_res.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->bindless_res.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->bindless_res.data(); - } - if (!internal_state->bindless_sam.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->bindless_sam.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->bindless_sam.data(); - } - - size_t rootconstant_hash = 0; - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.ShaderVisibility); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.ParameterType); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.Constants.Num32BitValues); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.Constants.RegisterSpace); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.Constants.ShaderRegister); - - size_t root_binding_hash = 0; - for (auto& x : internal_state->root_cbvs) - { - wi::helper::hash_combine(root_binding_hash, x.Flags); - wi::helper::hash_combine(root_binding_hash, x.ShaderRegister); - wi::helper::hash_combine(root_binding_hash, x.RegisterSpace); - } - - size_t resource_binding_hash = 0; - for (auto& x : internal_state->resources) - { - wi::helper::hash_combine(resource_binding_hash, x.BaseShaderRegister); - wi::helper::hash_combine(resource_binding_hash, x.NumDescriptors); - wi::helper::hash_combine(resource_binding_hash, x.Flags); - wi::helper::hash_combine(resource_binding_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(resource_binding_hash, x.RangeType); - wi::helper::hash_combine(resource_binding_hash, x.RegisterSpace); - } - - size_t sampler_binding_hash = 0; - for (auto& x : internal_state->samplers) - { - wi::helper::hash_combine(sampler_binding_hash, x.BaseShaderRegister); - wi::helper::hash_combine(sampler_binding_hash, x.NumDescriptors); - wi::helper::hash_combine(sampler_binding_hash, x.Flags); - wi::helper::hash_combine(sampler_binding_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(sampler_binding_hash, x.RangeType); - wi::helper::hash_combine(sampler_binding_hash, x.RegisterSpace); - } - - size_t bindless_hash = 0; - for (auto& x : internal_state->bindless_res) - { - wi::helper::hash_combine(bindless_hash, x.BaseShaderRegister); - wi::helper::hash_combine(bindless_hash, x.NumDescriptors); - wi::helper::hash_combine(bindless_hash, x.Flags); - wi::helper::hash_combine(bindless_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(bindless_hash, x.RangeType); - wi::helper::hash_combine(bindless_hash, x.RegisterSpace); - } - for (auto& x : internal_state->bindless_sam) - { - wi::helper::hash_combine(bindless_hash, x.BaseShaderRegister); - wi::helper::hash_combine(bindless_hash, x.NumDescriptors); - wi::helper::hash_combine(bindless_hash, x.Flags); - wi::helper::hash_combine(bindless_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(bindless_hash, x.RangeType); - wi::helper::hash_combine(bindless_hash, x.RegisterSpace); - } - - size_t rootsig_hash = 0; - wi::helper::hash_combine(rootsig_hash, rootconstant_hash); - wi::helper::hash_combine(rootsig_hash, root_binding_hash); - wi::helper::hash_combine(rootsig_hash, resource_binding_hash); - wi::helper::hash_combine(rootsig_hash, sampler_binding_hash); - wi::helper::hash_combine(rootsig_hash, bindless_hash); - for (auto& x : internal_state->staticsamplers) - { - wi::helper::hash_combine(rootsig_hash, x.AddressU); - wi::helper::hash_combine(rootsig_hash, x.AddressV); - wi::helper::hash_combine(rootsig_hash, x.AddressW); - wi::helper::hash_combine(rootsig_hash, x.BorderColor); - wi::helper::hash_combine(rootsig_hash, x.ComparisonFunc); - wi::helper::hash_combine(rootsig_hash, x.Filter); - wi::helper::hash_combine(rootsig_hash, x.MaxAnisotropy); - wi::helper::hash_combine(rootsig_hash, x.MaxLOD); - wi::helper::hash_combine(rootsig_hash, x.MinLOD); - wi::helper::hash_combine(rootsig_hash, x.MipLODBias); - wi::helper::hash_combine(rootsig_hash, x.RegisterSpace); - wi::helper::hash_combine(rootsig_hash, x.ShaderRegister); - wi::helper::hash_combine(rootsig_hash, x.ShaderVisibility); - } - - rootsignature_cache_mutex.lock(); - if (rootsignature_cache[rootsig_hash]) - { - internal_state->rootSignature = rootsignature_cache[rootsig_hash]; - } - else - { - D3D12_ROOT_SIGNATURE_DESC1 rootSigDesc = {}; - rootSigDesc.NumStaticSamplers = (UINT)internal_state->staticsamplers.size(); - rootSigDesc.pStaticSamplers = internal_state->staticsamplers.data(); - rootSigDesc.NumParameters = (UINT)params.size(); - rootSigDesc.pParameters = params.data(); - rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_rs = {}; - versioned_rs.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; - versioned_rs.Desc_1_1 = rootSigDesc; - - ID3DBlob* rootSigBlob; - ID3DBlob* rootSigError; - hr = D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); - if (FAILED(hr)) - { - OutputDebugStringA((char*)rootSigError->GetBufferPointer()); - assert(0); - } - hr = device->CreateRootSignature(0, rootSigBlob->GetBufferPointer(), rootSigBlob->GetBufferSize(), IID_PPV_ARGS(&internal_state->rootSignature)); - assert(SUCCEEDED(hr)); - if (SUCCEEDED(hr)) - { - rootsignature_cache[rootsig_hash] = internal_state->rootSignature; - } - } - rootsignature_cache_mutex.unlock(); - } + if (stage == ShaderStage::CS || stage == ShaderStage::LIB) + { + assert(internal_state->rootSignature != nullptr); + assert(internal_state->rootsig_desc != nullptr); + internal_state->rootsig_optimizer.init(*internal_state->rootsig_desc); } if (stage == ShaderStage::CS) { struct PSO_STREAM { - CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; CD3DX12_PIPELINE_STATE_STREAM_CS CS; } stream; - stream.pRootSignature = internal_state->rootSignature.Get(); stream.CS = { internal_state->shadercode.data(), internal_state->shadercode.size() }; D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = {}; @@ -3996,383 +3386,83 @@ using namespace dx12_internal; wi::helper::hash_combine(pso->hash, pDesc->pt); wi::helper::hash_combine(pso->hash, pDesc->sample_mask); - HRESULT hr = S_OK; - - { - // Root signature comes from reflection data when there is no root signature specified: - - auto insert_shader = [&](const Shader* shader) - { - if (shader == nullptr) - return; - - auto shader_internal = to_internal(shader); - - size_t check_max = internal_state->resources.size(); // dont't check for duplicates within self table - int b = 0; - for (auto& x : shader_internal->resources) - { - RESOURCEBINDING binding = shader_internal->resource_bindings[b++]; - - bool found = false; - size_t i = 0; - for (auto& y : internal_state->resources) - { - if (x.BaseShaderRegister == y.BaseShaderRegister && x.RangeType == y.RangeType) - { - found = true; - break; - } - if (i++ >= check_max) - break; - } - - if (!found) - { - internal_state->resources.push_back(x); - internal_state->resource_bindings.push_back(binding); - } - } - - check_max = internal_state->samplers.size(); // dont't check for duplicates within self table - for (auto& x : shader_internal->samplers) - { - bool found = false; - size_t i = 0; - for (auto& y : internal_state->samplers) - { - if (x.BaseShaderRegister == y.BaseShaderRegister && x.RangeType == y.RangeType) - { - found = true; - break; - } - if (i++ >= check_max) - break; - } - - if (!found) - { - internal_state->samplers.push_back(x); - } - } - - for (auto& x : shader_internal->staticsamplers) - { - internal_state->staticsamplers.push_back(x); - } - - if (shader_internal->rootconstants.Constants.Num32BitValues > 0) - { - internal_state->rootconstants = shader_internal->rootconstants; - } - }; - - insert_shader(pDesc->ps); // prioritize ps root descriptor assignment - insert_shader(pDesc->ms); - insert_shader(pDesc->as); - insert_shader(pDesc->vs); - insert_shader(pDesc->hs); - insert_shader(pDesc->ds); - insert_shader(pDesc->gs); - - wi::vector params; - - internal_state->bindpoint_rootconstant = (uint32_t)params.size(); - if (internal_state->rootconstants.Constants.Num32BitValues > 0) - { - auto& param = params.emplace_back(); - param = internal_state->rootconstants; - } - - // Split resources into root descriptors and tables: - { - wi::vector resources; - wi::vector bindings; - int i = 0; - for (auto& x : internal_state->resources) - { - RESOURCEBINDING binding = internal_state->resource_bindings[i++]; - if (x.NumDescriptors == 1 && binding == CONSTANTBUFFER && internal_state->root_cbvs.size() < CONSTANT_BUFFER_AUTO_PLACEMENT_IN_ROOT) - { - D3D12_ROOT_DESCRIPTOR1& descriptor = internal_state->root_cbvs.emplace_back(); - descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; - descriptor.ShaderRegister = x.BaseShaderRegister; - descriptor.RegisterSpace = x.RegisterSpace; - } - else - { - resources.push_back(x); - bindings.push_back(binding); - } - } - internal_state->resources = resources; - internal_state->resource_bindings = bindings; - } - - for (auto& x : internal_state->resources) - { - internal_state->resource_binding_count_unrolled += x.NumDescriptors; - } - for (auto& x : internal_state->samplers) - { - internal_state->sampler_binding_count_unrolled += x.NumDescriptors; - } - - internal_state->bindpoint_rootdescriptor = (uint32_t)params.size(); - for (auto& x : internal_state->root_cbvs) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.Descriptor = x; - } - - internal_state->bindpoint_res = (uint32_t)params.size(); - if (!internal_state->resources.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->resources.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->resources.data(); - } - - internal_state->bindpoint_sam = (uint32_t)params.size(); - if (!internal_state->samplers.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param = {}; - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->samplers.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->samplers.data(); - } - - internal_state->bindpoint_bindless = (uint32_t)params.size(); - - auto insert_shader_bindless = [&](const Shader* shader, D3D12_SHADER_VISIBILITY stage) { - if (shader == nullptr) - return; - - auto shader_internal = to_internal(shader); - - for (auto& x : shader_internal->bindless_res) - { - bool found = false; - for (auto& y : internal_state->bindless_res) - { - if (x.RegisterSpace == y.RegisterSpace) - { - found = true; - break; - } - } - if (!found) - internal_state->bindless_res.push_back(x); - } - - for (auto& x : shader_internal->bindless_sam) - { - bool found = false; - for (auto& y : internal_state->bindless_sam) - { - if (x.RegisterSpace == y.RegisterSpace) - { - found = true; - break; - } - } - if (!found) - internal_state->bindless_sam.push_back(x); - } - }; - - insert_shader_bindless(pDesc->ms, D3D12_SHADER_VISIBILITY_MESH); - insert_shader_bindless(pDesc->as, D3D12_SHADER_VISIBILITY_AMPLIFICATION); - insert_shader_bindless(pDesc->vs, D3D12_SHADER_VISIBILITY_VERTEX); - insert_shader_bindless(pDesc->hs, D3D12_SHADER_VISIBILITY_HULL); - insert_shader_bindless(pDesc->ds, D3D12_SHADER_VISIBILITY_DOMAIN); - insert_shader_bindless(pDesc->gs, D3D12_SHADER_VISIBILITY_GEOMETRY); - insert_shader_bindless(pDesc->ps, D3D12_SHADER_VISIBILITY_PIXEL); - - internal_state->bindpoint_bindless = (uint32_t)params.size(); - if (!internal_state->bindless_res.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->bindless_res.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->bindless_res.data(); - } - if (!internal_state->bindless_sam.empty()) - { - D3D12_ROOT_PARAMETER1& param = params.emplace_back(); - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->bindless_sam.size(); - param.DescriptorTable.pDescriptorRanges = internal_state->bindless_sam.data(); - } - - size_t rootconstant_hash = 0; - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.ShaderVisibility); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.ParameterType); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.Constants.Num32BitValues); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.Constants.RegisterSpace); - wi::helper::hash_combine(rootconstant_hash, internal_state->rootconstants.Constants.ShaderRegister); - - size_t root_binding_hash = 0; - for (auto& x : internal_state->root_cbvs) - { - wi::helper::hash_combine(root_binding_hash, x.Flags); - wi::helper::hash_combine(root_binding_hash, x.ShaderRegister); - wi::helper::hash_combine(root_binding_hash, x.RegisterSpace); - } - - size_t resource_binding_hash = 0; - for (auto& x : internal_state->resources) - { - wi::helper::hash_combine(resource_binding_hash, x.BaseShaderRegister); - wi::helper::hash_combine(resource_binding_hash, x.NumDescriptors); - wi::helper::hash_combine(resource_binding_hash, x.Flags); - wi::helper::hash_combine(resource_binding_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(resource_binding_hash, x.RangeType); - wi::helper::hash_combine(resource_binding_hash, x.RegisterSpace); - } - - size_t sampler_binding_hash = 0; - for (auto& x : internal_state->samplers) - { - wi::helper::hash_combine(sampler_binding_hash, x.BaseShaderRegister); - wi::helper::hash_combine(sampler_binding_hash, x.NumDescriptors); - wi::helper::hash_combine(sampler_binding_hash, x.Flags); - wi::helper::hash_combine(sampler_binding_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(sampler_binding_hash, x.RangeType); - wi::helper::hash_combine(sampler_binding_hash, x.RegisterSpace); - } - - size_t bindless_hash = 0; - for (auto& x : internal_state->bindless_res) - { - wi::helper::hash_combine(bindless_hash, x.BaseShaderRegister); - wi::helper::hash_combine(bindless_hash, x.NumDescriptors); - wi::helper::hash_combine(bindless_hash, x.Flags); - wi::helper::hash_combine(bindless_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(bindless_hash, x.RangeType); - wi::helper::hash_combine(bindless_hash, x.RegisterSpace); - } - for (auto& x : internal_state->bindless_sam) - { - wi::helper::hash_combine(bindless_hash, x.BaseShaderRegister); - wi::helper::hash_combine(bindless_hash, x.NumDescriptors); - wi::helper::hash_combine(bindless_hash, x.Flags); - wi::helper::hash_combine(bindless_hash, x.OffsetInDescriptorsFromTableStart); - wi::helper::hash_combine(bindless_hash, x.RangeType); - wi::helper::hash_combine(bindless_hash, x.RegisterSpace); - } - - size_t rootsig_hash = 0; - wi::helper::hash_combine(rootsig_hash, pDesc->il); - wi::helper::hash_combine(rootsig_hash, rootconstant_hash); - wi::helper::hash_combine(rootsig_hash, root_binding_hash); - wi::helper::hash_combine(rootsig_hash, resource_binding_hash); - wi::helper::hash_combine(rootsig_hash, sampler_binding_hash); - wi::helper::hash_combine(rootsig_hash, bindless_hash); - for (auto& x : internal_state->staticsamplers) - { - wi::helper::hash_combine(rootsig_hash, x.AddressU); - wi::helper::hash_combine(rootsig_hash, x.AddressV); - wi::helper::hash_combine(rootsig_hash, x.AddressW); - wi::helper::hash_combine(rootsig_hash, x.BorderColor); - wi::helper::hash_combine(rootsig_hash, x.ComparisonFunc); - wi::helper::hash_combine(rootsig_hash, x.Filter); - wi::helper::hash_combine(rootsig_hash, x.MaxAnisotropy); - wi::helper::hash_combine(rootsig_hash, x.MaxLOD); - wi::helper::hash_combine(rootsig_hash, x.MinLOD); - wi::helper::hash_combine(rootsig_hash, x.MipLODBias); - wi::helper::hash_combine(rootsig_hash, x.RegisterSpace); - wi::helper::hash_combine(rootsig_hash, x.ShaderRegister); - wi::helper::hash_combine(rootsig_hash, x.ShaderVisibility); - } - - rootsignature_cache_mutex.lock(); - if (rootsignature_cache[rootsig_hash]) - { - internal_state->rootSignature = rootsignature_cache[rootsig_hash]; - } - else - { - D3D12_ROOT_SIGNATURE_DESC1 rootSigDesc = {}; - rootSigDesc.NumStaticSamplers = (UINT)internal_state->staticsamplers.size(); - rootSigDesc.pStaticSamplers = internal_state->staticsamplers.data(); - rootSigDesc.NumParameters = (UINT)params.size(); - rootSigDesc.pParameters = params.data(); - if (pDesc->il != nullptr) - { - rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - } - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_rs = {}; - versioned_rs.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; - versioned_rs.Desc_1_1 = rootSigDesc; - - ID3DBlob* rootSigBlob; - ID3DBlob* rootSigError; - hr = D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); - if (FAILED(hr)) - { - OutputDebugStringA((char*)rootSigError->GetBufferPointer()); - assert(0); - rootsignature_cache_mutex.unlock(); - return false; - } - hr = device->CreateRootSignature(0, rootSigBlob->GetBufferPointer(), rootSigBlob->GetBufferSize(), IID_PPV_ARGS(&internal_state->rootSignature)); - assert(SUCCEEDED(hr)); - if (SUCCEEDED(hr)) - { - rootsignature_cache[rootsig_hash] = internal_state->rootSignature; - } - } - rootsignature_cache_mutex.unlock(); - } - auto& stream = internal_state->stream; if (pso->desc.vs != nullptr) { auto shader_internal = to_internal(pso->desc.vs); stream.stream1.VS = { shader_internal->shadercode.data(), shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } if (pso->desc.hs != nullptr) { auto shader_internal = to_internal(pso->desc.hs); stream.stream1.HS = { shader_internal->shadercode.data(), shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } if (pso->desc.ds != nullptr) { auto shader_internal = to_internal(pso->desc.ds); stream.stream1.DS = { shader_internal->shadercode.data(),shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } if (pso->desc.gs != nullptr) { auto shader_internal = to_internal(pso->desc.gs); stream.stream1.GS = { shader_internal->shadercode.data(), shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } if (pso->desc.ps != nullptr) { auto shader_internal = to_internal(pso->desc.ps); stream.stream1.PS = { shader_internal->shadercode.data(), shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } if (pso->desc.ms != nullptr) { auto shader_internal = to_internal(pso->desc.ms); stream.stream2.MS = { shader_internal->shadercode.data(), shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } if (pso->desc.as != nullptr) { auto shader_internal = to_internal(pso->desc.as); stream.stream2.AS = { shader_internal->shadercode.data(), shader_internal->shadercode.size() }; + if (internal_state->rootSignature == nullptr) + { + internal_state->rootSignature = shader_internal->rootSignature; + internal_state->rootsig_desc = shader_internal->rootsig_desc; + } } + assert(internal_state->rootSignature != nullptr); + assert(internal_state->rootsig_desc != nullptr); + internal_state->rootsig_optimizer.init(*internal_state->rootsig_desc); + RasterizerState pRasterizerStateDesc = pso->desc.rs != nullptr ? *pso->desc.rs : RasterizerState(); CD3DX12_RASTERIZER_DESC rs = {}; rs.FillMode = _ConvertFillMode(pRasterizerStateDesc.fill_mode); @@ -4483,9 +3573,7 @@ using namespace dx12_internal; stream.stream1.STRIP = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; - stream.stream1.pRootSignature = internal_state->rootSignature.Get(); - - return SUCCEEDED(hr); + return true; } bool GraphicsDevice_DX12::CreateRenderPass(const RenderPassDesc* pDesc, RenderPass* renderpass) const { @@ -4887,7 +3975,7 @@ using namespace dx12_internal; auto& subobject = subobjects.emplace_back(); subobject = {}; subobject.Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE; - auto shader_internal = to_internal(pDesc->shader_libraries.front().shader); // think better way + auto shader_internal = to_internal(pDesc->shader_libraries.front().shader); global_rootsig.pGlobalRootSignature = shader_internal->rootSignature.Get(); subobject.pDesc = &global_rootsig; } @@ -4961,6 +4049,9 @@ using namespace dx12_internal; HRESULT hr = device->CreateStateObject(&desc, IID_PPV_ARGS(&internal_state->resource)); assert(SUCCEEDED(hr)); + hr = internal_state->resource.As(&internal_state->stateObjectProperties); + assert(SUCCEEDED(hr)); + return SUCCEEDED(hr); } @@ -5497,7 +4588,7 @@ using namespace dx12_internal; { D3D12_RAYTRACING_INSTANCE_DESC* desc = (D3D12_RAYTRACING_INSTANCE_DESC*)dest; desc->AccelerationStructure = to_internal(&instance->bottom_level)->gpu_address; - memcpy(desc->Transform, &instance->transform, sizeof(desc->Transform)); + std::memcpy(desc->Transform, &instance->transform, sizeof(desc->Transform)); desc->InstanceID = instance->instance_id; desc->InstanceMask = instance->instance_mask; desc->InstanceContributionToHitGroupIndex = instance->instance_contribution_to_hit_group_index; @@ -5507,19 +4598,10 @@ using namespace dx12_internal; { auto internal_state = to_internal(rtpso); - ComPtr stateObjectProperties; - HRESULT hr = internal_state->resource.As(&stateObjectProperties); - assert(SUCCEEDED(hr)); - - void* identifier = stateObjectProperties->GetShaderIdentifier(internal_state->group_strings[group_index].c_str()); - memcpy(dest, identifier, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + void* identifier = internal_state->stateObjectProperties->GetShaderIdentifier(internal_state->group_strings[group_index].c_str()); + std::memcpy(dest, identifier, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); } - void GraphicsDevice_DX12::SetCommonSampler(const StaticSampler* sam) - { - common_samplers.push_back(_ConvertStaticSampler(*sam)); - } - void GraphicsDevice_DX12::SetName(GPUResource* pResource, const char* name) { wchar_t text[256]; @@ -5550,7 +4632,6 @@ using namespace dx12_internal; { hr = device->CreateCommandAllocator(queues[queue].desc.Type, IID_PPV_ARGS(&frames[fr].commandAllocators[cmd][queue])); assert(SUCCEEDED(hr)); - } hr = device->CreateCommandList1(0, queues[queue].desc.Type, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&commandLists[cmd][queue])); @@ -5574,8 +4655,6 @@ using namespace dx12_internal; }; GetCommandList(cmd)->SetDescriptorHeaps(arraysize(heaps), heaps); - binders[cmd].reset(); - if (queue == QUEUE_GRAPHICS) { D3D12_RECT pRects[D3D12_VIEWPORT_AND_SCISSORRECT_MAX_INDEX + 1]; @@ -5589,6 +4668,7 @@ using namespace dx12_internal; GetCommandList(cmd)->RSSetScissorRects(arraysize(pRects), pRects); } + binders[cmd].reset(); prev_pt[cmd] = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; prev_pipeline_hash[cmd] = 0; active_pso[cmd] = nullptr; @@ -5599,7 +4679,6 @@ using namespace dx12_internal; active_renderpass[cmd] = nullptr; prev_shadingrate[cmd] = ShadingRate::RATE_INVALID; dirty_pso[cmd] = false; - pushconstants[cmd] = {}; swapchains[cmd].clear(); active_backbuffer[cmd] = nullptr; @@ -5724,6 +4803,9 @@ using namespace dx12_internal; } } + descriptorheap_res.SignalGPU(queues[QUEUE_GRAPHICS].queue.Get()); + descriptorheap_sam.SignalGPU(queues[QUEUE_GRAPHICS].queue.Get()); + // From here, we begin a new frame, this affects GetFrameResources()! FRAMECOUNT++; @@ -5745,16 +4827,6 @@ using namespace dx12_internal; } assert(SUCCEEDED(hr)); - // Descriptor heaps' progress is recorded by the GPU: - descriptorheap_res.fenceValue = descriptorheap_res.allocationOffset.load(); - hr = queues[QUEUE_GRAPHICS].queue->Signal(descriptorheap_res.fence.Get(), descriptorheap_res.fenceValue); - assert(SUCCEEDED(hr)); - descriptorheap_res.cached_completedValue = descriptorheap_res.fence->GetCompletedValue(); - descriptorheap_sam.fenceValue = descriptorheap_sam.allocationOffset.load(); - hr = queues[QUEUE_GRAPHICS].queue->Signal(descriptorheap_sam.fence.Get(), descriptorheap_sam.fenceValue); - assert(SUCCEEDED(hr)); - descriptorheap_sam.cached_completedValue = descriptorheap_sam.fence->GetCompletedValue(); - allocationhandler->Update(FRAMECOUNT, BUFFERCOUNT); } } @@ -5891,7 +4963,10 @@ using namespace dx12_internal; auto internal_state = to_internal(active_renderpass[cmd]); - GetCommandList(cmd)->ResourceBarrier(internal_state->num_barriers_begin, internal_state->barrierdescs_begin); + if (internal_state->num_barriers_begin > 0) + { + GetCommandList(cmd)->ResourceBarrier(internal_state->num_barriers_begin, internal_state->barrierdescs_begin); + } if (internal_state->shading_rate_image != nullptr) { @@ -5919,7 +4994,10 @@ using namespace dx12_internal; GetCommandList(cmd)->RSSetShadingRateImage(nullptr); } - GetCommandList(cmd)->ResourceBarrier(internal_state->num_barriers_end, internal_state->barrierdescs_end); + if (internal_state->num_barriers_end > 0) + { + GetCommandList(cmd)->ResourceBarrier(internal_state->num_barriers_end, internal_state->barrierdescs_end); + } } active_renderpass[cmd] = nullptr; @@ -5976,7 +5054,23 @@ using namespace dx12_internal; { binder.table.SRV[slot] = *resource; binder.table.SRV_index[slot] = subresource; - binder.dirty_res = true; + + if (binder.optimizer_graphics != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_graphics; + if (optimizer.SRV[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_graphics |= 1ull << optimizer.SRV[slot]; + } + } + if (binder.optimizer_compute != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_compute; + if (optimizer.SRV[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_compute |= 1ull << optimizer.SRV[slot]; + } + } } } void GraphicsDevice_DX12::BindResources(const GPUResource* const* resources, uint32_t slot, uint32_t count, CommandList cmd) @@ -5997,7 +5091,23 @@ using namespace dx12_internal; { binder.table.UAV[slot] = *resource; binder.table.UAV_index[slot] = subresource; - binder.dirty_res = true; + + if (binder.optimizer_graphics != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_graphics; + if (optimizer.UAV[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_graphics |= 1ull << optimizer.UAV[slot]; + } + } + if (binder.optimizer_compute != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_compute; + if (optimizer.UAV[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_compute |= 1ull << optimizer.UAV[slot]; + } + } } } void GraphicsDevice_DX12::BindUAVs(const GPUResource* const* resources, uint32_t slot, uint32_t count, CommandList cmd) @@ -6017,7 +5127,23 @@ using namespace dx12_internal; if (binder.table.SAM[slot].internal_state != sampler->internal_state) { binder.table.SAM[slot] = *sampler; - binder.dirty_sam = true; + + if (binder.optimizer_graphics != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_graphics; + if (optimizer.SAM[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_graphics |= 1ull << optimizer.SAM[slot]; + } + } + if (binder.optimizer_compute != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_compute; + if (optimizer.SAM[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_compute |= 1ull << optimizer.SAM[slot]; + } + } } } void GraphicsDevice_DX12::BindConstantBuffer(const GPUBuffer* buffer, uint32_t slot, CommandList cmd, uint64_t offset) @@ -6028,22 +5154,23 @@ using namespace dx12_internal; { binder.table.CBV[slot] = *buffer; binder.table.CBV_offset[slot] = offset; - binder.dirty_res = true; - // Root constant buffer root signature state tracking: - auto internal_state = to_internal(buffer); - if (internal_state->cbv_mask_frame[cmd] != FRAMECOUNT) + if (binder.optimizer_graphics != nullptr) { - // This is the first binding as constant buffer in this frame for this resource, - // so clear the cbv flags completely - internal_state->cbv_mask[cmd] = 0; - internal_state->cbv_mask_frame[cmd] = FRAMECOUNT; + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_graphics; + if (optimizer.CBV[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_graphics |= 1ull << optimizer.CBV[slot]; + } + } + if (binder.optimizer_compute != nullptr) + { + const RootSignatureOptimizer& optimizer = *(RootSignatureOptimizer*)binder.optimizer_compute; + if (optimizer.CBV[slot] != RootSignatureOptimizer::INVALID_ROOT_PARAMETER) + { + binder.dirty_compute |= 1ull << optimizer.CBV[slot]; + } } - - // CBV flag marked as bound for this slot: - // Also, the corresponding slot is marked dirty - internal_state->cbv_mask[cmd] |= 1 << slot; - binder.dirty_root_cbvs |= 1 << slot; } } void GraphicsDevice_DX12::BindVertexBuffers(const GPUBuffer* const* vertexBuffers, uint32_t slot, uint32_t count, const uint32_t* strides, const uint64_t* offsets, CommandList cmd) @@ -6129,21 +5256,9 @@ using namespace dx12_internal; active_rootsig_graphics[cmd] = internal_state->rootSignature.Get(); GetCommandList(cmd)->SetGraphicsRootSignature(internal_state->rootSignature.Get()); - // Invalidate graphics root bindings: - binders[cmd].dirty_res = true; - binders[cmd].dirty_sam = true; - binders[cmd].dirty_root_cbvs = ~0; - - // Set the bindless tables: - uint32_t bindpoint = internal_state->bindpoint_bindless; - if (!internal_state->bindless_res.empty()) - { - GetCommandList(cmd)->SetGraphicsRootDescriptorTable(bindpoint++, descriptorheap_res.start_gpu); - } - if (!internal_state->bindless_sam.empty()) - { - GetCommandList(cmd)->SetGraphicsRootDescriptorTable(bindpoint++, descriptorheap_sam.start_gpu); - } + auto& binder = binders[cmd]; + binder.optimizer_graphics = &internal_state->rootsig_optimizer; + binder.dirty_graphics = internal_state->rootsig_optimizer.root_mask; // invalidates all root bindings } active_pso[cmd] = pso; @@ -6173,21 +5288,9 @@ using namespace dx12_internal; active_rootsig_compute[cmd] = internal_state->rootSignature.Get(); GetCommandList(cmd)->SetComputeRootSignature(internal_state->rootSignature.Get()); - // Invalidate compute root bindings: - binders[cmd].dirty_res = true; - binders[cmd].dirty_sam = true; - binders[cmd].dirty_root_cbvs = ~0; - - // Set the bindless tables: - uint32_t bindpoint = internal_state->bindpoint_bindless; - if (!internal_state->bindless_res.empty()) - { - GetCommandList(cmd)->SetComputeRootDescriptorTable(bindpoint++, descriptorheap_res.start_gpu); - } - if (!internal_state->bindless_sam.empty()) - { - GetCommandList(cmd)->SetComputeRootDescriptorTable(bindpoint++, descriptorheap_sam.start_gpu); - } + auto& binder = binders[cmd]; + binder.optimizer_compute = &internal_state->rootsig_optimizer; + binder.dirty_compute = internal_state->rootsig_optimizer.root_mask; // invalidates all root bindings } } @@ -6600,10 +5703,41 @@ using namespace dx12_internal; GetCommandList(cmd)->DispatchRays(&dispatchrays_desc); } - void GraphicsDevice_DX12::PushConstants(const void* data, uint32_t size, CommandList cmd) + void GraphicsDevice_DX12::PushConstants(const void* data, uint32_t size, CommandList cmd, uint32_t offset) { - std::memcpy(pushconstants[cmd].data, data, size); - pushconstants[cmd].size = size; + assert(size % sizeof(uint32_t) == 0); + assert(offset % sizeof(uint32_t) == 0); + + auto& binder = binders[cmd]; + if (active_pso[cmd] != nullptr) + { + const RootSignatureOptimizer* optimizer = (const RootSignatureOptimizer*)binder.optimizer_graphics; + const D3D12_ROOT_PARAMETER1& param = optimizer->rootsig_desc->Desc_1_1.pParameters[optimizer->PUSH]; + assert(param.ParameterType == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); + assert(size <= param.Constants.Num32BitValues * sizeof(uint32_t)); // if this fires, not enough root constants were declared in root signature! + GetCommandList(cmd)->SetGraphicsRoot32BitConstants( + optimizer->PUSH, + size / sizeof(uint32_t), + data, + offset / sizeof(uint32_t) + ); + return; + } + if (active_cs[cmd] != nullptr) + { + const RootSignatureOptimizer* optimizer = (const RootSignatureOptimizer*)binder.optimizer_compute; + const D3D12_ROOT_PARAMETER1& param = optimizer->rootsig_desc->Desc_1_1.pParameters[optimizer->PUSH]; + assert(param.ParameterType == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); + assert(size <= param.Constants.Num32BitValues * sizeof(uint32_t)); // if this fires, not enough root constants were declared in root signature! + GetCommandList(cmd)->SetComputeRoot32BitConstants( + optimizer->PUSH, + size / sizeof(uint32_t), + data, + offset / sizeof(uint32_t) + ); + return; + } + assert(0); // there was no active pipeline! } void GraphicsDevice_DX12::PredicationBegin(const GPUBuffer* buffer, uint64_t offset, PredicationOp op, CommandList cmd) { @@ -6647,6 +5781,10 @@ using namespace dx12_internal; } } + const RenderPass* GraphicsDevice_DX12::GetCurrentRenderPass(CommandList cmd) const + { + return active_renderpass[cmd]; + } } diff --git a/WickedEngine/wiGraphicsDevice_DX12.h b/WickedEngine/wiGraphicsDevice_DX12.h index 2012231af..edc855b7d 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.h +++ b/WickedEngine/wiGraphicsDevice_DX12.h @@ -29,7 +29,6 @@ namespace wi::graphics protected: Microsoft::WRL::ComPtr dxgiFactory; Microsoft::WRL::ComPtr dxgiAdapter; - bool tearingSupported = false; Microsoft::WRL::ComPtr device; Microsoft::WRL::ComPtr dispatchIndirectCommandSignature; @@ -37,6 +36,7 @@ namespace wi::graphics Microsoft::WRL::ComPtr drawIndexedInstancedIndirectCommandSignature; Microsoft::WRL::ComPtr dispatchMeshIndirectCommandSignature; + bool tearingSupported = false; bool additionalShadingRatesSupported = false; uint32_t rtv_descriptor_size = 0; @@ -44,25 +44,12 @@ namespace wi::graphics uint32_t resource_descriptor_size = 0; uint32_t sampler_descriptor_size = 0; + Microsoft::WRL::ComPtr nulldescriptorheap_cbv_srv_uav; + Microsoft::WRL::ComPtr nulldescriptorheap_sampler; D3D12_CPU_DESCRIPTOR_HANDLE nullCBV = {}; + D3D12_CPU_DESCRIPTOR_HANDLE nullSRV = {}; + D3D12_CPU_DESCRIPTOR_HANDLE nullUAV = {}; D3D12_CPU_DESCRIPTOR_HANDLE nullSAM = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_buffer = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texture1d = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texture1darray = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texture2d = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texture2darray = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texturecube = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texturecubearray = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_texture3d = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullSRV_accelerationstructure = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_buffer = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture1d = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture1darray = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture2d = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture2darray = {}; - D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture3d = {}; - - wi::vector common_samplers; struct CommandQueue { @@ -119,17 +106,11 @@ namespace wi::graphics { DescriptorBindingTable table; GraphicsDevice_DX12* device = nullptr; - uint32_t ringOffset_res = 0; - uint32_t ringOffset_sam = 0; - bool dirty_res = false; - bool dirty_sam = false; - uint32_t dirty_root_cbvs = 0; // bitmask - struct DescriptorHandles - { - D3D12_GPU_DESCRIPTOR_HANDLE sampler_handle = {}; - D3D12_GPU_DESCRIPTOR_HANDLE resource_handle = {}; - }; + const void* optimizer_graphics = nullptr; + uint64_t dirty_graphics = 0ull; // 1 dirty bit flag per root parameter + const void* optimizer_compute = nullptr; + uint64_t dirty_compute = 0ull; // 1 dirty bit flag per root parameter void init(GraphicsDevice_DX12* device); void reset(); @@ -157,13 +138,6 @@ namespace wi::graphics wi::vector swapchains[COMMANDLIST_COUNT]; Microsoft::WRL::ComPtr active_backbuffer[COMMANDLIST_COUNT]; - struct DeferredPushConstantData - { - uint8_t data[128]; - uint32_t size; - }; - DeferredPushConstantData pushconstants[COMMANDLIST_COUNT] = {}; - bool dirty_pso[COMMANDLIST_COUNT] = {}; void pso_validate(CommandList cmd); @@ -196,8 +170,6 @@ namespace wi::graphics void WriteShadingRateValue(ShadingRate rate, void* dest) const override; void WriteTopLevelAccelerationStructureInstance(const RaytracingAccelerationStructureDesc::TopLevel::Instance* instance, void* dest) const override; void WriteShaderIdentifier(const RaytracingPipelineState* rtpso, uint32_t group_index, void* dest) const override; - - void SetCommonSampler(const StaticSampler* sam) override; void SetName(GPUResource* pResource, const char* name) override; @@ -257,7 +229,7 @@ namespace wi::graphics void BuildRaytracingAccelerationStructure(const RaytracingAccelerationStructure* dst, CommandList cmd, const RaytracingAccelerationStructure* src = nullptr) override; void BindRaytracingPipelineState(const RaytracingPipelineState* rtpso, CommandList cmd) override; void DispatchRays(const DispatchRaysDesc* desc, CommandList cmd) override; - void PushConstants(const void* data, uint32_t size, CommandList cmd) override; + void PushConstants(const void* data, uint32_t size, CommandList cmd, uint32_t offset = 0) override; void PredicationBegin(const GPUBuffer* buffer, uint64_t offset, PredicationOp op, CommandList cmd) override; void PredicationEnd(CommandList cmd) override; @@ -265,9 +237,10 @@ namespace wi::graphics void EventEnd(CommandList cmd) override; void SetMarker(const char* name, CommandList cmd) override; + const RenderPass* GetCurrentRenderPass(CommandList cmd) const override; - struct DescriptorHeap + struct DescriptorHeapGPU { D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; Microsoft::WRL::ComPtr heap_GPU; @@ -281,9 +254,18 @@ namespace wi::graphics Microsoft::WRL::ComPtr fence; uint64_t fenceValue = 0; uint64_t cached_completedValue = 0; + + void SignalGPU(ID3D12CommandQueue* queue) + { + // Descriptor heaps' progress is recorded by the GPU: + fenceValue = allocationOffset.load(); + HRESULT hr = queue->Signal(fence.Get(), fenceValue); + assert(SUCCEEDED(hr)); + cached_completedValue = fence->GetCompletedValue(); + } }; - DescriptorHeap descriptorheap_res; - DescriptorHeap descriptorheap_sam; + DescriptorHeapGPU descriptorheap_res; + DescriptorHeapGPU descriptorheap_sam; struct AllocationHandler { @@ -301,11 +283,11 @@ namespace wi::graphics uint32_t descriptor_size = 0; wi::vector freelist; - void init(GraphicsDevice_DX12* device, D3D12_DESCRIPTOR_HEAP_TYPE type) + void init(GraphicsDevice_DX12* device, D3D12_DESCRIPTOR_HEAP_TYPE type, UINT numDescriptorsPerBlock) { this->device = device; desc.Type = type; - desc.NumDescriptors = 1024; + desc.NumDescriptors = numDescriptorsPerBlock; descriptor_size = device->device->GetDescriptorHandleIncrementSize(type); } void block_allocate() diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index a225791f7..0461beb77 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -870,6 +870,8 @@ namespace vulkan_internal wi::vector swapChainImages; wi::vector swapChainImageViews; wi::vector swapChainFramebuffers; + + Texture dummyTexture; RenderPass renderpass; VkSurfaceKHR surface = VK_NULL_HANDLE; @@ -1147,12 +1149,15 @@ namespace vulkan_internal renderPassInfo.dependencyCount = 1; renderPassInfo.pDependencies = &dependency; - internal_state->renderpass = RenderPass(); + internal_state->dummyTexture.desc.format = internal_state->desc.format; + internal_state->dummyTexture.desc.width = internal_state->desc.width; + internal_state->dummyTexture.desc.height = internal_state->desc.height; + internal_state->renderpass = {}; wi::helper::hash_combine(internal_state->renderpass.hash, internal_state->swapChainImageFormat); auto renderpass_internal = std::make_shared(); renderpass_internal->allocationhandler = allocationhandler; internal_state->renderpass.internal_state = renderpass_internal; - internal_state->renderpass.desc.attachments.push_back(RenderPassAttachment::RenderTarget()); + internal_state->renderpass.desc.attachments.push_back(RenderPassAttachment::RenderTarget(&internal_state->dummyTexture)); res = vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderpass_internal->renderpass); assert(res == VK_SUCCESS); @@ -1562,396 +1567,401 @@ using namespace vulkan_internal; } void GraphicsDevice_Vulkan::DescriptorBinder::flush(bool graphics, CommandList cmd) { - if (!dirty) + if (dirty == DIRTY_NONE) return; - dirty = false; - auto& binder_pool = device->GetFrameResources().binder_pools[cmd]; auto pso_internal = graphics ? to_internal(device->active_pso[cmd]) : nullptr; auto cs_internal = graphics ? nullptr : to_internal(device->active_cs[cmd]); + VkCommandBuffer commandBuffer = device->GetCommandList(cmd); - VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; - if (graphics) + if (dirty & DIRTY_DESCRIPTOR) { - pipelineLayout = pso_internal->pipelineLayout; - descriptorSetLayout = pso_internal->descriptorSetLayout; - } - else - { - pipelineLayout = cs_internal->pipelineLayout_cs; - descriptorSetLayout = cs_internal->descriptorSetLayout; - } - - VkDescriptorSetAllocateInfo allocInfo = {}; - allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - allocInfo.descriptorPool = binder_pool.descriptorPool; - allocInfo.descriptorSetCount = 1; - allocInfo.pSetLayouts = &descriptorSetLayout; - - VkDescriptorSet descriptorSet = VK_NULL_HANDLE; - VkResult res = vkAllocateDescriptorSets(device->device, &allocInfo, &descriptorSet); - while (res == VK_ERROR_OUT_OF_POOL_MEMORY) - { - binder_pool.poolSize *= 2; - binder_pool.destroy(); - binder_pool.init(device); - allocInfo.descriptorPool = binder_pool.descriptorPool; - res = vkAllocateDescriptorSets(device->device, &allocInfo, &descriptorSet); - } - assert(res == VK_SUCCESS); - - descriptorWrites.clear(); - bufferInfos.clear(); - imageInfos.clear(); - texelBufferViews.clear(); - accelerationStructureViews.clear(); - - const auto& layoutBindings = graphics ? pso_internal->layoutBindings : cs_internal->layoutBindings; - const auto& imageViewTypes = graphics ? pso_internal->imageViewTypes : cs_internal->imageViewTypes; - - - int i = 0; - for (auto& x : layoutBindings) - { - if (x.pImmutableSamplers != nullptr) + auto& binder_pool = device->GetFrameResources().binder_pools[cmd]; + VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; + if (graphics) { - i++; - continue; + pipelineLayout = pso_internal->pipelineLayout; + descriptorSetLayout = pso_internal->descriptorSetLayout; + } + else + { + pipelineLayout = cs_internal->pipelineLayout_cs; + descriptorSetLayout = cs_internal->descriptorSetLayout; } - VkImageViewType viewtype = imageViewTypes[i++]; + VkDescriptorSetAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorPool = binder_pool.descriptorPool; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &descriptorSetLayout; - for (uint32_t descriptor_index = 0; descriptor_index < x.descriptorCount; ++descriptor_index) + VkDescriptorSet descriptorSet = VK_NULL_HANDLE; + VkResult res = vkAllocateDescriptorSets(device->device, &allocInfo, &descriptorSet); + while (res == VK_ERROR_OUT_OF_POOL_MEMORY) { - uint32_t unrolled_binding = x.binding + descriptor_index; + binder_pool.poolSize *= 2; + binder_pool.destroy(); + binder_pool.init(device); + allocInfo.descriptorPool = binder_pool.descriptorPool; + res = vkAllocateDescriptorSets(device->device, &allocInfo, &descriptorSet); + } + assert(res == VK_SUCCESS); - descriptorWrites.emplace_back(); - auto& write = descriptorWrites.back(); - write = {}; - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.dstSet = descriptorSet; - write.dstArrayElement = descriptor_index; - write.descriptorType = x.descriptorType; - write.dstBinding = x.binding; - write.descriptorCount = 1; + descriptorWrites.clear(); + bufferInfos.clear(); + imageInfos.clear(); + texelBufferViews.clear(); + accelerationStructureViews.clear(); - switch (x.descriptorType) + const auto& layoutBindings = graphics ? pso_internal->layoutBindings : cs_internal->layoutBindings; + const auto& imageViewTypes = graphics ? pso_internal->imageViewTypes : cs_internal->imageViewTypes; + + + int i = 0; + for (auto& x : layoutBindings) + { + if (x.pImmutableSamplers != nullptr) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - { - imageInfos.emplace_back(); - write.pImageInfo = &imageInfos.back(); - imageInfos.back() = {}; - - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_S; - const Sampler& sampler = table.SAM[original_binding]; - if (!sampler.IsValid()) - { - imageInfos.back().sampler = device->nullSampler; - } - else - { - imageInfos.back().sampler = to_internal(&sampler)->resource; - } + i++; + continue; } - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + VkImageViewType viewtype = imageViewTypes[i++]; + + for (uint32_t descriptor_index = 0; descriptor_index < x.descriptorCount; ++descriptor_index) { - imageInfos.emplace_back(); - write.pImageInfo = &imageInfos.back(); - imageInfos.back() = {}; + uint32_t unrolled_binding = x.binding + descriptor_index; - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; - const GPUResource& resource = table.SRV[original_binding]; - if (!resource.IsValid() || !resource.IsTexture()) + descriptorWrites.emplace_back(); + auto& write = descriptorWrites.back(); + write = {}; + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.dstSet = descriptorSet; + write.dstArrayElement = descriptor_index; + write.descriptorType = x.descriptorType; + write.dstBinding = x.binding; + write.descriptorCount = 1; + + switch (x.descriptorType) { - switch (viewtype) + case VK_DESCRIPTOR_TYPE_SAMPLER: + { + imageInfos.emplace_back(); + write.pImageInfo = &imageInfos.back(); + imageInfos.back() = {}; + + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_S; + const Sampler& sampler = table.SAM[original_binding]; + if (!sampler.IsValid()) { - case VK_IMAGE_VIEW_TYPE_1D: - imageInfos.back().imageView = device->nullImageView1D; - break; - case VK_IMAGE_VIEW_TYPE_2D: - imageInfos.back().imageView = device->nullImageView2D; - break; - case VK_IMAGE_VIEW_TYPE_3D: - imageInfos.back().imageView = device->nullImageView3D; - break; - case VK_IMAGE_VIEW_TYPE_CUBE: - imageInfos.back().imageView = device->nullImageViewCube; - break; - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - imageInfos.back().imageView = device->nullImageView1DArray; - break; - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - imageInfos.back().imageView = device->nullImageView2DArray; - break; - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - imageInfos.back().imageView = device->nullImageViewCubeArray; - break; - case VK_IMAGE_VIEW_TYPE_MAX_ENUM: - break; - default: - break; + imageInfos.back().sampler = device->nullSampler; } + else + { + imageInfos.back().sampler = to_internal(&sampler)->resource; + } + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + { + imageInfos.emplace_back(); + write.pImageInfo = &imageInfos.back(); + imageInfos.back() = {}; + + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; + const GPUResource& resource = table.SRV[original_binding]; + if (!resource.IsValid() || !resource.IsTexture()) + { + switch (viewtype) + { + case VK_IMAGE_VIEW_TYPE_1D: + imageInfos.back().imageView = device->nullImageView1D; + break; + case VK_IMAGE_VIEW_TYPE_2D: + imageInfos.back().imageView = device->nullImageView2D; + break; + case VK_IMAGE_VIEW_TYPE_3D: + imageInfos.back().imageView = device->nullImageView3D; + break; + case VK_IMAGE_VIEW_TYPE_CUBE: + imageInfos.back().imageView = device->nullImageViewCube; + break; + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + imageInfos.back().imageView = device->nullImageView1DArray; + break; + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + imageInfos.back().imageView = device->nullImageView2DArray; + break; + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + imageInfos.back().imageView = device->nullImageViewCubeArray; + break; + case VK_IMAGE_VIEW_TYPE_MAX_ENUM: + break; + default: + break; + } + imageInfos.back().imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + int subresource = table.SRV_index[original_binding]; + auto texture_internal = to_internal((const Texture*)&resource); + if (subresource >= 0) + { + imageInfos.back().imageView = texture_internal->subresources_srv[subresource]; + } + else + { + imageInfos.back().imageView = texture_internal->srv; + } + + imageInfos.back().imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + { + imageInfos.emplace_back(); + write.pImageInfo = &imageInfos.back(); + imageInfos.back() = {}; imageInfos.back().imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - else - { - int subresource = table.SRV_index[original_binding]; - auto texture_internal = to_internal((const Texture*)&resource); - if (subresource >= 0) + + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_U; + const GPUResource& resource = table.UAV[original_binding]; + if (!resource.IsValid() || !resource.IsTexture()) { - imageInfos.back().imageView = texture_internal->subresources_srv[subresource]; + switch (viewtype) + { + case VK_IMAGE_VIEW_TYPE_1D: + imageInfos.back().imageView = device->nullImageView1D; + break; + case VK_IMAGE_VIEW_TYPE_2D: + imageInfos.back().imageView = device->nullImageView2D; + break; + case VK_IMAGE_VIEW_TYPE_3D: + imageInfos.back().imageView = device->nullImageView3D; + break; + case VK_IMAGE_VIEW_TYPE_CUBE: + imageInfos.back().imageView = device->nullImageViewCube; + break; + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + imageInfos.back().imageView = device->nullImageView1DArray; + break; + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + imageInfos.back().imageView = device->nullImageView2DArray; + break; + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + imageInfos.back().imageView = device->nullImageViewCubeArray; + break; + case VK_IMAGE_VIEW_TYPE_MAX_ENUM: + break; + default: + break; + } } else { - imageInfos.back().imageView = texture_internal->srv; - } - - imageInfos.back().imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - { - imageInfos.emplace_back(); - write.pImageInfo = &imageInfos.back(); - imageInfos.back() = {}; - imageInfos.back().imageLayout = VK_IMAGE_LAYOUT_GENERAL; - - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_U; - const GPUResource& resource = table.UAV[original_binding]; - if (!resource.IsValid() || !resource.IsTexture()) - { - switch (viewtype) - { - case VK_IMAGE_VIEW_TYPE_1D: - imageInfos.back().imageView = device->nullImageView1D; - break; - case VK_IMAGE_VIEW_TYPE_2D: - imageInfos.back().imageView = device->nullImageView2D; - break; - case VK_IMAGE_VIEW_TYPE_3D: - imageInfos.back().imageView = device->nullImageView3D; - break; - case VK_IMAGE_VIEW_TYPE_CUBE: - imageInfos.back().imageView = device->nullImageViewCube; - break; - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - imageInfos.back().imageView = device->nullImageView1DArray; - break; - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - imageInfos.back().imageView = device->nullImageView2DArray; - break; - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - imageInfos.back().imageView = device->nullImageViewCubeArray; - break; - case VK_IMAGE_VIEW_TYPE_MAX_ENUM: - break; - default: - break; + int subresource = table.UAV_index[original_binding]; + auto texture_internal = to_internal((const Texture*)&resource); + if (subresource >= 0) + { + imageInfos.back().imageView = texture_internal->subresources_uav[subresource]; + } + else + { + imageInfos.back().imageView = texture_internal->uav; + } } } - else + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { - int subresource = table.UAV_index[original_binding]; - auto texture_internal = to_internal((const Texture*)&resource); - if (subresource >= 0) + bufferInfos.emplace_back(); + write.pBufferInfo = &bufferInfos.back(); + bufferInfos.back() = {}; + + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_B; + const GPUBuffer& buffer = table.CBV[original_binding]; + uint64_t offset = table.CBV_offset[original_binding]; + + if (!buffer.IsValid()) { - imageInfos.back().imageView = texture_internal->subresources_uav[subresource]; + bufferInfos.back().buffer = device->nullBuffer; + bufferInfos.back().range = VK_WHOLE_SIZE; } else { - imageInfos.back().imageView = texture_internal->uav; + auto internal_state = to_internal(&buffer); + bufferInfos.back().buffer = internal_state->resource; + bufferInfos.back().offset = offset; + bufferInfos.back().range = std::min(buffer.desc.size - offset, (uint64_t)device->properties2.properties.limits.maxUniformBufferRange); } } - } - break; + break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - { - bufferInfos.emplace_back(); - write.pBufferInfo = &bufferInfos.back(); - bufferInfos.back() = {}; - - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_B; - const GPUBuffer& buffer = table.CBV[original_binding]; - uint64_t offset = table.CBV_offset[original_binding]; - - if (!buffer.IsValid()) + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { - bufferInfos.back().buffer = device->nullBuffer; - bufferInfos.back().range = VK_WHOLE_SIZE; - } - else - { - auto internal_state = to_internal(&buffer); - bufferInfos.back().buffer = internal_state->resource; - bufferInfos.back().offset = offset; - bufferInfos.back().range = std::min(buffer.desc.size - offset, (uint64_t)device->properties2.properties.limits.maxUniformBufferRange); - } - } - break; + texelBufferViews.emplace_back(); + write.pTexelBufferView = &texelBufferViews.back(); + texelBufferViews.back() = {}; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - { - texelBufferViews.emplace_back(); - write.pTexelBufferView = &texelBufferViews.back(); - texelBufferViews.back() = {}; - - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; - const GPUResource& resource = table.SRV[original_binding]; - if (!resource.IsValid() || !resource.IsBuffer()) - { - texelBufferViews.back() = device->nullBufferView; - } - else - { - int subresource = table.SRV_index[original_binding]; - auto buffer_internal = to_internal((const GPUBuffer*)&resource); - if (subresource >= 0) - { - texelBufferViews.back() = buffer_internal->subresources_srv[subresource]; - } - else - { - texelBufferViews.back() = buffer_internal->srv; - } - } - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - { - texelBufferViews.emplace_back(); - write.pTexelBufferView = &texelBufferViews.back(); - texelBufferViews.back() = {}; - - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_U; - const GPUResource& resource = table.UAV[original_binding]; - if (!resource.IsValid() || !resource.IsBuffer()) - { - texelBufferViews.back() = device->nullBufferView; - } - else - { - int subresource = table.UAV_index[original_binding]; - auto buffer_internal = to_internal((const GPUBuffer*)&resource); - if (subresource >= 0) - { - texelBufferViews.back() = buffer_internal->subresources_uav[subresource]; - } - else - { - texelBufferViews.back() = buffer_internal->uav; - } - } - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - { - bufferInfos.emplace_back(); - write.pBufferInfo = &bufferInfos.back(); - bufferInfos.back() = {}; - - if (x.binding < VULKAN_BINDING_SHIFT_U) - { - // SRV const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; const GPUResource& resource = table.SRV[original_binding]; if (!resource.IsValid() || !resource.IsBuffer()) { - bufferInfos.back().buffer = device->nullBuffer; - bufferInfos.back().range = VK_WHOLE_SIZE; + texelBufferViews.back() = device->nullBufferView; } else { int subresource = table.SRV_index[original_binding]; auto buffer_internal = to_internal((const GPUBuffer*)&resource); - bufferInfos.back().buffer = buffer_internal->resource; - bufferInfos.back().range = VK_WHOLE_SIZE; + if (subresource >= 0) + { + texelBufferViews.back() = buffer_internal->subresources_srv[subresource]; + } + else + { + texelBufferViews.back() = buffer_internal->srv; + } } } - else + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { - // UAV + texelBufferViews.emplace_back(); + write.pTexelBufferView = &texelBufferViews.back(); + texelBufferViews.back() = {}; + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_U; const GPUResource& resource = table.UAV[original_binding]; if (!resource.IsValid() || !resource.IsBuffer()) { - bufferInfos.back().buffer = device->nullBuffer; - bufferInfos.back().range = VK_WHOLE_SIZE; + texelBufferViews.back() = device->nullBufferView; } else { int subresource = table.UAV_index[original_binding]; auto buffer_internal = to_internal((const GPUBuffer*)&resource); - bufferInfos.back().buffer = buffer_internal->resource; - bufferInfos.back().range = VK_WHOLE_SIZE; + if (subresource >= 0) + { + texelBufferViews.back() = buffer_internal->subresources_uav[subresource]; + } + else + { + texelBufferViews.back() = buffer_internal->uav; + } } } - } - break; + break; - case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: - { - accelerationStructureViews.emplace_back(); - write.pNext = &accelerationStructureViews.back(); - accelerationStructureViews.back() = {}; - accelerationStructureViews.back().sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; - accelerationStructureViews.back().accelerationStructureCount = 1; - - const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; - const GPUResource& resource = table.SRV[original_binding]; - if (!resource.IsValid() || !resource.IsAccelerationStructure()) + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { - assert(0); // invalid acceleration structure! - } - else - { - auto as_internal = to_internal((const RaytracingAccelerationStructure*)&resource); - accelerationStructureViews.back().pAccelerationStructures = &as_internal->resource; - } - } - break; + bufferInfos.emplace_back(); + write.pBufferInfo = &bufferInfos.back(); + bufferInfos.back() = {}; + if (x.binding < VULKAN_BINDING_SHIFT_U) + { + // SRV + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; + const GPUResource& resource = table.SRV[original_binding]; + if (!resource.IsValid() || !resource.IsBuffer()) + { + bufferInfos.back().buffer = device->nullBuffer; + bufferInfos.back().range = VK_WHOLE_SIZE; + } + else + { + int subresource = table.SRV_index[original_binding]; + auto buffer_internal = to_internal((const GPUBuffer*)&resource); + bufferInfos.back().buffer = buffer_internal->resource; + bufferInfos.back().range = VK_WHOLE_SIZE; + } + } + else + { + // UAV + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_U; + const GPUResource& resource = table.UAV[original_binding]; + if (!resource.IsValid() || !resource.IsBuffer()) + { + bufferInfos.back().buffer = device->nullBuffer; + bufferInfos.back().range = VK_WHOLE_SIZE; + } + else + { + int subresource = table.UAV_index[original_binding]; + auto buffer_internal = to_internal((const GPUBuffer*)&resource); + bufferInfos.back().buffer = buffer_internal->resource; + bufferInfos.back().range = VK_WHOLE_SIZE; + } + } + } + break; + + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + { + accelerationStructureViews.emplace_back(); + write.pNext = &accelerationStructureViews.back(); + accelerationStructureViews.back() = {}; + accelerationStructureViews.back().sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + accelerationStructureViews.back().accelerationStructureCount = 1; + + const uint32_t original_binding = unrolled_binding - VULKAN_BINDING_SHIFT_T; + const GPUResource& resource = table.SRV[original_binding]; + if (!resource.IsValid() || !resource.IsAccelerationStructure()) + { + assert(0); // invalid acceleration structure! + } + else + { + auto as_internal = to_internal((const RaytracingAccelerationStructure*)&resource); + accelerationStructureViews.back().pAccelerationStructures = &as_internal->resource; + } + } + break; + + } } } - } - vkUpdateDescriptorSets( - device->device, - (uint32_t)descriptorWrites.size(), - descriptorWrites.data(), - 0, - nullptr - ); + vkUpdateDescriptorSets( + device->device, + (uint32_t)descriptorWrites.size(), + descriptorWrites.data(), + 0, + nullptr + ); - VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - if (!graphics) - { - bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; - - if (device->active_cs[cmd]->stage == ShaderStage::LIB) + VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + if (!graphics) { - bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; + + if (device->active_cs[cmd]->stage == ShaderStage::LIB) + { + bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + } } + + vkCmdBindDescriptorSets( + commandBuffer, + bindPoint, + pipelineLayout, + 0, + 1, + &descriptorSet, + 0, + nullptr + ); } - vkCmdBindDescriptorSets( - device->GetCommandList(cmd), - bindPoint, - pipelineLayout, - 0, - 1, - &descriptorSet, - 0, - nullptr - ); + dirty = DIRTY_NONE; } void GraphicsDevice_Vulkan::pso_validate(CommandList cmd) @@ -2151,36 +2161,10 @@ using namespace vulkan_internal; pso_validate(cmd); binders[cmd].flush(true, cmd); - - auto pso_internal = to_internal(active_pso[cmd]); - if (pso_internal->pushconstants.size > 0) - { - vkCmdPushConstants( - GetCommandList(cmd), - pso_internal->pipelineLayout, - pso_internal->pushconstants.stageFlags, - pso_internal->pushconstants.offset, - pso_internal->pushconstants.size, - pushconstants[cmd].data - ); - } } void GraphicsDevice_Vulkan::predispatch(CommandList cmd) { binders[cmd].flush(false, cmd); - - auto cs_internal = to_internal(active_cs[cmd]); - if (cs_internal->pushconstants.size > 0) - { - vkCmdPushConstants( - GetCommandList(cmd), - cs_internal->pipelineLayout_cs, - cs_internal->pushconstants.stageFlags, - cs_internal->pushconstants.offset, - cs_internal->pushconstants.size, - pushconstants[cmd].data - ); - } } // Engine functions @@ -3022,11 +3006,11 @@ using namespace vulkan_internal; uint32_t deviceID = 0; uint8_t pipelineCacheUUID[VK_UUID_SIZE] = {}; - memcpy(&headerLength, (uint8_t*)pipelineData.data() + 0, 4); - memcpy(&cacheHeaderVersion, (uint8_t*)pipelineData.data() + 4, 4); - memcpy(&vendorID, (uint8_t*)pipelineData.data() + 8, 4); - memcpy(&deviceID, (uint8_t*)pipelineData.data() + 12, 4); - memcpy(pipelineCacheUUID, (uint8_t*)pipelineData.data() + 16, VK_UUID_SIZE); + std::memcpy(&headerLength, (uint8_t*)pipelineData.data() + 0, 4); + std::memcpy(&cacheHeaderVersion, (uint8_t*)pipelineData.data() + 4, 4); + std::memcpy(&vendorID, (uint8_t*)pipelineData.data() + 8, 4); + std::memcpy(&deviceID, (uint8_t*)pipelineData.data() + 12, 4); + std::memcpy(pipelineCacheUUID, (uint8_t*)pipelineData.data() + 16, VK_UUID_SIZE); bool badCache = false; @@ -3071,6 +3055,133 @@ using namespace vulkan_internal; assert(res == VK_SUCCESS); } + // Static samplers: + { + VkSamplerCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + createInfo.pNext = nullptr; + createInfo.flags = 0; + createInfo.compareEnable = false; + createInfo.compareOp = VK_COMPARE_OP_NEVER; + createInfo.minLod = 0; + createInfo.maxLod = FLT_MAX; + createInfo.mipLodBias = 0; + createInfo.anisotropyEnable = false; + createInfo.maxAnisotropy = 0; + + // sampler_linear_clamp: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_linear_wrap: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + //sampler_linear_mirror: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_point_clamp: + createInfo.minFilter = VK_FILTER_NEAREST; + createInfo.magFilter = VK_FILTER_NEAREST; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_point_wrap: + createInfo.minFilter = VK_FILTER_NEAREST; + createInfo.magFilter = VK_FILTER_NEAREST; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_point_mirror: + createInfo.minFilter = VK_FILTER_NEAREST; + createInfo.magFilter = VK_FILTER_NEAREST; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_aniso_clamp: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.anisotropyEnable = true; + createInfo.maxAnisotropy = 16; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_aniso_wrap: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + createInfo.anisotropyEnable = true; + createInfo.maxAnisotropy = 16; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_aniso_mirror: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + createInfo.anisotropyEnable = true; + createInfo.maxAnisotropy = 16; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + + // sampler_cmp_depth: + createInfo.minFilter = VK_FILTER_LINEAR; + createInfo.magFilter = VK_FILTER_LINEAR; + createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + createInfo.anisotropyEnable = false; + createInfo.maxAnisotropy = 0; + createInfo.compareEnable = true; + createInfo.compareOp = VK_COMPARE_OP_GREATER_OR_EQUAL; + createInfo.minLod = 0; + createInfo.maxLod = 0; + res = vkCreateSampler(device, &createInfo, nullptr, &immutable_samplers.emplace_back()); + assert(res == VK_SUCCESS); + } + wi::backlog::post("Created GraphicsDevice_Vulkan (" + std::to_string((int)std::round(timer.elapsed())) + " ms)"); } GraphicsDevice_Vulkan::~GraphicsDevice_Vulkan() @@ -3351,7 +3462,7 @@ using namespace vulkan_internal; { auto cmd = copyAllocator.allocate(pDesc->size); - memcpy(cmd.uploadbuffer.mapped_data, pInitialData, pBuffer->desc.size); + std::memcpy(cmd.uploadbuffer.mapped_data, pInitialData, pBuffer->desc.size); VkBufferMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -3611,7 +3722,7 @@ using namespace vulkan_internal; const SubresourceData& subresourceData = pInitialData[initDataIdx++]; VkDeviceSize copySize = subresourceData.row_pitch * height * depth / GetFormatBlockSize(pDesc->format); uint8_t* cpyaddr = (uint8_t*)cmd.uploadbuffer.mapped_data + copyOffset; - memcpy(cpyaddr, subresourceData.data_ptr, copySize); + std::memcpy(cpyaddr, subresourceData.data_ptr, copySize); VkBufferImageCopy copyRegion = {}; copyRegion.bufferOffset = copyOffset; @@ -3827,8 +3938,6 @@ using namespace vulkan_internal; result = spvReflectEnumeratePushConstantBlocks(&module, &push_count, pushconstants.data()); assert(result == SPV_REFLECT_RESULT_SUCCESS); - wi::vector staticsamplers; - for (auto& x : pushconstants) { auto& push = internal_state->pushconstants; @@ -3861,34 +3970,10 @@ using namespace vulkan_internal; auto& imageViewType = internal_state->imageViewTypes.emplace_back(); imageViewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; - if (x->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER) + if (x->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER && x->binding >= VULKAN_BINDING_SHIFT_S + immutable_sampler_slot_begin) { - bool staticsampler = false; - for (auto& sam : pShader->auto_samplers) - { - if (x->binding == sam.slot + VULKAN_BINDING_SHIFT_S) - { - descriptor.pImmutableSamplers = &to_internal(&sam.sampler)->resource; - staticsampler = true; - break; // static sampler will be used instead - } - } - if (!staticsampler) - { - for (auto& sam : common_samplers) - { - if (x->binding == sam.slot + VULKAN_BINDING_SHIFT_S) - { - descriptor.pImmutableSamplers = &to_internal(&sam.sampler)->resource; - staticsampler = true; - break; // static sampler will be used instead - } - } - } - if (staticsampler) - { - continue; - } + descriptor.pImmutableSamplers = immutable_samplers.data() + x->binding - VULKAN_BINDING_SHIFT_S - immutable_sampler_slot_begin; + continue; } switch (x->descriptor_type) @@ -5359,7 +5444,7 @@ using namespace vulkan_internal; info.maxPipelineRayRecursionDepth = pDesc->max_trace_recursion_depth; - info.layout = to_internal(pDesc->shader_libraries.front().shader)->pipelineLayout_cs; // think better way + info.layout = to_internal(pDesc->shader_libraries.front().shader)->pipelineLayout_cs; //VkRayTracingPipelineInterfaceCreateInfoKHR library_interface = {}; //library_interface.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR; @@ -5871,7 +5956,7 @@ using namespace vulkan_internal; void GraphicsDevice_Vulkan::WriteTopLevelAccelerationStructureInstance(const RaytracingAccelerationStructureDesc::TopLevel::Instance* instance, void* dest) const { VkAccelerationStructureInstanceKHR* desc = (VkAccelerationStructureInstanceKHR*)dest; - memcpy(&desc->transform, &instance->transform, sizeof(desc->transform)); + std::memcpy(&desc->transform, &instance->transform, sizeof(desc->transform)); desc->instanceCustomIndex = instance->instance_id; desc->mask = instance->instance_mask; desc->instanceShaderBindingTableRecordOffset = instance->instance_contribution_to_hit_group_index; @@ -5887,11 +5972,6 @@ using namespace vulkan_internal; assert(res == VK_SUCCESS); } - void GraphicsDevice_Vulkan::SetCommonSampler(const StaticSampler* sam) - { - common_samplers.push_back(*sam); - } - void GraphicsDevice_Vulkan::SetName(GPUResource* pResource, const char* name) { if (debugUtils) @@ -6012,7 +6092,6 @@ using namespace vulkan_internal; active_renderpass[cmd] = nullptr; dirty_pso[cmd] = false; prev_shadingrate[cmd] = ShadingRate::RATE_INVALID; - pushconstants[cmd] = {}; vb_hash[cmd] = 0; for (int i = 0; i < arraysize(vb_strides[cmd]); ++i) { @@ -6367,7 +6446,7 @@ using namespace vulkan_internal; { binder.table.SRV[slot] = *resource; binder.table.SRV_index[slot] = subresource; - binder.dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_DESCRIPTOR; } } void GraphicsDevice_Vulkan::BindResources(const GPUResource *const* resources, uint32_t slot, uint32_t count, CommandList cmd) @@ -6388,7 +6467,7 @@ using namespace vulkan_internal; { binder.table.UAV[slot] = *resource; binder.table.UAV_index[slot] = subresource; - binder.dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_DESCRIPTOR; } } void GraphicsDevice_Vulkan::BindUAVs(const GPUResource *const* resources, uint32_t slot, uint32_t count, CommandList cmd) @@ -6408,7 +6487,7 @@ using namespace vulkan_internal; if (binder.table.SAM[slot].internal_state != sampler->internal_state) { binder.table.SAM[slot] = *sampler; - binder.dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_DESCRIPTOR; } } void GraphicsDevice_Vulkan::BindConstantBuffer(const GPUBuffer* buffer, uint32_t slot, CommandList cmd, uint64_t offset) @@ -6419,7 +6498,7 @@ using namespace vulkan_internal; { binder.table.CBV[slot] = *buffer; binder.table.CBV_offset[slot] = offset; - binder.dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_DESCRIPTOR; } } void GraphicsDevice_Vulkan::BindVertexBuffers(const GPUBuffer *const* vertexBuffers, uint32_t slot, uint32_t count, const uint32_t* strides, const uint64_t* offsets, CommandList cmd) @@ -6556,6 +6635,9 @@ using namespace vulkan_internal; } void GraphicsDevice_Vulkan::BindPipelineState(const PipelineState* pso, CommandList cmd) { + active_cs[cmd] = nullptr; + active_rt[cmd] = nullptr; + size_t pipeline_hash = 0; wi::helper::hash_combine(pipeline_hash, pso->hash); if (active_renderpass[cmd] != nullptr) @@ -6572,14 +6654,14 @@ using namespace vulkan_internal; if (active_pso[cmd] == nullptr) { - binders[cmd].dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_ALL; } else { auto active_internal = to_internal(active_pso[cmd]); if (internal_state->binding_hash != active_internal->binding_hash) { - binders[cmd].dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_ALL; } } @@ -6602,12 +6684,15 @@ using namespace vulkan_internal; } void GraphicsDevice_Vulkan::BindComputeShader(const Shader* cs, CommandList cmd) { + active_pso[cmd] = nullptr; + active_rt[cmd] = nullptr; + assert(cs->stage == ShaderStage::CS || cs->stage == ShaderStage::LIB); if (active_cs[cmd] != cs) { if (active_cs[cmd] == nullptr) { - binders[cmd].dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_ALL; } else { @@ -6615,7 +6700,7 @@ using namespace vulkan_internal; auto active_internal = to_internal(active_cs[cmd]); if (internal_state->binding_hash != active_internal->binding_hash) { - binders[cmd].dirty = true; + binders[cmd].dirty |= DescriptorBinder::DIRTY_ALL; } } @@ -7230,10 +7315,43 @@ using namespace vulkan_internal; desc->depth ); } - void GraphicsDevice_Vulkan::PushConstants(const void* data, uint32_t size, CommandList cmd) + void GraphicsDevice_Vulkan::PushConstants(const void* data, uint32_t size, CommandList cmd, uint32_t offset) { - std::memcpy(pushconstants[cmd].data, data, size); - pushconstants[cmd].size = size; + auto& binder = binders[cmd]; + + if (active_pso[cmd] != nullptr) + { + auto pso_internal = to_internal(active_pso[cmd]); + if (pso_internal->pushconstants.size > 0) + { + vkCmdPushConstants( + GetCommandList(cmd), + pso_internal->pipelineLayout, + pso_internal->pushconstants.stageFlags, + offset, + size, + data + ); + return; + } + } + if(active_cs[cmd] != nullptr) + { + auto cs_internal = to_internal(active_cs[cmd]); + if (cs_internal->pushconstants.size > 0) + { + vkCmdPushConstants( + GetCommandList(cmd), + cs_internal->pipelineLayout_cs, + cs_internal->pushconstants.stageFlags, + offset, + size, + data + ); + return; + } + } + assert(0); // there was no active pipeline! } void GraphicsDevice_Vulkan::PredicationBegin(const GPUBuffer* buffer, uint64_t offset, PredicationOp op, CommandList cmd) { @@ -7293,6 +7411,11 @@ using namespace vulkan_internal; label.color[3] = 1.0f; vkCmdInsertDebugUtilsLabelEXT(GetCommandList(cmd), &label); } + + const RenderPass* GraphicsDevice_Vulkan::GetCurrentRenderPass(CommandList cmd) const + { + return active_renderpass[cmd]; + } } #endif // WICKEDENGINE_BUILD_VULKAN diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.h b/WickedEngine/wiGraphicsDevice_Vulkan.h index 04d347c72..adc0ee429 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.h +++ b/WickedEngine/wiGraphicsDevice_Vulkan.h @@ -179,7 +179,15 @@ namespace wi::graphics wi::vector imageInfos; wi::vector texelBufferViews; wi::vector accelerationStructureViews; - bool dirty = false; + + enum DIRTY_FLAGS + { + DIRTY_NONE = 0, + DIRTY_DESCRIPTOR = 1 << 1, + + DIRTY_ALL = ~0, + }; + uint32_t dirty = DIRTY_NONE; void init(GraphicsDevice_Vulkan* device); void reset(); @@ -215,13 +223,6 @@ namespace wi::graphics uint32_t vb_strides[COMMANDLIST_COUNT][8] = {}; size_t vb_hash[COMMANDLIST_COUNT] = {}; - struct DeferredPushConstantData - { - uint8_t data[128]; - uint32_t size; - }; - DeferredPushConstantData pushconstants[COMMANDLIST_COUNT] = {}; - bool dirty_pso[COMMANDLIST_COUNT] = {}; void pso_validate(CommandList cmd); @@ -231,7 +232,8 @@ namespace wi::graphics std::atomic cmd_count{ 0 }; - wi::vector common_samplers; + static constexpr uint32_t immutable_sampler_slot_begin = 100; + wi::vector immutable_samplers; public: GraphicsDevice_Vulkan(wi::platform::window_type window, bool debuglayer = false); @@ -257,8 +259,6 @@ namespace wi::graphics void WriteShadingRateValue(ShadingRate rate, void* dest) const override; void WriteTopLevelAccelerationStructureInstance(const RaytracingAccelerationStructureDesc::TopLevel::Instance* instance, void* dest) const override; void WriteShaderIdentifier(const RaytracingPipelineState* rtpso, uint32_t group_index, void* dest) const override; - - void SetCommonSampler(const StaticSampler* sam) override; void SetName(GPUResource* pResource, const char* name) override; @@ -318,7 +318,7 @@ namespace wi::graphics void BuildRaytracingAccelerationStructure(const RaytracingAccelerationStructure* dst, CommandList cmd, const RaytracingAccelerationStructure* src = nullptr) override; void BindRaytracingPipelineState(const RaytracingPipelineState* rtpso, CommandList cmd) override; void DispatchRays(const DispatchRaysDesc* desc, CommandList cmd) override; - void PushConstants(const void* data, uint32_t size, CommandList cmd) override; + void PushConstants(const void* data, uint32_t size, CommandList cmd, uint32_t offset = 0) override; void PredicationBegin(const GPUBuffer* buffer, uint64_t offset, PredicationOp op, CommandList cmd) override; void PredicationEnd(CommandList cmd) override; @@ -326,6 +326,8 @@ namespace wi::graphics void EventEnd(CommandList cmd) override; void SetMarker(const char* name, CommandList cmd) override; + const RenderPass* GetCurrentRenderPass(CommandList cmd) const override; + struct AllocationHandler { VmaAllocator allocator = VK_NULL_HANDLE; diff --git a/WickedEngine/wiImage.cpp b/WickedEngine/wiImage.cpp index f0c53d05c..007ea7d6a 100644 --- a/WickedEngine/wiImage.cpp +++ b/WickedEngine/wiImage.cpp @@ -12,20 +12,13 @@ using namespace wi::enums; namespace wi::image { - enum IMAGE_SHADER - { - IMAGE_SHADER_STANDARD, - IMAGE_SHADER_FULLSCREEN, - IMAGE_SHADER_COUNT - }; - + Sampler samplers[SAMPLER_COUNT]; Shader vertexShader; - Shader screenVS; - Shader imagePS[IMAGE_SHADER_COUNT]; + Shader pixelShader; BlendState blendStates[BLENDMODE_COUNT]; RasterizerState rasterizerState; DepthStencilState depthStencilStates[STENCILMODE_COUNT][STENCILREFMODE_COUNT]; - PipelineState imagePSO[IMAGE_SHADER_COUNT][BLENDMODE_COUNT][STENCILMODE_COUNT][STENCILREFMODE_COUNT]; + PipelineState imagePSO[BLENDMODE_COUNT][STENCILMODE_COUNT][STENCILREFMODE_COUNT]; Texture backgroundTextures[COMMANDLIST_COUNT]; wi::Canvas canvases[COMMANDLIST_COUNT]; @@ -51,48 +44,58 @@ namespace wi::image } device->BindStencilRef(stencilRef, cmd); - const Sampler* sampler = wi::renderer::GetSampler(SAMPLER_LINEAR_CLAMP); + const Sampler* sampler = &samplers[SAMPLER_LINEAR_CLAMP]; if (params.quality == QUALITY_NEAREST) { if (params.sampleFlag == SAMPLEMODE_MIRROR) - sampler = wi::renderer::GetSampler(SAMPLER_POINT_MIRROR); + sampler = &samplers[SAMPLER_POINT_MIRROR]; else if (params.sampleFlag == SAMPLEMODE_WRAP) - sampler = wi::renderer::GetSampler(SAMPLER_POINT_WRAP); + sampler = &samplers[SAMPLER_POINT_WRAP]; else if (params.sampleFlag == SAMPLEMODE_CLAMP) - sampler = wi::renderer::GetSampler(SAMPLER_POINT_CLAMP); + sampler = &samplers[SAMPLER_POINT_CLAMP]; } else if (params.quality == QUALITY_LINEAR) { if (params.sampleFlag == SAMPLEMODE_MIRROR) - sampler = wi::renderer::GetSampler(SAMPLER_LINEAR_MIRROR); + sampler = &samplers[SAMPLER_LINEAR_MIRROR]; else if (params.sampleFlag == SAMPLEMODE_WRAP) - sampler = wi::renderer::GetSampler(SAMPLER_LINEAR_WRAP); + sampler = &samplers[SAMPLER_LINEAR_WRAP]; else if (params.sampleFlag == SAMPLEMODE_CLAMP) - sampler = wi::renderer::GetSampler(SAMPLER_LINEAR_CLAMP); + sampler = &samplers[SAMPLER_LINEAR_CLAMP]; } else if (params.quality == QUALITY_ANISOTROPIC) { if (params.sampleFlag == SAMPLEMODE_MIRROR) - sampler = wi::renderer::GetSampler(SAMPLER_ANISO_MIRROR); + sampler = &samplers[SAMPLER_ANISO_MIRROR]; else if (params.sampleFlag == SAMPLEMODE_WRAP) - sampler = wi::renderer::GetSampler(SAMPLER_ANISO_WRAP); + sampler = &samplers[SAMPLER_ANISO_WRAP]; else if (params.sampleFlag == SAMPLEMODE_CLAMP) - sampler = wi::renderer::GetSampler(SAMPLER_ANISO_CLAMP); + sampler = &samplers[SAMPLER_ANISO_CLAMP]; } - PushConstantsImage push; - push.texture_base_index = device->GetDescriptorIndex(texture, SubresourceType::SRV); - push.texture_mask_index = device->GetDescriptorIndex(params.maskMap, SubresourceType::SRV); + ImageConstants image; + ImagePushConstants image_push; + image_push.texture_base_index = device->GetDescriptorIndex(texture, SubresourceType::SRV); + image_push.texture_mask_index = device->GetDescriptorIndex(params.maskMap, SubresourceType::SRV); if (params.isBackgroundEnabled()) { - push.texture_background_index = device->GetDescriptorIndex(&backgroundTextures[cmd], SubresourceType::SRV); + image_push.texture_background_index = device->GetDescriptorIndex(&backgroundTextures[cmd], SubresourceType::SRV); } else { - push.texture_background_index = -1; + image_push.texture_background_index = -1; } - push.sampler_index = device->GetDescriptorIndex(sampler); + image_push.sampler_index = device->GetDescriptorIndex(sampler); + + const RenderPass* renderpass = device->GetCurrentRenderPass(cmd); + assert(renderpass != nullptr); // image renderer must draw inside render pass! + assert(!renderpass->GetDesc().attachments.empty()); + assert(renderpass->GetDesc().attachments.front().texture != nullptr); + image.output_resolution.x = renderpass->GetDesc().attachments.front().texture->GetDesc().width; + image.output_resolution.y = renderpass->GetDesc().attachments.front().texture->GetDesc().height; + image.output_resolution_rcp.x = 1.0f / image.output_resolution.x; + image.output_resolution_rcp.y = 1.0f / image.output_resolution.y; XMFLOAT4 color = params.color; const float darken = 1 - params.fade; @@ -107,34 +110,26 @@ namespace wi::image packed_color.z = XMConvertFloatToHalf(color.z); packed_color.w = XMConvertFloatToHalf(color.w); - push.packed_color.x = uint(packed_color.v); - push.packed_color.y = uint(packed_color.v >> 32ull); + image_push.packed_color.x = uint(packed_color.v); + image_push.packed_color.y = uint(packed_color.v >> 32ull); - push.flags = 0; + image_push.flags = 0; if (params.isExtractNormalMapEnabled()) { - push.flags |= IMAGE_FLAG_EXTRACT_NORMALMAP; + image_push.flags |= IMAGE_FLAG_EXTRACT_NORMALMAP; } if (params.isHDR10OutputMappingEnabled()) { - assert(params.isFullScreenEnabled()); // for now, this effect is only usable in full screen rendering - push.flags |= IMAGE_FLAG_OUTPUT_COLOR_SPACE_HDR10_ST2084; + image_push.flags |= IMAGE_FLAG_OUTPUT_COLOR_SPACE_HDR10_ST2084; } if (params.isLinearOutputMappingEnabled()) { - assert(params.isFullScreenEnabled()); // for now, this effect is only usable in full screen rendering - push.flags |= IMAGE_FLAG_OUTPUT_COLOR_SPACE_LINEAR; - push.corners0.x = params.hdr_scaling; + image_push.flags |= IMAGE_FLAG_OUTPUT_COLOR_SPACE_LINEAR; + image_push.hdr_scaling = params.hdr_scaling; } - if (params.isFullScreenEnabled()) { - // Full screen image uses a fast path with full screen triangle and no effects - device->BindPipelineState(&imagePSO[IMAGE_SHADER_FULLSCREEN][params.blendFlag][params.stencilComp][params.stencilRefMode], cmd); - device->PushConstants(&push, sizeof(push), cmd); - device->Draw(3, 0, cmd); - device->EventEnd(cmd); - return; + image_push.flags |= IMAGE_FLAG_FULLSCREEN; } XMMATRIX M = XMMatrixScaling(params.scale.x * params.siz.x, params.scale.y * params.siz.y, 1); @@ -147,42 +142,45 @@ namespace wi::image M = M * XMMatrixTranslation(params.pos.x, params.pos.y, params.pos.z); - if (params.customProjection != nullptr) + if (!params.isFullScreenEnabled()) { - M = XMMatrixScaling(1, -1, 1) * M; // reason: screen projection is Y down (like UV-space) and that is the common case for image rendering. But custom projections will use the "world space" - M = M * (*params.customProjection); - } - else - { - const wi::Canvas& canvas = canvases[cmd]; - // Asserts will check that a proper canvas was set for this cmd with wi::image::SetCanvas() - // The canvas must be set to have dpi aware rendering - assert(canvas.width > 0); - assert(canvas.height > 0); - assert(canvas.dpi > 0); - M = M * canvas.GetProjection(); + if (params.customProjection != nullptr) + { + M = XMMatrixScaling(1, -1, 1) * M; // reason: screen projection is Y down (like UV-space) and that is the common case for image rendering. But custom projections will use the "world space" + M = M * (*params.customProjection); + } + else + { + const wi::Canvas& canvas = canvases[cmd]; + // Asserts will check that a proper canvas was set for this cmd with wi::image::SetCanvas() + // The canvas must be set to have dpi aware rendering + assert(canvas.width > 0); + assert(canvas.height > 0); + assert(canvas.dpi > 0); + M = M * canvas.GetProjection(); + } } XMVECTOR V = XMVectorSet(params.corners[0].x - params.pivot.x, params.corners[0].y - params.pivot.y, 0, 1); V = XMVector2Transform(V, M); // division by w will happen on GPU - XMStoreFloat4(&push.corners0, V); + XMStoreFloat4(&image.corners0, V); V = XMVectorSet(params.corners[1].x - params.pivot.x, params.corners[1].y - params.pivot.y, 0, 1); V = XMVector2Transform(V, M); // division by w will happen on GPU - XMStoreFloat4(&push.corners1, V); + XMStoreFloat4(&image.corners1, V); V = XMVectorSet(params.corners[2].x - params.pivot.x, params.corners[2].y - params.pivot.y, 0, 1); V = XMVector2Transform(V, M); // division by w will happen on GPU - XMStoreFloat4(&push.corners2, V); + XMStoreFloat4(&image.corners2, V); V = XMVectorSet(params.corners[3].x - params.pivot.x, params.corners[3].y - params.pivot.y, 0, 1); V = XMVector2Transform(V, M); // division by w will happen on GPU - XMStoreFloat4(&push.corners3, V); + XMStoreFloat4(&image.corners3, V); if (params.isMirrorEnabled()) { - std::swap(push.corners0, push.corners1); - std::swap(push.corners2, push.corners3); + std::swap(image.corners0, image.corners1); + std::swap(image.corners2, image.corners3); } const TextureDesc& desc = texture->GetDesc(); @@ -208,8 +206,8 @@ namespace wi::image half_texMulAdd.y = XMConvertFloatToHalf(texMulAdd.y); half_texMulAdd.z = XMConvertFloatToHalf(texMulAdd.z); half_texMulAdd.w = XMConvertFloatToHalf(texMulAdd.w); - push.texMulAdd.x = uint(half_texMulAdd.v); - push.texMulAdd.y = uint(half_texMulAdd.v >> 32ull); + image.texMulAdd.x = uint(half_texMulAdd.v); + image.texMulAdd.y = uint(half_texMulAdd.v >> 32ull); XMFLOAT4 texMulAdd2; if (params.isDrawRect2Enabled()) @@ -230,14 +228,22 @@ namespace wi::image half_texMulAdd2.y = XMConvertFloatToHalf(texMulAdd2.y); half_texMulAdd2.z = XMConvertFloatToHalf(texMulAdd2.z); half_texMulAdd2.w = XMConvertFloatToHalf(texMulAdd2.w); - push.texMulAdd2.x = uint(half_texMulAdd2.v); - push.texMulAdd2.y = uint(half_texMulAdd2.v >> 32ull); + image.texMulAdd2.x = uint(half_texMulAdd2.v); + image.texMulAdd2.y = uint(half_texMulAdd2.v >> 32ull); - device->BindPipelineState(&imagePSO[IMAGE_SHADER_STANDARD][params.blendFlag][params.stencilComp][params.stencilRefMode], cmd); + device->BindPipelineState(&imagePSO[params.blendFlag][params.stencilComp][params.stencilRefMode], cmd); - device->PushConstants(&push, sizeof(push), cmd); + device->BindDynamicConstantBuffer(image, CBSLOT_IMAGE, cmd); + device->PushConstants(&image_push, sizeof(image_push), cmd); - device->Draw(4, 0, cmd); + if (params.isFullScreenEnabled()) + { + device->Draw(3, 0, cmd); + } + else + { + device->Draw(4, 0, cmd); + } device->EventEnd(cmd); } @@ -246,44 +252,30 @@ namespace wi::image void LoadShaders() { wi::renderer::LoadShader(ShaderStage::VS, vertexShader, "imageVS.cso"); - wi::renderer::LoadShader(ShaderStage::VS, screenVS, "screenVS.cso"); - - wi::renderer::LoadShader(ShaderStage::PS, imagePS[IMAGE_SHADER_STANDARD], "imagePS.cso"); - wi::renderer::LoadShader(ShaderStage::PS, imagePS[IMAGE_SHADER_FULLSCREEN], "screenPS.cso"); - + wi::renderer::LoadShader(ShaderStage::PS, pixelShader, "imagePS.cso"); GraphicsDevice* device = wi::graphics::GetDevice(); - for (int i = 0; i < IMAGE_SHADER_COUNT; ++i) + PipelineStateDesc desc; + desc.vs = &vertexShader; + desc.ps = &pixelShader; + desc.rs = &rasterizerState; + desc.pt = PrimitiveTopology::TRIANGLESTRIP; + + for (int j = 0; j < BLENDMODE_COUNT; ++j) { - PipelineStateDesc desc; - desc.vs = &vertexShader; - if (i == IMAGE_SHADER_FULLSCREEN) + desc.bs = &blendStates[j]; + for (int k = 0; k < STENCILMODE_COUNT; ++k) { - desc.vs = &screenVS; - } - desc.rs = &rasterizerState; - desc.pt = PrimitiveTopology::TRIANGLESTRIP; - - desc.ps = &imagePS[i]; - - for (int j = 0; j < BLENDMODE_COUNT; ++j) - { - desc.bs = &blendStates[j]; - for (int k = 0; k < STENCILMODE_COUNT; ++k) + for (int m = 0; m < STENCILREFMODE_COUNT; ++m) { - for (int m = 0; m < STENCILREFMODE_COUNT; ++m) - { - desc.dss = &depthStencilStates[k][m]; + desc.dss = &depthStencilStates[k][m]; - device->CreatePipelineState(&desc, &imagePSO[i][j][k][m]); + device->CreatePipelineState(&desc, &imagePSO[j][k][m]); - } } } } - - } void Initialize() @@ -415,6 +407,71 @@ namespace wi::image bd.independent_blend_enable = false; blendStates[BLENDMODE_MULTIPLY] = bd; + SamplerDesc samplerDesc; + samplerDesc.filter = Filter::MIN_MAG_MIP_LINEAR; + samplerDesc.address_u = TextureAddressMode::MIRROR; + samplerDesc.address_v = TextureAddressMode::MIRROR; + samplerDesc.address_w = TextureAddressMode::MIRROR; + samplerDesc.mip_lod_bias = 0.0f; + samplerDesc.max_anisotropy = 0; + samplerDesc.comparison_func = ComparisonFunc::NEVER; + samplerDesc.border_color = SamplerBorderColor::TRANSPARENT_BLACK; + samplerDesc.min_lod = 0; + samplerDesc.max_lod = std::numeric_limits::max(); + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_LINEAR_MIRROR]); + + samplerDesc.filter = Filter::MIN_MAG_MIP_LINEAR; + samplerDesc.address_u = TextureAddressMode::CLAMP; + samplerDesc.address_v = TextureAddressMode::CLAMP; + samplerDesc.address_w = TextureAddressMode::CLAMP; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_LINEAR_CLAMP]); + + samplerDesc.filter = Filter::MIN_MAG_MIP_LINEAR; + samplerDesc.address_u = TextureAddressMode::WRAP; + samplerDesc.address_v = TextureAddressMode::WRAP; + samplerDesc.address_w = TextureAddressMode::WRAP; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_LINEAR_WRAP]); + + samplerDesc.filter = Filter::MIN_MAG_MIP_POINT; + samplerDesc.address_u = TextureAddressMode::MIRROR; + samplerDesc.address_v = TextureAddressMode::MIRROR; + samplerDesc.address_w = TextureAddressMode::MIRROR; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_POINT_MIRROR]); + + samplerDesc.filter = Filter::MIN_MAG_MIP_POINT; + samplerDesc.address_u = TextureAddressMode::WRAP; + samplerDesc.address_v = TextureAddressMode::WRAP; + samplerDesc.address_w = TextureAddressMode::WRAP; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_POINT_WRAP]); + + + samplerDesc.filter = Filter::MIN_MAG_MIP_POINT; + samplerDesc.address_u = TextureAddressMode::CLAMP; + samplerDesc.address_v = TextureAddressMode::CLAMP; + samplerDesc.address_w = TextureAddressMode::CLAMP; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_POINT_CLAMP]); + + samplerDesc.filter = Filter::ANISOTROPIC; + samplerDesc.address_u = TextureAddressMode::CLAMP; + samplerDesc.address_v = TextureAddressMode::CLAMP; + samplerDesc.address_w = TextureAddressMode::CLAMP; + samplerDesc.max_anisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_ANISO_CLAMP]); + + samplerDesc.filter = Filter::ANISOTROPIC; + samplerDesc.address_u = TextureAddressMode::WRAP; + samplerDesc.address_v = TextureAddressMode::WRAP; + samplerDesc.address_w = TextureAddressMode::WRAP; + samplerDesc.max_anisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_ANISO_WRAP]); + + samplerDesc.filter = Filter::ANISOTROPIC; + samplerDesc.address_u = TextureAddressMode::MIRROR; + samplerDesc.address_v = TextureAddressMode::MIRROR; + samplerDesc.address_w = TextureAddressMode::MIRROR; + samplerDesc.max_anisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SAMPLER_ANISO_MIRROR]); + static wi::eventhandler::Handle handle = wi::eventhandler::Subscribe(wi::eventhandler::EVENT_RELOAD_SHADERS, [](uint64_t userdata) { LoadShaders(); }); LoadShaders(); diff --git a/WickedEngine/wiProfiler.cpp b/WickedEngine/wiProfiler.cpp index b482003cd..308fd7dc2 100644 --- a/WickedEngine/wiProfiler.cpp +++ b/WickedEngine/wiProfiler.cpp @@ -6,6 +6,12 @@ #include "wiTextureHelper.h" #include "wiHelper.h" #include "wiUnorderedMap.h" +#include "wiBacklog.h" + +#if __has_include("Superluminal/PerformanceAPI_capi.h") +#include "Superluminal/PerformanceAPI_capi.h" +#include "Superluminal/PerformanceAPI_loader.h" +#endif // superluminal #include #include @@ -27,6 +33,11 @@ namespace wi::profiler std::atomic nextQuery{ 0 }; int queryheap_idx = 0; +#if PERFORMANCEAPI_ENABLED + PerformanceAPI_ModuleHandle superluminal_handle = {}; + PerformanceAPI_Functions superluminal_functions = {}; +#endif // PERFORMANCEAPI_ENABLED + struct Range { bool in_use = false; @@ -73,6 +84,14 @@ namespace wi::profiler success = device->CreateBuffer(&bd, nullptr, &queryResultBuffer[i]); assert(success); } + +#if PERFORMANCEAPI_ENABLED + superluminal_handle = PerformanceAPI_LoadFrom(L"PerformanceAPI.dll", &superluminal_functions); + if (superluminal_handle) + { + wi::backlog::post("[wi::profiler] Superluminal Performance API loaded"); + } +#endif // PERFORMANCEAPI_ENABLED } cpu_frame = BeginRangeCPU("CPU Frame"); @@ -156,6 +175,13 @@ namespace wi::profiler if (!ENABLED || !initialized) return 0; +#if PERFORMANCEAPI_ENABLED + if (superluminal_handle) + { + superluminal_functions.BeginEvent(name, nullptr, 0xFF0000FF); + } +#endif // PERFORMANCEAPI_ENABLED + range_id id = wi::helper::string_hash(name); lock.lock(); @@ -213,6 +239,13 @@ namespace wi::profiler if (it->second.IsCPURange()) { it->second.time = (float)it->second.cpuTimer.elapsed(); + +#if PERFORMANCEAPI_ENABLED + if (superluminal_handle) + { + superluminal_functions.EndEvent(); + } +#endif // PERFORMANCEAPI_ENABLED } else { diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index 9eb2eb24d..b7f58ae67 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -668,6 +668,7 @@ void RenderPath3D::Render() const // async compute parallel with depth prepass cmd = device->BeginCommandList(QUEUE_COMPUTE); + CommandList cmd_prepareframe_async = cmd; device->WaitCommandList(cmd, cmd_prepareframe); wi::jobsystem::Execute(ctx, [this, cmd](wi::jobsystem::JobArgs args) { @@ -853,6 +854,7 @@ void RenderPath3D::Render() const // Updating textures: cmd = device->BeginCommandList(); + device->WaitCommandList(cmd, cmd_prepareframe_async); wi::jobsystem::Execute(ctx, [cmd, this](wi::jobsystem::JobArgs args) { wi::renderer::BindCommonResources(cmd); wi::renderer::BindCameraCB( @@ -1592,6 +1594,7 @@ void RenderPath3D::setSSREnabled(bool value) desc.format = Format::R16G16B16A16_FLOAT; desc.width = internalResolution.x / 2; desc.height = internalResolution.y / 2; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; device->CreateTexture(&desc, nullptr, &rtSSR); device->SetName(&rtSSR, "rtSSR"); diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 0b3f5199f..ccd71778b 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -53,6 +53,10 @@ Sampler samplers[SAMPLER_COUNT]; std::string SHADERPATH = "shaders/"; std::string SHADERSOURCEPATH = "../WickedEngine/shaders/"; +// define this to use raytracing pipeline for raytraced reflections: +// Currently the DX12 device could crash for unknown reasons with the global root signature export +//#define RTREFLECTION_WITH_RAYTRACING_PIPELINE + // Simple and efficient allocator that reserves a linear memory buffer and can: // - allocate bottom-up until there is space // - free from the last allocation top-down, for temporary allocations @@ -60,20 +64,17 @@ std::string SHADERSOURCEPATH = "../WickedEngine/shaders/"; class LinearAllocator { public: - constexpr size_t get_capacity() const + inline size_t get_capacity() const { - return capacity; + return buffer.size(); } inline void reserve(size_t newCapacity) { - capacity = newCapacity; - - std::free(buffer); - buffer = (uint8_t*)std::malloc(capacity); + buffer.resize(newCapacity); } - constexpr uint8_t* allocate(size_t size) + inline uint8_t* allocate(size_t size) { - if (offset + size <= capacity) + if (offset + size <= buffer.size()) { uint8_t* ret = &buffer[offset]; offset += size; @@ -81,25 +82,23 @@ public: } return nullptr; } - constexpr void free(size_t size) + inline void free(size_t size) { assert(offset >= size); offset -= size; } - constexpr void reset() + inline void reset() { offset = 0; } - constexpr uint8_t* top() + inline uint8_t* top() { - return buffer + offset; + return buffer.data() + offset; } private: - uint8_t* buffer = nullptr; - size_t capacity = 0; + wi::vector buffer; size_t offset = 0; - size_t alignment = 1; }; LinearAllocator renderFrameAllocators[COMMANDLIST_COUNT]; // can be used by graphics threads inline LinearAllocator& GetRenderFrameAllocator(CommandList cmd) @@ -788,14 +787,14 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_LIGHTVISUALIZER_SPOTLIGHT], "vSpotLightVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_LIGHTVISUALIZER_POINTLIGHT], "vPointLightVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_SPHERE], "sphereVS.cso"); }); - wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_CUBE], "cubeVS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_OCCLUDEE], "occludeeVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_SKY], "skyVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_VOXELIZER], "objectVS_voxelizer.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_VOXEL], "voxelVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_FORCEFIELDVISUALIZER_POINT], "forceFieldPointVisualizerVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_FORCEFIELDVISUALIZER_PLANE], "forceFieldPlaneVisualizerVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_RAYTRACE_SCREEN], "raytrace_screenVS.cso"); }); - wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_SCREEN], "screenVS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_POSTPROCESS], "postprocessVS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::VS, shaders[VSTYPE_LENSFLARE], "lensFlareVS.cso"); }); if (device->CheckCapability(GraphicsDeviceCapability::RENDERTARGET_AND_VIEWPORT_ARRAYINDEX_WITHOUT_GS)) @@ -1016,6 +1015,8 @@ void LoadShaders() if (device->CheckCapability(GraphicsDeviceCapability::RAYTRACING)) { + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTREFLECTION], "rtreflectionCS.cso", ShaderModel::SM_6_5); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW], "rtshadowCS.cso", ShaderModel::SM_6_5); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TILECLASSIFICATION], "rtshadow_denoise_tileclassificationCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_FILTER], "rtshadow_denoise_filterCS.cso"); }); @@ -1286,7 +1287,7 @@ void LoadShaders() }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { PipelineStateDesc desc; - desc.vs = &shaders[VSTYPE_CUBE]; + desc.vs = &shaders[VSTYPE_OCCLUDEE]; desc.rs = &rasterizers[RSTYPE_OCCLUDEE]; desc.bs = &blendStates[BSTYPE_COLORWRITEDISABLE]; desc.dss = &depthStencils[DSSTYPE_DEPTHREAD]; @@ -1432,7 +1433,7 @@ void LoadShaders() }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { PipelineStateDesc desc; - desc.vs = &shaders[VSTYPE_SCREEN]; + desc.vs = &shaders[VSTYPE_POSTPROCESS]; desc.ps = &shaders[PSTYPE_DOWNSAMPLEDEPTHBUFFER]; desc.rs = &rasterizers[RSTYPE_DOUBLESIDED]; desc.bs = &blendStates[BSTYPE_OPAQUE]; @@ -1442,7 +1443,7 @@ void LoadShaders() }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { PipelineStateDesc desc; - desc.vs = &shaders[VSTYPE_SCREEN]; + desc.vs = &shaders[VSTYPE_POSTPROCESS]; desc.ps = &shaders[PSTYPE_POSTPROCESS_UPSAMPLE_BILATERAL]; desc.rs = &rasterizers[RSTYPE_DOUBLESIDED]; desc.bs = &blendStates[BSTYPE_PREMULTIPLIED]; @@ -1452,7 +1453,7 @@ void LoadShaders() }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { PipelineStateDesc desc; - desc.vs = &shaders[VSTYPE_SCREEN]; + desc.vs = &shaders[VSTYPE_POSTPROCESS]; desc.ps = &shaders[PSTYPE_POSTPROCESS_OUTLINE]; desc.rs = &rasterizers[RSTYPE_DOUBLESIDED]; desc.bs = &blendStates[BSTYPE_TRANSPARENT]; @@ -1628,6 +1629,7 @@ void LoadShaders() device->CreatePipelineState(&desc, &PSO_debug[args.jobIndex]); }); +#ifdef RTREFLECTION_WITH_RAYTRACING_PIPELINE if(device->CheckCapability(GraphicsDeviceCapability::RAYTRACING)) { wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { @@ -1678,8 +1680,9 @@ void LoadShaders() success = device->CreateRaytracingPipelineState(&rtdesc, &RTPSO_reflection); - }); + }); }; +#endif // RTREFLECTION_WITH_RAYTRACING_PIPELINE wi::jobsystem::Wait(ctx); @@ -1694,7 +1697,7 @@ void LoadBuffers() bd.size = sizeof(FrameCB); bd.bind_flags = BindFlag::CONSTANT_BUFFER; device->CreateBuffer(&bd, nullptr, &constantBuffers[CBTYPE_FRAME]); - device->SetName(&constantBuffers[CBTYPE_FRAME], "FrameCB"); + device->SetName(&constantBuffers[CBTYPE_FRAME], "constantBuffers[CBTYPE_FRAME]"); bd.size = sizeof(ShaderEntity) * SHADER_ENTITY_COUNT; @@ -1702,14 +1705,14 @@ void LoadBuffers() bd.misc_flags = ResourceMiscFlag::BUFFER_RAW; bd.stride = sizeof(ShaderEntity); device->CreateBuffer(&bd, nullptr, &resourceBuffers[RBTYPE_ENTITYARRAY]); - device->SetName(&resourceBuffers[RBTYPE_ENTITYARRAY], "EntityArray"); + device->SetName(&resourceBuffers[RBTYPE_ENTITYARRAY], "resourceBuffers[RBTYPE_ENTITYARRAY]"); bd.size = sizeof(XMMATRIX) * MATRIXARRAY_COUNT; bd.bind_flags = BindFlag::SHADER_RESOURCE; bd.misc_flags = ResourceMiscFlag::BUFFER_RAW; bd.stride = sizeof(XMMATRIX); device->CreateBuffer(&bd, nullptr, &resourceBuffers[RBTYPE_MATRIXARRAY]); - device->SetName(&resourceBuffers[RBTYPE_MATRIXARRAY], "MatrixArray"); + device->SetName(&resourceBuffers[RBTYPE_MATRIXARRAY], "resourceBuffers[RBTYPE_MATRIXARRAY]"); { TextureDesc desc; @@ -1721,6 +1724,7 @@ void LoadBuffers() InitData.data_ptr = sheenLUTdata; InitData.row_pitch = desc.width; device->CreateTexture(&desc, &InitData, &textures[TEXTYPE_2D_SHEENLUT]); + device->SetName(&textures[TEXTYPE_2D_SHEENLUT], "textures[TEXTYPE_2D_SHEENLUT]"); } { @@ -1730,7 +1734,9 @@ void LoadBuffers() desc.height = 64; desc.format = Format::R16G16B16A16_FLOAT; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; device->CreateTexture(&desc, nullptr, &textures[TEXTYPE_2D_SKYATMOSPHERE_TRANSMITTANCELUT]); + device->SetName(&textures[TEXTYPE_2D_SKYATMOSPHERE_TRANSMITTANCELUT], "textures[TEXTYPE_2D_SKYATMOSPHERE_TRANSMITTANCELUT]"); } { TextureDesc desc; @@ -1739,7 +1745,9 @@ void LoadBuffers() desc.height = 32; desc.format = Format::R16G16B16A16_FLOAT; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; device->CreateTexture(&desc, nullptr, &textures[TEXTYPE_2D_SKYATMOSPHERE_MULTISCATTEREDLUMINANCELUT]); + device->SetName(&textures[TEXTYPE_2D_SKYATMOSPHERE_MULTISCATTEREDLUMINANCELUT], "textures[TEXTYPE_2D_SKYATMOSPHERE_MULTISCATTEREDLUMINANCELUT]"); } { TextureDesc desc; @@ -1748,7 +1756,9 @@ void LoadBuffers() desc.height = 104; desc.format = Format::R16G16B16A16_FLOAT; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; device->CreateTexture(&desc, nullptr, &textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT]); + device->SetName(&textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT], "textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT]"); } { TextureDesc desc; @@ -1757,7 +1767,9 @@ void LoadBuffers() desc.height = 1; desc.format = Format::R16G16B16A16_FLOAT; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; device->CreateTexture(&desc, nullptr, &textures[TEXTYPE_2D_SKYATMOSPHERE_SKYLUMINANCELUT]); + device->SetName(&textures[TEXTYPE_2D_SKYATMOSPHERE_SKYLUMINANCELUT], "textures[TEXTYPE_2D_SKYATMOSPHERE_SKYLUMINANCELUT]"); } } void SetUpStates() @@ -2025,38 +2037,11 @@ void SetUpStates() bd.alpha_to_coverage_enable = false; bd.independent_blend_enable = false; blendStates[BSTYPE_TRANSPARENTSHADOW] = bd; -} -void ModifyObjectSampler(const SamplerDesc& desc) -{ - device->CreateSampler(&desc, &samplers[SAMPLER_OBJECTSHADER]); -} -const std::string& GetShaderPath() -{ - return SHADERPATH; -} -void SetShaderPath(const std::string& path) -{ - SHADERPATH = path; -} -const std::string& GetShaderSourcePath() -{ - return SHADERSOURCEPATH; -} -void SetShaderSourcePath(const std::string& path) -{ - SHADERSOURCEPATH = path; -} -void ReloadShaders() -{ - device->ClearPipelineStateCache(); - wi::eventhandler::FireEvent(wi::eventhandler::EVENT_RELOAD_SHADERS, 0); -} -void InitializeCommonSamplers() -{ + SamplerDesc samplerDesc; samplerDesc.filter = Filter::MIN_MAG_MIP_LINEAR; samplerDesc.address_u = TextureAddressMode::MIRROR; @@ -2137,51 +2122,36 @@ void InitializeCommonSamplers() samplerDesc.max_anisotropy = 0; samplerDesc.comparison_func = ComparisonFunc::GREATER_EQUAL; device->CreateSampler(&samplerDesc, &samplers[SAMPLER_CMP_DEPTH]); - - - // Static sampler bindings must match with the static sampler declarations in shaders/globals.hlsli - StaticSampler sam; - - sam.sampler = samplers[SAMPLER_LINEAR_CLAMP]; - sam.slot = 100; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_LINEAR_WRAP]; - sam.slot = 101; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_LINEAR_MIRROR]; - sam.slot = 102; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_POINT_CLAMP]; - sam.slot = 103; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_POINT_WRAP]; - sam.slot = 104; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_POINT_MIRROR]; - sam.slot = 105; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_ANISO_CLAMP]; - sam.slot = 106; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_ANISO_WRAP]; - sam.slot = 107; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_ANISO_MIRROR]; - sam.slot = 108; - device->SetCommonSampler(&sam); - - sam.sampler = samplers[SAMPLER_CMP_DEPTH]; - sam.slot = 109; - device->SetCommonSampler(&sam); } + +void ModifyObjectSampler(const SamplerDesc& desc) +{ + device->CreateSampler(&desc, &samplers[SAMPLER_OBJECTSHADER]); +} + +const std::string& GetShaderPath() +{ + return SHADERPATH; +} +void SetShaderPath(const std::string& path) +{ + SHADERPATH = path; +} +const std::string& GetShaderSourcePath() +{ + return SHADERSOURCEPATH; +} +void SetShaderSourcePath(const std::string& path) +{ + SHADERSOURCEPATH = path; +} +void ReloadShaders() +{ + device->ClearPipelineStateCache(); + + wi::eventhandler::FireEvent(wi::eventhandler::EVENT_RELOAD_SHADERS, 0); +} + void Initialize() { wi::Timer timer; @@ -2545,15 +2515,16 @@ void RenderMeshes( device->GetDescriptorIndex(&instances.buffer, SubresourceType::SRV), instancedBatch.dataOffset ); - device->PushConstants(&push, sizeof(push), cmd); if (pso_backside != nullptr) { device->BindPipelineState(pso_backside, cmd); + device->PushConstants(&push, sizeof(push), cmd); device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, subset.indexOffset, 0, 0, cmd); } device->BindPipelineState(pso, cmd); + device->PushConstants(&push, sizeof(push), cmd); device->DrawIndexedInstanced(subset.indexCount, instancedBatch.instanceCount, subset.indexOffset, 0, 0, cmd); } @@ -2709,8 +2680,8 @@ void RenderImpostors( device->PushConstants(&instances.offset, sizeof(uint), cmd); - device->BindResource(&instances.buffer, 21, cmd); - device->BindResource(&vis.scene->impostorArray, 0, cmd); + device->BindResource(&instances.buffer, 0, cmd); + device->BindResource(&vis.scene->impostorArray, 1, cmd); device->Draw(drawableInstanceCount * 6, 0, cmd); @@ -3701,6 +3672,20 @@ void UpdateRenderData( wi::profiler::EndRange(range); } + device->EventEnd(cmd); +} + + +void UpdateRenderDataAsync( + const Visibility& vis, + const FrameCB& frameCB, + CommandList cmd +) +{ + device->EventBegin("UpdateRenderDataAsync", cmd); + + BindCommonResources(cmd); + if (vis.scene->weather.IsRealisticSky()) { // Render Atmospheric Scattering textures for lighting and sky @@ -3841,20 +3826,6 @@ void UpdateRenderData( volumetric_clouds_precomputed = true; } - device->EventEnd(cmd); -} - - -void UpdateRenderDataAsync( - const Visibility& vis, - const FrameCB& frameCB, - CommandList cmd -) -{ - device->EventBegin("UpdateRenderDataAsync", cmd); - - BindCommonResources(cmd); - // GPU Particle systems simulation/sorting/culling: if (!vis.visibleEmitters.empty() && frameCB.delta_time > 0) { @@ -4044,9 +4015,7 @@ void OcclusionCulling_Render(const CameraComponent& camera, const Visibility& vi if (queryIndex >= 0) { const AABB& aabb = vis.scene->aabb_objects[instanceIndex]; - const XMMATRIX transform = aabb.getAsBoxMatrix() * VP; - device->PushConstants(&transform, sizeof(transform), cmd); // render bounding box to later read the occlusion status @@ -4071,9 +4040,7 @@ void OcclusionCulling_Render(const CameraComponent& camera, const Visibility& vi { uint32_t queryIndex = (uint32_t)light.occlusionquery; const AABB& aabb = vis.scene->aabb_lights[lightIndex]; - const XMMATRIX transform = aabb.getAsBoxMatrix() * VP; - device->PushConstants(&transform, sizeof(transform), cmd); device->QueryBegin(&queryHeap, queryIndex, cmd); @@ -4173,6 +4140,8 @@ void DrawSoftParticles( wi::profiler::BeginRangeGPU("EmittedParticles - Render (Distortion)", cmd) : wi::profiler::BeginRangeGPU("EmittedParticles - Render", cmd); + BindCommonResources(cmd); + // Sort emitters based on distance: assert(emitterCount < 0x0000FFFF); // watch out for sorting hash truncation! uint32_t* emitterSortingHashes = (uint32_t*)GetRenderFrameAllocator(cmd).allocate(sizeof(uint32_t) * emitterCount); @@ -4219,6 +4188,8 @@ void DrawLightVisualizers( { device->EventBegin("Light Visualizer Render", cmd); + BindCommonResources(cmd); + XMMATRIX camrot = XMLoadFloat3x3(&vis.camera->rotationMatrix); XMMATRIX VP = vis.camera->GetViewProjection(); @@ -4367,6 +4338,8 @@ void DrawLensFlares( device->EventBegin("Lens Flares", cmd); + BindCommonResources(cmd); + for (auto visibleLight : vis.visibleLights) { uint16_t lightIndex = visibleLight.index; @@ -6757,9 +6730,16 @@ void ComputeTiledLightCulling( device->Barrier(barriers, arraysize(barriers), cmd); } - device->EventEnd(cmd); } + + // Unbind from UAV slots: + GPUResource empty; + const GPUResource* uavs[] = { + &empty, + &empty + }; + device->BindUAVs(uavs, 0, arraysize(uavs), cmd); } @@ -7443,7 +7423,6 @@ void ComputeLuminance( luminance_log_range_rcp = 1.0f / luminance_log_range; luminance_pixelcount = float(postprocess.resolution.x * postprocess.resolution.y); luminance_eyeadaptionkey = eyeadaptionkey; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindUAV(&res.luminance, 0, cmd); @@ -7459,6 +7438,7 @@ void ComputeLuminance( device->BindComputeShader(&shaders[CSTYPE_LUMINANCE_PASS1], cmd); device->BindResource(&sourceImage, 0, cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->Dispatch( (postprocess.resolution.x + LUMINANCE_BLOCKSIZE - 1) / LUMINANCE_BLOCKSIZE, (postprocess.resolution.y + LUMINANCE_BLOCKSIZE - 1) / LUMINANCE_BLOCKSIZE, @@ -7478,6 +7458,7 @@ void ComputeLuminance( { device->BindComputeShader(&shaders[CSTYPE_LUMINANCE_PASS2], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->Dispatch(1, 1, 1, cmd); } @@ -7537,6 +7518,8 @@ void ComputeBloom( const TextureDesc& desc = res.texture_bloom.GetDesc(); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_BLOOMSEPARATE], cmd); + Bloom bloom; bloom.resolution_rcp.x = 1.0f / desc.width; bloom.resolution_rcp.y = 1.0f / desc.height; @@ -7547,8 +7530,6 @@ void ComputeBloom( bloom.buffer_input_luminance = device->GetDescriptorIndex(buffer_luminance, SubresourceType::SRV); device->PushConstants(&bloom, sizeof(bloom), cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_BLOOMSEPARATE], cmd); - { GPUBarrier barriers[] = { GPUBarrier::Image(&res.texture_bloom, res.texture_bloom.desc.layout, ResourceState::UNORDERED_ACCESS), @@ -8774,7 +8755,7 @@ void Postprocess_MSAO( msao.xRejectFadeoff = 1.0f / -RejectionFalloff; msao.xRcpAccentuation = 1.0f / (1.0f + Accentuation); - device->PushConstants(&msao, sizeof(msao), cmd); + device->BindDynamicConstantBuffer(msao, CBSLOT_MSAO, cmd); device->BindResource(&read_depth, 0, cmd); @@ -9049,8 +9030,6 @@ void Postprocess_RTAO( device->EventEnd(cmd); - device->PushConstants(&postprocess, sizeof(postprocess), cmd); - int temporal_output = res.frame % 2; int temporal_history = 1 - temporal_output; @@ -9059,6 +9038,8 @@ void Postprocess_RTAO( device->EventBegin("Denoise - Tile Classification", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTAO_DENOISE_TILECLASSIFICATION], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); + device->BindResource(&res.normals, 0, cmd); device->BindResource(&res.tiles, 1, cmd); device->BindResource(&res.moments[temporal_history], 2, cmd); @@ -9105,6 +9086,8 @@ void Postprocess_RTAO( device->EventBegin("Denoise - Filter", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTAO_DENOISE_FILTER], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); + device->BindResource(&res.normals, 0, cmd); device->BindResource(&res.metadata, 1, cmd); @@ -9249,7 +9232,11 @@ void Postprocess_RTReflection( const TextureDesc& desc = output.desc; +#ifdef RTREFLECTION_WITH_RAYTRACING_PIPELINE device->BindRaytracingPipelineState(&RTPSO_reflection, cmd); +#else + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTREFLECTION], cmd); +#endif // RTREFLECTION_WITH_RAYTRACING_PIPELINE BindCommonResources(cmd); @@ -9263,6 +9250,21 @@ void Postprocess_RTReflection( std::memcpy(&postprocess.params1.x, &instanceInclusionMask, sizeof(instanceInclusionMask)); device->PushConstants(&postprocess, sizeof(postprocess), cmd); + const GPUResource* uavs[] = { + &output, + &res.rayLengths + }; + device->BindUAVs(uavs, 0, arraysize(uavs), cmd); + + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS), + GPUBarrier::Image(&res.rayLengths, res.rayLengths.desc.layout, ResourceState::UNORDERED_ACCESS), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + +#ifdef RTREFLECTION_WITH_RAYTRACING_PIPELINE size_t shaderIdentifierSize = device->GetShaderIdentifierSize(); GraphicsDevice::GPUAllocation shadertable_raygen = device->AllocateGPU(shaderIdentifierSize, cmd); GraphicsDevice::GPUAllocation shadertable_miss = device->AllocateGPU(shaderIdentifierSize, cmd); @@ -9290,22 +9292,19 @@ void Postprocess_RTReflection( dispatchraysdesc.width = desc.width; dispatchraysdesc.height = desc.height; - const GPUResource* uavs[] = { - &output, - &res.rayLengths - }; - device->BindUAVs(uavs, 0, arraysize(uavs), cmd); - - { - GPUBarrier barriers[] = { - GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS), - GPUBarrier::Image(&res.rayLengths, res.rayLengths.desc.layout, ResourceState::UNORDERED_ACCESS), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - device->DispatchRays(&dispatchraysdesc, cmd); +#else + + device->Dispatch( + (desc.width + 7) / 8, + (desc.height + 3) / 4, + 1, + cmd + ); + +#endif // RTREFLECTION_WITH_RAYTRACING_PIPELINE + { GPUBarrier barriers[] = { GPUBarrier::Memory(), @@ -9315,8 +9314,6 @@ void Postprocess_RTReflection( device->Barrier(barriers, arraysize(barriers), cmd); } - device->PushConstants(&postprocess, sizeof(postprocess), cmd); - int temporal_output = device->GetFrameCount() % 2; int temporal_history = 1 - temporal_output; @@ -9325,6 +9322,8 @@ void Postprocess_RTReflection( device->EventBegin("Temporal pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); + device->BindResource(&output, 0, cmd); device->BindResource(&res.temporal[temporal_history], 1, cmd); device->BindResource(&res.rayLengths, 3, cmd); @@ -9364,6 +9363,8 @@ void Postprocess_RTReflection( device->EventBegin("Median blur pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_MEDIAN], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); + device->BindResource(&res.temporal[temporal_output], 0, cmd); const GPUResource* uavs[] = { @@ -9444,12 +9445,12 @@ void Postprocess_SSR( ssr_input_maxmip = float(input_desc.mip_levels - 1); ssr_input_resolution_max = (float)std::max(input_desc.width, input_desc.height); ssr_frame = (float)res.frame; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); // Raytrace pass: { device->EventBegin("Stochastic Raytrace pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&input, 0, cmd); @@ -9488,6 +9489,7 @@ void Postprocess_SSR( { device->EventBegin("Resolve pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_raytrace, 0, cmd); device->BindResource(&input, 1, cmd); @@ -9529,6 +9531,7 @@ void Postprocess_SSR( { device->EventBegin("Temporal pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&output, 0, cmd); device->BindResource(&res.texture_temporal[temporal_history], 1, cmd); @@ -9568,6 +9571,7 @@ void Postprocess_SSR( { device->EventBegin("Median blur pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_MEDIAN], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_temporal[temporal_output], 0, cmd); @@ -9681,6 +9685,8 @@ void Postprocess_RTShadow( device->EventBegin("Raytrace", cmd); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW], cmd); + PostProcess postprocess; postprocess.resolution.x = desc.width; postprocess.resolution.y = desc.height; @@ -9690,8 +9696,6 @@ void Postprocess_RTShadow( std::memcpy(&postprocess.params1.x, &instanceInclusionMask, sizeof(instanceInclusionMask)); device->PushConstants(&postprocess, sizeof(postprocess), cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW], cmd); - const GPUResource* uavs[] = { &res.temp, &res.normals, @@ -9733,6 +9737,7 @@ void Postprocess_RTShadow( { device->EventBegin("Denoise - Tile Classification", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TILECLASSIFICATION], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.normals, 0, cmd); device->BindResource(&res.tiles, 2, cmd); @@ -9946,6 +9951,7 @@ void Postprocess_RTShadow( { device->EventBegin("Temporal Denoise", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.temp, 0, cmd); device->BindResource(&res.temporal[temporal_history], 1, cmd); @@ -10005,6 +10011,8 @@ void Postprocess_ScreenSpaceShadow( const TextureDesc& desc = output.GetDesc(); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SCREENSPACESHADOW], cmd); + PostProcess postprocess; postprocess.resolution.x = desc.width; postprocess.resolution.y = desc.height; @@ -10014,8 +10022,6 @@ void Postprocess_ScreenSpaceShadow( postprocess.params0.y = (float)samplecount; device->PushConstants(&postprocess, sizeof(postprocess), cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SCREENSPACESHADOW], cmd); - const GPUResource* uavs[] = { &output, }; @@ -10176,12 +10182,12 @@ void Postprocess_DepthOfField( postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; dof_cocscale = coc_scale; dof_maxcoc = max_coc; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); // Compute tile max COC (horizontal): { device->EventBegin("TileMax - Horizontal", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* uavs[] = { &res.texture_tilemax_horizontal, @@ -10220,6 +10226,7 @@ void Postprocess_DepthOfField( { device->EventBegin("TileMax - Vertical", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* resarray[] = { &res.texture_tilemax_horizontal, @@ -10264,6 +10271,7 @@ void Postprocess_DepthOfField( { device->EventBegin("NeighborhoodMax", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_NEIGHBORHOODMAXCOC], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* resarray[] = { &res.texture_tilemax, @@ -10309,6 +10317,7 @@ void Postprocess_DepthOfField( { device->EventBegin("Kickjobs", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_KICKJOBS], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_tilemax, 0, cmd); @@ -10336,7 +10345,6 @@ void Postprocess_DepthOfField( postprocess.resolution.y = desc.height / 2; postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x; postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); // Prepass: { @@ -10367,14 +10375,17 @@ void Postprocess_DepthOfField( device->BindResource(&res.buffer_tiles_earlyexit, 2, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_PREPASS_EARLYEXIT], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_EARLYEXIT, cmd); device->BindResource(&res.buffer_tiles_cheap, 2, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_PREPASS], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_CHEAP, cmd); device->BindResource(&res.buffer_tiles_expensive, 2, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_PREPASS], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_EXPENSIVE, cmd); { @@ -10418,12 +10429,15 @@ void Postprocess_DepthOfField( } device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_MAIN_EARLYEXIT], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_EARLYEXIT, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_MAIN_CHEAP], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_CHEAP, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_MAIN], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_EXPENSIVE, cmd); { @@ -10487,12 +10501,12 @@ void Postprocess_DepthOfField( postprocess.resolution.y = desc.height; postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x; postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); // Upsample pass: { device->EventBegin("Upsample pass", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_UPSAMPLE], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* resarray[] = { &input, @@ -10618,12 +10632,12 @@ void Postprocess_MotionBlur( postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x; postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; motionblur_strength = strength; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); // Compute tile max velocities (horizontal): { device->EventBegin("TileMax - Horizontal", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR_TILEMAXVELOCITY_HORIZONTAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* uavs[] = { &res.texture_tilemax_horizontal, @@ -10662,6 +10676,7 @@ void Postprocess_MotionBlur( { device->EventBegin("TileMax - Vertical", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR_TILEMAXVELOCITY_VERTICAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_tilemax_horizontal, 0, cmd); @@ -10702,6 +10717,7 @@ void Postprocess_MotionBlur( { device->EventBegin("NeighborhoodMax", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR_NEIGHBORHOODMAXVELOCITY], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* resarray[] = { &res.texture_tilemax, @@ -10747,6 +10763,7 @@ void Postprocess_MotionBlur( { device->EventBegin("Kickjobs", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR_KICKJOBS], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_tilemax, 0, cmd); @@ -10798,12 +10815,15 @@ void Postprocess_MotionBlur( } device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR_EARLYEXIT], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_EARLYEXIT, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR_CHEAP], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_CHEAP, cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_MOTIONBLUR], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->DispatchIndirect(&res.buffer_tile_statistics, INDIRECT_OFFSET_EXPENSIVE, cmd); { @@ -10885,12 +10905,12 @@ void Postprocess_VolumetricClouds( postprocess.params0.y = (float)res.texture_reproject[0].GetDesc().height; postprocess.params0.z = 1.0f / postprocess.params0.x; postprocess.params0.w = 1.0f / postprocess.params0.y; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); // Cloud pass: { device->EventBegin("Volumetric Cloud Rendering", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&texture_shapeNoise, 1, cmd); device->BindResource(&texture_detailNoise, 2, cmd); @@ -10936,7 +10956,6 @@ void Postprocess_VolumetricClouds( postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x; postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; volumetricclouds_frame = (float)res.frame; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); int temporal_output = device->GetFrameCount() % 2; int temporal_history = 1 - temporal_output; @@ -10945,6 +10964,7 @@ void Postprocess_VolumetricClouds( { device->EventBegin("Volumetric Cloud Reproject", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_cloudRender, 0, cmd); device->BindResource(&res.texture_cloudDepth, 1, cmd); @@ -10988,6 +11008,7 @@ void Postprocess_VolumetricClouds( { device->EventBegin("Volumetric Cloud Temporal", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&res.texture_reproject[temporal_output], 0, cmd); device->BindResource(&res.texture_reproject_depth[temporal_output], 1, cmd); @@ -11300,7 +11321,7 @@ void Postprocess_FSR( static_cast(temp.desc.height) ); - device->PushConstants(&fsr, sizeof(fsr), cmd); + device->BindDynamicConstantBuffer(fsr, CBSLOT_FSR, cmd); device->BindResource(&input, 0, cmd); @@ -11333,7 +11354,7 @@ void Postprocess_FSR( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_FSR_SHARPEN], cmd); FsrRcasCon(fsr.const0, sharpness); - device->PushConstants(&fsr, sizeof(fsr), cmd); + device->BindDynamicConstantBuffer(fsr, CBSLOT_FSR, cmd); device->BindResource(&temp, 0, cmd); @@ -11444,11 +11465,11 @@ void Postprocess_Upsample_Bilateral( postprocess.params1.y = (float)input.GetDesc().height; postprocess.params1.z = 1.0f / postprocess.params1.x; postprocess.params1.w = 1.0f / postprocess.params1.y; - device->PushConstants(&postprocess, sizeof(postprocess), cmd); if (pixelshader) { device->BindPipelineState(&PSO_upsample_bilateral, cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&input, 0, cmd); @@ -11486,6 +11507,7 @@ void Postprocess_Upsample_Bilateral( break; } device->BindComputeShader(&shaders[cs], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); device->BindResource(&input, 0, cmd); @@ -11528,6 +11550,8 @@ void Postprocess_Downsample4x( { device->EventBegin("Postprocess_Downsample4x", cmd); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DOWNSAMPLE4X], cmd); + const TextureDesc& desc = output.GetDesc(); PostProcess postprocess; @@ -11537,8 +11561,6 @@ void Postprocess_Downsample4x( postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; device->PushConstants(&postprocess, sizeof(postprocess), cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_DOWNSAMPLE4X], cmd); - device->BindResource(&input, 0, cmd); const GPUResource* uavs[] = { @@ -11578,6 +11600,8 @@ void Postprocess_NormalsFromDepth( { device->EventBegin("Postprocess_NormalsFromDepth", cmd); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_NORMALSFROMDEPTH], cmd); + const TextureDesc& desc = output.GetDesc(); PostProcess postprocess; @@ -11588,7 +11612,6 @@ void Postprocess_NormalsFromDepth( postprocess.params0.x = std::floor(std::max(1.0f, log2f(std::max((float)desc.width / (float)depthbuffer.GetDesc().width, (float)desc.height / (float)depthbuffer.GetDesc().height)))); device->PushConstants(&postprocess, sizeof(postprocess), cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_NORMALSFROMDEPTH], cmd); device->BindResource(&depthbuffer, 0, cmd); const GPUResource* uavs[] = { diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index a45cce778..6496292be 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -47,9 +47,6 @@ namespace wi::renderer void ModifyObjectSampler(const wi::graphics::SamplerDesc& desc); - // Initializes the samplers, including the global static samplers for the device - void InitializeCommonSamplers(); - // Initializes the renderer void Initialize(); @@ -112,9 +109,7 @@ namespace wi::renderer { uint16_t index; uint16_t distance; - bool operator<(const VisibleLight& other) { - return uint32_t(index | (uint32_t(distance) << 16)) < uint32_t(other.index | (uint32_t(other.distance) << 16)); - } + constexpr operator uint32_t() const { return index | (uint32_t(distance) << 16u); } }; wi::vector visibleLights; diff --git a/WickedEngine/wiShaderCompiler.cpp b/WickedEngine/wiShaderCompiler.cpp index c62f22be4..492cab399 100644 --- a/WickedEngine/wiShaderCompiler.cpp +++ b/WickedEngine/wiShaderCompiler.cpp @@ -11,7 +11,7 @@ #ifdef PLATFORM_WINDOWS_DESKTOP #define SHADERCOMPILER_ENABLED #define SHADERCOMPILER_ENABLED_DXCOMPILER -//#define SHADERCOMPILER_ENABLED_D3DCOMPILER +#define SHADERCOMPILER_ENABLED_D3DCOMPILER #include // ComPtr #endif // _WIN32 @@ -33,28 +33,14 @@ using namespace wi::graphics; namespace wi::shadercompiler { - struct InternalState + +#ifdef SHADERCOMPILER_ENABLED_DXCOMPILER + struct InternalState_DXC { -#ifdef SHADERCOMPILER_ENABLED_DXCOMPILER - CComPtr dxcUtils; - CComPtr dxcCompiler; - CComPtr dxcIncludeHandler; DxcCreateInstanceProc DxcCreateInstance = nullptr; -#endif // SHADERCOMPILER_ENABLED_DXCOMPILER -#ifdef SHADERCOMPILER_ENABLED_D3DCOMPILER - using PFN_D3DCOMPILE = decltype(&D3DCompile); - PFN_D3DCOMPILE D3DCompile = nullptr; -#endif // SHADERCOMPILER_ENABLED_D3DCOMPILER - - InternalState() + InternalState_DXC() { -#ifdef SHADERCOMPILER_ENABLED_DXCOMPILER - if (dxcCompiler != nullptr) - { - return; // already initialized - } - #ifdef _WIN32 #define LIBDXCOMPILER "dxcompiler.dll" HMODULE dxcompiler = wiLoadLibrary(LIBDXCOMPILER); @@ -67,12 +53,6 @@ namespace wi::shadercompiler DxcCreateInstance = (DxcCreateInstanceProc)wiGetProcAddress(dxcompiler, "DxcCreateInstance"); if (DxcCreateInstance != nullptr) { - HRESULT hr = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&dxcUtils)); - assert(SUCCEEDED(hr)); - hr = DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler)); - assert(SUCCEEDED(hr)); - hr = dxcUtils->CreateDefaultIncludeHandler(&dxcIncludeHandler); - assert(SUCCEEDED(hr)); wi::backlog::post("wi::shadercompiler: loaded " LIBDXCOMPILER); } } @@ -80,38 +60,31 @@ namespace wi::shadercompiler { wi::backlog::post("wi::shadercompiler: could not load library " LIBDXCOMPILER); } -#endif // SHADERCOMPILER_ENABLED_DXCOMPILER - - -#ifdef SHADERCOMPILER_ENABLED_D3DCOMPILER - if (D3DCompile != nullptr) - { - return; // already initialized - } - - HMODULE d3dcompiler = wiLoadLibrary("d3dcompiler_47.dll"); - if (d3dcompiler != nullptr) - { - D3DCompile = (PFN_D3DCOMPILE)wiGetProcAddress(d3dcompiler, "D3DCompile"); - if (D3DCompile != nullptr) - { - wi::backlog::post("wi::shadercompiler: loaded d3dcompiler_47.dll"); - } - } -#endif // SHADERCOMPILER_ENABLED_D3DCOMPILER } }; - inline InternalState& compiler_internal() + inline InternalState_DXC& dxc_compiler() { - static InternalState internal_state; + static InternalState_DXC internal_state; return internal_state; } -#ifdef SHADERCOMPILER_ENABLED_DXCOMPILER void Compile_DXCompiler(const CompilerInput& input, CompilerOutput& output) { - if (compiler_internal().dxcCompiler == nullptr) + if (dxc_compiler().DxcCreateInstance == nullptr) + { + return; + } + + CComPtr dxcUtils; + CComPtr dxcCompiler; + + HRESULT hr = dxc_compiler().DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&dxcUtils)); + assert(SUCCEEDED(hr)); + hr = dxc_compiler().DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler)); + assert(SUCCEEDED(hr)); + + if (dxcCompiler == nullptr) { return; } @@ -125,10 +98,10 @@ namespace wi::shadercompiler // https://github.com/microsoft/DirectXShaderCompiler/wiki/Using-dxc.exe-and-dxcompiler.dll#dxcompiler-dll-interface wi::vector args = { - L"-res-may-alias", - L"-flegacy-macro-expansion", + //L"-res-may-alias", + //L"-flegacy-macro-expansion", //L"-no-legacy-cbuf-layout", - //L"-pack-optimized", + //L"-pack-optimized", // this has problem with tessellation shaders: https://github.com/microsoft/DirectXShaderCompiler/issues/3362 //L"-all-resources-bound", //L"-Gis", // Force IEEE strictness //L"-Gec", // Enable backward compatibility mode @@ -145,6 +118,7 @@ namespace wi::shadercompiler { case ShaderFormat::HLSL6: args.push_back(L"-D"); args.push_back(L"HLSL6"); + args.push_back(L"-rootsig-define"); args.push_back(L"WICKED_ENGINE_DEFAULT_ROOTSIGNATURE"); break; case ShaderFormat::SPIRV: args.push_back(L"-D"); args.push_back(L"SPIRV"); @@ -425,13 +399,14 @@ namespace wi::shadercompiler { const CompilerInput* input = nullptr; CompilerOutput* output = nullptr; + CComPtr dxcIncludeHandler; HRESULT STDMETHODCALLTYPE LoadSource( _In_z_ LPCWSTR pFilename, // Candidate filename. _COM_Outptr_result_maybenull_ IDxcBlob** ppIncludeSource // Resultant source object for included file, nullptr if not found. ) override { - HRESULT hr = compiler_internal().dxcIncludeHandler->LoadSource(pFilename, ppIncludeSource); + HRESULT hr = dxcIncludeHandler->LoadSource(pFilename, ppIncludeSource); if (SUCCEEDED(hr)) { std::string& filename = output->dependencies.emplace_back(); @@ -443,7 +418,7 @@ namespace wi::shadercompiler /* [in] */ REFIID riid, /* [iid_is][out] */ _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) override { - return compiler_internal().dxcIncludeHandler->QueryInterface(riid, ppvObject); + return dxcIncludeHandler->QueryInterface(riid, ppvObject); } ULONG STDMETHODCALLTYPE AddRef(void) override @@ -458,8 +433,11 @@ namespace wi::shadercompiler includehandler.input = &input; includehandler.output = &output; + hr = dxcUtils->CreateDefaultIncludeHandler(&includehandler.dxcIncludeHandler); + assert(SUCCEEDED(hr)); + CComPtr pResults; - HRESULT hr = compiler_internal().dxcCompiler->Compile( + hr = dxcCompiler->Compile( &Source, // Source buffer. args.data(), // Array of pointers to arguments. (uint32_t)args.size(), // Number of arguments. @@ -517,9 +495,38 @@ namespace wi::shadercompiler #endif // SHADERCOMPILER_ENABLED_DXCOMPILER #ifdef SHADERCOMPILER_ENABLED_D3DCOMPILER + struct InternalState_D3DCompiler + { + using PFN_D3DCOMPILE = decltype(&D3DCompile); + PFN_D3DCOMPILE D3DCompile = nullptr; + + InternalState_D3DCompiler() + { + if (D3DCompile != nullptr) + { + return; // already initialized + } + + HMODULE d3dcompiler = wiLoadLibrary("d3dcompiler_47.dll"); + if (d3dcompiler != nullptr) + { + D3DCompile = (PFN_D3DCOMPILE)wiGetProcAddress(d3dcompiler, "D3DCompile"); + if (D3DCompile != nullptr) + { + wi::backlog::post("wi::shadercompiler: loaded d3dcompiler_47.dll"); + } + } + } + }; + inline InternalState_D3DCompiler& d3d_compiler() + { + static InternalState_D3DCompiler internal_state; + return internal_state; + } + void Compile_D3DCompiler(const CompilerInput& input, CompilerOutput& output) { - if (compiler_internal().D3DCompile == nullptr) + if (d3d_compiler().D3DCompile == nullptr) { return; } @@ -614,7 +621,7 @@ namespace wi::shadercompiler CComPtr code; CComPtr errors; - HRESULT hr = compiler_internal().D3DCompile( + HRESULT hr = d3d_compiler().D3DCompile( shadersourcedata.data(), shadersourcedata.size(), input.shadersourcefilename.c_str(), diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 08bf19e30..3b6bc44e1 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 60; // minor bug fixes, alterations, refactors, updates - const int revision = 2; + const int revision = 3; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision); diff --git a/appveyor.yml b/appveyor.yml index ecc6a78a7..db228cbf6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -23,11 +23,11 @@ after_build: #Editor: - cmd: move %APPVEYOR_BUILD_FOLDER%\BUILD\%PLATFORM%\Release\Editor_Windows\Editor_Windows.exe %APPVEYOR_BUILD_FOLDER%\Editor - cmd: xcopy C:\projects\wickedengine\WickedEngine\*.dll %APPVEYOR_BUILD_FOLDER%\Editor - - cmd: 7z a WickedEngineEditor.zip Content\ features.txt other_licenses.txt *.md Editor\*.exe Editor\images\ Editor\sound\ Editor\*.ini Editor\*.ico Editor\*.lua Editor\*.dll + - cmd: 7z a WickedEngineEditor.zip Content\ features.txt other_licenses.txt *.md Editor\*.exe Editor\images\ Editor\sound\ Editor\*.ini Editor\*.ico Editor\*.lua #Tests: - cmd: move %APPVEYOR_BUILD_FOLDER%\BUILD\%PLATFORM%\Release\Tests\Tests.exe %APPVEYOR_BUILD_FOLDER%\Tests - cmd: xcopy C:\projects\wickedengine\WickedEngine\*.dll %APPVEYOR_BUILD_FOLDER%\Tests - - cmd: 7z a WickedEngineTests.zip Content\ features.txt other_licenses.txt *.md Tests\*.exe Tests\images\ Tests\sound\ Tests\*.ini Tests\*.ico Tests\*.lua Tests\*.dll Tests\*.ttf + - cmd: 7z a WickedEngineTests.zip Content\ features.txt other_licenses.txt *.md Tests\*.exe Tests\images\ Tests\sound\ Tests\*.ini Tests\*.ico Tests\*.lua Tests\*.ttf artifacts: - path: WickedEngineEditor.zip