From f45f08e8ee487fecad716e5c3d26db519b2f8cdf Mon Sep 17 00:00:00 2001 From: Turanszki Janos Date: Mon, 6 Aug 2018 23:37:03 +0100 Subject: [PATCH] mipgen updates --- Editor/RendererWindow.cpp | 3 +- .../generateMIPChain2D_float4_GaussianCS.hlsl | 79 +++++++++++-------- ...erateMIPChain2D_float4_SimpleFilterCS.hlsl | 2 +- .../generateMIPChain3D_float4_GaussianCS.hlsl | 2 +- ...erateMIPChain3D_float4_SimpleFilterCS.hlsl | 2 +- ...PChainCubeArray_float4_SimpleFilterCS.hlsl | 2 +- ...ateMIPChainCube_float4_SimpleFilterCS.hlsl | 2 +- WickedEngine/generateMIPChainHF.hlsli | 2 +- WickedEngine/wiVersion.cpp | 2 +- 9 files changed, 56 insertions(+), 40 deletions(-) diff --git a/Editor/RendererWindow.cpp b/Editor/RendererWindow.cpp index 043131418..5a3f1cb0b 100644 --- a/Editor/RendererWindow.cpp +++ b/Editor/RendererWindow.cpp @@ -175,14 +175,13 @@ RendererWindow::RendererWindow(wiGUI* gui, Renderable3DComponent* component) : G tessellationCheckBox->SetEnabled(wiRenderer::GetDevice()->CheckCapability(wiGraphicsTypes::GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_TESSELLATION)); advancedRefractionsCheckBox = new wiCheckBox("Advanced Refractions: "); - advancedRefractionsCheckBox->SetTooltip("Enable advanced refraction rendering: rough transparent materials will be more matte. This needs additional support from the graphics driver."); + advancedRefractionsCheckBox->SetTooltip("Enable advanced refraction rendering: rough transparent materials will be more matte."); advancedRefractionsCheckBox->SetPos(XMFLOAT2(x, y += step)); advancedRefractionsCheckBox->OnClick([=](wiEventArgs args) { wiRenderer::SetAdvancedRefractionsEnabled(args.bValue); }); advancedRefractionsCheckBox->SetCheck(wiRenderer::GetAdvancedRefractionsEnabled()); rendererWindow->AddWidget(advancedRefractionsCheckBox); - advancedRefractionsCheckBox->SetEnabled(wiRenderer::GetDevice()->CheckCapability(wiGraphicsTypes::GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_UNORDEREDACCESSTEXTURE_LOAD_FORMAT_EXT)); alphaCompositionCheckBox = new wiCheckBox("Alpha Composition: "); alphaCompositionCheckBox->SetTooltip("Enable Alpha Composition. Enables softer alpha blending on partly solid geometry (eg. vegetation) but rendering performance will be slower."); diff --git a/WickedEngine/generateMIPChain2D_float4_GaussianCS.hlsl b/WickedEngine/generateMIPChain2D_float4_GaussianCS.hlsl index 97af175b9..8eeb77109 100644 --- a/WickedEngine/generateMIPChain2D_float4_GaussianCS.hlsl +++ b/WickedEngine/generateMIPChain2D_float4_GaussianCS.hlsl @@ -8,44 +8,61 @@ TEXTURE2D(input, float4, TEXSLOT_UNIQUE0); RWTEXTURE2D(input_output, MIP_OUTPUT_FORMAT, 0); -// Shader requires feature: Typed UAV additional format loads! +static const uint TILE_BORDER = 4; +static const uint TILE_SIZE = TILE_BORDER + GENERATEMIPCHAIN_2D_BLOCK_SIZE + TILE_BORDER; +groupshared float4 tile[TILE_SIZE][TILE_SIZE]; + [numthreads(GENERATEMIPCHAIN_2D_BLOCK_SIZE, GENERATEMIPCHAIN_2D_BLOCK_SIZE, 1)] -void main(uint3 DTid : SV_DispatchThreadID) +void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID) { -#ifndef SHADERCOMPILER_SPIRV + uint i; - // Determine if the thread is alive (it is alive when the dispatchthreadID can directly index a pixel) - if (DTid.x < outputResolution.x && DTid.y < outputResolution.y) + // First, we prewarm the tile cache, including border region: + const uint2 tile_upperleft = Gid.xy * GENERATEMIPCHAIN_2D_BLOCK_SIZE - TILE_BORDER; + const uint2 co[] = { + uint2(0, 0), uint2(1, 0), + uint2(0, 1), uint2(1, 1) + }; + for (i = 0; i < 4; ++i) { - // Do a bilinear sample first and write it out: - input_output[DTid.xy] = input.SampleLevel(sampler_linear_clamp, (DTid.xy + 0.5f) / (float2)outputResolution.xy, 0); - DeviceMemoryBarrier(); + const uint2 coord = GTid.xy * 2 + co[i]; + tile[coord.x][coord.y] = input.SampleLevel(sampler_linear_clamp, (tile_upperleft + coord + 1.0f) / (float2)outputResolution.xy, 0); + } + GroupMemoryBarrierWithGroupSync(); - uint i = 0; - float4 sum = 0; + const int2 thread_to_cache = GTid.xy + TILE_BORDER; - // Gather samples in the X (horizontal) direction: - [unroll] - for (i = 0; i < 9; ++i) - { - sum += input_output[DTid.xy + uint2(gaussianOffsets[i], 0)] * gaussianWeightsNormalized[i]; - } - // Write out the result of the horizontal blur: - DeviceMemoryBarrier(); - input_output[DTid.xy] = sum; - DeviceMemoryBarrier(); - sum = 0; + float4 sum = 0; - // Gather samples in the Y (vertical) direction: - [unroll] - for (i = 0; i < 9; ++i) - { - sum += input_output[DTid.xy + uint2(0, gaussianOffsets[i])] * gaussianWeightsNormalized[i]; - } - // Write out the result of the vertical blur: - DeviceMemoryBarrier(); - input_output[DTid.xy] = sum; + // Then each thread processes just one pixel within tile, excluding border: + + // Horizontal accumulation for each tile pixel, with help of the border region + [unroll] + for (i = 0; i < 9; ++i) + { + const uint2 coord = thread_to_cache + int2(gaussianOffsets[i], 0); + sum += tile[coord.x][coord.y] * gaussianWeightsNormalized[i]; } -#endif + // write out into cache (excluding border region): + tile[thread_to_cache.x][thread_to_cache.y] = sum; + + GroupMemoryBarrierWithGroupSync(); + + sum = 0; + + // Vertical accumulation for each tile pixel, with help of the border region + [unroll] + for (i = 0; i < 9; ++i) + { + const uint2 coord = thread_to_cache + int2(0, gaussianOffsets[i]); + sum += tile[coord.x][coord.y] * gaussianWeightsNormalized[i]; + } + + + if (DTid.x < outputResolution.x && DTid.y < outputResolution.y) + { + // Each valid thread writes out one pixel: + input_output[DTid.xy] = sum; + } } \ No newline at end of file diff --git a/WickedEngine/generateMIPChain2D_float4_SimpleFilterCS.hlsl b/WickedEngine/generateMIPChain2D_float4_SimpleFilterCS.hlsl index 89a65b86d..2d99c856e 100644 --- a/WickedEngine/generateMIPChain2D_float4_SimpleFilterCS.hlsl +++ b/WickedEngine/generateMIPChain2D_float4_SimpleFilterCS.hlsl @@ -15,6 +15,6 @@ void main(uint3 DTid : SV_DispatchThreadID) { if (DTid.x < outputResolution.x && DTid.y < outputResolution.y) { - output[DTid.xy] = input.SampleLevel(customsampler, (DTid.xy + 0.5f) / (float2)outputResolution.xy, 0); + output[DTid.xy] = input.SampleLevel(customsampler, (DTid.xy + 1.0f) / (float2)outputResolution.xy, 0); } } \ No newline at end of file diff --git a/WickedEngine/generateMIPChain3D_float4_GaussianCS.hlsl b/WickedEngine/generateMIPChain3D_float4_GaussianCS.hlsl index a74ebc538..722409cfb 100644 --- a/WickedEngine/generateMIPChain3D_float4_GaussianCS.hlsl +++ b/WickedEngine/generateMIPChain3D_float4_GaussianCS.hlsl @@ -18,7 +18,7 @@ void main(uint3 DTid : SV_DispatchThreadID) if (DTid.x < outputResolution.x && DTid.y < outputResolution.y && DTid.z < outputResolution.z) { // Do a bilinear sample first and write it out: - input_output[DTid] = input.SampleLevel(sampler_linear_clamp, (DTid + 0.5f) / (float3)outputResolution, 0); + input_output[DTid] = input.SampleLevel(sampler_linear_clamp, (DTid + 1.0f) / (float3)outputResolution, 0); DeviceMemoryBarrier(); uint i = 0; diff --git a/WickedEngine/generateMIPChain3D_float4_SimpleFilterCS.hlsl b/WickedEngine/generateMIPChain3D_float4_SimpleFilterCS.hlsl index 7342c13b4..f199bb544 100644 --- a/WickedEngine/generateMIPChain3D_float4_SimpleFilterCS.hlsl +++ b/WickedEngine/generateMIPChain3D_float4_SimpleFilterCS.hlsl @@ -15,6 +15,6 @@ void main( uint3 DTid : SV_DispatchThreadID ) { if (DTid.x < outputResolution.x && DTid.y < outputResolution.y && DTid.z < outputResolution.z) { - output[DTid] = input.SampleLevel(customsampler, (DTid + 0.5f) / (float3)outputResolution, 0); + output[DTid] = input.SampleLevel(customsampler, (DTid + 1.0f) / (float3)outputResolution, 0); } } \ No newline at end of file diff --git a/WickedEngine/generateMIPChainCubeArray_float4_SimpleFilterCS.hlsl b/WickedEngine/generateMIPChainCubeArray_float4_SimpleFilterCS.hlsl index 06d562bae..ce75a0fe3 100644 --- a/WickedEngine/generateMIPChainCubeArray_float4_SimpleFilterCS.hlsl +++ b/WickedEngine/generateMIPChainCubeArray_float4_SimpleFilterCS.hlsl @@ -15,7 +15,7 @@ void main(uint3 DTid : SV_DispatchThreadID) { if (DTid.x < outputResolution.x && DTid.y < outputResolution.y) { - float2 uv = (DTid.xy + 0.5f) / outputResolution.xy; + float2 uv = (DTid.xy + 1.0f) / outputResolution.xy; float3 N = UV_to_CubeMap(uv, DTid.z); output[uint3(DTid.xy, DTid.z + arrayIndex * 6)] = input.SampleLevel(customsampler, float4(N, arrayIndex), 0); diff --git a/WickedEngine/generateMIPChainCube_float4_SimpleFilterCS.hlsl b/WickedEngine/generateMIPChainCube_float4_SimpleFilterCS.hlsl index 5c79af1f6..826acc741 100644 --- a/WickedEngine/generateMIPChainCube_float4_SimpleFilterCS.hlsl +++ b/WickedEngine/generateMIPChainCube_float4_SimpleFilterCS.hlsl @@ -15,7 +15,7 @@ void main(uint3 DTid : SV_DispatchThreadID) { if (DTid.x < outputResolution.x && DTid.y < outputResolution.y) { - float2 uv = (DTid.xy + 0.5f) / outputResolution.xy; + float2 uv = (DTid.xy + 1.0f) / outputResolution.xy; float3 N = UV_to_CubeMap(uv, DTid.z); output[DTid.xyz] = input.SampleLevel(customsampler, N, 0); diff --git a/WickedEngine/generateMIPChainHF.hlsli b/WickedEngine/generateMIPChainHF.hlsli index 9316a7dd6..7aff330db 100644 --- a/WickedEngine/generateMIPChainHF.hlsli +++ b/WickedEngine/generateMIPChainHF.hlsli @@ -19,7 +19,7 @@ static const float gaussianWeightsNormalized[9] = { gaussWeight3 * gaussNormalization, gaussWeight4 * gaussNormalization, }; -static const uint gaussianOffsets[9] = { +static const int gaussianOffsets[9] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 4420f3801..66ca49eca 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wiVersion // minor features, major updates const int minor = 19; // minor bug fixes, alterations, refactors, updates - const int revision = 2; + const int revision = 3; long GetVersion()