diff --git a/WickedEngine/RenderPath3D.cpp b/WickedEngine/RenderPath3D.cpp index 506ca449b..decb83b3e 100644 --- a/WickedEngine/RenderPath3D.cpp +++ b/WickedEngine/RenderPath3D.cpp @@ -870,7 +870,7 @@ void RenderPath3D::Render() const device->EventBegin("Volumetric Clouds Reflection Blend", cmd); wiImageParams fx; fx.enableFullScreen(); - wiImage::Draw(&volumetriccloudResources_reflection.texture_reproject[device->GetFrameCount() % 2], fx, cmd); + wiImage::Draw(&volumetriccloudResources_reflection.texture_temporal[device->GetFrameCount() % 2], fx, cmd); device->EventEnd(cmd); } @@ -947,7 +947,7 @@ void RenderPath3D::Render() const { device->EventBegin("Volumetric Clouds Upsample + Blend", cmd); wiRenderer::Postprocess_Upsample_Bilateral( - volumetriccloudResources.texture_reproject[device->GetFrameCount() % 2], + volumetriccloudResources.texture_temporal[device->GetFrameCount() % 2], rtLinearDepth, *GetGbuffer_Read(GBUFFER_COLOR), // only desc is taken if pixel shader upsampling is used cmd, diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 9b13d4cb7..79fd87148 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -208,6 +208,7 @@ int main(int argc, char* argv[]) "volumetricCloud_weathermapCS.hlsl" , "volumetricCloud_renderCS.hlsl" , "volumetricCloud_reprojectCS.hlsl" , + "volumetricCloud_temporalCS.hlsl" , "shadingRateClassificationCS.hlsl" , "shadingRateClassificationCS_DEBUG.hlsl" , "skyAtmosphere_transmittanceLutCS.hlsl" , diff --git a/WickedEngine/shaders/CMakeLists.txt b/WickedEngine/shaders/CMakeLists.txt index a8247ce83..105e89d66 100644 --- a/WickedEngine/shaders/CMakeLists.txt +++ b/WickedEngine/shaders/CMakeLists.txt @@ -134,6 +134,7 @@ set(SHADERS_CS "volumetricCloud_weathermapCS.hlsl" "volumetricCloud_renderCS.hlsl" "volumetricCloud_reprojectCS.hlsl" + "volumetricCloud_temporalCS.hlsl" "shadingRateClassificationCS.hlsl" "shadingRateClassificationCS_DEBUG.hlsl" "skyAtmosphere_transmittanceLutCS.hlsl" diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 0fe2b6c56..65d1836da 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -1028,6 +1028,7 @@ Compute Compute + Vertex Vertex diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 973bd7ea1..694147d47 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -992,6 +992,9 @@ CS + + CS + diff --git a/WickedEngine/shaders/volumetricCloud_renderCS.hlsl b/WickedEngine/shaders/volumetricCloud_renderCS.hlsl index 8e280b745..374247d07 100644 --- a/WickedEngine/shaders/volumetricCloud_renderCS.hlsl +++ b/WickedEngine/shaders/volumetricCloud_renderCS.hlsl @@ -29,7 +29,7 @@ TEXTURE2D(texture_curlNoise, float4, TEXSLOT_ONDEMAND3); TEXTURE2D(texture_weatherMap, float4, TEXSLOT_ONDEMAND4); RWTEXTURE2D(texture_render, float4, 0); -RWTEXTURE2D(texture_cloudDepth, float, 1); +RWTEXTURE2D(texture_cloudDepth, float2, 1); // Octaves for multiple-scattering approximation. 1 means single-scattering only. @@ -545,29 +545,6 @@ void RenderClouds(float3 rayOrigin, float3 rayDirection, float t, float steps, f } } -bool TraceSphereIntersections(float3 rayOrigin, float3 rayDirection, float3 sphereCenter, float sphereRadius, inout float2 solutions) -{ - float3 localPosition = rayOrigin - sphereCenter; - float localPositionSqr = dot(localPosition, localPosition); - - // Quadratic Coefficients - float a = dot(rayDirection, rayDirection); - float b = 2 * dot(rayDirection, localPosition); - float c = localPositionSqr - sphereRadius * sphereRadius; - - float discriminant = b * b - 4 * a * c; - - // Only continue if the ray intersects with the sphere - if (discriminant >= 0.0) - { - float sqrtDiscriminant = sqrt(discriminant); - solutions = (-b + float2(-1, 1) * sqrtDiscriminant) / (2 * a); - return true; - } - - return false; -} - float CalculateAtmosphereBlend(float tDepth) { // Progressively increase alpha as clouds reaches the desired distance. @@ -582,15 +559,29 @@ float CalculateAtmosphereBlend(float tDepth) return fade; } +static const uint2 g_HalfResIndexToCoordinateOffset[4] = { uint2(0, 0), uint2(1, 0), uint2(0, 1), uint2(1, 1) }; + +// Calculates checkerboard undersampling position +int ComputeCheckerBoardIndex(int2 renderCoord, int subPixelIndex) +{ + const int localOffset = (renderCoord.x & 1 + renderCoord.y & 1) & 1; + const int checkerBoardLocation = (subPixelIndex + localOffset) & 0x3; + return checkerBoardLocation; +} + [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID) { - const float2 uv = (DTid.xy + 0.5) * xPPResolution_rcp; + int subPixelIndex = g_xFrame_FrameCount % 4; + int checkerBoardIndex = ComputeCheckerBoardIndex(DTid.xy, subPixelIndex); + uint2 halfResCoord = DTid.xy * 2 + g_HalfResIndexToCoordinateOffset[checkerBoardIndex]; + + const float2 uv = (halfResCoord + 0.5) * xPPParams0.zw; float x = uv.x * 2 - 1; float y = (1 - uv.y) * 2 - 1; float2 screenPosition = float2(x, y); - + float4 unprojected = mul(g_xCamera_InvVP, float4(screenPosition, 0, 1)); unprojected.xyz /= unprojected.w; @@ -601,6 +592,7 @@ void main(uint3 DTid : SV_DispatchThreadID) float tMin = -FLT_MAX; float tMax = -FLT_MAX; float t; + float tToDepthBuffer; float steps; float stepSize; { @@ -612,11 +604,11 @@ void main(uint3 DTid : SV_DispatchThreadID) const float cloudBottomRadius = planetRadius + g_xFrame_VolumetricClouds.CloudStartHeight; const float cloudTopRadius = planetRadius + g_xFrame_VolumetricClouds.CloudStartHeight + g_xFrame_VolumetricClouds.CloudThickness; - float2 tTopSolutions = 0.0; - if (TraceSphereIntersections(rayOrigin, rayDirection, planetCenterWorld, cloudTopRadius, tTopSolutions)) + float2 tTopSolutions = RaySphereIntersect(rayOrigin, rayDirection, planetCenterWorld, cloudTopRadius); + if (tTopSolutions.x > 0.0 || tTopSolutions.y > 0.0) { - float2 tBottomSolutions = 0.0; - if (TraceSphereIntersections(rayOrigin, rayDirection, planetCenterWorld, cloudBottomRadius, tBottomSolutions)) + float2 tBottomSolutions = RaySphereIntersect(rayOrigin, rayDirection, planetCenterWorld, cloudBottomRadius); + if (tBottomSolutions.x > 0.0 || tBottomSolutions.y > 0.0) { // If we see both intersections on the screen, keep the min closest, otherwise the max furthest float tempTop = all(tTopSolutions > 0.0f) ? min(tTopSolutions.x, tTopSolutions.y) : max(tTopSolutions.x, tTopSolutions.y); @@ -642,23 +634,23 @@ void main(uint3 DTid : SV_DispatchThreadID) } else { - texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); - texture_cloudDepth[DTid.xy] = 0.0; + texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); // Inverted alpha + texture_cloudDepth[DTid.xy] = FLT_MAX; return; } if (tMax <= tMin || tMin > g_xFrame_VolumetricClouds.RenderDistance) { - texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); - texture_cloudDepth[DTid.xy] = 0.0; + texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); // Inverted alpha + texture_cloudDepth[DTid.xy] = FLT_MAX; return; } // Depth buffer intersection - float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0).r; + float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; float3 depthWorldPosition = reconstructPosition(uv, depth); - float tToDepthBuffer = length(depthWorldPosition - rayOrigin); + tToDepthBuffer = length(depthWorldPosition - rayOrigin); tMax = depth == 0.0 ? tMax : min(tMax, tToDepthBuffer); // Exclude skybox const float marchingDistance = min(g_xFrame_VolumetricClouds.MaxMarchingDistance, tMax - tMin); @@ -692,8 +684,8 @@ void main(uint3 DTid : SV_DispatchThreadID) float grayScaleTransmittance = approxTransmittance < g_xFrame_VolumetricClouds.TransmittanceThreshold ? 0.0 : approxTransmittance; float4 color = float4(luminance, grayScaleTransmittance); - - color.a = 1.0 - color.a; // Invert to match reprojection. Early returns has to be inverted too. + + color.a = 1.0 - color.a; // Invert to match reprojection. Early color returns has to be inverted too. // Blend clouds with horizon if (depthWeightsSum > 0.0) @@ -707,5 +699,5 @@ void main(uint3 DTid : SV_DispatchThreadID) // Output texture_render[DTid.xy] = color; - texture_cloudDepth[DTid.xy] = tDepth; // Linear depth + texture_cloudDepth[DTid.xy] = float2(tDepth, tToDepthBuffer); // Linear depth } diff --git a/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl b/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl index 89fcdafc3..7c49f38d0 100644 --- a/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl +++ b/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl @@ -2,150 +2,21 @@ #include "ShaderInterop_Postprocess.h" TEXTURE2D(cloud_current, float4, TEXSLOT_ONDEMAND0); -TEXTURE2D(cloud_depth, float, TEXSLOT_ONDEMAND1); +TEXTURE2D(cloud_depth_current, float2, TEXSLOT_ONDEMAND1); TEXTURE2D(cloud_history, float4, TEXSLOT_ONDEMAND2); +TEXTURE2D(cloud_depth_history, float2, TEXSLOT_ONDEMAND3); RWTEXTURE2D(output, float4, 0); -RWTEXTURE2D(output_cloudMask, unorm float4, 1); +RWTEXTURE2D(output_depth, float2, 1); - -// The rendering uses a temporal upsampling pass similar to Frostbite. See https://odr.chalmers.se/handle/20.500.12380/241770 - -// If the clouds are moving fast, the upsampling will most likely not be able to keep up. You can modify these values to relax the effect: -static const float temporalResponse = 0.05; -static const float temporalScale = 3.0; -static const float temporalExposure = 10.0; - -inline float Luma4(float3 color) +// This function compute the checkerboard undersampling position +int ComputeCheckerBoardIndex(int2 renderCoord, int subPixelIndex) { - return (color.g * 2) + (color.r + color.b); + const int localOffset = (renderCoord.x & 1 + renderCoord.y & 1) & 1; + const int checkerBoardLocation = (subPixelIndex + localOffset) & 0x3; + return checkerBoardLocation; } -inline float HdrWeight4(float3 color, float exposure) -{ - return rcp(Luma4(color) * exposure + 4.0f); -} - -// Different aabb clipping method from eg. SSR temporal, suitable for clouds in this case -float4 clip_aabb(float4 aabb_min, float4 aabb_max, float4 prev_sample) -{ - float4 p_clip = 0.5 * (aabb_max + aabb_min); - float4 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f; - - float4 v_clip = prev_sample - p_clip; - float4 v_unit = v_clip / e_clip; - float4 a_unit = abs(v_unit); - float ma_unit = max(max(a_unit.x, max(a_unit.y, a_unit.z)), a_unit.w); - - if (ma_unit > 1.0) - return p_clip + v_clip / ma_unit; - else - return prev_sample; // point inside aabb -} - -inline void ResolverAABB(Texture2D currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput) -{ - const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) }; - - // Modulate Luma HDR - - float4 sampleColors[9]; - [unroll] - for (uint i = 0; i < 9; i++) - { - sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f); - } - - -#if 0 // Exaggerates outline between clouds and geometry - float sampleWeights[9]; - [unroll] - for (uint j = 0; j < 9; j++) - { - sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale); - } - - float totalWeight = 0; - [unroll] - for (uint k = 0; k < 9; k++) - { - totalWeight += sampleWeights[k]; - } - sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] + - sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight; -#endif - - -#if 0 // Standard clipping - - // Variance Clipping (AABB) - - float4 m1 = 0.0; - float4 m2 = 0.0; - [unroll] - for (uint x = 0; x < 9; x++) - { - m1 += sampleColors[x]; - m2 += sampleColors[x] * sampleColors[x]; - } - - float4 mean = m1 / 9.0; - float4 stddev = sqrt((m2 / 9.0) - sqr(mean)); - -#else // Depth check - - float originalLinearDepth = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, uv, 0).r); - float validSampleCount = 1.0; - - float4 m1 = 0.0; - float4 m2 = 0.0; - [unroll] - for (uint x = 0; x < 9; x++) - { - if (x == 4) - { - m1 += sampleColors[x]; - m2 += sampleColors[x] * sampleColors[x]; - } - else - { - float depth = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, uv + (SampleOffset[x] / texelSize), 0).r); - if (abs(originalLinearDepth - depth) < 1.5) - { - m1 += sampleColors[x]; - m2 += sampleColors[x] * sampleColors[x]; - validSampleCount += 1.0; - } - } - } - - float4 mean = m1 / validSampleCount; - float4 stddev = sqrt((m2 / validSampleCount) - sqr(mean)); - -#endif - - currentMin = mean - AABBScale * stddev; - currentMax = mean + AABBScale * stddev; - - currentOutput = sampleColors[4]; - currentMin = min(currentMin, currentOutput); - currentMax = max(currentMax, currentOutput); - currentAverage = mean; -} - -/*float2 CalculateCustomMotion(float4 worldPosition) -{ - float4 thisClip = mul(g_xCamera_VP, worldPosition); - float4 prevClip = mul(g_xCamera_PrevVP, worldPosition); - - float2 thisScreen = thisClip.xy * rcp(thisClip.w); - float2 prevScreen = prevClip.xy * rcp(prevClip.w); - thisScreen = (thisScreen.xy * float2(0.5, -0.5) + 0.5); - prevScreen = (prevScreen.xy * float2(0.5, -0.5) + 0.5); - - return thisScreen - prevScreen; -}*/ - // Computes post-projection depth from linear depth float getInverseLinearDepth(float lin, float near, float far) { @@ -157,33 +28,34 @@ float getInverseLinearDepth(float lin, float near, float far) [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID) { + uint2 renderCoord = DTid.xy / 2; const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp; - + #if 0 - + // Calculate screen dependant motion vector float4 prevPos = float4(uv * 2.0 - 1.0, 1.0, 1.0); prevPos = mul(g_xCamera_InvP, prevPos); prevPos = prevPos / prevPos.w; - + prevPos.xyz = mul((float3x3)g_xCamera_InvV, prevPos.xyz); prevPos.xyz = mul((float3x3)g_xCamera_PrevV, prevPos.xyz); - + float4 reproj = mul(g_xCamera_Proj, prevPos); reproj /= reproj.w; - + float2 prevUV = reproj.xy * 0.5 + 0.5; - + #else - + float x = uv.x * 2 - 1; float y = (1 - uv.y) * 2 - 1; float2 screenPosition = float2(x, y); - float cloudLinearDepth = cloud_depth.SampleLevel(sampler_linear_clamp, uv, 0).r; - float cloudDepth = getInverseLinearDepth(cloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP); + float currentCloudLinearDepth = cloud_depth_current.SampleLevel(sampler_point_clamp, uv, 0).x; + float currentCloudDepth = getInverseLinearDepth(currentCloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP); - float4 thisClip = float4(screenPosition, cloudDepth, 1.0); + float4 thisClip = float4(screenPosition, currentCloudDepth, 1.0); float4 prevClip = mul(g_xCamera_InvVP, thisClip); prevClip = mul(g_xCamera_PrevVP, prevClip); @@ -198,26 +70,103 @@ void main(uint3 DTid : SV_DispatchThreadID) float2 prevUV = prevScreenPosition * float2(0.5, -0.5) + 0.5; #endif - - float4 previous = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0); - - float4 current = 0; - float4 currentMin, currentMax, currentAverage; - ResolverAABB(cloud_current, sampler_point_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current); + + bool validHistory = is_saturated(prevUV); - //previous = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous); - previous = clip_aabb(currentMin, currentMax, previous); + int subPixelIndex = g_xFrame_FrameCount % 4; + int localIndex = (DTid.x & 1) + (DTid.y & 1) * 2; + int currentIndex = ComputeCheckerBoardIndex(renderCoord, subPixelIndex); + + bool shouldUpdatePixel = (localIndex == currentIndex); + + float4 result = 0.0; + float2 depthResult = 0.0; - float4 result = lerp(previous, current, temporalResponse); - - result = is_saturated(prevUV) ? result : current; + +#if 0 // Simple reprojection version + if (shouldUpdatePixel) + { + result = cloud_current[renderCoord]; + depthResult = cloud_depth_current[renderCoord]; + } + else + { + result = cloud_history.SampleLevel(sampler_linear_clamp, uv, 0); + depthResult = cloud_depth_history.SampleLevel(sampler_linear_clamp, uv, 0); + } + output[DTid.xy] = result; + output_depth[DTid.xy] = depthResult; + return; +#endif + + + if (validHistory) + { + float4 newResult = cloud_current[renderCoord]; + float2 newDepthResult = cloud_depth_current[renderCoord]; + + if (shouldUpdatePixel) + { + result = newResult; + depthResult = newDepthResult; + } + else + { + float4 previousResult = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0); + float2 previousDepthResult = cloud_depth_history.SampleLevel(sampler_linear_clamp, prevUV, 0); + + result = previousResult; + depthResult = previousDepthResult; + + float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; // Half res + float3 depthWorldPosition = reconstructPosition(uv, depth); + float tToDepthBuffer = length(depthWorldPosition - g_xCamera_CamPos); + + if (abs(tToDepthBuffer - previousDepthResult.y) > tToDepthBuffer * 0.1) + { + float closestDepth = FLT_MAX; + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + // If it's middle then skip. We only evaluate neighbor samples + if ((abs(x) + abs(y)) == 0) + continue; + + int2 neighborCoord = renderCoord + int2(x, y); + + float2 neighboorDepthResult = cloud_depth_current[neighborCoord]; + float neighborClosestDepth = abs(tToDepthBuffer - neighboorDepthResult.y); + + if (neighborClosestDepth < closestDepth) + { + closestDepth = neighborClosestDepth; + float4 neighborResult = cloud_current[neighborCoord]; + + result = neighborResult; + depthResult = neighboorDepthResult; + } + } + } + + if (abs(tToDepthBuffer - newDepthResult.y) < closestDepth) + { + result = newResult; + depthResult = newDepthResult; + } + } + else + { + + } + } + } + else + { + result = cloud_current.SampleLevel(sampler_linear_clamp, uv, 0); + depthResult = cloud_depth_current.SampleLevel(sampler_linear_clamp, uv, 0); + } output[DTid.xy] = result; - - [branch] - if (DTid.x % 2 == 0 && DTid.y % 2 == 0) - { - // the mask is half the resolution of the clouds - output_cloudMask[DTid.xy / 2] = pow(saturate(1 - result.a), 64); - } + output_depth[DTid.xy] = depthResult; } diff --git a/WickedEngine/shaders/volumetricCloud_temporalCS.hlsl b/WickedEngine/shaders/volumetricCloud_temporalCS.hlsl new file mode 100644 index 000000000..790473e11 --- /dev/null +++ b/WickedEngine/shaders/volumetricCloud_temporalCS.hlsl @@ -0,0 +1,203 @@ +#include "globals.hlsli" +#include "ShaderInterop_Postprocess.h" + +TEXTURE2D(cloud_reproject, float4, TEXSLOT_ONDEMAND0); +TEXTURE2D(cloud_reproject_depth, float2, TEXSLOT_ONDEMAND1); +TEXTURE2D(cloud_history, float4, TEXSLOT_ONDEMAND2); + +RWTEXTURE2D(output, float4, 0); +RWTEXTURE2D(output_cloudMask, unorm float4, 1); + + +// If the clouds are moving fast, the upsampling will most likely not be able to keep up. You can modify these values to relax the effect: +static const float temporalResponse = 0.05; +static const float temporalScale = 2.0; +static const float temporalExposure = 10.0; + +// Different aabb clipping method from eg. SSR temporal, suitable for clouds in this case +float4 clip_aabb(float4 aabb_min, float4 aabb_max, float4 prev_sample) +{ + float4 p_clip = 0.5 * (aabb_max + aabb_min); + float4 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f; + + float4 v_clip = prev_sample - p_clip; + float4 v_unit = v_clip / e_clip; + float4 a_unit = abs(v_unit); + float ma_unit = max(max(a_unit.x, max(a_unit.y, a_unit.z)), a_unit.w); + + if (ma_unit > 1.0) + return p_clip + v_clip / ma_unit; + else + return prev_sample; // point inside aabb +} + +inline void ResolverAABB(Texture2D currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput) +{ + const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) }; + + // Modulate Luma HDR + + float4 sampleColors[9]; + [unroll] + for (uint i = 0; i < 9; i++) + { + sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f); + } + + +#if 0 // Exaggerates outline between clouds and geometry + float sampleWeights[9]; + [unroll] + for (uint j = 0; j < 9; j++) + { + sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale); + } + + float totalWeight = 0; + [unroll] + for (uint k = 0; k < 9; k++) + { + totalWeight += sampleWeights[k]; + } + sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] + + sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight; +#endif + + +#if 0 // Standard clipping + + // Variance Clipping (AABB) + + float4 m1 = 0.0; + float4 m2 = 0.0; + [unroll] + for (uint x = 0; x < 9; x++) + { + m1 += sampleColors[x]; + m2 += sampleColors[x] * sampleColors[x]; + } + + float4 mean = m1 / 9.0; + float4 stddev = sqrt((m2 / 9.0) - sqr(mean)); + +#else // Depth check + + float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; // Half res + float3 depthWorldPosition = reconstructPosition(uv, depth); + float tToDepthBuffer = length(depthWorldPosition - g_xCamera_CamPos); + + float validSampleCount = 1.0; + + float4 m1 = 0.0; + float4 m2 = 0.0; + [unroll] + for (uint x = 0; x < 9; x++) + { + if (x == 4) + { + m1 += sampleColors[x]; + m2 += sampleColors[x] * sampleColors[x]; + } + else + { + float2 reprojectionDepthResults = cloud_reproject_depth.SampleLevel(sampler_point_clamp, uv + (SampleOffset[x] / texelSize), 1); + if (abs(tToDepthBuffer - reprojectionDepthResults.y) < tToDepthBuffer * 0.1) + { + m1 += sampleColors[x]; + m2 += sampleColors[x] * sampleColors[x]; + validSampleCount += 1.0; + } + } + } + + float4 mean = m1 / validSampleCount; + float4 stddev = sqrt((m2 / validSampleCount) - sqr(mean)); + +#endif + + currentMin = mean - AABBScale * stddev; + currentMax = mean + AABBScale * stddev; + + currentOutput = sampleColors[4]; + currentMin = min(currentMin, currentOutput); + currentMax = max(currentMax, currentOutput); + currentAverage = mean; +} + +// Computes post-projection depth from linear depth +float getInverseLinearDepth(float lin, float near, float far) +{ + float z_n = ((lin - 2 * far) * near + far * lin) / (lin * near - far * lin); + float z = (z_n + 1) / 2; + return z; +} + +[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID) +{ + const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp; + +#if 0 + + // Calculate screen dependant motion vector + float4 prevPos = float4(uv * 2.0 - 1.0, 1.0, 1.0); + prevPos = mul(g_xCamera_InvP, prevPos); + prevPos = prevPos / prevPos.w; + + prevPos.xyz = mul((float3x3)g_xCamera_InvV, prevPos.xyz); + prevPos.xyz = mul((float3x3)g_xCamera_PrevV, prevPos.xyz); + + float4 reproj = mul(g_xCamera_Proj, prevPos); + reproj /= reproj.w; + + float2 prevUV = reproj.xy * 0.5 + 0.5; + +#else + + // We must recalculate motion with new upscaled cloud depths: + + float x = uv.x * 2 - 1; + float y = (1 - uv.y) * 2 - 1; + float2 screenPosition = float2(x, y); + + float currentCloudLinearDepth = cloud_reproject_depth[DTid.xy].x; + float currentCloudDepth = getInverseLinearDepth(currentCloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP); + + float4 thisClip = float4(screenPosition, currentCloudDepth, 1.0); + + float4 prevClip = mul(g_xCamera_InvVP, thisClip); + prevClip = mul(g_xCamera_PrevVP, prevClip); + + //float4 prevClip = mul(g_xCamera_PrevVP, worldPosition); + float2 prevScreen = prevClip.xy / prevClip.w; + + float2 screenVelocity = screenPosition - prevScreen; + float2 prevScreenPosition = screenPosition - screenVelocity; + + // Transform from screen position to uv + float2 prevUV = prevScreenPosition * float2(0.5, -0.5) + 0.5; + +#endif + + float4 previous = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0); + + float4 current = 0; + float4 currentMin, currentMax, currentAverage; + ResolverAABB(cloud_reproject, sampler_point_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current); + + //previous = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous); + previous = clip_aabb(currentMin, currentMax, previous); + + float4 result = lerp(previous, current, temporalResponse); + + result = is_saturated(prevUV) ? result : current; + + output[DTid.xy] = result; + + [branch] + if (DTid.x % 2 == 0 && DTid.y % 2 == 0) + { + // the mask is half the resolution of the clouds + output_cloudMask[DTid.xy / 2] = pow(saturate(1 - result.a), 64); + } +} diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index bc0ff6707..e0cf1cebd 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -365,6 +365,7 @@ enum SHADERTYPE CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_WEATHERMAP, CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER, CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT, + CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL, CSTYPE_POSTPROCESS_FXAA, CSTYPE_POSTPROCESS_TEMPORALAA, CSTYPE_POSTPROCESS_LINEARDEPTH, diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index a017eb70b..d44e1df23 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -1296,6 +1296,7 @@ void LoadShaders() wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_WEATHERMAP], "volumetricCloud_weathermapCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER], "volumetricCloud_renderCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT], "volumetricCloud_reprojectCS.cso"); }); + wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], "volumetricCloud_temporalCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_FXAA], "fxaaCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_TEMPORALAA], "temporalaaCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_LINEARDEPTH], "lineardepthCS.cso"); }); @@ -11406,25 +11407,43 @@ void Postprocess_Bloom( } void CreateVolumetricCloudResources(VolumetricCloudResources& res, XMUINT2 resolution) { + XMUINT2 renderResolution = XMUINT2(resolution.x / 4, resolution.y / 4); + XMUINT2 reprojectionResolution = XMUINT2(resolution.x / 2, resolution.y / 2); + XMUINT2 maskResolution = XMUINT2(resolution.x / 4, resolution.y / 4); // Needs to be half of final cloud output + TextureDesc desc; desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS; - desc.Width = resolution.x / 4; - desc.Height = resolution.y / 4; + desc.Width = renderResolution.x; + desc.Height = renderResolution.y; desc.Format = FORMAT_R16G16B16A16_FLOAT; desc.layout = IMAGE_LAYOUT_SHADER_RESOURCE_COMPUTE; device->CreateTexture(&desc, nullptr, &res.texture_cloudRender); device->SetName(&res.texture_cloudRender, "texture_cloudRender"); + desc.Format = FORMAT_R16G16_FLOAT; + device->CreateTexture(&desc, nullptr, &res.texture_cloudDepth); + device->SetName(&res.texture_cloudDepth, "texture_cloudDepth"); + + desc.Width = reprojectionResolution.x; + desc.Height = reprojectionResolution.y; + desc.Format = FORMAT_R16G16B16A16_FLOAT; device->CreateTexture(&desc, nullptr, &res.texture_reproject[0]); device->SetName(&res.texture_reproject[0], "texture_reproject[0]"); device->CreateTexture(&desc, nullptr, &res.texture_reproject[1]); device->SetName(&res.texture_reproject[1], "texture_reproject[1]"); + desc.Format = FORMAT_R16G16_FLOAT; + device->CreateTexture(&desc, nullptr, &res.texture_reproject_depth[0]); + device->SetName(&res.texture_reproject_depth[0], "texture_reproject_depth[0]"); + device->CreateTexture(&desc, nullptr, &res.texture_reproject_depth[1]); + device->SetName(&res.texture_reproject_depth[1], "texture_reproject_depth[1]"); - desc.Format = FORMAT_R16_FLOAT; - device->CreateTexture(&desc, nullptr, &res.texture_cloudDepth); - device->SetName(&res.texture_cloudDepth, "texture_cloudDepth"); + desc.Format = FORMAT_R16G16B16A16_FLOAT; + device->CreateTexture(&desc, nullptr, &res.texture_temporal[0]); + device->SetName(&res.texture_temporal[0], "texture_temporal[0]"); + device->CreateTexture(&desc, nullptr, &res.texture_temporal[1]); + device->SetName(&res.texture_temporal[1], "texture_temporal[1]"); - desc.Width /= 2; - desc.Height /= 2; + desc.Width = maskResolution.x; + desc.Height = maskResolution.y; desc.Format = FORMAT_R8G8B8A8_UNORM; device->CreateTexture(&desc, nullptr, &res.texture_cloudMask); device->SetName(&res.texture_cloudMask, "texture_cloudMask"); @@ -11446,8 +11465,10 @@ void Postprocess_VolumetricClouds( cb.xPPResolution.y = desc.Height; cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x; cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y; - //const XMFLOAT4& halton = wiMath::GetHaltonSequence((int)device->GetFrameCount()); - //cb.xPPParams0 = halton; + cb.xPPParams0.x = (float)res.texture_reproject[0].GetDesc().Width; + cb.xPPParams0.y = (float)res.texture_reproject[0].GetDesc().Height; + cb.xPPParams0.z = 1.0f / cb.xPPParams0.x; + cb.xPPParams0.w = 1.0f / cb.xPPParams0.y; device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd); device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd); @@ -11498,6 +11519,14 @@ void Postprocess_VolumetricClouds( device->EventEnd(cmd); } + const TextureDesc& reprojection_desc = res.texture_reproject[0].GetDesc(); + cb.xPPResolution.x = reprojection_desc.Width; + cb.xPPResolution.y = reprojection_desc.Height; + cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x; + cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y; + device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd); + device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd); + int temporal_output = device->GetFrameCount() % 2; int temporal_history = 1 - temporal_output; @@ -11510,17 +11539,18 @@ void Postprocess_VolumetricClouds( device->BindResource(CS, &res.texture_cloudRender, TEXSLOT_ONDEMAND0, cmd); device->BindResource(CS, &res.texture_cloudDepth, TEXSLOT_ONDEMAND1, cmd); device->BindResource(CS, &res.texture_reproject[temporal_history], TEXSLOT_ONDEMAND2, cmd); + device->BindResource(CS, &res.texture_reproject_depth[temporal_history], TEXSLOT_ONDEMAND3, cmd); const GPUResource* uavs[] = { &res.texture_reproject[temporal_output], - &res.texture_cloudMask, + &res.texture_reproject_depth[temporal_output], }; device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); { GPUBarrier barriers[] = { GPUBarrier::Image(&res.texture_reproject[temporal_output], res.texture_reproject[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS), - GPUBarrier::Image(&res.texture_cloudMask, res.texture_cloudMask.desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS), + GPUBarrier::Image(&res.texture_reproject_depth[temporal_output], res.texture_reproject_depth[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -11536,6 +11566,50 @@ void Postprocess_VolumetricClouds( GPUBarrier barriers[] = { GPUBarrier::Memory(), GPUBarrier::Image(&res.texture_reproject[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_reproject[temporal_output].desc.layout), + GPUBarrier::Image(&res.texture_reproject_depth[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_reproject_depth[temporal_output].desc.layout), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + + device->UnbindUAVs(0, arraysize(uavs), cmd); + device->EventEnd(cmd); + } + + // Temporal pass: + { + device->EventBegin("Volumetric Cloud Temporal", cmd); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], cmd); + + device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd); + device->BindResource(CS, &res.texture_reproject[temporal_output], TEXSLOT_ONDEMAND0, cmd); + device->BindResource(CS, &res.texture_reproject_depth[temporal_output], TEXSLOT_ONDEMAND1, cmd); + device->BindResource(CS, &res.texture_temporal[temporal_history], TEXSLOT_ONDEMAND2, cmd); + + const GPUResource* uavs[] = { + &res.texture_temporal[temporal_output], + &res.texture_cloudMask, + }; + device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); + + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&res.texture_temporal[temporal_output], res.texture_temporal[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS), + GPUBarrier::Image(&res.texture_cloudMask, res.texture_cloudMask.desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + + device->Dispatch( + (res.texture_temporal[temporal_output].GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (res.texture_temporal[temporal_output].GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + 1, + cmd + ); + + { + GPUBarrier barriers[] = { + GPUBarrier::Memory(), + GPUBarrier::Image(&res.texture_temporal[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_temporal[temporal_output].desc.layout), GPUBarrier::Image(&res.texture_cloudMask, IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_cloudMask.desc.layout), }; device->Barrier(barriers, arraysize(barriers), cmd); diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index f5e2c1d90..f8f6d6409 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -551,8 +551,10 @@ namespace wiRenderer { wiGraphics::Texture texture_cloudRender; wiGraphics::Texture texture_cloudDepth; - wiGraphics::Texture texture_cloudMask; wiGraphics::Texture texture_reproject[2]; + wiGraphics::Texture texture_reproject_depth[2]; + wiGraphics::Texture texture_temporal[2]; + wiGraphics::Texture texture_cloudMask; }; void CreateVolumetricCloudResources(VolumetricCloudResources& res, XMUINT2 resolution); void Postprocess_VolumetricClouds(