diff --git a/WickedEngine/RenderPath3D.cpp b/WickedEngine/RenderPath3D.cpp
index 506ca449b..decb83b3e 100644
--- a/WickedEngine/RenderPath3D.cpp
+++ b/WickedEngine/RenderPath3D.cpp
@@ -870,7 +870,7 @@ void RenderPath3D::Render() const
device->EventBegin("Volumetric Clouds Reflection Blend", cmd);
wiImageParams fx;
fx.enableFullScreen();
- wiImage::Draw(&volumetriccloudResources_reflection.texture_reproject[device->GetFrameCount() % 2], fx, cmd);
+ wiImage::Draw(&volumetriccloudResources_reflection.texture_temporal[device->GetFrameCount() % 2], fx, cmd);
device->EventEnd(cmd);
}
@@ -947,7 +947,7 @@ void RenderPath3D::Render() const
{
device->EventBegin("Volumetric Clouds Upsample + Blend", cmd);
wiRenderer::Postprocess_Upsample_Bilateral(
- volumetriccloudResources.texture_reproject[device->GetFrameCount() % 2],
+ volumetriccloudResources.texture_temporal[device->GetFrameCount() % 2],
rtLinearDepth,
*GetGbuffer_Read(GBUFFER_COLOR), // only desc is taken if pixel shader upsampling is used
cmd,
diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp
index 9b13d4cb7..79fd87148 100644
--- a/WickedEngine/offlineshadercompiler.cpp
+++ b/WickedEngine/offlineshadercompiler.cpp
@@ -208,6 +208,7 @@ int main(int argc, char* argv[])
"volumetricCloud_weathermapCS.hlsl" ,
"volumetricCloud_renderCS.hlsl" ,
"volumetricCloud_reprojectCS.hlsl" ,
+ "volumetricCloud_temporalCS.hlsl" ,
"shadingRateClassificationCS.hlsl" ,
"shadingRateClassificationCS_DEBUG.hlsl" ,
"skyAtmosphere_transmittanceLutCS.hlsl" ,
diff --git a/WickedEngine/shaders/CMakeLists.txt b/WickedEngine/shaders/CMakeLists.txt
index a8247ce83..105e89d66 100644
--- a/WickedEngine/shaders/CMakeLists.txt
+++ b/WickedEngine/shaders/CMakeLists.txt
@@ -134,6 +134,7 @@ set(SHADERS_CS
"volumetricCloud_weathermapCS.hlsl"
"volumetricCloud_renderCS.hlsl"
"volumetricCloud_reprojectCS.hlsl"
+ "volumetricCloud_temporalCS.hlsl"
"shadingRateClassificationCS.hlsl"
"shadingRateClassificationCS_DEBUG.hlsl"
"skyAtmosphere_transmittanceLutCS.hlsl"
diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems
index 0fe2b6c56..65d1836da 100644
--- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems
+++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems
@@ -1028,6 +1028,7 @@
Compute
Compute
+
Vertex
Vertex
diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters
index 973bd7ea1..694147d47 100644
--- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters
+++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters
@@ -992,6 +992,9 @@
CS
+
+ CS
+
diff --git a/WickedEngine/shaders/volumetricCloud_renderCS.hlsl b/WickedEngine/shaders/volumetricCloud_renderCS.hlsl
index 8e280b745..374247d07 100644
--- a/WickedEngine/shaders/volumetricCloud_renderCS.hlsl
+++ b/WickedEngine/shaders/volumetricCloud_renderCS.hlsl
@@ -29,7 +29,7 @@ TEXTURE2D(texture_curlNoise, float4, TEXSLOT_ONDEMAND3);
TEXTURE2D(texture_weatherMap, float4, TEXSLOT_ONDEMAND4);
RWTEXTURE2D(texture_render, float4, 0);
-RWTEXTURE2D(texture_cloudDepth, float, 1);
+RWTEXTURE2D(texture_cloudDepth, float2, 1);
// Octaves for multiple-scattering approximation. 1 means single-scattering only.
@@ -545,29 +545,6 @@ void RenderClouds(float3 rayOrigin, float3 rayDirection, float t, float steps, f
}
}
-bool TraceSphereIntersections(float3 rayOrigin, float3 rayDirection, float3 sphereCenter, float sphereRadius, inout float2 solutions)
-{
- float3 localPosition = rayOrigin - sphereCenter;
- float localPositionSqr = dot(localPosition, localPosition);
-
- // Quadratic Coefficients
- float a = dot(rayDirection, rayDirection);
- float b = 2 * dot(rayDirection, localPosition);
- float c = localPositionSqr - sphereRadius * sphereRadius;
-
- float discriminant = b * b - 4 * a * c;
-
- // Only continue if the ray intersects with the sphere
- if (discriminant >= 0.0)
- {
- float sqrtDiscriminant = sqrt(discriminant);
- solutions = (-b + float2(-1, 1) * sqrtDiscriminant) / (2 * a);
- return true;
- }
-
- return false;
-}
-
float CalculateAtmosphereBlend(float tDepth)
{
// Progressively increase alpha as clouds reaches the desired distance.
@@ -582,15 +559,29 @@ float CalculateAtmosphereBlend(float tDepth)
return fade;
}
+static const uint2 g_HalfResIndexToCoordinateOffset[4] = { uint2(0, 0), uint2(1, 0), uint2(0, 1), uint2(1, 1) };
+
+// Calculates checkerboard undersampling position
+int ComputeCheckerBoardIndex(int2 renderCoord, int subPixelIndex)
+{
+ const int localOffset = (renderCoord.x & 1 + renderCoord.y & 1) & 1;
+ const int checkerBoardLocation = (subPixelIndex + localOffset) & 0x3;
+ return checkerBoardLocation;
+}
+
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
- const float2 uv = (DTid.xy + 0.5) * xPPResolution_rcp;
+ int subPixelIndex = g_xFrame_FrameCount % 4;
+ int checkerBoardIndex = ComputeCheckerBoardIndex(DTid.xy, subPixelIndex);
+ uint2 halfResCoord = DTid.xy * 2 + g_HalfResIndexToCoordinateOffset[checkerBoardIndex];
+
+ const float2 uv = (halfResCoord + 0.5) * xPPParams0.zw;
float x = uv.x * 2 - 1;
float y = (1 - uv.y) * 2 - 1;
float2 screenPosition = float2(x, y);
-
+
float4 unprojected = mul(g_xCamera_InvVP, float4(screenPosition, 0, 1));
unprojected.xyz /= unprojected.w;
@@ -601,6 +592,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
float tMin = -FLT_MAX;
float tMax = -FLT_MAX;
float t;
+ float tToDepthBuffer;
float steps;
float stepSize;
{
@@ -612,11 +604,11 @@ void main(uint3 DTid : SV_DispatchThreadID)
const float cloudBottomRadius = planetRadius + g_xFrame_VolumetricClouds.CloudStartHeight;
const float cloudTopRadius = planetRadius + g_xFrame_VolumetricClouds.CloudStartHeight + g_xFrame_VolumetricClouds.CloudThickness;
- float2 tTopSolutions = 0.0;
- if (TraceSphereIntersections(rayOrigin, rayDirection, planetCenterWorld, cloudTopRadius, tTopSolutions))
+ float2 tTopSolutions = RaySphereIntersect(rayOrigin, rayDirection, planetCenterWorld, cloudTopRadius);
+ if (tTopSolutions.x > 0.0 || tTopSolutions.y > 0.0)
{
- float2 tBottomSolutions = 0.0;
- if (TraceSphereIntersections(rayOrigin, rayDirection, planetCenterWorld, cloudBottomRadius, tBottomSolutions))
+ float2 tBottomSolutions = RaySphereIntersect(rayOrigin, rayDirection, planetCenterWorld, cloudBottomRadius);
+ if (tBottomSolutions.x > 0.0 || tBottomSolutions.y > 0.0)
{
// If we see both intersections on the screen, keep the min closest, otherwise the max furthest
float tempTop = all(tTopSolutions > 0.0f) ? min(tTopSolutions.x, tTopSolutions.y) : max(tTopSolutions.x, tTopSolutions.y);
@@ -642,23 +634,23 @@ void main(uint3 DTid : SV_DispatchThreadID)
}
else
{
- texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0);
- texture_cloudDepth[DTid.xy] = 0.0;
+ texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); // Inverted alpha
+ texture_cloudDepth[DTid.xy] = FLT_MAX;
return;
}
if (tMax <= tMin || tMin > g_xFrame_VolumetricClouds.RenderDistance)
{
- texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0);
- texture_cloudDepth[DTid.xy] = 0.0;
+ texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); // Inverted alpha
+ texture_cloudDepth[DTid.xy] = FLT_MAX;
return;
}
// Depth buffer intersection
- float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0).r;
+ float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r;
float3 depthWorldPosition = reconstructPosition(uv, depth);
- float tToDepthBuffer = length(depthWorldPosition - rayOrigin);
+ tToDepthBuffer = length(depthWorldPosition - rayOrigin);
tMax = depth == 0.0 ? tMax : min(tMax, tToDepthBuffer); // Exclude skybox
const float marchingDistance = min(g_xFrame_VolumetricClouds.MaxMarchingDistance, tMax - tMin);
@@ -692,8 +684,8 @@ void main(uint3 DTid : SV_DispatchThreadID)
float grayScaleTransmittance = approxTransmittance < g_xFrame_VolumetricClouds.TransmittanceThreshold ? 0.0 : approxTransmittance;
float4 color = float4(luminance, grayScaleTransmittance);
-
- color.a = 1.0 - color.a; // Invert to match reprojection. Early returns has to be inverted too.
+
+ color.a = 1.0 - color.a; // Invert to match reprojection. Early color returns has to be inverted too.
// Blend clouds with horizon
if (depthWeightsSum > 0.0)
@@ -707,5 +699,5 @@ void main(uint3 DTid : SV_DispatchThreadID)
// Output
texture_render[DTid.xy] = color;
- texture_cloudDepth[DTid.xy] = tDepth; // Linear depth
+ texture_cloudDepth[DTid.xy] = float2(tDepth, tToDepthBuffer); // Linear depth
}
diff --git a/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl b/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl
index 89fcdafc3..7c49f38d0 100644
--- a/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl
+++ b/WickedEngine/shaders/volumetricCloud_reprojectCS.hlsl
@@ -2,150 +2,21 @@
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(cloud_current, float4, TEXSLOT_ONDEMAND0);
-TEXTURE2D(cloud_depth, float, TEXSLOT_ONDEMAND1);
+TEXTURE2D(cloud_depth_current, float2, TEXSLOT_ONDEMAND1);
TEXTURE2D(cloud_history, float4, TEXSLOT_ONDEMAND2);
+TEXTURE2D(cloud_depth_history, float2, TEXSLOT_ONDEMAND3);
RWTEXTURE2D(output, float4, 0);
-RWTEXTURE2D(output_cloudMask, unorm float4, 1);
+RWTEXTURE2D(output_depth, float2, 1);
-
-// The rendering uses a temporal upsampling pass similar to Frostbite. See https://odr.chalmers.se/handle/20.500.12380/241770
-
-// If the clouds are moving fast, the upsampling will most likely not be able to keep up. You can modify these values to relax the effect:
-static const float temporalResponse = 0.05;
-static const float temporalScale = 3.0;
-static const float temporalExposure = 10.0;
-
-inline float Luma4(float3 color)
+// This function compute the checkerboard undersampling position
+int ComputeCheckerBoardIndex(int2 renderCoord, int subPixelIndex)
{
- return (color.g * 2) + (color.r + color.b);
+ const int localOffset = (renderCoord.x & 1 + renderCoord.y & 1) & 1;
+ const int checkerBoardLocation = (subPixelIndex + localOffset) & 0x3;
+ return checkerBoardLocation;
}
-inline float HdrWeight4(float3 color, float exposure)
-{
- return rcp(Luma4(color) * exposure + 4.0f);
-}
-
-// Different aabb clipping method from eg. SSR temporal, suitable for clouds in this case
-float4 clip_aabb(float4 aabb_min, float4 aabb_max, float4 prev_sample)
-{
- float4 p_clip = 0.5 * (aabb_max + aabb_min);
- float4 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
-
- float4 v_clip = prev_sample - p_clip;
- float4 v_unit = v_clip / e_clip;
- float4 a_unit = abs(v_unit);
- float ma_unit = max(max(a_unit.x, max(a_unit.y, a_unit.z)), a_unit.w);
-
- if (ma_unit > 1.0)
- return p_clip + v_clip / ma_unit;
- else
- return prev_sample; // point inside aabb
-}
-
-inline void ResolverAABB(Texture2D currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
-{
- const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
-
- // Modulate Luma HDR
-
- float4 sampleColors[9];
- [unroll]
- for (uint i = 0; i < 9; i++)
- {
- sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
- }
-
-
-#if 0 // Exaggerates outline between clouds and geometry
- float sampleWeights[9];
- [unroll]
- for (uint j = 0; j < 9; j++)
- {
- sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
- }
-
- float totalWeight = 0;
- [unroll]
- for (uint k = 0; k < 9; k++)
- {
- totalWeight += sampleWeights[k];
- }
- sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
- sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
-#endif
-
-
-#if 0 // Standard clipping
-
- // Variance Clipping (AABB)
-
- float4 m1 = 0.0;
- float4 m2 = 0.0;
- [unroll]
- for (uint x = 0; x < 9; x++)
- {
- m1 += sampleColors[x];
- m2 += sampleColors[x] * sampleColors[x];
- }
-
- float4 mean = m1 / 9.0;
- float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
-
-#else // Depth check
-
- float originalLinearDepth = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, uv, 0).r);
- float validSampleCount = 1.0;
-
- float4 m1 = 0.0;
- float4 m2 = 0.0;
- [unroll]
- for (uint x = 0; x < 9; x++)
- {
- if (x == 4)
- {
- m1 += sampleColors[x];
- m2 += sampleColors[x] * sampleColors[x];
- }
- else
- {
- float depth = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, uv + (SampleOffset[x] / texelSize), 0).r);
- if (abs(originalLinearDepth - depth) < 1.5)
- {
- m1 += sampleColors[x];
- m2 += sampleColors[x] * sampleColors[x];
- validSampleCount += 1.0;
- }
- }
- }
-
- float4 mean = m1 / validSampleCount;
- float4 stddev = sqrt((m2 / validSampleCount) - sqr(mean));
-
-#endif
-
- currentMin = mean - AABBScale * stddev;
- currentMax = mean + AABBScale * stddev;
-
- currentOutput = sampleColors[4];
- currentMin = min(currentMin, currentOutput);
- currentMax = max(currentMax, currentOutput);
- currentAverage = mean;
-}
-
-/*float2 CalculateCustomMotion(float4 worldPosition)
-{
- float4 thisClip = mul(g_xCamera_VP, worldPosition);
- float4 prevClip = mul(g_xCamera_PrevVP, worldPosition);
-
- float2 thisScreen = thisClip.xy * rcp(thisClip.w);
- float2 prevScreen = prevClip.xy * rcp(prevClip.w);
- thisScreen = (thisScreen.xy * float2(0.5, -0.5) + 0.5);
- prevScreen = (prevScreen.xy * float2(0.5, -0.5) + 0.5);
-
- return thisScreen - prevScreen;
-}*/
-
// Computes post-projection depth from linear depth
float getInverseLinearDepth(float lin, float near, float far)
{
@@ -157,33 +28,34 @@ float getInverseLinearDepth(float lin, float near, float far)
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
+ uint2 renderCoord = DTid.xy / 2;
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
-
+
#if 0
-
+
// Calculate screen dependant motion vector
float4 prevPos = float4(uv * 2.0 - 1.0, 1.0, 1.0);
prevPos = mul(g_xCamera_InvP, prevPos);
prevPos = prevPos / prevPos.w;
-
+
prevPos.xyz = mul((float3x3)g_xCamera_InvV, prevPos.xyz);
prevPos.xyz = mul((float3x3)g_xCamera_PrevV, prevPos.xyz);
-
+
float4 reproj = mul(g_xCamera_Proj, prevPos);
reproj /= reproj.w;
-
+
float2 prevUV = reproj.xy * 0.5 + 0.5;
-
+
#else
-
+
float x = uv.x * 2 - 1;
float y = (1 - uv.y) * 2 - 1;
float2 screenPosition = float2(x, y);
- float cloudLinearDepth = cloud_depth.SampleLevel(sampler_linear_clamp, uv, 0).r;
- float cloudDepth = getInverseLinearDepth(cloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP);
+ float currentCloudLinearDepth = cloud_depth_current.SampleLevel(sampler_point_clamp, uv, 0).x;
+ float currentCloudDepth = getInverseLinearDepth(currentCloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP);
- float4 thisClip = float4(screenPosition, cloudDepth, 1.0);
+ float4 thisClip = float4(screenPosition, currentCloudDepth, 1.0);
float4 prevClip = mul(g_xCamera_InvVP, thisClip);
prevClip = mul(g_xCamera_PrevVP, prevClip);
@@ -198,26 +70,103 @@ void main(uint3 DTid : SV_DispatchThreadID)
float2 prevUV = prevScreenPosition * float2(0.5, -0.5) + 0.5;
#endif
-
- float4 previous = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
-
- float4 current = 0;
- float4 currentMin, currentMax, currentAverage;
- ResolverAABB(cloud_current, sampler_point_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
+
+ bool validHistory = is_saturated(prevUV);
- //previous = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous);
- previous = clip_aabb(currentMin, currentMax, previous);
+ int subPixelIndex = g_xFrame_FrameCount % 4;
+ int localIndex = (DTid.x & 1) + (DTid.y & 1) * 2;
+ int currentIndex = ComputeCheckerBoardIndex(renderCoord, subPixelIndex);
+
+ bool shouldUpdatePixel = (localIndex == currentIndex);
+
+ float4 result = 0.0;
+ float2 depthResult = 0.0;
- float4 result = lerp(previous, current, temporalResponse);
-
- result = is_saturated(prevUV) ? result : current;
+
+#if 0 // Simple reprojection version
+ if (shouldUpdatePixel)
+ {
+ result = cloud_current[renderCoord];
+ depthResult = cloud_depth_current[renderCoord];
+ }
+ else
+ {
+ result = cloud_history.SampleLevel(sampler_linear_clamp, uv, 0);
+ depthResult = cloud_depth_history.SampleLevel(sampler_linear_clamp, uv, 0);
+ }
+ output[DTid.xy] = result;
+ output_depth[DTid.xy] = depthResult;
+ return;
+#endif
+
+
+ if (validHistory)
+ {
+ float4 newResult = cloud_current[renderCoord];
+ float2 newDepthResult = cloud_depth_current[renderCoord];
+
+ if (shouldUpdatePixel)
+ {
+ result = newResult;
+ depthResult = newDepthResult;
+ }
+ else
+ {
+ float4 previousResult = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
+ float2 previousDepthResult = cloud_depth_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
+
+ result = previousResult;
+ depthResult = previousDepthResult;
+
+ float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; // Half res
+ float3 depthWorldPosition = reconstructPosition(uv, depth);
+ float tToDepthBuffer = length(depthWorldPosition - g_xCamera_CamPos);
+
+ if (abs(tToDepthBuffer - previousDepthResult.y) > tToDepthBuffer * 0.1)
+ {
+ float closestDepth = FLT_MAX;
+ for (int y = -1; y <= 1; y++)
+ {
+ for (int x = -1; x <= 1; x++)
+ {
+ // If it's middle then skip. We only evaluate neighbor samples
+ if ((abs(x) + abs(y)) == 0)
+ continue;
+
+ int2 neighborCoord = renderCoord + int2(x, y);
+
+ float2 neighboorDepthResult = cloud_depth_current[neighborCoord];
+ float neighborClosestDepth = abs(tToDepthBuffer - neighboorDepthResult.y);
+
+ if (neighborClosestDepth < closestDepth)
+ {
+ closestDepth = neighborClosestDepth;
+ float4 neighborResult = cloud_current[neighborCoord];
+
+ result = neighborResult;
+ depthResult = neighboorDepthResult;
+ }
+ }
+ }
+
+ if (abs(tToDepthBuffer - newDepthResult.y) < closestDepth)
+ {
+ result = newResult;
+ depthResult = newDepthResult;
+ }
+ }
+ else
+ {
+
+ }
+ }
+ }
+ else
+ {
+ result = cloud_current.SampleLevel(sampler_linear_clamp, uv, 0);
+ depthResult = cloud_depth_current.SampleLevel(sampler_linear_clamp, uv, 0);
+ }
output[DTid.xy] = result;
-
- [branch]
- if (DTid.x % 2 == 0 && DTid.y % 2 == 0)
- {
- // the mask is half the resolution of the clouds
- output_cloudMask[DTid.xy / 2] = pow(saturate(1 - result.a), 64);
- }
+ output_depth[DTid.xy] = depthResult;
}
diff --git a/WickedEngine/shaders/volumetricCloud_temporalCS.hlsl b/WickedEngine/shaders/volumetricCloud_temporalCS.hlsl
new file mode 100644
index 000000000..790473e11
--- /dev/null
+++ b/WickedEngine/shaders/volumetricCloud_temporalCS.hlsl
@@ -0,0 +1,203 @@
+#include "globals.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+TEXTURE2D(cloud_reproject, float4, TEXSLOT_ONDEMAND0);
+TEXTURE2D(cloud_reproject_depth, float2, TEXSLOT_ONDEMAND1);
+TEXTURE2D(cloud_history, float4, TEXSLOT_ONDEMAND2);
+
+RWTEXTURE2D(output, float4, 0);
+RWTEXTURE2D(output_cloudMask, unorm float4, 1);
+
+
+// If the clouds are moving fast, the upsampling will most likely not be able to keep up. You can modify these values to relax the effect:
+static const float temporalResponse = 0.05;
+static const float temporalScale = 2.0;
+static const float temporalExposure = 10.0;
+
+// Different aabb clipping method from eg. SSR temporal, suitable for clouds in this case
+float4 clip_aabb(float4 aabb_min, float4 aabb_max, float4 prev_sample)
+{
+ float4 p_clip = 0.5 * (aabb_max + aabb_min);
+ float4 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
+
+ float4 v_clip = prev_sample - p_clip;
+ float4 v_unit = v_clip / e_clip;
+ float4 a_unit = abs(v_unit);
+ float ma_unit = max(max(a_unit.x, max(a_unit.y, a_unit.z)), a_unit.w);
+
+ if (ma_unit > 1.0)
+ return p_clip + v_clip / ma_unit;
+ else
+ return prev_sample; // point inside aabb
+}
+
+inline void ResolverAABB(Texture2D currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
+{
+ const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
+
+ // Modulate Luma HDR
+
+ float4 sampleColors[9];
+ [unroll]
+ for (uint i = 0; i < 9; i++)
+ {
+ sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
+ }
+
+
+#if 0 // Exaggerates outline between clouds and geometry
+ float sampleWeights[9];
+ [unroll]
+ for (uint j = 0; j < 9; j++)
+ {
+ sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
+ }
+
+ float totalWeight = 0;
+ [unroll]
+ for (uint k = 0; k < 9; k++)
+ {
+ totalWeight += sampleWeights[k];
+ }
+ sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
+ sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
+#endif
+
+
+#if 0 // Standard clipping
+
+ // Variance Clipping (AABB)
+
+ float4 m1 = 0.0;
+ float4 m2 = 0.0;
+ [unroll]
+ for (uint x = 0; x < 9; x++)
+ {
+ m1 += sampleColors[x];
+ m2 += sampleColors[x] * sampleColors[x];
+ }
+
+ float4 mean = m1 / 9.0;
+ float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
+
+#else // Depth check
+
+ float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; // Half res
+ float3 depthWorldPosition = reconstructPosition(uv, depth);
+ float tToDepthBuffer = length(depthWorldPosition - g_xCamera_CamPos);
+
+ float validSampleCount = 1.0;
+
+ float4 m1 = 0.0;
+ float4 m2 = 0.0;
+ [unroll]
+ for (uint x = 0; x < 9; x++)
+ {
+ if (x == 4)
+ {
+ m1 += sampleColors[x];
+ m2 += sampleColors[x] * sampleColors[x];
+ }
+ else
+ {
+ float2 reprojectionDepthResults = cloud_reproject_depth.SampleLevel(sampler_point_clamp, uv + (SampleOffset[x] / texelSize), 1);
+ if (abs(tToDepthBuffer - reprojectionDepthResults.y) < tToDepthBuffer * 0.1)
+ {
+ m1 += sampleColors[x];
+ m2 += sampleColors[x] * sampleColors[x];
+ validSampleCount += 1.0;
+ }
+ }
+ }
+
+ float4 mean = m1 / validSampleCount;
+ float4 stddev = sqrt((m2 / validSampleCount) - sqr(mean));
+
+#endif
+
+ currentMin = mean - AABBScale * stddev;
+ currentMax = mean + AABBScale * stddev;
+
+ currentOutput = sampleColors[4];
+ currentMin = min(currentMin, currentOutput);
+ currentMax = max(currentMax, currentOutput);
+ currentAverage = mean;
+}
+
+// Computes post-projection depth from linear depth
+float getInverseLinearDepth(float lin, float near, float far)
+{
+ float z_n = ((lin - 2 * far) * near + far * lin) / (lin * near - far * lin);
+ float z = (z_n + 1) / 2;
+ return z;
+}
+
+[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+ const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
+
+#if 0
+
+ // Calculate screen dependant motion vector
+ float4 prevPos = float4(uv * 2.0 - 1.0, 1.0, 1.0);
+ prevPos = mul(g_xCamera_InvP, prevPos);
+ prevPos = prevPos / prevPos.w;
+
+ prevPos.xyz = mul((float3x3)g_xCamera_InvV, prevPos.xyz);
+ prevPos.xyz = mul((float3x3)g_xCamera_PrevV, prevPos.xyz);
+
+ float4 reproj = mul(g_xCamera_Proj, prevPos);
+ reproj /= reproj.w;
+
+ float2 prevUV = reproj.xy * 0.5 + 0.5;
+
+#else
+
+ // We must recalculate motion with new upscaled cloud depths:
+
+ float x = uv.x * 2 - 1;
+ float y = (1 - uv.y) * 2 - 1;
+ float2 screenPosition = float2(x, y);
+
+ float currentCloudLinearDepth = cloud_reproject_depth[DTid.xy].x;
+ float currentCloudDepth = getInverseLinearDepth(currentCloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP);
+
+ float4 thisClip = float4(screenPosition, currentCloudDepth, 1.0);
+
+ float4 prevClip = mul(g_xCamera_InvVP, thisClip);
+ prevClip = mul(g_xCamera_PrevVP, prevClip);
+
+ //float4 prevClip = mul(g_xCamera_PrevVP, worldPosition);
+ float2 prevScreen = prevClip.xy / prevClip.w;
+
+ float2 screenVelocity = screenPosition - prevScreen;
+ float2 prevScreenPosition = screenPosition - screenVelocity;
+
+ // Transform from screen position to uv
+ float2 prevUV = prevScreenPosition * float2(0.5, -0.5) + 0.5;
+
+#endif
+
+ float4 previous = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
+
+ float4 current = 0;
+ float4 currentMin, currentMax, currentAverage;
+ ResolverAABB(cloud_reproject, sampler_point_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
+
+ //previous = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous);
+ previous = clip_aabb(currentMin, currentMax, previous);
+
+ float4 result = lerp(previous, current, temporalResponse);
+
+ result = is_saturated(prevUV) ? result : current;
+
+ output[DTid.xy] = result;
+
+ [branch]
+ if (DTid.x % 2 == 0 && DTid.y % 2 == 0)
+ {
+ // the mask is half the resolution of the clouds
+ output_cloudMask[DTid.xy / 2] = pow(saturate(1 - result.a), 64);
+ }
+}
diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h
index bc0ff6707..e0cf1cebd 100644
--- a/WickedEngine/wiEnums.h
+++ b/WickedEngine/wiEnums.h
@@ -365,6 +365,7 @@ enum SHADERTYPE
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_WEATHERMAP,
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER,
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT,
+ CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL,
CSTYPE_POSTPROCESS_FXAA,
CSTYPE_POSTPROCESS_TEMPORALAA,
CSTYPE_POSTPROCESS_LINEARDEPTH,
diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp
index a017eb70b..d44e1df23 100644
--- a/WickedEngine/wiRenderer.cpp
+++ b/WickedEngine/wiRenderer.cpp
@@ -1296,6 +1296,7 @@ void LoadShaders()
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_WEATHERMAP], "volumetricCloud_weathermapCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER], "volumetricCloud_renderCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT], "volumetricCloud_reprojectCS.cso"); });
+ wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], "volumetricCloud_temporalCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_FXAA], "fxaaCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_TEMPORALAA], "temporalaaCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_LINEARDEPTH], "lineardepthCS.cso"); });
@@ -11406,25 +11407,43 @@ void Postprocess_Bloom(
}
void CreateVolumetricCloudResources(VolumetricCloudResources& res, XMUINT2 resolution)
{
+ XMUINT2 renderResolution = XMUINT2(resolution.x / 4, resolution.y / 4);
+ XMUINT2 reprojectionResolution = XMUINT2(resolution.x / 2, resolution.y / 2);
+ XMUINT2 maskResolution = XMUINT2(resolution.x / 4, resolution.y / 4); // Needs to be half of final cloud output
+
TextureDesc desc;
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
- desc.Width = resolution.x / 4;
- desc.Height = resolution.y / 4;
+ desc.Width = renderResolution.x;
+ desc.Height = renderResolution.y;
desc.Format = FORMAT_R16G16B16A16_FLOAT;
desc.layout = IMAGE_LAYOUT_SHADER_RESOURCE_COMPUTE;
device->CreateTexture(&desc, nullptr, &res.texture_cloudRender);
device->SetName(&res.texture_cloudRender, "texture_cloudRender");
+ desc.Format = FORMAT_R16G16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_cloudDepth);
+ device->SetName(&res.texture_cloudDepth, "texture_cloudDepth");
+
+ desc.Width = reprojectionResolution.x;
+ desc.Height = reprojectionResolution.y;
+ desc.Format = FORMAT_R16G16B16A16_FLOAT;
device->CreateTexture(&desc, nullptr, &res.texture_reproject[0]);
device->SetName(&res.texture_reproject[0], "texture_reproject[0]");
device->CreateTexture(&desc, nullptr, &res.texture_reproject[1]);
device->SetName(&res.texture_reproject[1], "texture_reproject[1]");
+ desc.Format = FORMAT_R16G16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_reproject_depth[0]);
+ device->SetName(&res.texture_reproject_depth[0], "texture_reproject_depth[0]");
+ device->CreateTexture(&desc, nullptr, &res.texture_reproject_depth[1]);
+ device->SetName(&res.texture_reproject_depth[1], "texture_reproject_depth[1]");
- desc.Format = FORMAT_R16_FLOAT;
- device->CreateTexture(&desc, nullptr, &res.texture_cloudDepth);
- device->SetName(&res.texture_cloudDepth, "texture_cloudDepth");
+ desc.Format = FORMAT_R16G16B16A16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal[0]);
+ device->SetName(&res.texture_temporal[0], "texture_temporal[0]");
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal[1]);
+ device->SetName(&res.texture_temporal[1], "texture_temporal[1]");
- desc.Width /= 2;
- desc.Height /= 2;
+ desc.Width = maskResolution.x;
+ desc.Height = maskResolution.y;
desc.Format = FORMAT_R8G8B8A8_UNORM;
device->CreateTexture(&desc, nullptr, &res.texture_cloudMask);
device->SetName(&res.texture_cloudMask, "texture_cloudMask");
@@ -11446,8 +11465,10 @@ void Postprocess_VolumetricClouds(
cb.xPPResolution.y = desc.Height;
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
- //const XMFLOAT4& halton = wiMath::GetHaltonSequence((int)device->GetFrameCount());
- //cb.xPPParams0 = halton;
+ cb.xPPParams0.x = (float)res.texture_reproject[0].GetDesc().Width;
+ cb.xPPParams0.y = (float)res.texture_reproject[0].GetDesc().Height;
+ cb.xPPParams0.z = 1.0f / cb.xPPParams0.x;
+ cb.xPPParams0.w = 1.0f / cb.xPPParams0.y;
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
@@ -11498,6 +11519,14 @@ void Postprocess_VolumetricClouds(
device->EventEnd(cmd);
}
+ const TextureDesc& reprojection_desc = res.texture_reproject[0].GetDesc();
+ cb.xPPResolution.x = reprojection_desc.Width;
+ cb.xPPResolution.y = reprojection_desc.Height;
+ cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
+ cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
+ device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
+ device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
+
int temporal_output = device->GetFrameCount() % 2;
int temporal_history = 1 - temporal_output;
@@ -11510,17 +11539,18 @@ void Postprocess_VolumetricClouds(
device->BindResource(CS, &res.texture_cloudRender, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &res.texture_cloudDepth, TEXSLOT_ONDEMAND1, cmd);
device->BindResource(CS, &res.texture_reproject[temporal_history], TEXSLOT_ONDEMAND2, cmd);
+ device->BindResource(CS, &res.texture_reproject_depth[temporal_history], TEXSLOT_ONDEMAND3, cmd);
const GPUResource* uavs[] = {
&res.texture_reproject[temporal_output],
- &res.texture_cloudMask,
+ &res.texture_reproject_depth[temporal_output],
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&res.texture_reproject[temporal_output], res.texture_reproject[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
- GPUBarrier::Image(&res.texture_cloudMask, res.texture_cloudMask.desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_reproject_depth[temporal_output], res.texture_reproject_depth[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -11536,6 +11566,50 @@ void Postprocess_VolumetricClouds(
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
GPUBarrier::Image(&res.texture_reproject[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_reproject[temporal_output].desc.layout),
+ GPUBarrier::Image(&res.texture_reproject_depth[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_reproject_depth[temporal_output].desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->UnbindUAVs(0, arraysize(uavs), cmd);
+ device->EventEnd(cmd);
+ }
+
+ // Temporal pass:
+ {
+ device->EventBegin("Volumetric Cloud Temporal", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], cmd);
+
+ device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
+ device->BindResource(CS, &res.texture_reproject[temporal_output], TEXSLOT_ONDEMAND0, cmd);
+ device->BindResource(CS, &res.texture_reproject_depth[temporal_output], TEXSLOT_ONDEMAND1, cmd);
+ device->BindResource(CS, &res.texture_temporal[temporal_history], TEXSLOT_ONDEMAND2, cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_temporal[temporal_output],
+ &res.texture_cloudMask,
+ };
+ device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_temporal[temporal_output], res.texture_temporal[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_cloudMask, res.texture_cloudMask.desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_temporal[temporal_output].GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_temporal[temporal_output].GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_temporal[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_temporal[temporal_output].desc.layout),
GPUBarrier::Image(&res.texture_cloudMask, IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_cloudMask.desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h
index f5e2c1d90..f8f6d6409 100644
--- a/WickedEngine/wiRenderer.h
+++ b/WickedEngine/wiRenderer.h
@@ -551,8 +551,10 @@ namespace wiRenderer
{
wiGraphics::Texture texture_cloudRender;
wiGraphics::Texture texture_cloudDepth;
- wiGraphics::Texture texture_cloudMask;
wiGraphics::Texture texture_reproject[2];
+ wiGraphics::Texture texture_reproject_depth[2];
+ wiGraphics::Texture texture_temporal[2];
+ wiGraphics::Texture texture_cloudMask;
};
void CreateVolumetricCloudResources(VolumetricCloudResources& res, XMUINT2 resolution);
void Postprocess_VolumetricClouds(