Volumetric clouds reprojection update (#290)

This commit is contained in:
Silas Oler
2021-07-21 10:39:40 +02:00
committed by GitHub
parent 7948b9b944
commit 6dda0bb2aa
11 changed files with 445 additions and 218 deletions
+2 -2
View File
@@ -870,7 +870,7 @@ void RenderPath3D::Render() const
device->EventBegin("Volumetric Clouds Reflection Blend", cmd);
wiImageParams fx;
fx.enableFullScreen();
wiImage::Draw(&volumetriccloudResources_reflection.texture_reproject[device->GetFrameCount() % 2], fx, cmd);
wiImage::Draw(&volumetriccloudResources_reflection.texture_temporal[device->GetFrameCount() % 2], fx, cmd);
device->EventEnd(cmd);
}
@@ -947,7 +947,7 @@ void RenderPath3D::Render() const
{
device->EventBegin("Volumetric Clouds Upsample + Blend", cmd);
wiRenderer::Postprocess_Upsample_Bilateral(
volumetriccloudResources.texture_reproject[device->GetFrameCount() % 2],
volumetriccloudResources.texture_temporal[device->GetFrameCount() % 2],
rtLinearDepth,
*GetGbuffer_Read(GBUFFER_COLOR), // only desc is taken if pixel shader upsampling is used
cmd,
+1
View File
@@ -208,6 +208,7 @@ int main(int argc, char* argv[])
"volumetricCloud_weathermapCS.hlsl" ,
"volumetricCloud_renderCS.hlsl" ,
"volumetricCloud_reprojectCS.hlsl" ,
"volumetricCloud_temporalCS.hlsl" ,
"shadingRateClassificationCS.hlsl" ,
"shadingRateClassificationCS_DEBUG.hlsl" ,
"skyAtmosphere_transmittanceLutCS.hlsl" ,
+1
View File
@@ -134,6 +134,7 @@ set(SHADERS_CS
"volumetricCloud_weathermapCS.hlsl"
"volumetricCloud_renderCS.hlsl"
"volumetricCloud_reprojectCS.hlsl"
"volumetricCloud_temporalCS.hlsl"
"shadingRateClassificationCS.hlsl"
"shadingRateClassificationCS_DEBUG.hlsl"
"skyAtmosphere_transmittanceLutCS.hlsl"
@@ -1028,6 +1028,7 @@
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)volumetricCloud_temporalCS.hlsl" />
<FxCompile Include="$(MSBuildThisFileDirectory)volumetriclight_directionalVS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Vertex</ShaderType>
@@ -992,6 +992,9 @@
<FxCompile Include="$(MSBuildThisFileDirectory)fsr_sharpenCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)volumetricCloud_temporalCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(MSBuildThisFileDirectory)ConstantBufferMapping.h">
@@ -29,7 +29,7 @@ TEXTURE2D(texture_curlNoise, float4, TEXSLOT_ONDEMAND3);
TEXTURE2D(texture_weatherMap, float4, TEXSLOT_ONDEMAND4);
RWTEXTURE2D(texture_render, float4, 0);
RWTEXTURE2D(texture_cloudDepth, float, 1);
RWTEXTURE2D(texture_cloudDepth, float2, 1);
// Octaves for multiple-scattering approximation. 1 means single-scattering only.
@@ -545,29 +545,6 @@ void RenderClouds(float3 rayOrigin, float3 rayDirection, float t, float steps, f
}
}
bool TraceSphereIntersections(float3 rayOrigin, float3 rayDirection, float3 sphereCenter, float sphereRadius, inout float2 solutions)
{
float3 localPosition = rayOrigin - sphereCenter;
float localPositionSqr = dot(localPosition, localPosition);
// Quadratic Coefficients
float a = dot(rayDirection, rayDirection);
float b = 2 * dot(rayDirection, localPosition);
float c = localPositionSqr - sphereRadius * sphereRadius;
float discriminant = b * b - 4 * a * c;
// Only continue if the ray intersects with the sphere
if (discriminant >= 0.0)
{
float sqrtDiscriminant = sqrt(discriminant);
solutions = (-b + float2(-1, 1) * sqrtDiscriminant) / (2 * a);
return true;
}
return false;
}
float CalculateAtmosphereBlend(float tDepth)
{
// Progressively increase alpha as clouds reaches the desired distance.
@@ -582,15 +559,29 @@ float CalculateAtmosphereBlend(float tDepth)
return fade;
}
static const uint2 g_HalfResIndexToCoordinateOffset[4] = { uint2(0, 0), uint2(1, 0), uint2(0, 1), uint2(1, 1) };
// Calculates checkerboard undersampling position
int ComputeCheckerBoardIndex(int2 renderCoord, int subPixelIndex)
{
const int localOffset = (renderCoord.x & 1 + renderCoord.y & 1) & 1;
const int checkerBoardLocation = (subPixelIndex + localOffset) & 0x3;
return checkerBoardLocation;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5) * xPPResolution_rcp;
int subPixelIndex = g_xFrame_FrameCount % 4;
int checkerBoardIndex = ComputeCheckerBoardIndex(DTid.xy, subPixelIndex);
uint2 halfResCoord = DTid.xy * 2 + g_HalfResIndexToCoordinateOffset[checkerBoardIndex];
const float2 uv = (halfResCoord + 0.5) * xPPParams0.zw;
float x = uv.x * 2 - 1;
float y = (1 - uv.y) * 2 - 1;
float2 screenPosition = float2(x, y);
float4 unprojected = mul(g_xCamera_InvVP, float4(screenPosition, 0, 1));
unprojected.xyz /= unprojected.w;
@@ -601,6 +592,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
float tMin = -FLT_MAX;
float tMax = -FLT_MAX;
float t;
float tToDepthBuffer;
float steps;
float stepSize;
{
@@ -612,11 +604,11 @@ void main(uint3 DTid : SV_DispatchThreadID)
const float cloudBottomRadius = planetRadius + g_xFrame_VolumetricClouds.CloudStartHeight;
const float cloudTopRadius = planetRadius + g_xFrame_VolumetricClouds.CloudStartHeight + g_xFrame_VolumetricClouds.CloudThickness;
float2 tTopSolutions = 0.0;
if (TraceSphereIntersections(rayOrigin, rayDirection, planetCenterWorld, cloudTopRadius, tTopSolutions))
float2 tTopSolutions = RaySphereIntersect(rayOrigin, rayDirection, planetCenterWorld, cloudTopRadius);
if (tTopSolutions.x > 0.0 || tTopSolutions.y > 0.0)
{
float2 tBottomSolutions = 0.0;
if (TraceSphereIntersections(rayOrigin, rayDirection, planetCenterWorld, cloudBottomRadius, tBottomSolutions))
float2 tBottomSolutions = RaySphereIntersect(rayOrigin, rayDirection, planetCenterWorld, cloudBottomRadius);
if (tBottomSolutions.x > 0.0 || tBottomSolutions.y > 0.0)
{
// If we see both intersections on the screen, keep the min closest, otherwise the max furthest
float tempTop = all(tTopSolutions > 0.0f) ? min(tTopSolutions.x, tTopSolutions.y) : max(tTopSolutions.x, tTopSolutions.y);
@@ -642,23 +634,23 @@ void main(uint3 DTid : SV_DispatchThreadID)
}
else
{
texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0);
texture_cloudDepth[DTid.xy] = 0.0;
texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); // Inverted alpha
texture_cloudDepth[DTid.xy] = FLT_MAX;
return;
}
if (tMax <= tMin || tMin > g_xFrame_VolumetricClouds.RenderDistance)
{
texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0);
texture_cloudDepth[DTid.xy] = 0.0;
texture_render[DTid.xy] = float4(0.0, 0.0, 0.0, 0.0); // Inverted alpha
texture_cloudDepth[DTid.xy] = FLT_MAX;
return;
}
// Depth buffer intersection
float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0).r;
float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r;
float3 depthWorldPosition = reconstructPosition(uv, depth);
float tToDepthBuffer = length(depthWorldPosition - rayOrigin);
tToDepthBuffer = length(depthWorldPosition - rayOrigin);
tMax = depth == 0.0 ? tMax : min(tMax, tToDepthBuffer); // Exclude skybox
const float marchingDistance = min(g_xFrame_VolumetricClouds.MaxMarchingDistance, tMax - tMin);
@@ -692,8 +684,8 @@ void main(uint3 DTid : SV_DispatchThreadID)
float grayScaleTransmittance = approxTransmittance < g_xFrame_VolumetricClouds.TransmittanceThreshold ? 0.0 : approxTransmittance;
float4 color = float4(luminance, grayScaleTransmittance);
color.a = 1.0 - color.a; // Invert to match reprojection. Early returns has to be inverted too.
color.a = 1.0 - color.a; // Invert to match reprojection. Early color returns has to be inverted too.
// Blend clouds with horizon
if (depthWeightsSum > 0.0)
@@ -707,5 +699,5 @@ void main(uint3 DTid : SV_DispatchThreadID)
// Output
texture_render[DTid.xy] = color;
texture_cloudDepth[DTid.xy] = tDepth; // Linear depth
texture_cloudDepth[DTid.xy] = float2(tDepth, tToDepthBuffer); // Linear depth
}
@@ -2,150 +2,21 @@
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(cloud_current, float4, TEXSLOT_ONDEMAND0);
TEXTURE2D(cloud_depth, float, TEXSLOT_ONDEMAND1);
TEXTURE2D(cloud_depth_current, float2, TEXSLOT_ONDEMAND1);
TEXTURE2D(cloud_history, float4, TEXSLOT_ONDEMAND2);
TEXTURE2D(cloud_depth_history, float2, TEXSLOT_ONDEMAND3);
RWTEXTURE2D(output, float4, 0);
RWTEXTURE2D(output_cloudMask, unorm float4, 1);
RWTEXTURE2D(output_depth, float2, 1);
// The rendering uses a temporal upsampling pass similar to Frostbite. See https://odr.chalmers.se/handle/20.500.12380/241770
// If the clouds are moving fast, the upsampling will most likely not be able to keep up. You can modify these values to relax the effect:
static const float temporalResponse = 0.05;
static const float temporalScale = 3.0;
static const float temporalExposure = 10.0;
inline float Luma4(float3 color)
// This function compute the checkerboard undersampling position
int ComputeCheckerBoardIndex(int2 renderCoord, int subPixelIndex)
{
return (color.g * 2) + (color.r + color.b);
const int localOffset = (renderCoord.x & 1 + renderCoord.y & 1) & 1;
const int checkerBoardLocation = (subPixelIndex + localOffset) & 0x3;
return checkerBoardLocation;
}
inline float HdrWeight4(float3 color, float exposure)
{
return rcp(Luma4(color) * exposure + 4.0f);
}
// Different aabb clipping method from eg. SSR temporal, suitable for clouds in this case
float4 clip_aabb(float4 aabb_min, float4 aabb_max, float4 prev_sample)
{
float4 p_clip = 0.5 * (aabb_max + aabb_min);
float4 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
float4 v_clip = prev_sample - p_clip;
float4 v_unit = v_clip / e_clip;
float4 a_unit = abs(v_unit);
float ma_unit = max(max(a_unit.x, max(a_unit.y, a_unit.z)), a_unit.w);
if (ma_unit > 1.0)
return p_clip + v_clip / ma_unit;
else
return prev_sample; // point inside aabb
}
inline void ResolverAABB(Texture2D<float4> currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
{
const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
// Modulate Luma HDR
float4 sampleColors[9];
[unroll]
for (uint i = 0; i < 9; i++)
{
sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
}
#if 0 // Exaggerates outline between clouds and geometry
float sampleWeights[9];
[unroll]
for (uint j = 0; j < 9; j++)
{
sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
}
float totalWeight = 0;
[unroll]
for (uint k = 0; k < 9; k++)
{
totalWeight += sampleWeights[k];
}
sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
#endif
#if 0 // Standard clipping
// Variance Clipping (AABB)
float4 m1 = 0.0;
float4 m2 = 0.0;
[unroll]
for (uint x = 0; x < 9; x++)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
}
float4 mean = m1 / 9.0;
float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
#else // Depth check
float originalLinearDepth = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, uv, 0).r);
float validSampleCount = 1.0;
float4 m1 = 0.0;
float4 m2 = 0.0;
[unroll]
for (uint x = 0; x < 9; x++)
{
if (x == 4)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
}
else
{
float depth = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, uv + (SampleOffset[x] / texelSize), 0).r);
if (abs(originalLinearDepth - depth) < 1.5)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
validSampleCount += 1.0;
}
}
}
float4 mean = m1 / validSampleCount;
float4 stddev = sqrt((m2 / validSampleCount) - sqr(mean));
#endif
currentMin = mean - AABBScale * stddev;
currentMax = mean + AABBScale * stddev;
currentOutput = sampleColors[4];
currentMin = min(currentMin, currentOutput);
currentMax = max(currentMax, currentOutput);
currentAverage = mean;
}
/*float2 CalculateCustomMotion(float4 worldPosition)
{
float4 thisClip = mul(g_xCamera_VP, worldPosition);
float4 prevClip = mul(g_xCamera_PrevVP, worldPosition);
float2 thisScreen = thisClip.xy * rcp(thisClip.w);
float2 prevScreen = prevClip.xy * rcp(prevClip.w);
thisScreen = (thisScreen.xy * float2(0.5, -0.5) + 0.5);
prevScreen = (prevScreen.xy * float2(0.5, -0.5) + 0.5);
return thisScreen - prevScreen;
}*/
// Computes post-projection depth from linear depth
float getInverseLinearDepth(float lin, float near, float far)
{
@@ -157,33 +28,34 @@ float getInverseLinearDepth(float lin, float near, float far)
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
uint2 renderCoord = DTid.xy / 2;
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
#if 0
// Calculate screen dependant motion vector
float4 prevPos = float4(uv * 2.0 - 1.0, 1.0, 1.0);
prevPos = mul(g_xCamera_InvP, prevPos);
prevPos = prevPos / prevPos.w;
prevPos.xyz = mul((float3x3)g_xCamera_InvV, prevPos.xyz);
prevPos.xyz = mul((float3x3)g_xCamera_PrevV, prevPos.xyz);
float4 reproj = mul(g_xCamera_Proj, prevPos);
reproj /= reproj.w;
float2 prevUV = reproj.xy * 0.5 + 0.5;
#else
float x = uv.x * 2 - 1;
float y = (1 - uv.y) * 2 - 1;
float2 screenPosition = float2(x, y);
float cloudLinearDepth = cloud_depth.SampleLevel(sampler_linear_clamp, uv, 0).r;
float cloudDepth = getInverseLinearDepth(cloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP);
float currentCloudLinearDepth = cloud_depth_current.SampleLevel(sampler_point_clamp, uv, 0).x;
float currentCloudDepth = getInverseLinearDepth(currentCloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP);
float4 thisClip = float4(screenPosition, cloudDepth, 1.0);
float4 thisClip = float4(screenPosition, currentCloudDepth, 1.0);
float4 prevClip = mul(g_xCamera_InvVP, thisClip);
prevClip = mul(g_xCamera_PrevVP, prevClip);
@@ -198,26 +70,103 @@ void main(uint3 DTid : SV_DispatchThreadID)
float2 prevUV = prevScreenPosition * float2(0.5, -0.5) + 0.5;
#endif
float4 previous = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
float4 current = 0;
float4 currentMin, currentMax, currentAverage;
ResolverAABB(cloud_current, sampler_point_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
bool validHistory = is_saturated(prevUV);
//previous = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous);
previous = clip_aabb(currentMin, currentMax, previous);
int subPixelIndex = g_xFrame_FrameCount % 4;
int localIndex = (DTid.x & 1) + (DTid.y & 1) * 2;
int currentIndex = ComputeCheckerBoardIndex(renderCoord, subPixelIndex);
bool shouldUpdatePixel = (localIndex == currentIndex);
float4 result = 0.0;
float2 depthResult = 0.0;
float4 result = lerp(previous, current, temporalResponse);
result = is_saturated(prevUV) ? result : current;
#if 0 // Simple reprojection version
if (shouldUpdatePixel)
{
result = cloud_current[renderCoord];
depthResult = cloud_depth_current[renderCoord];
}
else
{
result = cloud_history.SampleLevel(sampler_linear_clamp, uv, 0);
depthResult = cloud_depth_history.SampleLevel(sampler_linear_clamp, uv, 0);
}
output[DTid.xy] = result;
output_depth[DTid.xy] = depthResult;
return;
#endif
if (validHistory)
{
float4 newResult = cloud_current[renderCoord];
float2 newDepthResult = cloud_depth_current[renderCoord];
if (shouldUpdatePixel)
{
result = newResult;
depthResult = newDepthResult;
}
else
{
float4 previousResult = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
float2 previousDepthResult = cloud_depth_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
result = previousResult;
depthResult = previousDepthResult;
float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; // Half res
float3 depthWorldPosition = reconstructPosition(uv, depth);
float tToDepthBuffer = length(depthWorldPosition - g_xCamera_CamPos);
if (abs(tToDepthBuffer - previousDepthResult.y) > tToDepthBuffer * 0.1)
{
float closestDepth = FLT_MAX;
for (int y = -1; y <= 1; y++)
{
for (int x = -1; x <= 1; x++)
{
// If it's middle then skip. We only evaluate neighbor samples
if ((abs(x) + abs(y)) == 0)
continue;
int2 neighborCoord = renderCoord + int2(x, y);
float2 neighboorDepthResult = cloud_depth_current[neighborCoord];
float neighborClosestDepth = abs(tToDepthBuffer - neighboorDepthResult.y);
if (neighborClosestDepth < closestDepth)
{
closestDepth = neighborClosestDepth;
float4 neighborResult = cloud_current[neighborCoord];
result = neighborResult;
depthResult = neighboorDepthResult;
}
}
}
if (abs(tToDepthBuffer - newDepthResult.y) < closestDepth)
{
result = newResult;
depthResult = newDepthResult;
}
}
else
{
}
}
}
else
{
result = cloud_current.SampleLevel(sampler_linear_clamp, uv, 0);
depthResult = cloud_depth_current.SampleLevel(sampler_linear_clamp, uv, 0);
}
output[DTid.xy] = result;
[branch]
if (DTid.x % 2 == 0 && DTid.y % 2 == 0)
{
// the mask is half the resolution of the clouds
output_cloudMask[DTid.xy / 2] = pow(saturate(1 - result.a), 64);
}
output_depth[DTid.xy] = depthResult;
}
@@ -0,0 +1,203 @@
#include "globals.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(cloud_reproject, float4, TEXSLOT_ONDEMAND0);
TEXTURE2D(cloud_reproject_depth, float2, TEXSLOT_ONDEMAND1);
TEXTURE2D(cloud_history, float4, TEXSLOT_ONDEMAND2);
RWTEXTURE2D(output, float4, 0);
RWTEXTURE2D(output_cloudMask, unorm float4, 1);
// If the clouds are moving fast, the upsampling will most likely not be able to keep up. You can modify these values to relax the effect:
static const float temporalResponse = 0.05;
static const float temporalScale = 2.0;
static const float temporalExposure = 10.0;
// Different aabb clipping method from eg. SSR temporal, suitable for clouds in this case
float4 clip_aabb(float4 aabb_min, float4 aabb_max, float4 prev_sample)
{
float4 p_clip = 0.5 * (aabb_max + aabb_min);
float4 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
float4 v_clip = prev_sample - p_clip;
float4 v_unit = v_clip / e_clip;
float4 a_unit = abs(v_unit);
float ma_unit = max(max(a_unit.x, max(a_unit.y, a_unit.z)), a_unit.w);
if (ma_unit > 1.0)
return p_clip + v_clip / ma_unit;
else
return prev_sample; // point inside aabb
}
inline void ResolverAABB(Texture2D<float4> currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
{
const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
// Modulate Luma HDR
float4 sampleColors[9];
[unroll]
for (uint i = 0; i < 9; i++)
{
sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
}
#if 0 // Exaggerates outline between clouds and geometry
float sampleWeights[9];
[unroll]
for (uint j = 0; j < 9; j++)
{
sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
}
float totalWeight = 0;
[unroll]
for (uint k = 0; k < 9; k++)
{
totalWeight += sampleWeights[k];
}
sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
#endif
#if 0 // Standard clipping
// Variance Clipping (AABB)
float4 m1 = 0.0;
float4 m2 = 0.0;
[unroll]
for (uint x = 0; x < 9; x++)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
}
float4 mean = m1 / 9.0;
float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
#else // Depth check
float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 1).r; // Half res
float3 depthWorldPosition = reconstructPosition(uv, depth);
float tToDepthBuffer = length(depthWorldPosition - g_xCamera_CamPos);
float validSampleCount = 1.0;
float4 m1 = 0.0;
float4 m2 = 0.0;
[unroll]
for (uint x = 0; x < 9; x++)
{
if (x == 4)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
}
else
{
float2 reprojectionDepthResults = cloud_reproject_depth.SampleLevel(sampler_point_clamp, uv + (SampleOffset[x] / texelSize), 1);
if (abs(tToDepthBuffer - reprojectionDepthResults.y) < tToDepthBuffer * 0.1)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
validSampleCount += 1.0;
}
}
}
float4 mean = m1 / validSampleCount;
float4 stddev = sqrt((m2 / validSampleCount) - sqr(mean));
#endif
currentMin = mean - AABBScale * stddev;
currentMax = mean + AABBScale * stddev;
currentOutput = sampleColors[4];
currentMin = min(currentMin, currentOutput);
currentMax = max(currentMax, currentOutput);
currentAverage = mean;
}
// Computes post-projection depth from linear depth
float getInverseLinearDepth(float lin, float near, float far)
{
float z_n = ((lin - 2 * far) * near + far * lin) / (lin * near - far * lin);
float z = (z_n + 1) / 2;
return z;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
#if 0
// Calculate screen dependant motion vector
float4 prevPos = float4(uv * 2.0 - 1.0, 1.0, 1.0);
prevPos = mul(g_xCamera_InvP, prevPos);
prevPos = prevPos / prevPos.w;
prevPos.xyz = mul((float3x3)g_xCamera_InvV, prevPos.xyz);
prevPos.xyz = mul((float3x3)g_xCamera_PrevV, prevPos.xyz);
float4 reproj = mul(g_xCamera_Proj, prevPos);
reproj /= reproj.w;
float2 prevUV = reproj.xy * 0.5 + 0.5;
#else
// We must recalculate motion with new upscaled cloud depths:
float x = uv.x * 2 - 1;
float y = (1 - uv.y) * 2 - 1;
float2 screenPosition = float2(x, y);
float currentCloudLinearDepth = cloud_reproject_depth[DTid.xy].x;
float currentCloudDepth = getInverseLinearDepth(currentCloudLinearDepth, g_xCamera_ZNearP, g_xCamera_ZFarP);
float4 thisClip = float4(screenPosition, currentCloudDepth, 1.0);
float4 prevClip = mul(g_xCamera_InvVP, thisClip);
prevClip = mul(g_xCamera_PrevVP, prevClip);
//float4 prevClip = mul(g_xCamera_PrevVP, worldPosition);
float2 prevScreen = prevClip.xy / prevClip.w;
float2 screenVelocity = screenPosition - prevScreen;
float2 prevScreenPosition = screenPosition - screenVelocity;
// Transform from screen position to uv
float2 prevUV = prevScreenPosition * float2(0.5, -0.5) + 0.5;
#endif
float4 previous = cloud_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
float4 current = 0;
float4 currentMin, currentMax, currentAverage;
ResolverAABB(cloud_reproject, sampler_point_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
//previous = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous);
previous = clip_aabb(currentMin, currentMax, previous);
float4 result = lerp(previous, current, temporalResponse);
result = is_saturated(prevUV) ? result : current;
output[DTid.xy] = result;
[branch]
if (DTid.x % 2 == 0 && DTid.y % 2 == 0)
{
// the mask is half the resolution of the clouds
output_cloudMask[DTid.xy / 2] = pow(saturate(1 - result.a), 64);
}
}
+1
View File
@@ -365,6 +365,7 @@ enum SHADERTYPE
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_WEATHERMAP,
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER,
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT,
CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL,
CSTYPE_POSTPROCESS_FXAA,
CSTYPE_POSTPROCESS_TEMPORALAA,
CSTYPE_POSTPROCESS_LINEARDEPTH,
+85 -11
View File
@@ -1296,6 +1296,7 @@ void LoadShaders()
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_WEATHERMAP], "volumetricCloud_weathermapCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_RENDER], "volumetricCloud_renderCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_REPROJECT], "volumetricCloud_reprojectCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], "volumetricCloud_temporalCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_FXAA], "fxaaCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_TEMPORALAA], "temporalaaCS.cso"); });
wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_POSTPROCESS_LINEARDEPTH], "lineardepthCS.cso"); });
@@ -11406,25 +11407,43 @@ void Postprocess_Bloom(
}
void CreateVolumetricCloudResources(VolumetricCloudResources& res, XMUINT2 resolution)
{
XMUINT2 renderResolution = XMUINT2(resolution.x / 4, resolution.y / 4);
XMUINT2 reprojectionResolution = XMUINT2(resolution.x / 2, resolution.y / 2);
XMUINT2 maskResolution = XMUINT2(resolution.x / 4, resolution.y / 4); // Needs to be half of final cloud output
TextureDesc desc;
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.Width = resolution.x / 4;
desc.Height = resolution.y / 4;
desc.Width = renderResolution.x;
desc.Height = renderResolution.y;
desc.Format = FORMAT_R16G16B16A16_FLOAT;
desc.layout = IMAGE_LAYOUT_SHADER_RESOURCE_COMPUTE;
device->CreateTexture(&desc, nullptr, &res.texture_cloudRender);
device->SetName(&res.texture_cloudRender, "texture_cloudRender");
desc.Format = FORMAT_R16G16_FLOAT;
device->CreateTexture(&desc, nullptr, &res.texture_cloudDepth);
device->SetName(&res.texture_cloudDepth, "texture_cloudDepth");
desc.Width = reprojectionResolution.x;
desc.Height = reprojectionResolution.y;
desc.Format = FORMAT_R16G16B16A16_FLOAT;
device->CreateTexture(&desc, nullptr, &res.texture_reproject[0]);
device->SetName(&res.texture_reproject[0], "texture_reproject[0]");
device->CreateTexture(&desc, nullptr, &res.texture_reproject[1]);
device->SetName(&res.texture_reproject[1], "texture_reproject[1]");
desc.Format = FORMAT_R16G16_FLOAT;
device->CreateTexture(&desc, nullptr, &res.texture_reproject_depth[0]);
device->SetName(&res.texture_reproject_depth[0], "texture_reproject_depth[0]");
device->CreateTexture(&desc, nullptr, &res.texture_reproject_depth[1]);
device->SetName(&res.texture_reproject_depth[1], "texture_reproject_depth[1]");
desc.Format = FORMAT_R16_FLOAT;
device->CreateTexture(&desc, nullptr, &res.texture_cloudDepth);
device->SetName(&res.texture_cloudDepth, "texture_cloudDepth");
desc.Format = FORMAT_R16G16B16A16_FLOAT;
device->CreateTexture(&desc, nullptr, &res.texture_temporal[0]);
device->SetName(&res.texture_temporal[0], "texture_temporal[0]");
device->CreateTexture(&desc, nullptr, &res.texture_temporal[1]);
device->SetName(&res.texture_temporal[1], "texture_temporal[1]");
desc.Width /= 2;
desc.Height /= 2;
desc.Width = maskResolution.x;
desc.Height = maskResolution.y;
desc.Format = FORMAT_R8G8B8A8_UNORM;
device->CreateTexture(&desc, nullptr, &res.texture_cloudMask);
device->SetName(&res.texture_cloudMask, "texture_cloudMask");
@@ -11446,8 +11465,10 @@ void Postprocess_VolumetricClouds(
cb.xPPResolution.y = desc.Height;
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
//const XMFLOAT4& halton = wiMath::GetHaltonSequence((int)device->GetFrameCount());
//cb.xPPParams0 = halton;
cb.xPPParams0.x = (float)res.texture_reproject[0].GetDesc().Width;
cb.xPPParams0.y = (float)res.texture_reproject[0].GetDesc().Height;
cb.xPPParams0.z = 1.0f / cb.xPPParams0.x;
cb.xPPParams0.w = 1.0f / cb.xPPParams0.y;
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
@@ -11498,6 +11519,14 @@ void Postprocess_VolumetricClouds(
device->EventEnd(cmd);
}
const TextureDesc& reprojection_desc = res.texture_reproject[0].GetDesc();
cb.xPPResolution.x = reprojection_desc.Width;
cb.xPPResolution.y = reprojection_desc.Height;
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
int temporal_output = device->GetFrameCount() % 2;
int temporal_history = 1 - temporal_output;
@@ -11510,17 +11539,18 @@ void Postprocess_VolumetricClouds(
device->BindResource(CS, &res.texture_cloudRender, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &res.texture_cloudDepth, TEXSLOT_ONDEMAND1, cmd);
device->BindResource(CS, &res.texture_reproject[temporal_history], TEXSLOT_ONDEMAND2, cmd);
device->BindResource(CS, &res.texture_reproject_depth[temporal_history], TEXSLOT_ONDEMAND3, cmd);
const GPUResource* uavs[] = {
&res.texture_reproject[temporal_output],
&res.texture_cloudMask,
&res.texture_reproject_depth[temporal_output],
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&res.texture_reproject[temporal_output], res.texture_reproject[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
GPUBarrier::Image(&res.texture_cloudMask, res.texture_cloudMask.desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
GPUBarrier::Image(&res.texture_reproject_depth[temporal_output], res.texture_reproject_depth[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -11536,6 +11566,50 @@ void Postprocess_VolumetricClouds(
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
GPUBarrier::Image(&res.texture_reproject[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_reproject[temporal_output].desc.layout),
GPUBarrier::Image(&res.texture_reproject_depth[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_reproject_depth[temporal_output].desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
// Temporal pass:
{
device->EventBegin("Volumetric Cloud Temporal", cmd);
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_VOLUMETRICCLOUDS_TEMPORAL], cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &res.texture_reproject[temporal_output], TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &res.texture_reproject_depth[temporal_output], TEXSLOT_ONDEMAND1, cmd);
device->BindResource(CS, &res.texture_temporal[temporal_history], TEXSLOT_ONDEMAND2, cmd);
const GPUResource* uavs[] = {
&res.texture_temporal[temporal_output],
&res.texture_cloudMask,
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&res.texture_temporal[temporal_output], res.texture_temporal[temporal_output].desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
GPUBarrier::Image(&res.texture_cloudMask, res.texture_cloudMask.desc.layout, IMAGE_LAYOUT_UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->Dispatch(
(res.texture_temporal[temporal_output].GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(res.texture_temporal[temporal_output].GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
{
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
GPUBarrier::Image(&res.texture_temporal[temporal_output], IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_temporal[temporal_output].desc.layout),
GPUBarrier::Image(&res.texture_cloudMask, IMAGE_LAYOUT_UNORDERED_ACCESS, res.texture_cloudMask.desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
+3 -1
View File
@@ -551,8 +551,10 @@ namespace wiRenderer
{
wiGraphics::Texture texture_cloudRender;
wiGraphics::Texture texture_cloudDepth;
wiGraphics::Texture texture_cloudMask;
wiGraphics::Texture texture_reproject[2];
wiGraphics::Texture texture_reproject_depth[2];
wiGraphics::Texture texture_temporal[2];
wiGraphics::Texture texture_cloudMask;
};
void CreateVolumetricCloudResources(VolumetricCloudResources& res, XMUINT2 resolution);
void Postprocess_VolumetricClouds(