Traced Reflection Improvements (#395)
* Traced Reflection Improvements * bilateral shader compiler fix * hierarchy shader compiler fix * Use texture_depth in hierarchy * Bilateral pass output fix & surface pass early exit * rtreflection fix: missing push constants after shader change * normals signedness fix * version bump Co-authored-by: Turánszki János <turanszkij@users.noreply.github.com>
This commit is contained in:
@@ -110,10 +110,17 @@ int main(int argc, char* argv[])
|
||||
"fsr_sharpenCS.hlsl" ,
|
||||
"ssaoCS.hlsl" ,
|
||||
"rtreflectionCS.hlsl" ,
|
||||
"ssr_raytraceCS.hlsl" ,
|
||||
"ssr_surfaceCS.hlsl" ,
|
||||
"ssr_tileMaxRoughness_horizontalCS.hlsl" ,
|
||||
"ssr_tileMaxRoughness_verticalCS.hlsl" ,
|
||||
"ssr_kickjobsCS.hlsl" ,
|
||||
"ssr_depthHierarchyCS.hlsl" ,
|
||||
"ssr_resolveCS.hlsl" ,
|
||||
"ssr_temporalCS.hlsl" ,
|
||||
"ssr_medianCS.hlsl" ,
|
||||
"ssr_bilateralCS.hlsl" ,
|
||||
"ssr_raytraceCS.hlsl" ,
|
||||
"ssr_raytraceCS_cheap.hlsl" ,
|
||||
"ssr_raytraceCS_earlyexit.hlsl" ,
|
||||
"sharpenCS.hlsl" ,
|
||||
"skinningCS.hlsl" ,
|
||||
"resolveMSAADepthStencilCS.hlsl" ,
|
||||
|
||||
@@ -40,8 +40,7 @@ struct Bloom
|
||||
#define lineardepth_inputresolution postprocess.params0.xy
|
||||
#define lineardepth_inputresolution_rcp postprocess.params0.zw
|
||||
|
||||
#define ssr_input_maxmip postprocess.params0.x
|
||||
#define ssr_input_resolution_max postprocess.params0.y
|
||||
static const uint SSR_TILESIZE = 32;
|
||||
#define ssr_frame postprocess.params0.w
|
||||
|
||||
#define ssao_range postprocess.params0.x
|
||||
|
||||
@@ -998,6 +998,38 @@
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_bilateralCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_depthHierarchyCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_kickjobsCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_raytraceCS_cheap.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_raytraceCS_earlyexit.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_surfaceCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_tileMaxRoughness_horizontalCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_tileMaxRoughness_verticalCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)surfel_binningCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
|
||||
@@ -2561,16 +2593,6 @@
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_medianCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_raytraceCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
|
||||
@@ -230,9 +230,6 @@
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssaoCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_medianCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_raytraceCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
@@ -1025,6 +1022,30 @@
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)surfel_integrateCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_bilateralCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_depthHierarchyCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_kickjobsCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_raytraceCS_cheap.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_raytraceCS_earlyexit.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_surfaceCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_tileMaxRoughness_horizontalCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="$(MSBuildThisFileDirectory)ssr_tileMaxRoughness_verticalCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(MSBuildThisFileDirectory)ShaderInterop.h">
|
||||
|
||||
@@ -11,8 +11,13 @@
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
RWTexture2D<float4> output : register(u0);
|
||||
RWTexture2D<float> output_rayLengths : register(u1);
|
||||
Texture2D<float3> texture_surface_normal : register(t0);
|
||||
Texture2D<float> texture_surface_roughness : register(t1);
|
||||
Texture2D<float3> texture_surface_environment : register(t2);
|
||||
|
||||
RWTexture2D<float4> output_rayIndirectSpecular : register(u0);
|
||||
RWTexture2D<float4> output_rayDirectionPDF : register(u1);
|
||||
RWTexture2D<float> output_rayLengths : register(u2);
|
||||
|
||||
struct RayPayload
|
||||
{
|
||||
@@ -23,34 +28,30 @@ struct RayPayload
|
||||
void main(uint2 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const float2 uv = ((float2)DTid.xy + 0.5) * postprocess.resolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
|
||||
if (depth == 0)
|
||||
return;
|
||||
|
||||
const float3 P = reconstruct_position(uv, depth);
|
||||
const float3 V = normalize(GetCamera().position - P);
|
||||
const uint downsampleFactor = 2;
|
||||
|
||||
PrimitiveID prim;
|
||||
prim.unpack(texture_gbuffer0[DTid.xy * 2]);
|
||||
// This is necessary for accurate upscaling. This is so we don't reuse the same half-res pixels
|
||||
uint2 screenJitter = floor(blue_noise(uint2(0, 0)).xy * downsampleFactor);
|
||||
uint2 jitterPixel = screenJitter + DTid.xy * downsampleFactor;
|
||||
float2 jitterUV = (screenJitter + DTid.xy + 0.5f) * postprocess.resolution_rcp;
|
||||
|
||||
//output[DTid] = float4(saturate(P * 0.1), 1);
|
||||
//return;
|
||||
const float depth = texture_depth.SampleLevel(sampler_linear_clamp, jitterUV, 0);
|
||||
const float roughness = texture_surface_roughness[jitterPixel];
|
||||
|
||||
Surface surface;
|
||||
surface.init();
|
||||
if (!surface.load(prim, P))
|
||||
if (!NeedReflection(roughness, depth))
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (surface.roughness > 0.6)
|
||||
{
|
||||
output[DTid.xy] = float4(max(0, EnvironmentReflection_Global(surface)), 1);
|
||||
float3 environmentReflection = texture_surface_environment[DTid.xy * downsampleFactor];
|
||||
|
||||
output_rayIndirectSpecular[DTid.xy] = float4(environmentReflection, 1);
|
||||
output_rayDirectionPDF[DTid.xy] = 0.0;
|
||||
output_rayLengths[DTid.xy] = FLT_MAX;
|
||||
return;
|
||||
}
|
||||
|
||||
float3 N = surface.N;
|
||||
float roughness = surface.roughness;
|
||||
const float3 N = texture_surface_normal[jitterPixel];
|
||||
const float3 P = reconstruct_position(jitterUV, depth);
|
||||
const float3 V = normalize(GetCamera().position - P);
|
||||
|
||||
// The ray direction selection part is the same as in from ssr_raytraceCS.hlsl:
|
||||
float4 H;
|
||||
@@ -217,6 +218,7 @@ void main(uint2 DTid : SV_DispatchThreadID)
|
||||
payload.data.w = q.CommittedRayT();
|
||||
}
|
||||
|
||||
output[DTid.xy] = float4(payload.data.xyz, 1);
|
||||
output_rayIndirectSpecular[DTid.xy] = float4(payload.data.xyz, 1);
|
||||
output_rayDirectionPDF[DTid.xy] = float4(L, H.w);
|
||||
output_rayLengths[DTid.xy] = payload.data.w;
|
||||
}
|
||||
|
||||
@@ -10,8 +10,13 @@
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
RWTexture2D<float4> output : register(u0);
|
||||
RWTexture2D<float> output_rayLengths : register(u1);
|
||||
Texture2D<float3> texture_surface_normal : register(t0);
|
||||
Texture2D<float> texture_surface_roughness : register(t1);
|
||||
Texture2D<float3> texture_surface_environment : register(t2);
|
||||
|
||||
RWTexture2D<float4> output_rayIndirectSpecular : register(u0);
|
||||
RWTexture2D<float4> output_rayDirectionPDF : register(u1);
|
||||
RWTexture2D<float> output_rayLengths : register(u2);
|
||||
|
||||
struct RayPayload
|
||||
{
|
||||
@@ -30,34 +35,30 @@ void RTReflection_Raygen()
|
||||
{
|
||||
uint2 DTid = DispatchRaysIndex().xy;
|
||||
const float2 uv = ((float2)DTid.xy + 0.5) / (float2)DispatchRaysDimensions();
|
||||
const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
|
||||
if (depth == 0)
|
||||
return;
|
||||
|
||||
const float3 P = reconstruct_position(uv, depth);
|
||||
const float3 V = normalize(GetCamera().position - P);
|
||||
const uint downsampleFactor = 2;
|
||||
|
||||
PrimitiveID prim;
|
||||
prim.unpack(texture_gbuffer0[DTid.xy * 2]);
|
||||
// This is necessary for accurate upscaling. This is so we don't reuse the same half-res pixels
|
||||
uint2 screenJitter = floor(blue_noise(uint2(0, 0)).xy * downsampleFactor);
|
||||
uint2 jitterPixel = screenJitter + DTid.xy * downsampleFactor;
|
||||
float2 jitterUV = (screenJitter + DTid.xy + 0.5f) / (float2)DispatchRaysDimensions();
|
||||
|
||||
//output[DTid] = float4(saturate(P * 0.1), 1);
|
||||
//return;
|
||||
const float depth = texture_depth.SampleLevel(sampler_linear_clamp, jitterUV, 0);
|
||||
const float roughness = texture_surface_roughness[jitterPixel];
|
||||
|
||||
Surface surface;
|
||||
surface.init();
|
||||
if (!surface.load(prim, P))
|
||||
if (!NeedReflection(roughness, depth))
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (surface.roughness > 0.6)
|
||||
{
|
||||
output[DTid.xy] = float4(max(0, EnvironmentReflection_Global(surface)), 1);
|
||||
float3 environmentReflection = texture_surface_environment[DTid.xy * downsampleFactor];
|
||||
|
||||
output_rayIndirectSpecular[DTid.xy] = float4(environmentReflection, 1);
|
||||
output_rayDirectionPDF[DTid.xy] = 0.0;
|
||||
output_rayLengths[DTid.xy] = FLT_MAX;
|
||||
return;
|
||||
}
|
||||
|
||||
float3 N = surface.N;
|
||||
float roughness = surface.roughness;
|
||||
const float3 N = texture_surface_normal[jitterPixel];
|
||||
const float3 P = reconstruct_position(jitterUV, depth);
|
||||
const float3 V = normalize(GetCamera().position - P);
|
||||
|
||||
// The ray direction selection part is the same as in from ssr_raytraceCS.hlsl:
|
||||
float4 H;
|
||||
@@ -78,7 +79,6 @@ void RTReflection_Raygen()
|
||||
// Tangent to world
|
||||
H.xyz = mul(H.xyz, tangentBasis);
|
||||
|
||||
|
||||
L = reflect(-V, H.xyz);
|
||||
}
|
||||
else
|
||||
@@ -87,7 +87,6 @@ void RTReflection_Raygen()
|
||||
L = reflect(-V, H.xyz);
|
||||
}
|
||||
|
||||
|
||||
const float3 R = L;
|
||||
|
||||
float seed = GetFrame().time;
|
||||
@@ -112,7 +111,8 @@ void RTReflection_Raygen()
|
||||
payload // Payload
|
||||
);
|
||||
|
||||
output[DTid.xy] = float4(payload.data.xyz, 1);
|
||||
output_rayIndirectSpecular[DTid.xy] = float4(L, 1);
|
||||
output_rayDirectionPDF[DTid.xy] = float4(L, H.w);
|
||||
output_rayLengths[DTid.xy] = payload.data.w;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
#include "globals.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float4> texture_temporal : register(t0);
|
||||
Texture2D<float> texture_resolve_variance : register(t1);
|
||||
Texture2D<float3> texture_surface_normal : register(t2);
|
||||
Texture2D<float> texture_surface_roughness : register(t3);
|
||||
|
||||
RWTexture2D<float4> output : register(u0);
|
||||
|
||||
static const float depthThreshold = 10000.0;
|
||||
static const float normalThreshold = 1.0;
|
||||
static const float varianceEstimateThreshold = 0.015; // Larger variance values use stronger blur
|
||||
static const float varianceExitThreshold = 0.005; // Variance needs to be higher than this value to accept blur
|
||||
static const uint2 bilateralMinMaxRadius = uint2(0, 2); // Chosen by variance
|
||||
|
||||
#define BILATERAL_SIGMA 0.9
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
#if 0 // Debug
|
||||
output[DTid.xy] = float4((texture_resolve_variance[DTid.xy] > varianceEstimateThreshold).rrr, 1.0);
|
||||
return;
|
||||
#endif
|
||||
|
||||
const float depth = texture_depth[DTid.xy];
|
||||
const float roughness = texture_surface_roughness[DTid.xy];
|
||||
|
||||
if (!NeedReflection(roughness, depth))
|
||||
{
|
||||
output[DTid.xy] = texture_temporal[DTid.xy];
|
||||
return;
|
||||
}
|
||||
|
||||
float2 direction = postprocess.params0.xy;
|
||||
|
||||
const float linearDepth = texture_lineardepth[DTid.xy];
|
||||
const float3 N = texture_surface_normal[DTid.xy];
|
||||
|
||||
float4 outputColor = texture_temporal[DTid.xy];
|
||||
|
||||
|
||||
float variance = texture_resolve_variance[DTid.xy];
|
||||
bool strongBlur = variance > varianceEstimateThreshold;
|
||||
|
||||
float radius = strongBlur ? bilateralMinMaxRadius.y : bilateralMinMaxRadius.x;
|
||||
radius = lerp(0.0, radius, saturate(roughness * 8.0)); // roughness 0.125 is destination
|
||||
|
||||
float sigma = radius * BILATERAL_SIGMA;
|
||||
int effectiveRadius = min(sigma * 2.0, radius);
|
||||
|
||||
if (variance > varianceExitThreshold && effectiveRadius > 0)
|
||||
{
|
||||
float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
|
||||
float3 P = reconstruct_position(uv, depth);
|
||||
|
||||
float4 result = 0;
|
||||
float weightSum = 0.0f;
|
||||
|
||||
for (int r = -effectiveRadius; r <= effectiveRadius; r++)
|
||||
{
|
||||
const int2 sampleCoord = DTid.xy + (direction * r); // Left to right diameter directionally
|
||||
|
||||
if (all(sampleCoord >= int2(0, 0) && sampleCoord < (int2) postprocess.resolution))
|
||||
{
|
||||
const float sampleDepth = texture_depth[sampleCoord];
|
||||
const float4 sampleColor = texture_temporal[sampleCoord];
|
||||
|
||||
const float3 sampleN = texture_surface_normal[sampleCoord];
|
||||
const float sampleRoughness = texture_surface_roughness[sampleCoord];
|
||||
|
||||
float2 sampleUV = (sampleCoord + 0.5) * postprocess.resolution_rcp;
|
||||
float3 sampleP = reconstruct_position(sampleUV, sampleDepth);
|
||||
|
||||
// Don't let invalid roughness samples interfere
|
||||
if (NeedReflection(sampleRoughness, sampleDepth))
|
||||
{
|
||||
float3 dq = P - sampleP;
|
||||
float planeError = max(abs(dot(dq, sampleN)), abs(dot(dq, N)));
|
||||
float relativeDepthDifference = planeError / (linearDepth * GetCamera().z_far);
|
||||
float bilateralDepthWeight = exp(-sqr(relativeDepthDifference) * depthThreshold);
|
||||
|
||||
float normalError = pow(saturate(dot(sampleN, N)), 4.0);
|
||||
float bilateralNormalWeight = saturate(1.0 - (1.0 - normalError) * normalThreshold);
|
||||
|
||||
float bilateralWeight = bilateralDepthWeight * bilateralNormalWeight;
|
||||
|
||||
float gaussian = exp(-sqr(r / sigma));
|
||||
float weight = (r == 0) ? 1.0 : gaussian * bilateralWeight; // Skip center gaussian peak
|
||||
|
||||
result += sampleColor * weight;
|
||||
weightSum += weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result /= weightSum;
|
||||
outputColor = result;
|
||||
}
|
||||
|
||||
output[DTid.xy] = outputColor;
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
#include "globals.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float2> input : register(t0);
|
||||
|
||||
RWTexture2D<float2> output : register(u0);
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
if (all(DTid.xy < postprocess.params0.xy))
|
||||
{
|
||||
if (postprocess.params0.z == 1)
|
||||
{
|
||||
uint2 dim;
|
||||
texture_depth.GetDimensions(dim.x, dim.y);
|
||||
|
||||
float2 uv = (DTid.xy + 0.5) / dim * 2; // Account for half-res
|
||||
|
||||
float4 depths = texture_depth.GatherRed(sampler_point_clamp, uv);
|
||||
|
||||
float depthMax = max(max(depths.x, depths.y), max(depths.z, depths.w));
|
||||
float depthMin = min(min(depths.x, depths.y), min(depths.z, depths.w));
|
||||
|
||||
output[DTid.xy] = float2(depthMax, depthMin);
|
||||
}
|
||||
else
|
||||
{
|
||||
float2 uv = (DTid.xy + 0.5) / postprocess.params0.xy;
|
||||
|
||||
float4 depthsRed = input.GatherRed(sampler_point_clamp, uv);
|
||||
float4 depthsGreen = input.GatherGreen(sampler_point_clamp, uv);
|
||||
|
||||
float depthMax = max(max(depthsRed.x, depthsRed.y), max(depthsRed.z, depthsRed.w));
|
||||
float depthMin = min(min(depthsGreen.x, depthsGreen.y), min(depthsGreen.z, depthsGreen.w));
|
||||
|
||||
output[DTid.xy] = float2(depthMax, depthMin);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
#include "globals.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
RWByteAddressBuffer tile_tracing_statistics : register(u0);
|
||||
RWStructuredBuffer<uint> tiles_tracing_earlyexit : register(u1);
|
||||
RWStructuredBuffer<uint> tiles_tracing_cheap : register(u2);
|
||||
RWStructuredBuffer<uint> tiles_tracing_expensive : register(u3);
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
// Load statistics:
|
||||
const uint tracing_earlyexit_count = tile_tracing_statistics.Load(TILE_STATISTICS_OFFSET_EARLYEXIT);
|
||||
const uint tracing_cheap_count = tile_tracing_statistics.Load(TILE_STATISTICS_OFFSET_CHEAP);
|
||||
const uint tracing_expensive_count = tile_tracing_statistics.Load(TILE_STATISTICS_OFFSET_EXPENSIVE);
|
||||
|
||||
// Reset counters:
|
||||
tile_tracing_statistics.Store(TILE_STATISTICS_OFFSET_EARLYEXIT, 0);
|
||||
tile_tracing_statistics.Store(TILE_STATISTICS_OFFSET_CHEAP, 0);
|
||||
tile_tracing_statistics.Store(TILE_STATISTICS_OFFSET_EXPENSIVE, 0);
|
||||
|
||||
// Create indirect dispatch arguments:
|
||||
const uint tile_tracing_replicate = sqr(SSR_TILESIZE / 2 / POSTPROCESS_BLOCKSIZE);
|
||||
tile_tracing_statistics.Store3(INDIRECT_OFFSET_EARLYEXIT, uint3(tracing_earlyexit_count * tile_tracing_replicate, 1, 1));
|
||||
tile_tracing_statistics.Store3(INDIRECT_OFFSET_CHEAP, uint3(tracing_cheap_count * tile_tracing_replicate, 1, 1));
|
||||
tile_tracing_statistics.Store3(INDIRECT_OFFSET_EXPENSIVE, uint3(tracing_expensive_count * tile_tracing_replicate, 1, 1));
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
#include "globals.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float4> texture_temporal : register(t0);
|
||||
|
||||
RWTexture2D<float4> output : register(u0);
|
||||
|
||||
// A Fast, Small-Radius GPU Median Filter by Morgan McGuire
|
||||
// https://casual-effects.com/research/McGuire2008Median/index.html
|
||||
|
||||
#define s2(a, b) temp = a; a = min(a, b); b = max(temp, b);
|
||||
#define t2(a, b) s2(v[a], v[b]);
|
||||
#define t24(a, b, c, d, e, f, g, h) t2(a, b); t2(c, d); t2(e, f); t2(g, h);
|
||||
#define t25(a, b, c, d, e, f, g, h, i, j) t24(a, b, c, d, e, f, g, h); t2(i, j);
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
if (texture_depth.Load(uint3(DTid.xy, 1)) == 0)
|
||||
return;
|
||||
|
||||
const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
|
||||
|
||||
half4 v[25];
|
||||
|
||||
// Add the pixels which make up our window to the pixel array.
|
||||
[unroll]
|
||||
for (int dX = -2; dX <= 2; ++dX)
|
||||
{
|
||||
[unroll]
|
||||
for (int dY = -2; dY <= 2; ++dY)
|
||||
{
|
||||
float2 offset = float2(float(dX), float(dY));
|
||||
|
||||
// If a pixel in the window is located at (x+dX, y+dY), put it at index (dX + R)(2R + 1) + (dY + R) of the
|
||||
// pixel array. This will fill the pixel array, with the top left pixel of the window at pixel[0] and the
|
||||
// bottom right pixel of the window at pixel[N-1].
|
||||
v[(dX + 2) * 5 + (dY + 2)] = texture_temporal.SampleLevel(sampler_linear_clamp, uv + offset * postprocess.resolution_rcp, 0);
|
||||
}
|
||||
}
|
||||
|
||||
half4 temp;
|
||||
t25(0, 1, 3, 4, 2, 4, 2, 3, 6, 7);
|
||||
t25(5, 7, 5, 6, 9, 7, 1, 7, 1, 4);
|
||||
t25(12, 13, 11, 13, 11, 12, 15, 16, 14, 16);
|
||||
t25(14, 15, 18, 19, 17, 19, 17, 18, 21, 22);
|
||||
t25(20, 22, 20, 21, 23, 24, 2, 5, 3, 6);
|
||||
t25(0, 6, 0, 3, 4, 7, 1, 7, 1, 4);
|
||||
t25(11, 14, 8, 14, 8, 11, 12, 15, 9, 15);
|
||||
t25(9, 12, 13, 16, 10, 16, 10, 13, 20, 23);
|
||||
t25(17, 23, 17, 20, 21, 24, 18, 24, 18, 21);
|
||||
t25(19, 22, 8, 17, 9, 18, 0, 18, 0, 9);
|
||||
t25(10, 19, 1, 19, 1, 10, 11, 20, 2, 20);
|
||||
t25(2, 11, 12, 21, 3, 21, 3, 12, 13, 22);
|
||||
t25(4, 22, 4, 13, 14, 23, 5, 23, 5, 14);
|
||||
t25(15, 24, 6, 24, 6, 15, 7, 16, 7, 19);
|
||||
t25(3, 11, 5, 17, 11, 17, 9, 17, 4, 10);
|
||||
t25(6, 12, 7, 14, 4, 6, 4, 7, 12, 14);
|
||||
t25(10, 14, 6, 7, 10, 12, 6, 10, 6, 17);
|
||||
t25(12, 17, 7, 17, 7, 10, 12, 18, 7, 12);
|
||||
t24(10, 18, 12, 20, 10, 20, 10, 12);
|
||||
|
||||
output[DTid.xy] = v[12];
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,2 @@
|
||||
#define SSR_CHEAP
|
||||
#include "ssr_raytraceCS.hlsl"
|
||||
@@ -0,0 +1,2 @@
|
||||
#define SSR_EARLYEXIT
|
||||
#include "ssr_raytraceCS.hlsl"
|
||||
@@ -5,221 +5,170 @@
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float4> texture_raytrace : register(t0);
|
||||
Texture2D<float4> texture_main : register(t1);
|
||||
Texture2D<float3> texture_surface_normal : register(t0);
|
||||
Texture2D<float> texture_surface_roughness : register(t1);
|
||||
Texture2D<float4> texture_rayIndirectSpecular : register(t2);
|
||||
Texture2D<float4> texture_rayDirectionPDF : register(t3);
|
||||
Texture2D<float> texture_rayLength : register(t4);
|
||||
|
||||
RWTexture2D<float4> texture_resolve : register(u0);
|
||||
RWTexture2D<float> texture_resolve_variance : register(u1);
|
||||
RWTexture2D<float> texture_reprojectionDepth : register(u2);
|
||||
|
||||
static const float2 resolveSpatialSizeMinMax = float2(2.0, 8.0); // Good to have a min size as downsample scale (2x in this case)
|
||||
static const uint resolveSpatialReconstructionCount = 4.0f;
|
||||
|
||||
static const float2 spatialReuseOffsets3x3[9] =
|
||||
float GetWeight(int2 neighborTracingCoord, float3 V, float3 N, float roughness, float NdotV)
|
||||
{
|
||||
float2(0.0, 0.0),
|
||||
float2(0.0, 1.0),
|
||||
float2(1.0, -1.0),
|
||||
float2(-1.0, -1.0),
|
||||
float2(-1.0, 0.0),
|
||||
float2(0.0, -1.0),
|
||||
float2(1.0, 0.0),
|
||||
float2(-1.0, 1.0),
|
||||
float2(1.0, 1.0)
|
||||
};
|
||||
// Sample local pixel information
|
||||
float4 rayDirectionPDF = texture_rayDirectionPDF[neighborTracingCoord];
|
||||
float3 rayDirection = rayDirectionPDF.rgb;
|
||||
float PDF = rayDirectionPDF.a;
|
||||
|
||||
// Not in use, but could perhaps be useful in the future.
|
||||
/*float2 CalculateTailDirection(float3 viewNormal)
|
||||
{
|
||||
float3 upVector = abs(viewNormal.z) < 0.999 ? float3(0.0, 0.0, 1.0) : float3(1.0, 0.0, 0.0);
|
||||
float3 T = normalize(cross(upVector, viewNormal));
|
||||
float3 sampleL = normalize(rayDirection);
|
||||
float3 sampleH = normalize(sampleL + V);
|
||||
|
||||
float tailDirection = T.x * -viewNormal.y;
|
||||
|
||||
return lerp(float2(1.0, 0.1), float2(0.1, 1.0), tailDirection);
|
||||
}*/
|
||||
float sampleNdotH = saturate(dot(N, sampleH));
|
||||
float sampleNdotL = saturate(dot(N, sampleL));
|
||||
|
||||
float CalculateEdgeFade(float2 hitPixel)
|
||||
{
|
||||
float2 hitPixelNDC = hitPixel * 2.0 - 1.0;
|
||||
|
||||
//float maxDimension = min(1.0, max(abs(hitPixelNDC.x), abs(hitPixelNDC.y)));
|
||||
//float attenuation = 1.0 - max(0.0, maxDimension - blendScreenEdgeFade) / (1.0 - blendScreenEdgeFade);
|
||||
float roughnessBRDF = roughness * roughness;
|
||||
|
||||
float2 vignette = saturate(abs(hitPixelNDC) * SSRBlendScreenEdgeFade - (SSRBlendScreenEdgeFade - 1.0f));
|
||||
float attenuation = saturate(1.0 - dot(vignette, vignette));
|
||||
|
||||
return attenuation;
|
||||
float Vis = V_SmithGGXCorrelated(roughnessBRDF, NdotV, sampleNdotL);
|
||||
float D = D_GGX(roughnessBRDF, sampleNdotH, sampleH);
|
||||
float localBRDF = Vis * D * sampleNdotL;
|
||||
|
||||
float weight = localBRDF / max(PDF, 0.00001f);
|
||||
|
||||
return weight;
|
||||
}
|
||||
|
||||
void GetSampleInfo(float2 velocity, float2 neighborUV, float2 uv, float3 P, float3 V, float3 N, float NdotV, float specularConeTangent, float roughness, out float4 sampleColor, out float weight)
|
||||
// Weighted incremental variance
|
||||
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
||||
void GetWeightedVariance(float4 sampleColor, float weight, float weightSum, inout float mean, inout float S)
|
||||
{
|
||||
// Sample local pixel information
|
||||
float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, neighborUV, 0);
|
||||
|
||||
float2 hitPixel = raytraceSource.xy + velocity;
|
||||
float hitDepth = raytraceSource.z;
|
||||
float hitPDF = raytraceSource.w;
|
||||
float luminance = Luminance(sampleColor.rgb);
|
||||
float oldMean = mean;
|
||||
mean += weight / weightSum * (luminance - oldMean);
|
||||
S += weight * (luminance - oldMean) * (luminance - mean);
|
||||
}
|
||||
|
||||
float intersectionCircleRadius = specularConeTangent * length(hitPixel - uv);
|
||||
float sourceMip = clamp(log2(intersectionCircleRadius * ssr_input_resolution_max), 0.0, ssr_input_maxmip) * SSRResolveConeMip;
|
||||
|
||||
sampleColor.rgb = texture_main.SampleLevel(sampler_linear_clamp, hitPixel, sourceMip).rgb; // Scene color
|
||||
sampleColor.a = CalculateEdgeFade(raytraceSource.xy); // Opacity - Since this is used for masking, we can ignore velocity
|
||||
|
||||
// BRDF Weight
|
||||
|
||||
float3 hitViewPosition = reconstruct_position(hitPixel, hitDepth, GetCamera().inverse_projection);
|
||||
|
||||
float3 L = normalize(hitViewPosition - P);
|
||||
float3 H = normalize(L + V);
|
||||
// modified from 'globals.hlsli' with random shift
|
||||
// idx : iteration index
|
||||
// num : number of iterations in total
|
||||
// random : 16 bit random sequence
|
||||
inline float2 hammersley2d_random(uint idx, uint num, uint2 random)
|
||||
{
|
||||
uint bits = idx;
|
||||
bits = (bits << 16u) | (bits >> 16u);
|
||||
bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
|
||||
bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
|
||||
bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
|
||||
const float radicalInverse_VdC = float(bits ^ random.y) * 2.3283064365386963e-10; // / 0x100000000
|
||||
|
||||
float NdotH = saturate(dot(N, H));
|
||||
float NdotL = saturate(dot(N, L));
|
||||
|
||||
Surface surface;
|
||||
surface.init();
|
||||
surface.roughnessBRDF = roughness * roughness;
|
||||
surface.NdotV = NdotV;
|
||||
|
||||
SurfaceToLight surfaceToLight;
|
||||
surfaceToLight.NdotH = NdotH;
|
||||
surfaceToLight.NdotL = NdotL;
|
||||
|
||||
// Calculate BRDF where Fresnel = 1
|
||||
float Vis = V_SmithGGXCorrelated(surface.roughnessBRDF, surface.NdotV, surfaceToLight.NdotL);
|
||||
float D = D_GGX(surface.roughnessBRDF, surfaceToLight.NdotH, surfaceToLight.H);
|
||||
float specularLight = Vis * D * PI / 4.0;
|
||||
// ... & 0xffff) / (1 << 16): limit to 65536 then range 0 - 1
|
||||
return float2(frac(float(idx) / float(num) + float(random.x & 0xffff) / (1 << 16)), radicalInverse_VdC); // frac since we only want range [0; 1[
|
||||
}
|
||||
|
||||
weight = specularLight / max(hitPDF, 0.00001f);
|
||||
uint baseHash(uint3 p)
|
||||
{
|
||||
p = 1103515245u * ((p.xyz >> 1u) ^ (p.yzx));
|
||||
uint h32 = 1103515245u * ((p.x ^ p.z) ^ (p.y >> 3u));
|
||||
return h32 ^ (h32 >> 16);
|
||||
}
|
||||
|
||||
// Great quality hash with 3D input
|
||||
// based on: https://www.shadertoy.com/view/Xt3cDn
|
||||
uint3 hash33(uint3 x)
|
||||
{
|
||||
uint n = baseHash(x);
|
||||
return uint3(n, n * 16807u, n * 48271u); //see: http://random.mat.sbg.ac.at/results/karl/server/node4.html
|
||||
}
|
||||
|
||||
// Computes post-projection depth from linear depth
|
||||
float getInverseLinearDepth(float lin, float near, float far)
|
||||
{
|
||||
float z_n = ((lin - 2 * far) * near + far * lin) / (lin * near - far * lin);
|
||||
float z = (z_n + 1) / 2;
|
||||
return z;
|
||||
}
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
|
||||
if (depth == 0.0f)
|
||||
return;
|
||||
const uint2 tracingCoord = DTid.xy / 2;
|
||||
|
||||
// Everthing in view space:
|
||||
const float3 P = reconstruct_position(uv, depth, GetCamera().inverse_projection);
|
||||
const float3 V = normalize(-P);
|
||||
const float depth = texture_depth[DTid.xy];
|
||||
const float roughness = texture_surface_roughness[DTid.xy];
|
||||
|
||||
PrimitiveID prim;
|
||||
prim.unpack(texture_gbuffer0[DTid.xy * 2]);
|
||||
|
||||
Surface surface;
|
||||
surface.init();
|
||||
if (!surface.load(prim, P))
|
||||
if (!NeedReflection(roughness, depth))
|
||||
{
|
||||
texture_resolve[DTid.xy] = texture_rayIndirectSpecular[tracingCoord];
|
||||
texture_resolve_variance[DTid.xy] = 0.0;
|
||||
texture_reprojectionDepth[DTid.xy] = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
const float3 N = normalize(mul((float3x3)GetCamera().view, surface.N));
|
||||
const float roughness = GetRoughness(surface.roughness);
|
||||
|
||||
// Everthing in world space:
|
||||
const float3 P = reconstruct_position(uv, depth);
|
||||
const float3 N = texture_surface_normal[DTid.xy];
|
||||
const float3 V = normalize(GetCamera().position - P);
|
||||
const float NdotV = saturate(dot(N, V));
|
||||
|
||||
const float2 velocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy;
|
||||
const float2 prevUV = uv + velocity;
|
||||
const float resolveSpatialScale = saturate(roughness * 5.0); // roughness 0.2 is destination
|
||||
const float2 resolveSpatialSize = lerp(resolveSpatialSizeMinMax.x, resolveSpatialSizeMinMax.y, resolveSpatialScale);
|
||||
|
||||
// Early out, useless if the roughness is out of range
|
||||
float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
|
||||
if (roughnessFade <= 0.0f)
|
||||
{
|
||||
texture_resolve[DTid.xy] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// Since we aren't importance sampling in this range, no need to resolve
|
||||
if (roughness < 0.05f)
|
||||
{
|
||||
float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
float2 hitPixel = raytraceSource.xy + velocity;
|
||||
|
||||
float4 sampleColor;
|
||||
sampleColor.rgb = texture_main.SampleLevel(sampler_linear_clamp, hitPixel, 0).rgb; // Scene color
|
||||
sampleColor.a = CalculateEdgeFade(raytraceSource.xy); // Opacity
|
||||
|
||||
texture_resolve[DTid.xy] = sampleColor;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Cone mip sampling
|
||||
float specularConeTangent = lerp(0.0, roughness * (1.0 - GGX_IMPORTANCE_SAMPLE_BIAS), NdotV * sqrt(roughness));
|
||||
specularConeTangent *= lerp(saturate(NdotV * 2), 1.0f, sqrt(roughness));
|
||||
|
||||
|
||||
#if 1 // EAW spatial resolve
|
||||
|
||||
|
||||
float4 result = 0.0f;
|
||||
float weightSum = 0.0f;
|
||||
|
||||
#define BLOCK_SAMPLE_RADIUS 1
|
||||
|
||||
[unroll]
|
||||
for (int y = -BLOCK_SAMPLE_RADIUS; y <= BLOCK_SAMPLE_RADIUS; y++)
|
||||
|
||||
float mean = 0.0f;
|
||||
float S = 0.0f;
|
||||
|
||||
float closestRayLength = 0.0f;
|
||||
|
||||
const uint sampleCount = resolveSpatialReconstructionCount;
|
||||
const uint2 random = hash33(uint3(DTid.xy, GetFrame().frame_count)).xy;
|
||||
|
||||
for (int i = 0; i < sampleCount; i++)
|
||||
{
|
||||
[loop]
|
||||
for (int x = -BLOCK_SAMPLE_RADIUS; x <= BLOCK_SAMPLE_RADIUS; x++)
|
||||
float2 offset = (hammersley2d_random(i, sampleCount, random) - 0.5) * resolveSpatialSize;
|
||||
|
||||
int2 neighborTracingCoord = tracingCoord + offset;
|
||||
int2 neighborCoord = DTid.xy + offset;
|
||||
|
||||
float neighborDepth = texture_depth[neighborCoord];
|
||||
if (neighborDepth > 0.0)
|
||||
{
|
||||
if (uint(abs(x) + abs(y)) % 2 == 0)
|
||||
continue;
|
||||
|
||||
float2 offsetUV = float2(x, y) * postprocess.resolution_rcp * SSRResolveSpatialSize;
|
||||
float2 neighborUV = uv + offsetUV;
|
||||
|
||||
float4 sampleColor;
|
||||
float weight;
|
||||
GetSampleInfo(velocity, neighborUV, uv, P, V, N, NdotV, specularConeTangent, roughness, sampleColor, weight);
|
||||
|
||||
float weight = GetWeight(neighborTracingCoord, V, N, roughness, NdotV);
|
||||
|
||||
float4 sampleColor = texture_rayIndirectSpecular[neighborTracingCoord];
|
||||
sampleColor.rgb *= rcp(1 + Luminance(sampleColor.rgb));
|
||||
|
||||
|
||||
result += sampleColor * weight;
|
||||
weightSum += weight;
|
||||
|
||||
GetWeightedVariance(sampleColor, weight, weightSum, mean, S);
|
||||
|
||||
if (weight > 0.001)
|
||||
{
|
||||
float neighborRayLength = texture_rayLength[neighborTracingCoord];
|
||||
closestRayLength = max(closestRayLength, neighborRayLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
result /= weightSum;
|
||||
|
||||
result.rgb *= rcp(1 - Luminance(result.rgb));
|
||||
|
||||
#undef BLOCK_SAMPLE_RADIUS
|
||||
|
||||
|
||||
#else // Frostbite presentation, spatial resolve
|
||||
|
||||
|
||||
float4 result = 0.0f;
|
||||
float weightSum = 0.0f;
|
||||
|
||||
#define NUM_RESOLVE 4 // Four samples to achieve effective ray reuse patterns
|
||||
|
||||
[unroll]
|
||||
for (uint i = 0; i < NUM_RESOLVE; i++)
|
||||
{
|
||||
float2 offsetUV = spatialReuseOffsets3x3[i] * postprocess.resolution_rcp * SSRResolveSpatialSize;
|
||||
float2 neighborUV = uv + offsetUV;
|
||||
|
||||
float4 sampleColor;
|
||||
float weight;
|
||||
GetSampleInfo(velocity, neighborUV, uv, P, V, N, NdotV, specularConeTangent, roughness, sampleColor, weight);
|
||||
|
||||
sampleColor.rgb *= rcp( 1 + Luminance(sampleColor.rgb) );
|
||||
|
||||
result += sampleColor * weight;
|
||||
weightSum += weight;
|
||||
}
|
||||
result /= weightSum;
|
||||
|
||||
result.rgb *= rcp( 1 - Luminance(result.rgb) );
|
||||
|
||||
#undef NUM_RESOLVE
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
result *= roughnessFade;
|
||||
result *= SSRIntensity;
|
||||
|
||||
result.rgb *= rcp(1 - Luminance(result.rgb));
|
||||
|
||||
// Population variance
|
||||
float resolveVariance = S / weightSum;
|
||||
|
||||
// Convert to post-projection depth so we can construct dual source reprojection buffers later
|
||||
const float lineardepth = texture_lineardepth[DTid.xy] * GetCamera().z_far;
|
||||
float reprojectionDepth = getInverseLinearDepth(lineardepth + closestRayLength, GetCamera().z_near, GetCamera().z_far);
|
||||
|
||||
texture_resolve[DTid.xy] = max(result, 0.00001f);
|
||||
texture_resolve_variance[DTid.xy] = resolveVariance;
|
||||
texture_reprojectionDepth[DTid.xy] = reprojectionDepth;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
#include "globals.hlsli"
|
||||
#include "brdf.hlsli"
|
||||
#include "lightingHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
RWTexture2D<float3> output_surface_normal : register(u0);
|
||||
RWTexture2D<float> output_surface_roughness : register(u1);
|
||||
RWTexture2D<float3> output_surface_environment : register(u2);
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
uint2 dim;
|
||||
texture_depth.GetDimensions(dim.x, dim.y);
|
||||
|
||||
float2 uv = (DTid.xy + 0.5f) / dim;
|
||||
|
||||
float depth = texture_depth[DTid.xy];
|
||||
if (depth == 0.0)
|
||||
{
|
||||
output_surface_normal[DTid.xy] = 0.0;
|
||||
output_surface_roughness[DTid.xy] = 0.0;
|
||||
output_surface_environment[DTid.xy] = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
uint2 primitiveID = texture_gbuffer0[DTid.xy]; // Map to resolution
|
||||
|
||||
PrimitiveID prim;
|
||||
prim.unpack(primitiveID);
|
||||
|
||||
Surface surface;
|
||||
surface.init();
|
||||
if (!surface.load(prim, reconstruct_position(uv, depth)))
|
||||
{
|
||||
output_surface_normal[DTid.xy] = 0.0;
|
||||
output_surface_roughness[DTid.xy] = 0.0;
|
||||
output_surface_environment[DTid.xy] = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
float3 N = surface.N;
|
||||
float roughness = surface.roughness;
|
||||
float3 environmentReflection = EnvironmentReflection_Global(surface);
|
||||
|
||||
output_surface_normal[DTid.xy] = N;
|
||||
output_surface_roughness[DTid.xy] = roughness;
|
||||
output_surface_environment[DTid.xy] = environmentReflection;
|
||||
}
|
||||
@@ -4,177 +4,236 @@
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float4> resolve_current : register(t0);
|
||||
Texture2D<float4> resolve_history : register(t1);
|
||||
Texture2D<float> rayLengths : register(t3);
|
||||
Texture2D<float> texture_surface_roughness : register(t0);
|
||||
Texture2D<float4> texture_color_current : register(t1);
|
||||
Texture2D<float4> texture_color_history : register(t2);
|
||||
Texture2D<float> texture_variance_current : register(t3);
|
||||
Texture2D<float> texture_variance_history : register(t4);
|
||||
Texture2D<float> texture_reprojectionDepth : register(t5);
|
||||
|
||||
RWTexture2D<float4> output : register(u0);
|
||||
RWTexture2D<float4> output_color : register(u0);
|
||||
RWTexture2D<float> output_variance : register(u1);
|
||||
|
||||
static const float temporalResponseMin = 0.75;
|
||||
static const float temporalResponseMax = 0.95f;
|
||||
static const float temporalScale = 3.0;
|
||||
static const float temporalExposure = 10.0f;
|
||||
static const float temporalResponse = 0.95;
|
||||
static const float temporalScale = 2.0;
|
||||
static const float disocclusionDepthWeight = 1.0f;
|
||||
static const float disocclusionThreshold = 0.9f;
|
||||
static const float varianceTemporalResponse = 0.9f;
|
||||
|
||||
inline float Luma4(float3 color)
|
||||
float2 CalculateReprojectionBuffer(float2 uv, float depth)
|
||||
{
|
||||
return (color.g * 2) + (color.r + color.b);
|
||||
float x = uv.x * 2 - 1;
|
||||
float y = (1 - uv.y) * 2 - 1;
|
||||
float2 screenPosition = float2(x, y);
|
||||
|
||||
float4 thisClip = float4(screenPosition, depth, 1);
|
||||
|
||||
float4 prevClip = mul(GetCamera().inverse_view_projection, thisClip);
|
||||
prevClip = mul(GetCamera().previous_view_projection, prevClip);
|
||||
|
||||
float2 prevScreen = prevClip.xy / prevClip.w;
|
||||
|
||||
float2 screenVelocity = screenPosition - prevScreen;
|
||||
float2 prevScreenPosition = screenPosition - screenVelocity;
|
||||
|
||||
return prevScreenPosition * float2(0.5, -0.5) + 0.5;
|
||||
}
|
||||
|
||||
inline float HdrWeight4(float3 color, float exposure)
|
||||
float GetDisocclusion(float depth, float depthHistory)
|
||||
{
|
||||
return rcp(Luma4(color) * exposure + 4.0f);
|
||||
float lineardepthCurrent = compute_lineardepth(depth);
|
||||
float lineardepthHistory = compute_lineardepth(depthHistory);
|
||||
|
||||
float disocclusion = 1.0
|
||||
//* exp(-abs(1.0 - max(0.0, dot(normal, normalHistory))) * disocclusionNormalWeight) // Potential normal check if necessary
|
||||
* exp(-abs(lineardepthHistory - lineardepthCurrent) / lineardepthCurrent * disocclusionDepthWeight);
|
||||
|
||||
return disocclusion;
|
||||
}
|
||||
|
||||
float4 clip_aabb(float3 aabb_min, float3 aabb_max, float4 p, float4 q)
|
||||
float4 SamplePreviousColor(float2 prevUV, float2 size, float depth, out float disocclusion, out float2 prevUVSample)
|
||||
{
|
||||
float3 p_clip = 0.5 * (aabb_max + aabb_min);
|
||||
float3 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
|
||||
prevUVSample = prevUV;
|
||||
|
||||
float4 v_clip = q - float4(p_clip, p.w);
|
||||
float3 v_unit = v_clip.xyz / e_clip;
|
||||
float3 a_unit = abs(v_unit);
|
||||
float ma_unit = max(a_unit.x, max(a_unit.y, a_unit.z));
|
||||
float4 previousColor = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVSample, 0);
|
||||
float previousDepth = texture_depth_history.SampleLevel(sampler_point_clamp, prevUVSample, 0);
|
||||
|
||||
if (ma_unit > 1.0)
|
||||
return float4(p_clip, p.w) + v_clip / ma_unit;
|
||||
else
|
||||
return q; // point inside aabb
|
||||
}
|
||||
disocclusion = GetDisocclusion(depth, previousDepth);
|
||||
if (disocclusion > disocclusionThreshold) // Good enough
|
||||
{
|
||||
return previousColor;
|
||||
}
|
||||
|
||||
inline void ResolverAABB(Texture2D<float4> currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
|
||||
{
|
||||
const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
|
||||
|
||||
// Modulate Luma HDR
|
||||
|
||||
float4 sampleColors[9];
|
||||
[unroll]
|
||||
for (uint i = 0; i < 9; i++)
|
||||
{
|
||||
sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
|
||||
}
|
||||
// Try to find the closest sample in the vicinity if we are not convinced of a disocclusion
|
||||
if (disocclusion < disocclusionThreshold)
|
||||
{
|
||||
float2 closestUV = prevUVSample;
|
||||
float2 dudv = rcp(size);
|
||||
|
||||
float sampleWeights[9];
|
||||
[unroll]
|
||||
for (uint j = 0; j < 9; j++)
|
||||
{
|
||||
sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
|
||||
}
|
||||
const int searchRadius = 1;
|
||||
for (int y = -searchRadius; y <= searchRadius; y++)
|
||||
{
|
||||
for (int x = -searchRadius; x <= searchRadius; x++)
|
||||
{
|
||||
int2 offset = int2(x, y);
|
||||
float2 sampleUV = prevUVSample + offset * dudv;
|
||||
|
||||
float totalWeight = 0;
|
||||
[unroll]
|
||||
for (uint k = 0; k < 9; k++)
|
||||
{
|
||||
totalWeight += sampleWeights[k];
|
||||
}
|
||||
sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
|
||||
sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
|
||||
float samplePreviousDepth = texture_depth_history.SampleLevel(sampler_point_clamp, sampleUV, 0);
|
||||
|
||||
// Variance Clipping (AABB)
|
||||
|
||||
float4 m1 = 0.0;
|
||||
float4 m2 = 0.0;
|
||||
[unroll]
|
||||
for (uint x = 0; x < 9; x++)
|
||||
{
|
||||
m1 += sampleColors[x];
|
||||
m2 += sampleColors[x] * sampleColors[x];
|
||||
}
|
||||
float weight = GetDisocclusion(depth, samplePreviousDepth);
|
||||
if (weight > disocclusion)
|
||||
{
|
||||
disocclusion = weight;
|
||||
closestUV = sampleUV;
|
||||
prevUVSample = closestUV;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float4 mean = m1 / 9.0;
|
||||
float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
|
||||
|
||||
currentMin = mean - AABBScale * stddev;
|
||||
currentMax = mean + AABBScale * stddev;
|
||||
previousColor = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVSample, 0);
|
||||
}
|
||||
|
||||
currentOutput = sampleColors[4];
|
||||
currentMin = min(currentMin, currentOutput);
|
||||
currentMax = max(currentMax, currentOutput);
|
||||
currentAverage = mean;
|
||||
// Bilinear interpolation on fallback - near edges
|
||||
if (disocclusion < disocclusionThreshold)
|
||||
{
|
||||
float2 weight = frac(prevUVSample * size + 0.5);
|
||||
|
||||
// Bilinear weights
|
||||
float weights[4] =
|
||||
{
|
||||
(1 - weight.x) * (1 - weight.y),
|
||||
weight.x * (1 - weight.y),
|
||||
(1 - weight.x) * weight.y,
|
||||
weight.x * weight.y
|
||||
};
|
||||
|
||||
float4 previousColorResult = 0;
|
||||
float previousDepthResult = 0;
|
||||
float weightSum = 0;
|
||||
|
||||
uint2 prevCoord = uint2(size * prevUVSample - 0.5);
|
||||
uint2 offsets[4] = { uint2(0, 0), uint2(1, 0), uint2(0, 1), uint2(1, 1) };
|
||||
|
||||
for (uint i = 0; i < 4; i++)
|
||||
{
|
||||
uint2 sampleCoord = prevCoord + offsets[i];
|
||||
|
||||
previousColorResult += weights[i] * texture_color_history[sampleCoord];
|
||||
previousDepthResult += weights[i] * texture_depth_history[sampleCoord];
|
||||
|
||||
weightSum += weights[i];
|
||||
}
|
||||
|
||||
previousColorResult /= max(weightSum, 0.00001);
|
||||
previousDepthResult /= max(weightSum, 0.00001);
|
||||
|
||||
previousColor = previousColorResult;
|
||||
disocclusion = GetDisocclusion(depth, previousDepthResult);
|
||||
}
|
||||
|
||||
disocclusion = disocclusion < disocclusionThreshold ? 0.0 : disocclusion;
|
||||
return previousColor;
|
||||
}
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
|
||||
void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
if ((uint)ssr_frame == 0)
|
||||
if ((uint) ssr_frame == 0)
|
||||
{
|
||||
output[DTid.xy] = resolve_current[DTid.xy];
|
||||
output_color[DTid.xy] = texture_color_current[DTid.xy];
|
||||
return;
|
||||
}
|
||||
|
||||
const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
|
||||
if (depth == 0)
|
||||
return;
|
||||
const float depth = texture_depth[DTid.xy];
|
||||
const float roughness = texture_surface_roughness[DTid.xy];
|
||||
|
||||
const float2 velocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy;
|
||||
float2 prevUV = uv + velocity;
|
||||
if (!is_saturated(prevUV))
|
||||
if (!NeedReflection(roughness, depth))
|
||||
{
|
||||
output[DTid.xy] = resolve_current[DTid.xy];
|
||||
output_color[DTid.xy] = texture_color_current[DTid.xy];
|
||||
output_variance[DTid.xy] = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
const float3 P = reconstruct_position(uv, depth, GetCamera().inverse_projection);
|
||||
// Welford's online algorithm:
|
||||
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
||||
|
||||
PrimitiveID prim;
|
||||
prim.unpack(texture_gbuffer0[DTid.xy * 2]);
|
||||
|
||||
Surface surface;
|
||||
surface.init();
|
||||
if (!surface.load(prim, P))
|
||||
return;
|
||||
|
||||
const float roughness = surface.roughness;
|
||||
|
||||
if (roughness < 0.01)
|
||||
float4 m1 = 0.0;
|
||||
float4 m2 = 0.0;
|
||||
for (int x = -1; x <= 1; x++)
|
||||
{
|
||||
output[DTid.xy] = resolve_current[DTid.xy];
|
||||
//return;
|
||||
}
|
||||
|
||||
// Secondary reprojection based on ray lengths:
|
||||
// https://www.ea.com/seed/news/seed-dd18-presentation-slides-raytracing (Slide 45)
|
||||
if (roughness < 0.5)
|
||||
{
|
||||
float rayLength = rayLengths[DTid.xy];
|
||||
if (rayLength > 0)
|
||||
for (int y = -1; y <= 1; y++)
|
||||
{
|
||||
const float3 P = reconstruct_position(uv, depth);
|
||||
const float3 V = normalize(GetCamera().position - P);
|
||||
const float3 rayEnd = P - V * rayLength;
|
||||
float4 rayEndPrev = mul(GetCamera().previous_view_projection, float4(rayEnd, 1));
|
||||
rayEndPrev.xy /= rayEndPrev.w;
|
||||
prevUV = rayEndPrev.xy * float2(0.5, -0.5) + 0.5;
|
||||
int2 offset = int2(x, y);
|
||||
int2 coord = DTid.xy + offset;
|
||||
|
||||
float4 sampleColor = texture_color_current[coord];
|
||||
|
||||
m1 += sampleColor;
|
||||
m2 += sampleColor * sampleColor;
|
||||
}
|
||||
}
|
||||
|
||||
// Disocclusion fallback:
|
||||
float depth_current = compute_lineardepth(depth);
|
||||
float depth_history = compute_lineardepth(texture_depth_history.SampleLevel(sampler_point_clamp, prevUV, 1));
|
||||
if (abs(depth_current - depth_history) > 1)
|
||||
float4 mean = m1 / 9.0;
|
||||
float4 variance = (m2 / 9.0) - (mean * mean);
|
||||
float4 stddev = sqrt(max(variance, 0.0f));
|
||||
|
||||
// Secondary reprojection based on ray lengths:
|
||||
// https://www.ea.com/seed/news/seed-dd18-presentation-slides-raytracing (Slide 45)
|
||||
|
||||
float2 velocity = texture_gbuffer1[DTid.xy];
|
||||
float reprojectionDepth = texture_reprojectionDepth[DTid.xy];
|
||||
|
||||
float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
|
||||
|
||||
float2 prevUVVelocity = uv + velocity;
|
||||
float2 prevUVReflectionHit = CalculateReprojectionBuffer(uv, reprojectionDepth);
|
||||
|
||||
float4 previousColorVelocity = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVVelocity, 0);
|
||||
float4 previousColorReflectionHit = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVReflectionHit, 0);
|
||||
|
||||
float previousDistanceVelocity = abs(Luminance(previousColorVelocity.rgb) - Luminance(mean.rgb));
|
||||
float previousDistanceReflectionHit = abs(Luminance(previousColorReflectionHit.rgb) - Luminance(mean.rgb));
|
||||
|
||||
float2 prevUV = previousDistanceVelocity < previousDistanceReflectionHit ? prevUVVelocity : prevUVReflectionHit;
|
||||
|
||||
float disocclusion = 0.0;
|
||||
float2 prevUVSample = 0.0;
|
||||
float4 previousColor = SamplePreviousColor(prevUV, postprocess.resolution, depth, disocclusion, prevUVSample);
|
||||
|
||||
float4 currentColor = texture_color_current[DTid.xy];
|
||||
float4 resultColor = currentColor;
|
||||
|
||||
// Disocclusion fallback: color
|
||||
if (disocclusion > disocclusionThreshold && is_saturated(prevUVSample))
|
||||
{
|
||||
output[DTid.xy] = resolve_current[DTid.xy];
|
||||
//output[DTid.xy] = float4(1, 0, 0, 1);
|
||||
return;
|
||||
// Color box clamp
|
||||
float4 colorMin = mean - temporalScale * stddev;
|
||||
float4 colorMax = mean + temporalScale * stddev;
|
||||
previousColor = clamp(previousColor, colorMin, colorMax);
|
||||
|
||||
resultColor = lerp(currentColor, previousColor, temporalResponse);
|
||||
}
|
||||
|
||||
float4 previous = resolve_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
|
||||
#if 0 // Debug
|
||||
else
|
||||
{
|
||||
resultColor = float4(1, 0, 0, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Luma HDR and AABB minmax
|
||||
|
||||
float4 current = 0;
|
||||
float4 currentMin, currentMax, currentAverage;
|
||||
ResolverAABB(resolve_current, sampler_linear_clamp, 0, temporalExposure, temporalScale, uv, postprocess.resolution, currentMin, currentMax, currentAverage, current);
|
||||
float currentVariance = texture_variance_current[DTid.xy];
|
||||
float varianceResponse = varianceTemporalResponse;
|
||||
|
||||
previous.xyz = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous).xyz;
|
||||
previous.a = clamp(previous.a, currentMin.a, currentMax.a);
|
||||
|
||||
// Blend color & history
|
||||
|
||||
float blendFinal = lerp(temporalResponseMin, temporalResponseMax, saturate(1.0 - length(velocity) * 100));
|
||||
|
||||
float4 result = lerp(current, previous, blendFinal);
|
||||
|
||||
output[DTid.xy] = max(0, result);
|
||||
// Disocclusion fallback: variance
|
||||
if (disocclusion < disocclusionThreshold || !is_saturated(prevUVSample))
|
||||
{
|
||||
// Apply white for variance on occlusion. This helps to hide artifacts from temporal
|
||||
varianceResponse = 0.0f;
|
||||
currentVariance = 1.0f;
|
||||
}
|
||||
|
||||
float previousVariance = texture_variance_history.SampleLevel(sampler_linear_clamp, prevUVSample, 0);
|
||||
float resultVariance = lerp(currentVariance, previousVariance, varianceResponse);
|
||||
|
||||
output_color[DTid.xy] = max(0, resultColor);
|
||||
output_variance[DTid.xy] = max(0, resultVariance);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
#include "globals.hlsli"
|
||||
#include "brdf.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float> texture_surface_roughness : register(t0);
|
||||
|
||||
RWTexture2D<float2> tile_minmax_roughness_horizontal : register(u0);
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const uint2 tile_upperleft = uint2(DTid.x * SSR_TILESIZE, DTid.y);
|
||||
float minRoughness = 1.0;
|
||||
float maxRoughness = 0.0;
|
||||
|
||||
uint2 dim;
|
||||
texture_depth.GetDimensions(dim.x, dim.y);
|
||||
|
||||
[loop]
|
||||
for (uint i = 0; i < SSR_TILESIZE; ++i)
|
||||
{
|
||||
const uint2 pixel = uint2(tile_upperleft.x + i, tile_upperleft.y);
|
||||
if (pixel.x >= 0 && pixel.y >= 0 && pixel.x < dim.x && pixel.y < dim.y)
|
||||
{
|
||||
float depth = texture_depth[pixel];
|
||||
if (depth == 0.0)
|
||||
{
|
||||
maxRoughness = max(maxRoughness, 1.0);
|
||||
minRoughness = min(minRoughness, 1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
float roughness = texture_surface_roughness[pixel];
|
||||
maxRoughness = max(maxRoughness, roughness);
|
||||
minRoughness = min(minRoughness, roughness);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tile_minmax_roughness_horizontal[DTid.xy] = float2(minRoughness, maxRoughness);
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
#include "globals.hlsli"
|
||||
#include "brdf.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
Texture2D<float2> tile_minmax_roughness_horizontal : register(t0);
|
||||
|
||||
RWByteAddressBuffer tile_tracing_statistics : register(u0);
|
||||
RWStructuredBuffer<uint> tiles_tracing_earlyexit : register(u1);
|
||||
RWStructuredBuffer<uint> tiles_tracing_cheap : register(u2);
|
||||
RWStructuredBuffer<uint> tiles_tracing_expensive : register(u3);
|
||||
RWTexture2D<float2> tile_minmax_roughness : register(u4);
|
||||
|
||||
static const float SSRRoughnessCheap = 0.35;
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const uint2 tile_upperleft = uint2(DTid.x, DTid.y * SSR_TILESIZE);
|
||||
float minRoughness = 1.0;
|
||||
float maxRoughness = 0.0;
|
||||
|
||||
int2 dim;
|
||||
tile_minmax_roughness_horizontal.GetDimensions(dim.x, dim.y);
|
||||
|
||||
[loop]
|
||||
for (uint i = 0; i < SSR_TILESIZE; ++i)
|
||||
{
|
||||
const uint2 pixel = uint2(tile_upperleft.x, tile_upperleft.y + i);
|
||||
if (pixel.x >= 0 && pixel.y >= 0 && pixel.x < dim.x && pixel.y < dim.y)
|
||||
{
|
||||
float2 minmax_roughness = tile_minmax_roughness_horizontal[pixel];
|
||||
minRoughness = min(minRoughness, minmax_roughness.r);
|
||||
maxRoughness = max(maxRoughness, minmax_roughness.g);
|
||||
}
|
||||
}
|
||||
|
||||
const uint tile = (DTid.x & 0xFFFF) | ((DTid.y & 0xFFFF) << 16);
|
||||
|
||||
uint prevCount;
|
||||
if (minRoughness < SSRRoughnessCheap)
|
||||
{
|
||||
tile_tracing_statistics.InterlockedAdd(TILE_STATISTICS_OFFSET_EXPENSIVE, 1, prevCount);
|
||||
tiles_tracing_expensive[prevCount] = tile;
|
||||
}
|
||||
else if (maxRoughness > SSRRoughnessCheap && minRoughness < ReflectionMaxRoughness)
|
||||
{
|
||||
tile_tracing_statistics.InterlockedAdd(TILE_STATISTICS_OFFSET_CHEAP, 1, prevCount);
|
||||
tiles_tracing_cheap[prevCount] = tile;
|
||||
}
|
||||
else
|
||||
{
|
||||
tile_tracing_statistics.InterlockedAdd(TILE_STATISTICS_OFFSET_EARLYEXIT, 1, prevCount);
|
||||
tiles_tracing_earlyexit[prevCount] = tile;
|
||||
}
|
||||
|
||||
tile_minmax_roughness[DTid.xy] = float2(minRoughness, maxRoughness);
|
||||
}
|
||||
@@ -1,162 +1,70 @@
|
||||
#ifndef WI_STOCHASTICSSR_HF
|
||||
#define WI_STOCHASTICSSR_HF
|
||||
#include "brdf.hlsli"
|
||||
|
||||
// Stochastic Screen Space Reflections reference:
|
||||
// https://www.ea.com/frostbite/news/stochastic-screen-space-reflections
|
||||
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
#define GGX_SAMPLE_VISIBLE
|
||||
|
||||
// Bias used on GGX importance sample when denoising, to remove part of the tale that create a lot more noise.
|
||||
#define GGX_IMPORTANCE_SAMPLE_BIAS 0.1
|
||||
|
||||
// Shared SSR settings:
|
||||
static const float SSRMaxRoughness = 1.0f; // Specify max roughness, this can improve performance in complex scenes.
|
||||
static const float SSRIntensity = 1.0f;
|
||||
static const float SSRResolveConeMip = 1.0f; // Control overall filtering of the importance sampling.
|
||||
static const float SSRResolveSpatialSize = 3.0f; // Seems to work best with the temporal pass in the [-3;3] range
|
||||
static const float SSRBlendScreenEdgeFade = 5.0f;
|
||||
// Shared Reflection settings:
|
||||
static const float ReflectionMaxRoughness = 0.6f;
|
||||
|
||||
// Temporary
|
||||
static const float BRDFBias = 0.7f;
|
||||
|
||||
|
||||
float ComputeRoughnessMaskScale(in float maxRoughness)
|
||||
uint2 GetReflectionIndirectDispatchCoord(uint3 Gid, uint3 GTid, StructuredBuffer<uint> tiles, uint downsample)
|
||||
{
|
||||
float MaxRoughness = clamp(maxRoughness, 0.01f, 1.0f);
|
||||
|
||||
float roughnessMaskScale = -2.0f / MaxRoughness;
|
||||
return roughnessMaskScale * 1.0f; // 2.0f & 1.0f
|
||||
uint tile_replicate = sqr(SSR_TILESIZE / downsample / POSTPROCESS_BLOCKSIZE);
|
||||
uint tile_idx = Gid.x / tile_replicate;
|
||||
uint tile_packed = tiles[tile_idx];
|
||||
uint2 tile = uint2(tile_packed & 0xFFFF, (tile_packed >> 16) & 0xFFFF);
|
||||
uint subtile_idx = Gid.x % tile_replicate;
|
||||
uint2 subtile = unflatten2D(subtile_idx, SSR_TILESIZE / downsample / POSTPROCESS_BLOCKSIZE);
|
||||
uint2 subtile_upperleft = tile * SSR_TILESIZE / downsample + subtile * POSTPROCESS_BLOCKSIZE;
|
||||
return subtile_upperleft + unflatten2D(GTid.x, POSTPROCESS_BLOCKSIZE);
|
||||
}
|
||||
|
||||
float GetRoughnessFade(in float roughness, in float maxRoughness)
|
||||
bool NeedReflection(float roughness, float depth)
|
||||
{
|
||||
float roughnessMaskScale = ComputeRoughnessMaskScale(maxRoughness);
|
||||
return min(roughness * roughnessMaskScale + 2, 1.0f);
|
||||
}
|
||||
|
||||
float GetRoughness(float roughness)
|
||||
{
|
||||
return max(roughness, 0.02f);
|
||||
}
|
||||
|
||||
float Luminance(float3 color)
|
||||
{
|
||||
return dot(color, float3(0.2126, 0.7152, 0.0722));
|
||||
}
|
||||
|
||||
// Fast RNG inspired by PCG (Permuted Congruential Generator) - Based on Epic Games (Unreal Engine)
|
||||
// Returns three elements with 16 random bits each (0-0xffff (65535)).
|
||||
uint3 Rand_PCG16(int3 i)
|
||||
{
|
||||
// Epic Games had good results by interpreting signed values as unsigned.
|
||||
uint3 r = uint3(i);
|
||||
|
||||
// Linear congruential generator
|
||||
// A simple but very fast pseudorandom number generator
|
||||
// see: https://en.wikipedia.org/wiki/Linear_congruential_generator
|
||||
r = r * 1664525u + 1013904223u; // LCG set from 'Numerical Recipes'
|
||||
|
||||
// Final shuffle
|
||||
// In the original PCG code, they used xorshift for their final shuffle.
|
||||
// According to Epic Games, they would do simple Feistel steps instead since xorshift is expensive.
|
||||
// They would then use r.x, r.y and r.z as parts to create something persistence with few instructions.
|
||||
r.x += r.y * r.z;
|
||||
r.y += r.z * r.x;
|
||||
r.z += r.x * r.y;
|
||||
|
||||
r.x += r.y * r.z;
|
||||
r.y += r.z * r.x;
|
||||
r.z += r.x * r.y;
|
||||
|
||||
// PCG would then shuffle the top 16 bits thoroughly.
|
||||
return r >> 16u;
|
||||
}
|
||||
|
||||
// Hammersley sequence manipulated by a random value and returns top 16 bits
|
||||
float2 HammersleyRandom16(uint idx, uint num, uint2 random)
|
||||
{
|
||||
// Reverse Bits 32
|
||||
uint bits = idx;
|
||||
bits = (bits << 16u) | (bits >> 16u);
|
||||
bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
|
||||
bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
|
||||
bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
|
||||
|
||||
float E1 = frac(float(idx / num) + float(random.x) * 1.52587890625e-5); // / 0xffff (rcp(65536) )
|
||||
float E2 = float((bits >> 16) ^ random.y) * 1.52587890625e-5; // Shift reverse bits by 16 and compare bits with random
|
||||
return float2(E1, E2);
|
||||
}
|
||||
|
||||
float2 HammersleyRandom16(uint idx, uint2 random)
|
||||
{
|
||||
uint bits = idx;
|
||||
bits = (bits << 16u) | (bits >> 16u);
|
||||
bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
|
||||
bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
|
||||
bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
|
||||
|
||||
float E1 = frac(float(random.x) * 1.52587890625e-5); // / 0xffff (rcp(65536) )
|
||||
float E2 = float((bits >> 16) ^ random.y) * 1.52587890625e-5; // Shift reverse bits by 16 and compare bits with random
|
||||
return float2(E1, E2);
|
||||
return (roughness < ReflectionMaxRoughness) && (depth > 0.0);
|
||||
}
|
||||
|
||||
// Brian Karis, Epic Games "Real Shading in Unreal Engine 4"
|
||||
float4 ImportanceSampleGGX(float2 Xi, float Roughness)
|
||||
{
|
||||
float m = Roughness * Roughness;
|
||||
float m2 = m * m;
|
||||
|
||||
float Phi = 2 * PI * Xi.x;
|
||||
|
||||
float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
|
||||
float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
|
||||
|
||||
float3 H;
|
||||
H.x = SinTheta * cos(Phi);
|
||||
H.y = SinTheta * sin(Phi);
|
||||
H.z = CosTheta;
|
||||
|
||||
float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
|
||||
float D = m2 / (PI * d * d);
|
||||
float pdf = D * CosTheta;
|
||||
float m = Roughness * Roughness;
|
||||
float m2 = m * m;
|
||||
|
||||
return float4(H, pdf);
|
||||
float Phi = 2 * PI * Xi.x;
|
||||
|
||||
float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
|
||||
float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
|
||||
|
||||
float3 H;
|
||||
H.x = SinTheta * cos(Phi);
|
||||
H.y = SinTheta * sin(Phi);
|
||||
H.z = CosTheta;
|
||||
|
||||
float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
|
||||
float D = m2 / (PI * d * d);
|
||||
float pdf = D * CosTheta;
|
||||
|
||||
return float4(H, pdf);
|
||||
}
|
||||
|
||||
// [ Duff et al. 2017, "Building an Orthonormal Basis, Revisited" ]
|
||||
// http://jcgt.org/published/0006/01/01/
|
||||
float3x3 GetTangentBasis(float3 TangentZ)
|
||||
{
|
||||
const float Sign = TangentZ.z >= 0 ? 1 : -1;
|
||||
const float a = -rcp(Sign + TangentZ.z);
|
||||
const float b = TangentZ.x * TangentZ.y * a;
|
||||
|
||||
float3 TangentX = { 1 + Sign * a * pow(TangentZ.x, 2), Sign * b, -Sign * TangentZ.x };
|
||||
float3 TangentY = { b, Sign + a * pow(TangentZ.y, 2), -TangentZ.y };
|
||||
const float Sign = TangentZ.z >= 0 ? 1 : -1;
|
||||
const float a = -rcp(Sign + TangentZ.z);
|
||||
const float b = TangentZ.x * TangentZ.y * a;
|
||||
|
||||
return float3x3(TangentX, TangentY, TangentZ);
|
||||
float3 TangentX = { 1 + Sign * a * pow(TangentZ.x, 2), Sign * b, -Sign * TangentZ.x };
|
||||
float3 TangentY = { b, Sign + a * pow(TangentZ.y, 2), -TangentZ.y };
|
||||
|
||||
return float3x3(TangentX, TangentY, TangentZ);
|
||||
}
|
||||
|
||||
float3 TangentToWorld(float3 vec, float3 tangentZ)
|
||||
{
|
||||
return mul(vec, GetTangentBasis(tangentZ));
|
||||
}
|
||||
|
||||
float4 TangentToWorld(float4 H, float3 tangentZ)
|
||||
{
|
||||
return float4(mul(H.xyz, GetTangentBasis(tangentZ)), H.w);
|
||||
}
|
||||
|
||||
float3 WorldToTangent(float3 vec, float3 tangentZ)
|
||||
{
|
||||
return mul(GetTangentBasis(tangentZ), vec);
|
||||
}
|
||||
|
||||
|
||||
float2 SampleDisk(float2 Xi)
|
||||
{
|
||||
float theta = 2 * PI * Xi.x;
|
||||
@@ -209,5 +117,9 @@ float4 ImportanceSampleVisibleGGX(float2 diskXi, float roughness, float3 V)
|
||||
return float4(H, PDF);
|
||||
}
|
||||
|
||||
float Luminance(float3 color)
|
||||
{
|
||||
return dot(color, float3(0.2126, 0.7152, 0.0722));
|
||||
}
|
||||
|
||||
#endif // WI_STOCHASTICSSR_HF
|
||||
|
||||
@@ -284,10 +284,17 @@ namespace wi::enums
|
||||
CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN,
|
||||
CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN_BLENDOUT,
|
||||
CSTYPE_POSTPROCESS_RTREFLECTION,
|
||||
CSTYPE_POSTPROCESS_SSR_SURFACE,
|
||||
CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL,
|
||||
CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_VERTICAL,
|
||||
CSTYPE_POSTPROCESS_SSR_KICKJOBS,
|
||||
CSTYPE_POSTPROCESS_SSR_DEPTHHIERARCHY,
|
||||
CSTYPE_POSTPROCESS_SSR_RAYTRACE,
|
||||
CSTYPE_POSTPROCESS_SSR_RAYTRACE_EARLYEXIT,
|
||||
CSTYPE_POSTPROCESS_SSR_RAYTRACE_CHEAP,
|
||||
CSTYPE_POSTPROCESS_SSR_RESOLVE,
|
||||
CSTYPE_POSTPROCESS_SSR_TEMPORAL,
|
||||
CSTYPE_POSTPROCESS_SSR_MEDIAN,
|
||||
CSTYPE_POSTPROCESS_SSR_BILATERAL,
|
||||
CSTYPE_POSTPROCESS_LIGHTSHAFTS,
|
||||
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL,
|
||||
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL,
|
||||
|
||||
@@ -1599,8 +1599,8 @@ void RenderPath3D::setSSREnabled(bool value)
|
||||
TextureDesc desc;
|
||||
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
|
||||
desc.format = Format::R16G16B16A16_FLOAT;
|
||||
desc.width = internalResolution.x / 2;
|
||||
desc.height = internalResolution.y / 2;
|
||||
desc.width = internalResolution.x;
|
||||
desc.height = internalResolution.y;
|
||||
desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE;
|
||||
device->CreateTexture(&desc, nullptr, &rtSSR);
|
||||
device->SetName(&rtSSR, "rtSSR");
|
||||
@@ -1625,8 +1625,8 @@ void RenderPath3D::setRaytracedReflectionsEnabled(bool value)
|
||||
TextureDesc desc;
|
||||
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
|
||||
desc.format = Format::R11G11B10_FLOAT;
|
||||
desc.width = internalResolution.x / 2;
|
||||
desc.height = internalResolution.y / 2;
|
||||
desc.width = internalResolution.x;
|
||||
desc.height = internalResolution.y;
|
||||
device->CreateTexture(&desc, nullptr, &rtSSR);
|
||||
device->SetName(&rtSSR, "rtSSR");
|
||||
|
||||
|
||||
+772
-176
File diff suppressed because it is too large
Load Diff
@@ -444,8 +444,18 @@ namespace wi::renderer
|
||||
struct RTReflectionResources
|
||||
{
|
||||
mutable int frame = 0;
|
||||
wi::graphics::Texture temporal[2];
|
||||
wi::graphics::Texture rayLengths;
|
||||
wi::graphics::Texture texture_surface_normal;
|
||||
wi::graphics::Texture texture_surface_roughness;
|
||||
wi::graphics::Texture texture_surface_environment;
|
||||
wi::graphics::Texture texture_rayIndirectSpecular;
|
||||
wi::graphics::Texture texture_rayDirectionPDF;
|
||||
wi::graphics::Texture texture_rayLengths;
|
||||
wi::graphics::Texture texture_resolve;
|
||||
wi::graphics::Texture texture_resolve_variance;
|
||||
wi::graphics::Texture texture_resolve_reprojectionDepth;
|
||||
wi::graphics::Texture texture_temporal[2];
|
||||
wi::graphics::Texture texture_temporal_variance[2];
|
||||
wi::graphics::Texture texture_bilateral_temp;
|
||||
};
|
||||
void CreateRTReflectionResources(RTReflectionResources& res, XMUINT2 resolution);
|
||||
void Postprocess_RTReflection(
|
||||
@@ -459,9 +469,24 @@ namespace wi::renderer
|
||||
struct SSRResources
|
||||
{
|
||||
mutable int frame = 0;
|
||||
wi::graphics::Texture texture_raytrace;
|
||||
wi::graphics::Texture rayLengths;
|
||||
wi::graphics::Texture texture_surface_normal;
|
||||
wi::graphics::Texture texture_surface_roughness;
|
||||
wi::graphics::Texture texture_tile_minmax_roughness_horizontal;
|
||||
wi::graphics::Texture texture_tile_minmax_roughness;
|
||||
wi::graphics::Texture texture_depth_hierarchy;
|
||||
wi::graphics::Texture texture_rayIndirectSpecular;
|
||||
wi::graphics::Texture texture_rayDirectionPDF;
|
||||
wi::graphics::Texture texture_rayLengths;
|
||||
wi::graphics::Texture texture_resolve;
|
||||
wi::graphics::Texture texture_resolve_variance;
|
||||
wi::graphics::Texture texture_resolve_reprojectionDepth;
|
||||
wi::graphics::Texture texture_temporal[2];
|
||||
wi::graphics::Texture texture_temporal_variance[2];
|
||||
wi::graphics::Texture texture_bilateral_temp;
|
||||
wi::graphics::GPUBuffer buffer_tile_tracing_statistics;
|
||||
wi::graphics::GPUBuffer buffer_tiles_tracing_earlyexit;
|
||||
wi::graphics::GPUBuffer buffer_tiles_tracing_cheap;
|
||||
wi::graphics::GPUBuffer buffer_tiles_tracing_expensive;
|
||||
};
|
||||
void CreateSSRResources(SSRResources& res, XMUINT2 resolution);
|
||||
void Postprocess_SSR(
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace wi::version
|
||||
// minor features, major updates, breaking compatibility changes
|
||||
const int minor = 60;
|
||||
// minor bug fixes, alterations, refactors, updates
|
||||
const int revision = 27;
|
||||
const int revision = 28;
|
||||
|
||||
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user