diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp
index af45ddcc2..87630abc6 100644
--- a/WickedEngine/offlineshadercompiler.cpp
+++ b/WickedEngine/offlineshadercompiler.cpp
@@ -110,10 +110,17 @@ int main(int argc, char* argv[])
"fsr_sharpenCS.hlsl" ,
"ssaoCS.hlsl" ,
"rtreflectionCS.hlsl" ,
- "ssr_raytraceCS.hlsl" ,
+ "ssr_surfaceCS.hlsl" ,
+ "ssr_tileMaxRoughness_horizontalCS.hlsl" ,
+ "ssr_tileMaxRoughness_verticalCS.hlsl" ,
+ "ssr_kickjobsCS.hlsl" ,
+ "ssr_depthHierarchyCS.hlsl" ,
"ssr_resolveCS.hlsl" ,
"ssr_temporalCS.hlsl" ,
- "ssr_medianCS.hlsl" ,
+ "ssr_bilateralCS.hlsl" ,
+ "ssr_raytraceCS.hlsl" ,
+ "ssr_raytraceCS_cheap.hlsl" ,
+ "ssr_raytraceCS_earlyexit.hlsl" ,
"sharpenCS.hlsl" ,
"skinningCS.hlsl" ,
"resolveMSAADepthStencilCS.hlsl" ,
diff --git a/WickedEngine/shaders/ShaderInterop_Postprocess.h b/WickedEngine/shaders/ShaderInterop_Postprocess.h
index 363686ddf..0ff67e899 100644
--- a/WickedEngine/shaders/ShaderInterop_Postprocess.h
+++ b/WickedEngine/shaders/ShaderInterop_Postprocess.h
@@ -40,8 +40,7 @@ struct Bloom
#define lineardepth_inputresolution postprocess.params0.xy
#define lineardepth_inputresolution_rcp postprocess.params0.zw
-#define ssr_input_maxmip postprocess.params0.x
-#define ssr_input_resolution_max postprocess.params0.y
+static const uint SSR_TILESIZE = 32;
#define ssr_frame postprocess.params0.w
#define ssao_range postprocess.params0.x
diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems
index 2cdbc092a..b19d32856 100644
--- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems
+++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems
@@ -998,6 +998,38 @@
Compute
Compute
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
+
+ Compute
+ 4.0
+
Compute
4.0
@@ -2561,16 +2593,6 @@
Compute
Compute
-
- Compute
- Compute
- Compute
- Compute
- Compute
- Compute
- Compute
- Compute
-
Compute
Compute
diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters
index c9b613559..30faa01cc 100644
--- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters
+++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters
@@ -230,9 +230,6 @@
CS
-
- CS
-
CS
@@ -1025,6 +1022,30 @@
CS
+
+ CS
+
+
+ CS
+
+
+ CS
+
+
+ CS
+
+
+ CS
+
+
+ CS
+
+
+ CS
+
+
+ CS
+
diff --git a/WickedEngine/shaders/rtreflectionCS.hlsl b/WickedEngine/shaders/rtreflectionCS.hlsl
index 4892a1e36..a6a6bd1f3 100644
--- a/WickedEngine/shaders/rtreflectionCS.hlsl
+++ b/WickedEngine/shaders/rtreflectionCS.hlsl
@@ -11,8 +11,13 @@
PUSHCONSTANT(postprocess, PostProcess);
-RWTexture2D output : register(u0);
-RWTexture2D output_rayLengths : register(u1);
+Texture2D texture_surface_normal : register(t0);
+Texture2D texture_surface_roughness : register(t1);
+Texture2D texture_surface_environment : register(t2);
+
+RWTexture2D output_rayIndirectSpecular : register(u0);
+RWTexture2D output_rayDirectionPDF : register(u1);
+RWTexture2D output_rayLengths : register(u2);
struct RayPayload
{
@@ -23,34 +28,30 @@ struct RayPayload
void main(uint2 DTid : SV_DispatchThreadID)
{
const float2 uv = ((float2)DTid.xy + 0.5) * postprocess.resolution_rcp;
- const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
- if (depth == 0)
- return;
- const float3 P = reconstruct_position(uv, depth);
- const float3 V = normalize(GetCamera().position - P);
+ const uint downsampleFactor = 2;
- PrimitiveID prim;
- prim.unpack(texture_gbuffer0[DTid.xy * 2]);
+ // This is necessary for accurate upscaling. This is so we don't reuse the same half-res pixels
+ uint2 screenJitter = floor(blue_noise(uint2(0, 0)).xy * downsampleFactor);
+ uint2 jitterPixel = screenJitter + DTid.xy * downsampleFactor;
+ float2 jitterUV = (screenJitter + DTid.xy + 0.5f) * postprocess.resolution_rcp;
- //output[DTid] = float4(saturate(P * 0.1), 1);
- //return;
+ const float depth = texture_depth.SampleLevel(sampler_linear_clamp, jitterUV, 0);
+ const float roughness = texture_surface_roughness[jitterPixel];
- Surface surface;
- surface.init();
- if (!surface.load(prim, P))
+ if (!NeedReflection(roughness, depth))
{
- return;
- }
- if (surface.roughness > 0.6)
- {
- output[DTid.xy] = float4(max(0, EnvironmentReflection_Global(surface)), 1);
+ float3 environmentReflection = texture_surface_environment[DTid.xy * downsampleFactor];
+
+ output_rayIndirectSpecular[DTid.xy] = float4(environmentReflection, 1);
+ output_rayDirectionPDF[DTid.xy] = 0.0;
output_rayLengths[DTid.xy] = FLT_MAX;
return;
}
- float3 N = surface.N;
- float roughness = surface.roughness;
+ const float3 N = texture_surface_normal[jitterPixel];
+ const float3 P = reconstruct_position(jitterUV, depth);
+ const float3 V = normalize(GetCamera().position - P);
// The ray direction selection part is the same as in from ssr_raytraceCS.hlsl:
float4 H;
@@ -217,6 +218,7 @@ void main(uint2 DTid : SV_DispatchThreadID)
payload.data.w = q.CommittedRayT();
}
- output[DTid.xy] = float4(payload.data.xyz, 1);
+ output_rayIndirectSpecular[DTid.xy] = float4(payload.data.xyz, 1);
+ output_rayDirectionPDF[DTid.xy] = float4(L, H.w);
output_rayLengths[DTid.xy] = payload.data.w;
}
diff --git a/WickedEngine/shaders/rtreflectionLIB.hlsl b/WickedEngine/shaders/rtreflectionLIB.hlsl
index 3a7f72cb6..742982a2b 100644
--- a/WickedEngine/shaders/rtreflectionLIB.hlsl
+++ b/WickedEngine/shaders/rtreflectionLIB.hlsl
@@ -10,8 +10,13 @@
PUSHCONSTANT(postprocess, PostProcess);
-RWTexture2D output : register(u0);
-RWTexture2D output_rayLengths : register(u1);
+Texture2D texture_surface_normal : register(t0);
+Texture2D texture_surface_roughness : register(t1);
+Texture2D texture_surface_environment : register(t2);
+
+RWTexture2D output_rayIndirectSpecular : register(u0);
+RWTexture2D output_rayDirectionPDF : register(u1);
+RWTexture2D output_rayLengths : register(u2);
struct RayPayload
{
@@ -30,34 +35,30 @@ void RTReflection_Raygen()
{
uint2 DTid = DispatchRaysIndex().xy;
const float2 uv = ((float2)DTid.xy + 0.5) / (float2)DispatchRaysDimensions();
- const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
- if (depth == 0)
- return;
- const float3 P = reconstruct_position(uv, depth);
- const float3 V = normalize(GetCamera().position - P);
+ const uint downsampleFactor = 2;
- PrimitiveID prim;
- prim.unpack(texture_gbuffer0[DTid.xy * 2]);
+ // This is necessary for accurate upscaling. This is so we don't reuse the same half-res pixels
+ uint2 screenJitter = floor(blue_noise(uint2(0, 0)).xy * downsampleFactor);
+ uint2 jitterPixel = screenJitter + DTid.xy * downsampleFactor;
+ float2 jitterUV = (screenJitter + DTid.xy + 0.5f) / (float2)DispatchRaysDimensions();
- //output[DTid] = float4(saturate(P * 0.1), 1);
- //return;
+ const float depth = texture_depth.SampleLevel(sampler_linear_clamp, jitterUV, 0);
+ const float roughness = texture_surface_roughness[jitterPixel];
- Surface surface;
- surface.init();
- if (!surface.load(prim, P))
+ if (!NeedReflection(roughness, depth))
{
- return;
- }
- if (surface.roughness > 0.6)
- {
- output[DTid.xy] = float4(max(0, EnvironmentReflection_Global(surface)), 1);
+ float3 environmentReflection = texture_surface_environment[DTid.xy * downsampleFactor];
+
+ output_rayIndirectSpecular[DTid.xy] = float4(environmentReflection, 1);
+ output_rayDirectionPDF[DTid.xy] = 0.0;
output_rayLengths[DTid.xy] = FLT_MAX;
return;
}
- float3 N = surface.N;
- float roughness = surface.roughness;
+ const float3 N = texture_surface_normal[jitterPixel];
+ const float3 P = reconstruct_position(jitterUV, depth);
+ const float3 V = normalize(GetCamera().position - P);
// The ray direction selection part is the same as in from ssr_raytraceCS.hlsl:
float4 H;
@@ -78,7 +79,6 @@ void RTReflection_Raygen()
// Tangent to world
H.xyz = mul(H.xyz, tangentBasis);
-
L = reflect(-V, H.xyz);
}
else
@@ -87,7 +87,6 @@ void RTReflection_Raygen()
L = reflect(-V, H.xyz);
}
-
const float3 R = L;
float seed = GetFrame().time;
@@ -112,7 +111,8 @@ void RTReflection_Raygen()
payload // Payload
);
- output[DTid.xy] = float4(payload.data.xyz, 1);
+ output_rayIndirectSpecular[DTid.xy] = float4(L, 1);
+ output_rayDirectionPDF[DTid.xy] = float4(L, H.w);
output_rayLengths[DTid.xy] = payload.data.w;
}
diff --git a/WickedEngine/shaders/ssr_bilateralCS.hlsl b/WickedEngine/shaders/ssr_bilateralCS.hlsl
new file mode 100644
index 000000000..df0236666
--- /dev/null
+++ b/WickedEngine/shaders/ssr_bilateralCS.hlsl
@@ -0,0 +1,106 @@
+#include "globals.hlsli"
+#include "stochasticSSRHF.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+PUSHCONSTANT(postprocess, PostProcess);
+
+Texture2D texture_temporal : register(t0);
+Texture2D texture_resolve_variance : register(t1);
+Texture2D texture_surface_normal : register(t2);
+Texture2D texture_surface_roughness : register(t3);
+
+RWTexture2D output : register(u0);
+
+static const float depthThreshold = 10000.0;
+static const float normalThreshold = 1.0;
+static const float varianceEstimateThreshold = 0.015; // Larger variance values use stronger blur
+static const float varianceExitThreshold = 0.005; // Variance needs to be higher than this value to accept blur
+static const uint2 bilateralMinMaxRadius = uint2(0, 2); // Chosen by variance
+
+#define BILATERAL_SIGMA 0.9
+
+[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+#if 0 // Debug
+ output[DTid.xy] = float4((texture_resolve_variance[DTid.xy] > varianceEstimateThreshold).rrr, 1.0);
+ return;
+#endif
+
+ const float depth = texture_depth[DTid.xy];
+ const float roughness = texture_surface_roughness[DTid.xy];
+
+ if (!NeedReflection(roughness, depth))
+ {
+ output[DTid.xy] = texture_temporal[DTid.xy];
+ return;
+ }
+
+ float2 direction = postprocess.params0.xy;
+
+ const float linearDepth = texture_lineardepth[DTid.xy];
+ const float3 N = texture_surface_normal[DTid.xy];
+
+ float4 outputColor = texture_temporal[DTid.xy];
+
+
+ float variance = texture_resolve_variance[DTid.xy];
+ bool strongBlur = variance > varianceEstimateThreshold;
+
+ float radius = strongBlur ? bilateralMinMaxRadius.y : bilateralMinMaxRadius.x;
+ radius = lerp(0.0, radius, saturate(roughness * 8.0)); // roughness 0.125 is destination
+
+ float sigma = radius * BILATERAL_SIGMA;
+ int effectiveRadius = min(sigma * 2.0, radius);
+
+ if (variance > varianceExitThreshold && effectiveRadius > 0)
+ {
+ float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
+ float3 P = reconstruct_position(uv, depth);
+
+ float4 result = 0;
+ float weightSum = 0.0f;
+
+ for (int r = -effectiveRadius; r <= effectiveRadius; r++)
+ {
+ const int2 sampleCoord = DTid.xy + (direction * r); // Left to right diameter directionally
+
+ if (all(sampleCoord >= int2(0, 0) && sampleCoord < (int2) postprocess.resolution))
+ {
+ const float sampleDepth = texture_depth[sampleCoord];
+ const float4 sampleColor = texture_temporal[sampleCoord];
+
+ const float3 sampleN = texture_surface_normal[sampleCoord];
+ const float sampleRoughness = texture_surface_roughness[sampleCoord];
+
+ float2 sampleUV = (sampleCoord + 0.5) * postprocess.resolution_rcp;
+ float3 sampleP = reconstruct_position(sampleUV, sampleDepth);
+
+ // Don't let invalid roughness samples interfere
+ if (NeedReflection(sampleRoughness, sampleDepth))
+ {
+ float3 dq = P - sampleP;
+ float planeError = max(abs(dot(dq, sampleN)), abs(dot(dq, N)));
+ float relativeDepthDifference = planeError / (linearDepth * GetCamera().z_far);
+ float bilateralDepthWeight = exp(-sqr(relativeDepthDifference) * depthThreshold);
+
+ float normalError = pow(saturate(dot(sampleN, N)), 4.0);
+ float bilateralNormalWeight = saturate(1.0 - (1.0 - normalError) * normalThreshold);
+
+ float bilateralWeight = bilateralDepthWeight * bilateralNormalWeight;
+
+ float gaussian = exp(-sqr(r / sigma));
+ float weight = (r == 0) ? 1.0 : gaussian * bilateralWeight; // Skip center gaussian peak
+
+ result += sampleColor * weight;
+ weightSum += weight;
+ }
+ }
+ }
+
+ result /= weightSum;
+ outputColor = result;
+ }
+
+ output[DTid.xy] = outputColor;
+}
diff --git a/WickedEngine/shaders/ssr_depthHierarchyCS.hlsl b/WickedEngine/shaders/ssr_depthHierarchyCS.hlsl
new file mode 100644
index 000000000..9111560a8
--- /dev/null
+++ b/WickedEngine/shaders/ssr_depthHierarchyCS.hlsl
@@ -0,0 +1,42 @@
+#include "globals.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+PUSHCONSTANT(postprocess, PostProcess);
+
+Texture2D input : register(t0);
+
+RWTexture2D output : register(u0);
+
+[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+ if (all(DTid.xy < postprocess.params0.xy))
+ {
+ if (postprocess.params0.z == 1)
+ {
+ uint2 dim;
+ texture_depth.GetDimensions(dim.x, dim.y);
+
+ float2 uv = (DTid.xy + 0.5) / dim * 2; // Account for half-res
+
+ float4 depths = texture_depth.GatherRed(sampler_point_clamp, uv);
+
+ float depthMax = max(max(depths.x, depths.y), max(depths.z, depths.w));
+ float depthMin = min(min(depths.x, depths.y), min(depths.z, depths.w));
+
+ output[DTid.xy] = float2(depthMax, depthMin);
+ }
+ else
+ {
+ float2 uv = (DTid.xy + 0.5) / postprocess.params0.xy;
+
+ float4 depthsRed = input.GatherRed(sampler_point_clamp, uv);
+ float4 depthsGreen = input.GatherGreen(sampler_point_clamp, uv);
+
+ float depthMax = max(max(depthsRed.x, depthsRed.y), max(depthsRed.z, depthsRed.w));
+ float depthMin = min(min(depthsGreen.x, depthsGreen.y), min(depthsGreen.z, depthsGreen.w));
+
+ output[DTid.xy] = float2(depthMax, depthMin);
+ }
+ }
+}
diff --git a/WickedEngine/shaders/ssr_kickjobsCS.hlsl b/WickedEngine/shaders/ssr_kickjobsCS.hlsl
new file mode 100644
index 000000000..8888ac648
--- /dev/null
+++ b/WickedEngine/shaders/ssr_kickjobsCS.hlsl
@@ -0,0 +1,27 @@
+#include "globals.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+RWByteAddressBuffer tile_tracing_statistics : register(u0);
+RWStructuredBuffer tiles_tracing_earlyexit : register(u1);
+RWStructuredBuffer tiles_tracing_cheap : register(u2);
+RWStructuredBuffer tiles_tracing_expensive : register(u3);
+
+[numthreads(1, 1, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+ // Load statistics:
+ const uint tracing_earlyexit_count = tile_tracing_statistics.Load(TILE_STATISTICS_OFFSET_EARLYEXIT);
+ const uint tracing_cheap_count = tile_tracing_statistics.Load(TILE_STATISTICS_OFFSET_CHEAP);
+ const uint tracing_expensive_count = tile_tracing_statistics.Load(TILE_STATISTICS_OFFSET_EXPENSIVE);
+
+ // Reset counters:
+ tile_tracing_statistics.Store(TILE_STATISTICS_OFFSET_EARLYEXIT, 0);
+ tile_tracing_statistics.Store(TILE_STATISTICS_OFFSET_CHEAP, 0);
+ tile_tracing_statistics.Store(TILE_STATISTICS_OFFSET_EXPENSIVE, 0);
+
+ // Create indirect dispatch arguments:
+ const uint tile_tracing_replicate = sqr(SSR_TILESIZE / 2 / POSTPROCESS_BLOCKSIZE);
+ tile_tracing_statistics.Store3(INDIRECT_OFFSET_EARLYEXIT, uint3(tracing_earlyexit_count * tile_tracing_replicate, 1, 1));
+ tile_tracing_statistics.Store3(INDIRECT_OFFSET_CHEAP, uint3(tracing_cheap_count * tile_tracing_replicate, 1, 1));
+ tile_tracing_statistics.Store3(INDIRECT_OFFSET_EXPENSIVE, uint3(tracing_expensive_count * tile_tracing_replicate, 1, 1));
+}
diff --git a/WickedEngine/shaders/ssr_medianCS.hlsl b/WickedEngine/shaders/ssr_medianCS.hlsl
deleted file mode 100644
index 94f0c9ffc..000000000
--- a/WickedEngine/shaders/ssr_medianCS.hlsl
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "globals.hlsli"
-#include "ShaderInterop_Postprocess.h"
-
-PUSHCONSTANT(postprocess, PostProcess);
-
-Texture2D texture_temporal : register(t0);
-
-RWTexture2D output : register(u0);
-
-// A Fast, Small-Radius GPU Median Filter by Morgan McGuire
-// https://casual-effects.com/research/McGuire2008Median/index.html
-
-#define s2(a, b) temp = a; a = min(a, b); b = max(temp, b);
-#define t2(a, b) s2(v[a], v[b]);
-#define t24(a, b, c, d, e, f, g, h) t2(a, b); t2(c, d); t2(e, f); t2(g, h);
-#define t25(a, b, c, d, e, f, g, h, i, j) t24(a, b, c, d, e, f, g, h); t2(i, j);
-
-[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
-void main(uint3 DTid : SV_DispatchThreadID)
-{
- if (texture_depth.Load(uint3(DTid.xy, 1)) == 0)
- return;
-
- const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
-
- half4 v[25];
-
- // Add the pixels which make up our window to the pixel array.
- [unroll]
- for (int dX = -2; dX <= 2; ++dX)
- {
- [unroll]
- for (int dY = -2; dY <= 2; ++dY)
- {
- float2 offset = float2(float(dX), float(dY));
-
- // If a pixel in the window is located at (x+dX, y+dY), put it at index (dX + R)(2R + 1) + (dY + R) of the
- // pixel array. This will fill the pixel array, with the top left pixel of the window at pixel[0] and the
- // bottom right pixel of the window at pixel[N-1].
- v[(dX + 2) * 5 + (dY + 2)] = texture_temporal.SampleLevel(sampler_linear_clamp, uv + offset * postprocess.resolution_rcp, 0);
- }
- }
-
- half4 temp;
- t25(0, 1, 3, 4, 2, 4, 2, 3, 6, 7);
- t25(5, 7, 5, 6, 9, 7, 1, 7, 1, 4);
- t25(12, 13, 11, 13, 11, 12, 15, 16, 14, 16);
- t25(14, 15, 18, 19, 17, 19, 17, 18, 21, 22);
- t25(20, 22, 20, 21, 23, 24, 2, 5, 3, 6);
- t25(0, 6, 0, 3, 4, 7, 1, 7, 1, 4);
- t25(11, 14, 8, 14, 8, 11, 12, 15, 9, 15);
- t25(9, 12, 13, 16, 10, 16, 10, 13, 20, 23);
- t25(17, 23, 17, 20, 21, 24, 18, 24, 18, 21);
- t25(19, 22, 8, 17, 9, 18, 0, 18, 0, 9);
- t25(10, 19, 1, 19, 1, 10, 11, 20, 2, 20);
- t25(2, 11, 12, 21, 3, 21, 3, 12, 13, 22);
- t25(4, 22, 4, 13, 14, 23, 5, 23, 5, 14);
- t25(15, 24, 6, 24, 6, 15, 7, 16, 7, 19);
- t25(3, 11, 5, 17, 11, 17, 9, 17, 4, 10);
- t25(6, 12, 7, 14, 4, 6, 4, 7, 12, 14);
- t25(10, 14, 6, 7, 10, 12, 6, 10, 6, 17);
- t25(12, 17, 7, 17, 7, 10, 12, 18, 7, 12);
- t24(10, 18, 12, 20, 10, 20, 10, 12);
-
- output[DTid.xy] = v[12];
-}
diff --git a/WickedEngine/shaders/ssr_raytraceCS.hlsl b/WickedEngine/shaders/ssr_raytraceCS.hlsl
index 15db58d35..d72cf4abd 100644
--- a/WickedEngine/shaders/ssr_raytraceCS.hlsl
+++ b/WickedEngine/shaders/ssr_raytraceCS.hlsl
@@ -4,358 +4,390 @@
PUSHCONSTANT(postprocess, PostProcess);
-Texture2D input : register(t0);
+//#define DEBUG_TILING
-RWTexture2D texture_raytrace : register(u0);
-RWTexture2D texture_rayLengths : register(u1);
+Texture2D texture_surface_normal : register(t0);
+Texture2D texture_surface_roughness : register(t1);
+Texture2D texture_depth_hierarchy : register(t2);
+Texture2D input : register(t3);
-static const float rayTraceStrideMin = 1.0f; // Step in horizontal or vertical pixels between samples.
-static const float rayTraceStrideMax = 10.0f; // Define max stride between samples. Roughness will interpolate between it's min and max counterparts.
-static const float rayTraceMaxStep = 512.0f; // Maximum number of iterations. Higher gives better images but may be slow.
-static const float rayTraceThicknessOffset = 0.0f; // Increse or decrease thickness for each pixels in the depth buffer. [- / +]
-static const float rayTraceThicknessBias = 1.0f; // Bias to control the growth of the thickness.
-static const bool raytraceThicknessInfinite = false; // Use infinite thickness for maximum performance, but may not be suitable for most scenes.
-static const float rayTraceMaxDistance = 1000.0f; // Maximum camera-space distance to trace before returning a miss.
-static const float rayTraceStrideCutoff = 100.0f; // More distant pixels are smaller in screen space. This value tells at what point to
- // start relaxing the stride to give higher quality reflections for objects far from the camera.
-static const float raytraceHZBBias = 0.05f; // This value tells how fast the roughness increases the level.
-static const float raytraceHZBStartLevel = 1.0f;
-static const float raytraceHZBMinStep = 0.005f; // Minimum level increasement per iteration.
-
-
-float DistanceSquared(float2 a, float2 b)
-{
- a -= b;
- return dot(a, a);
-}
-
-bool IntersectsDepthBuffer(float sceneZMax, float rayZMin, float rayZMax)
-{
- // Increase thickness along distance.
- float thickness = max(sceneZMax * rayTraceThicknessBias + rayTraceThicknessOffset, 1.0);
-
-#if 0 // precision issues in DX12
- // Effectively remove line/tiny artifacts, mostly caused by Zbuffers precision.
- float depthScale = min(1.0f, sceneZMax / rayTraceStrideCutoff);
- sceneZMax += lerp(0.05f, 0.0f, depthScale);
-#endif
-
- if (raytraceThicknessInfinite)
- return (rayZMin >= sceneZMax);
- else
- return (rayZMin >= sceneZMax) && (rayZMax - thickness <= sceneZMax);
-}
-
-// Heavily adapted from McGuire and Mara's original implementation
-// http://casual-effects.blogspot.com/2014/08/screen-space-ray-tracing.html
-bool ScreenSpaceRayTrace(float3 csOrig, float3 csDir, float jitter, float roughness, out float2 hitPixel, out float3 hitPoint, out float iterations)
-{
- csOrig += csDir * 0.001; // precision issues in DX12
- float rayLength = ((csOrig.z + csDir.z * rayTraceMaxDistance) < GetCamera().z_near) ?
- (GetCamera().z_near - csOrig.z) / csDir.z : rayTraceMaxDistance;
-
- float3 csRayEnd = csOrig + csDir * rayLength;
-
- // Project into homogeneous clip space
- float4 clipRayOrigin = mul(GetCamera().projection, float4(csOrig, 1.0f));
- float4 clipRayEnd = mul(GetCamera().projection, float4(csRayEnd, 1.0f));
-
- float k0 = 1.0f / clipRayOrigin.w;
- float k1 = 1.0f / clipRayEnd.w;
-
- float3 Q0 = csOrig * k0;
- float3 Q1 = csRayEnd * k1;
-
- // Screen-space endpoints
- float2 P0 = clipRayOrigin.xy * k0;
- float2 P1 = clipRayEnd.xy * k1;
-
- // Project to pixel
- P0 = P0 * float2(0.5, -0.5) + float2(0.5, 0.5);
- P1 = P1 * float2(0.5, -0.5) + float2(0.5, 0.5);
-
- P0.xy *= postprocess.resolution.xy;
- P1.xy *= postprocess.resolution.xy;
-
-#if 0
- // Clip to the screen coordinates. Alternatively we could just modify rayTraceMaxStep instead
- float2 yDelta = float2(postprocess.resolution.y + 2.0f, -2.0f); // - 0.5, 0.5
- float2 xDelta = float2(postprocess.resolution.x + 2.0f, -2.0f); // - 0.5, 0.5
- float alpha = 0.0;
-
- // P0 must be in bounds
- if (P1.y > yDelta.x || P1.y < yDelta.y)
- {
- float yClip = (P1.y > yDelta.x) ? yDelta.x : yDelta.y;
- float yAlpha = (P1.y - yClip) / (P1.y - P0.y);
- alpha = yAlpha;
- }
-
- // P1 must be in bounds
- if (P1.x > xDelta.x || P1.x < xDelta.y)
- {
- float xClip = (P1.x > xDelta.x) ? xDelta.x : xDelta.y;
- float xAlpha = (P1.x - xClip) / (P1.x - P0.x);
- alpha = max(alpha, xAlpha);
- }
-
- // These are all in homogeneous space, so they interpolate linearly
- P1 = lerp(P1, P0, alpha);
- k1 = lerp(k1, k0, alpha);
- Q1 = lerp(Q1, Q0, alpha);
-#endif
-
- // If the line is degenerate, make it cover at least one pixel to avoid handling zero-pixel extent as a special case later
- P1 += (DistanceSquared(P0, P1) < 0.0001f) ? float2(0.01f, 0.01f) : 0.0f;
- float2 screenOffset = P1 - P0;
-
- // Permute so that the primary iteration is in x to collapse all quadrant-specific DDA cases later
- bool permute = false;
- if (abs(screenOffset.x) < abs(screenOffset.y))
- {
- permute = true;
- screenOffset = screenOffset.yx;
- P0 = P0.yx;
- P1 = P1.yx;
- }
-
- float stepDirection = sign(screenOffset.x);
- float stepInterval = stepDirection / screenOffset.x;
-
- // Track the derivatives of Q and k
- float3 dQ = (Q1 - Q0) * stepInterval;
- float dk = (k1 - k0) * stepInterval;
-
- // Because we test 1/2 a texel forward along the ray, on the very last iteration
- // the interpolation can go past the end of the ray. Use these bounds to clamp it.
- float zMin = min(csRayEnd.z, csOrig.z);
- float zMax = max(csRayEnd.z, csOrig.z);
-
- float2 dP = float2(stepDirection, screenOffset.y * stepInterval);
-
- // Scale derivatives by the desired pixel stride and then offset the starting values by the jitter fraction
-#if 1 // Stride based on roughness. Matte materials will recieve higher stride
- float alphaRoughness = roughness * roughness;
- float alphaRoughnessSq = alphaRoughness * alphaRoughness;
-
- float strideScale = 1.0f - min(1.0f, csOrig.z / rayTraceStrideCutoff);
- float strideRoughnessScale = lerp(rayTraceStrideMin, rayTraceStrideMax, min(alphaRoughnessSq, 1.0)); // Climb exponentially at extreme conditions
- float stride = 1.0 + strideScale * strideRoughnessScale;
+#if defined(SSR_EARLYEXIT)
+StructuredBuffer tiles : register(t4);
+#elif defined(SSR_CHEAP)
+StructuredBuffer tiles : register(t5);
#else
- float strideScale = 1.0f - min(1.0f, csOrig.z / rayTraceStrideCutoff);
- float stride = 1.0 + strideScale * rayTraceStrideMin;
-#endif
-
- dP *= stride;
- dQ *= stride;
- dk *= stride;
+StructuredBuffer tiles : register(t6);
+#endif // SSR_EARLYEXIT
- P0 += dP * jitter;
- Q0 += dQ * jitter;
- k0 += dk * jitter;
+RWTexture2D output_rayIndirectSpecular : register(u0);
+RWTexture2D output_rayDirectionPDF : register(u1);
+RWTexture2D output_rayLengths : register(u2);
- float4 PQk = float4(P0, Q0.z, k0);
- float4 dPQk = float4(dP, dQ.z, dk);
- float3 Q = Q0;
+static const float traceThickness = 1.5;
+static const float blendScreenEdgeFade = 5.0f;
- // Adjust end condition for iteration direction
- float end = P1.x * stepDirection;
+static const float HiZTraceMostDetailedLevel = 0.0;
+static const float HiZTraceIterationsMax = 64;
- float stepCount = 0.0f;
- float level = raytraceHZBStartLevel;
+void InitialAdvanceRay(float3 origin, float3 direction, float2 currentMipResolution, float2 currentMipResolution_rcp, float2 floorOffset, float2 uvOffset, out float3 position, out float tCurrent)
+{
+ float2 currentMipPosition = currentMipResolution * origin.xy;
- float prevZMaxEstimate = csOrig.z;
- float rayZMin = prevZMaxEstimate;
- float rayZMax = prevZMaxEstimate;
- float sceneZMax = rayZMax + 100000.0f;
-
- [loop]
- for (; ((PQk.x * stepDirection) <= end) &&
- (stepCount < rayTraceMaxStep) &&
- !IntersectsDepthBuffer(sceneZMax, rayZMin, rayZMax) &&
- (sceneZMax != 0.0f);
- PQk += dPQk, stepCount++)
- {
- if (any(hitPixel < 0.0) || any(hitPixel > 1.0))
- {
- return false;
- }
-
- rayZMin = prevZMaxEstimate;
-
- // Compute the value at 1/2 step into the future
- rayZMax = (dPQk.z * 0.5f + PQk.z) / (dPQk.w * 0.5f + PQk.w);
- rayZMax = clamp(rayZMax, zMin, zMax);
-
- prevZMaxEstimate = rayZMax;
-
- if (rayZMin > rayZMax)
- {
- float t = rayZMin;
- rayZMin = rayZMax;
- rayZMax = t;
- }
+ // Intersect ray with the half box that is pointing away from the ray origin.
+ float2 xyPlane = floor(currentMipPosition) + floorOffset;
+ xyPlane = xyPlane * currentMipResolution_rcp + uvOffset;
- // A simple HZB approach based on roughness
- level += max(raytraceHZBBias * roughness, raytraceHZBMinStep);
- level = min(level, 6.0f);
-
- hitPixel = permute ? PQk.yx : PQk.xy;
- hitPixel *= postprocess.resolution_rcp;
-
- sceneZMax = texture_lineardepth.SampleLevel(sampler_linear_clamp, hitPixel, level) * GetCamera().z_far;
- }
-
- // Undo the last increment, which ran after the test variables were set up
- //PQk -= dPQk;
- //stepCount -= 1.0;
-
- // Advance Q based on the number of steps
- Q.xy += dQ.xy * stepCount;
- Q.z = PQk.z;
- hitPoint = Q * (1.0f / PQk.w);
- iterations = stepCount;
-
- return IntersectsDepthBuffer(sceneZMax, rayZMin, rayZMax);
+ // o + d * t = p' => t = (p' - o) / d
+ float2 t = (xyPlane - origin.xy) / direction.xy;
+ tCurrent = min(t.x, t.y);
+ position = origin + tCurrent * direction;
}
-
-[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
-void main(uint3 DTid : SV_DispatchThreadID)
+bool AdvanceRay(float3 origin, float3 direction, float2 currentMipPosition, float2 currentMipResolution_rcp, float2 floorOffset, float2 uvOffset, float surfaceZ, inout float3 position, inout float tCurrent)
{
- const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
- const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 1);
- if (depth == 0)
- return;
+ // Create boundary planes
+ float2 xyPlane = floor(currentMipPosition) + floorOffset;
+ xyPlane = xyPlane * currentMipResolution_rcp + uvOffset;
+ float3 boundaryPlanes = float3(xyPlane, surfaceZ);
- PrimitiveID prim;
- prim.unpack(texture_gbuffer0[DTid.xy * 2]);
+ // Intersect ray with the half box that is pointing away from the ray origin.
+ // o + d * t = p' => t = (p' - o) / d
+ float3 t = (boundaryPlanes - origin) / direction;
- Surface surface;
- surface.init();
- if (!surface.load(prim, reconstruct_position(uv, depth)))
+ // Prevent using z plane when shooting out of the depth buffer.
+ t.z = direction.z < 0 ? t.z : FLT_MAX;
+
+ // Choose nearest intersection with a boundary.
+ float tMin = min(min(t.x, t.y), t.z);
+
+ // Larger z means closer to the camera.
+ bool aboveSurface = surfaceZ < position.z;
+
+ // Decide whether we are able to advance the ray until we hit the xy boundaries or if we had to clamp it at the surface.
+ // We use the asuint comparison to avoid NaN / Inf logic, also we actually care about bitwise equality here to see if t_min is the t.z we fed into the min3 above.
+ bool skippedTile = asuint(tMin) != asuint(t.z) && aboveSurface;
+
+ // Make sure to only advance the ray if we're still above the surface.
+ tCurrent = aboveSurface ? tMin : tCurrent;
+
+ // Advance ray
+ position = origin + tCurrent * direction;
+
+ return skippedTile;
+}
+
+float2 GetMipResolution(float2 screenDimensions, int mipLevel)
+{
+ return screenDimensions * pow(0.5, mipLevel);
+}
+
+// Based on: https://github.com/GPUOpen-Effects/FidelityFX-SSSR/tree/master
+// Requires origin and direction of the ray to be in screen space [0, 1] x [0, 1]
+float3 HierarchicalRaymarch(float3 origin, float3 direction, float2 screenSize, out bool validHit)
+{
+ // Start on mip with highest detail.
+ int currentMip = HiZTraceMostDetailedLevel;
+
+ // Could recompute these every iteration, but it's faster to hoist them out and update them.
+ float2 currentMipResolution = GetMipResolution(screenSize, currentMip);
+ float2 currentMipResolution_rcp = rcp(currentMipResolution);
+
+ // Offset to the bounding boxes uv space to intersect the ray with the center of the next pixel.
+ // This means we ever so slightly over shoot into the next region.
+ float2 uvOffset = 0.005 * exp2(HiZTraceMostDetailedLevel) / screenSize;
+ uvOffset = direction.xy < 0 ? -uvOffset : uvOffset;
+
+ // Offset applied depending on current mip resolution to move the boundary to the left/right upper/lower border depending on ray direction.
+ float2 floorOffset = direction.xy < 0 ? 0 : 1;
+
+ // Initially advance ray to avoid immediate self intersections.
+ float tCurrent;
+ float3 position;
+ InitialAdvanceRay(origin, direction, currentMipResolution, currentMipResolution_rcp, floorOffset, uvOffset, position, tCurrent);
+
+ int i = 0;
+ while (i < HiZTraceIterationsMax && currentMip >= HiZTraceMostDetailedLevel)
{
- return;
+ if (any(position.xy < 0.0) || any(position.xy > 1.0))
+ {
+ validHit = false;
+ return position;
+ }
+
+ float2 currentMipPosition = currentMipResolution * position.xy;
+ float surfaceZ = texture_depth_hierarchy.Load(int3(currentMipPosition, currentMip)).r;
+
+ bool skippedTile = AdvanceRay(origin, direction, currentMipPosition, currentMipResolution_rcp, floorOffset, uvOffset, surfaceZ, position, tCurrent);
+
+ currentMip += skippedTile ? 1 : -1;
+ currentMipResolution *= skippedTile ? 0.5 : 2;
+ currentMipResolution_rcp *= skippedTile ? 2 : 0.5;
+
+ i++;
}
- if (surface.roughness > 0.6)
+
+ validHit = (i <= HiZTraceIterationsMax);
+
+ return position;
+}
+
+static const uint rayMarchIterationsMax = 60; // primary ray march step count (higher will find more in distance, but slower)
+static const float rayMarchStepIncrease = 1.05f; // primary ray march step increase (higher will travel more distance, but can miss details)
+static const uint rayMarchFineIterationsMax = 2; // binary step count (higher is nicer but slower)
+static const float rayMarchTolerance = 0.000002; // early exit factor for binary search (smaller is nicer but slower)
+static const float rayMarchLevelIncrement = 0.3; // level increment based on ray travel distance and roughness (higher values improves performance, but traces at lower resolution)
+
+// samplePos where ray march left of
+float3 BinarySearch(float3 samplePos, float3 V, float level)
+{
+ for (uint i = 0; i < rayMarchFineIterationsMax; i++)
{
- texture_raytrace[DTid.xy] = 0;
- texture_rayLengths[DTid.xy] = 0;
+ float sampleDepth = texture_depth_hierarchy.SampleLevel(sampler_point_clamp, samplePos.xy, level).g;
+
+ if (abs(samplePos.z - sampleDepth) < rayMarchTolerance)
+ {
+ return samplePos;
+ }
+
+ if (samplePos.z >= sampleDepth)
+ {
+ samplePos += V;
+ }
+
+ V *= 0.5f;
+ samplePos -= V;
+ }
+
+ return samplePos;
+}
+
+// P and V in screen space [0, 1] x [0, 1]
+float3 RayMarch(float3 P, float3 V, float roughness, float jitter, out bool validHit)
+{
+ float3 samplePos = P + V * jitter;
+
+ float sampleDepth = 0;
+ float level = 1;
+
+ uint iterations = 0;
+ while (iterations <= rayMarchIterationsMax)
+ {
+ if (any(samplePos.xy < 0.0) || any(samplePos.xy > 1.0))
+ {
+ validHit = false;
+ return samplePos;
+ }
+
+ samplePos += V;
+
+ sampleDepth = texture_depth_hierarchy.SampleLevel(sampler_point_clamp, samplePos.xy, level).g;
+
+ if (sampleDepth > samplePos.z)
+ {
+ samplePos = BinarySearch(samplePos, V, level);
+ break;
+ }
+
+ V *= rayMarchStepIncrease;
+ level += rayMarchLevelIncrement * roughness;
+
+ iterations++;
+ }
+
+ validHit = (iterations <= rayMarchIterationsMax);
+ return float3(samplePos.xy, sampleDepth);
+}
+
+float CalculateEdgeVignette(float2 hitPixel)
+{
+ float2 hitPixelNDC = hitPixel * 2.0 - 1.0;
+
+ //float maxDimension = min(1.0, max(abs(hitPixelNDC.x), abs(hitPixelNDC.y)));
+ //float attenuation = 1.0 - max(0.0, maxDimension - blendScreenEdgeFade) / (1.0 - blendScreenEdgeFade);
+
+ float2 vignette = saturate(abs(hitPixelNDC) * blendScreenEdgeFade - (blendScreenEdgeFade - 1.0f));
+ float attenuation = saturate(1.0 - dot(vignette, vignette));
+
+ return attenuation;
+}
+
+float ValidateHit(float3 hit, float hitDepth, float2 prevHitUV)
+{
+ float vignetteHit = CalculateEdgeVignette(hit.xy);
+ float vignetteHitPrev = CalculateEdgeVignette(prevHitUV);
+ float vignette = min(vignetteHit, vignetteHitPrev);
+
+ float3 surfaceViewPosition = reconstruct_position(hit.xy, hitDepth, GetCamera().inverse_projection);
+ float3 hitViewPosition = reconstruct_position(hit.xy, hit.z, GetCamera().inverse_projection);
+
+ float distance = length(surfaceViewPosition - hitViewPosition);
+ float confidence = 1.0 - smoothstep(0.0, traceThickness, distance);
+
+ return vignette * confidence;
+}
+
+[numthreads(POSTPROCESS_BLOCKSIZE * POSTPROCESS_BLOCKSIZE, 1, 1)]
+void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID)
+{
+ // This pass is rendered at half-res
+ const uint downsampleFactor = 2;
+
+ const uint2 pixel = GetReflectionIndirectDispatchCoord(Gid, GTid, tiles, downsampleFactor);
+ const float2 uv = (pixel + 0.5f) * postprocess.resolution_rcp;
+
+#ifdef SSR_EARLYEXIT
+
+ output_rayIndirectSpecular[pixel] = 0;
+ output_rayDirectionPDF[pixel] = 0;
+ output_rayLengths[pixel] = 0;
+
+#else
+
+ // This is necessary for accurate upscaling. This is so we don't reuse the same half-res pixels
+ uint2 screenJitter = floor(blue_noise(uint2(0, 0)).xy * downsampleFactor);
+ uint2 jitterPixel = screenJitter + pixel * downsampleFactor;
+ float2 jitterUV = (screenJitter + pixel + 0.5f) * postprocess.resolution_rcp;
+
+ // Due to HiZ tracing, the tracing and the pass components must match depth.
+ float depth = texture_depth_hierarchy[screenJitter + pixel].r;
+ float roughness = texture_surface_roughness[jitterPixel];
+
+ if (!NeedReflection(roughness, depth))
+ {
+ output_rayIndirectSpecular[pixel] = 0.0;
+ output_rayDirectionPDF[pixel] = 0.0;
+ output_rayLengths[pixel] = 0.0;
return;
}
- // Everything in view space:
- float3 N = normalize(mul((float3x3)GetCamera().view, surface.N));
- float3 P = reconstruct_position(uv, depth, GetCamera().inverse_projection);
- float3 V = normalize(-P);
- const float roughness = GetRoughness(surface.roughness);
-
- const float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
- if (roughnessFade <= 0)
- {
- texture_raytrace[DTid.xy] = 0;
- return;
- }
-
+ float3 N = texture_surface_normal[jitterPixel];
+ float3 P = reconstruct_position(jitterUV, depth);
+ float3 V = normalize(GetCamera().position - P);
+
float4 H;
float3 L;
- float jitter;
if (roughness > 0.05f)
{
float3x3 tangentBasis = GetTangentBasis(N);
float3 tangentV = mul(tangentBasis, V);
#ifdef GGX_SAMPLE_VISIBLE
-
-#if 1
- const float2 bluenoise = blue_noise(DTid.xy).xy;
+
+ const float2 bluenoise = blue_noise(pixel).xy;
float2 Xi = bluenoise.xy;
-
+
Xi.y = lerp(Xi.y, 0.0f, GGX_IMPORTANCE_SAMPLE_BIAS);
H = ImportanceSampleVisibleGGX(SampleDisk(Xi), roughness, tangentV);
-
-#else // Old
-
- // Low-discrepancy sequence
- uint2 Random = Rand_PCG16(int3((DTid.xy + 0.5f), GetFrame().frame_count)).xy;
-
- float2 Xi = HammersleyRandom16(1, Random); // SingleSPP
-
- Xi.y = lerp(Xi.y, 0.0f, GGX_IMPORTANCE_SAMPLE_BIAS);
-
- H = ImportanceSampleVisibleGGX(SampleDisk(Xi), roughness, tangentV);
-
-#endif
- // Tangent to world
+ // Tangent to world
H.xyz = mul(H.xyz, tangentBasis);
-
+
#else
-
+
const float surfaceMargin = 0.0f;
const float maxRegenCount = 15.0f;
-
- uint2 Random = Rand_PCG16(int3((DTid.xy + 0.5f), GetFrame().frame_count)).xy;
-
- // By using an uniform importance sampling method, some rays go below the surface.
- // We simply re-generate them at a negligible cost, to get some nice ones.
-
+
+ // By using an uniform importance sampling method, some rays go below the surface.
+ // We simply re-generate them at a negligible cost, to get some nice ones.
+
float RdotN = 0.0f;
float regenCount = 0;
- [loop]
+ [loop]
for (; RdotN <= surfaceMargin && regenCount < maxRegenCount; regenCount++)
{
- // Low-discrepancy sequence
- //float2 Xi = float2(Random) * rcp(65536.0); // equivalent to HammersleyRandom(0, 1, Random).
- float2 Xi = HammersleyRandom16(regenCount, Random); // SingleSPP
-
+ // Low-discrepancy sequence
+ const float2 bluenoise = blue_noise(pixel, regenCount).xy;
+
+ float2 Xi = bluenoise.xy;
+
Xi.y = lerp(Xi.y, 0.0, GGX_IMPORTANCE_SAMPLE_BIAS);
-
+
H = ImportanceSampleGGX(Xi, roughness);
-
- // Tangent to world
+
+ // Tangent to world
H.xyz = mul(H.xyz, tangentBasis);
-
+
RdotN = dot(N, reflect(-V, H.xyz));
}
-
-#endif
-
+
+#endif // GGX_SAMPLE_VISIBLE
+
L = reflect(-V, H.xyz);
- jitter = InterleavedGradientNoise(DTid.xy, GetFrame().frame_count);
}
else
{
H = float4(N.xyz, 1.0f);
L = reflect(-V, H.xyz);
- jitter = 0;
}
-
- float2 hitPixel = float2(0.0f, 0.0f);
- float3 hitPoint = float3(0.0f, 0.0f, 0.0f);
- float iterations = 0.0f;
- bool hit = ScreenSpaceRayTrace(P, L, jitter, roughness, hitPixel, hitPoint, iterations);
+ float4 rayStartClip = mul(GetCamera().view_projection, float4(P, 1)); // World to Clip
+ float4 rayEndClip = mul(GetCamera().view_projection, float4(P + L, 1));
+ float3 rayStartScreen = rayStartClip.xyz * rcp(rayStartClip.w);
+ float3 rayEndScreen = rayEndClip.xyz * rcp(rayEndClip.w);
- float hitDepth = texture_depth.SampleLevel(sampler_linear_clamp, hitPixel, 1);
+ rayStartScreen.xy = rayStartScreen.xy * float2(0.5, -0.5) + float2(0.5, 0.5);
+ rayEndScreen.xy = rayEndScreen.xy * float2(0.5, -0.5) + float2(0.5, 0.5);
- // Output:
- // xy: hit pixel
- // z: hit depth
- // w: pdf
- float4 raytrace = max(0, float4(hitPixel, hitDepth, H.w));
- texture_raytrace[DTid.xy] = raytrace;
+#ifdef SSR_CHEAP
- if (hit)
+ rayStartScreen.xy *= postprocess.params1.xy; // Ratio factor between hierarchy and pass
+ rayEndScreen.xy *= postprocess.params1.xy;
+
+ float3 rayDirectionScreen = rayEndScreen - rayStartScreen;
+
+ // The ray marching benefits from jittering to create a smoother transition between samples and LOD
+ float jitter = InterleavedGradientNoise(pixel, GetFrame().frame_count % 8u); // Temporally stabilize
+
+ bool validHit = false;
+ float3 hit = RayMarch(rayStartScreen, rayDirectionScreen, roughness, jitter, validHit);
+
+ hit.xy *= postprocess.params1.zw; // Undo ratio
+
+#else
+
+ float3 rayDirectionScreen = rayEndScreen - rayStartScreen;
+
+ bool validHit = false;
+ float3 hit = HierarchicalRaymarch(rayStartScreen, rayDirectionScreen, postprocess.resolution, validHit);
+
+#endif // SSR_CHEAP
+
+ float2 prevHitUV = texture_gbuffer1.SampleLevel(sampler_point_clamp, hit.xy, 0).xy + hit.xy;
+
+ float hitDepth = texture_depth.SampleLevel(sampler_point_clamp, hit.xy, 0);
+ float confidence = validHit ? ValidateHit(hit, hitDepth, prevHitUV) : 0;
+
+ float4 indirectSpecular;
+ indirectSpecular.rgb = confidence > 0 ? input.SampleLevel(sampler_point_clamp, prevHitUV, 0).rgb : 0;
+ indirectSpecular.a = confidence;
+
+ output_rayIndirectSpecular[pixel] = indirectSpecular;
+
+ output_rayDirectionPDF[pixel] = float4(L, H.w);
+
+ if (validHit)
{
- const float3 Phit = reconstruct_position(uv, hitDepth, GetCamera().inverse_projection);
- texture_rayLengths[DTid.xy] = distance(P, Phit);
+ const float3 Phit = reconstruct_position(jitterUV, hit.z, GetCamera().inverse_projection);
+ output_rayLengths[pixel] = distance(P, Phit);
}
else
{
- texture_rayLengths[DTid.xy] = 0;
+ output_rayLengths[pixel] = 0;
}
+
+#endif // SSR_EARLYEXIT
+
+#ifdef DEBUG_TILING
+ float3 color = input[pixel].rgb;
+#if defined(SSR_EARLYEXIT)
+ output_rayIndirectSpecular[pixel] = float4(lerp(color, float3(0, 0, 1), 0.5f), 1.0);
+#elif defined(SSR_CHEAP)
+ output_rayIndirectSpecular[pixel] = float4(lerp(color, float3(0, 1, 0), 0.5f), 1.0);
+#else
+ output_rayIndirectSpecular[pixel] = float4(lerp(color, float3(1, 0, 0), 0.5f), 1.0);
+#endif // SSR_EARLYEXIT
+#endif // DEBUG_TILING
}
diff --git a/WickedEngine/shaders/ssr_raytraceCS_cheap.hlsl b/WickedEngine/shaders/ssr_raytraceCS_cheap.hlsl
new file mode 100644
index 000000000..c3e5e0099
--- /dev/null
+++ b/WickedEngine/shaders/ssr_raytraceCS_cheap.hlsl
@@ -0,0 +1,2 @@
+#define SSR_CHEAP
+#include "ssr_raytraceCS.hlsl"
diff --git a/WickedEngine/shaders/ssr_raytraceCS_earlyexit.hlsl b/WickedEngine/shaders/ssr_raytraceCS_earlyexit.hlsl
new file mode 100644
index 000000000..f486e9992
--- /dev/null
+++ b/WickedEngine/shaders/ssr_raytraceCS_earlyexit.hlsl
@@ -0,0 +1,2 @@
+#define SSR_EARLYEXIT
+#include "ssr_raytraceCS.hlsl"
diff --git a/WickedEngine/shaders/ssr_resolveCS.hlsl b/WickedEngine/shaders/ssr_resolveCS.hlsl
index 39e5aa590..816da6a56 100644
--- a/WickedEngine/shaders/ssr_resolveCS.hlsl
+++ b/WickedEngine/shaders/ssr_resolveCS.hlsl
@@ -5,221 +5,170 @@
PUSHCONSTANT(postprocess, PostProcess);
-Texture2D texture_raytrace : register(t0);
-Texture2D texture_main : register(t1);
+Texture2D texture_surface_normal : register(t0);
+Texture2D texture_surface_roughness : register(t1);
+Texture2D texture_rayIndirectSpecular : register(t2);
+Texture2D texture_rayDirectionPDF : register(t3);
+Texture2D texture_rayLength : register(t4);
RWTexture2D texture_resolve : register(u0);
+RWTexture2D texture_resolve_variance : register(u1);
+RWTexture2D texture_reprojectionDepth : register(u2);
+static const float2 resolveSpatialSizeMinMax = float2(2.0, 8.0); // Good to have a min size as downsample scale (2x in this case)
+static const uint resolveSpatialReconstructionCount = 4.0f;
-static const float2 spatialReuseOffsets3x3[9] =
+float GetWeight(int2 neighborTracingCoord, float3 V, float3 N, float roughness, float NdotV)
{
- float2(0.0, 0.0),
- float2(0.0, 1.0),
- float2(1.0, -1.0),
- float2(-1.0, -1.0),
- float2(-1.0, 0.0),
- float2(0.0, -1.0),
- float2(1.0, 0.0),
- float2(-1.0, 1.0),
- float2(1.0, 1.0)
-};
+ // Sample local pixel information
+ float4 rayDirectionPDF = texture_rayDirectionPDF[neighborTracingCoord];
+ float3 rayDirection = rayDirectionPDF.rgb;
+ float PDF = rayDirectionPDF.a;
-// Not in use, but could perhaps be useful in the future.
-/*float2 CalculateTailDirection(float3 viewNormal)
-{
- float3 upVector = abs(viewNormal.z) < 0.999 ? float3(0.0, 0.0, 1.0) : float3(1.0, 0.0, 0.0);
- float3 T = normalize(cross(upVector, viewNormal));
+ float3 sampleL = normalize(rayDirection);
+ float3 sampleH = normalize(sampleL + V);
- float tailDirection = T.x * -viewNormal.y;
-
- return lerp(float2(1.0, 0.1), float2(0.1, 1.0), tailDirection);
-}*/
+ float sampleNdotH = saturate(dot(N, sampleH));
+ float sampleNdotL = saturate(dot(N, sampleL));
-float CalculateEdgeFade(float2 hitPixel)
-{
- float2 hitPixelNDC = hitPixel * 2.0 - 1.0;
-
- //float maxDimension = min(1.0, max(abs(hitPixelNDC.x), abs(hitPixelNDC.y)));
- //float attenuation = 1.0 - max(0.0, maxDimension - blendScreenEdgeFade) / (1.0 - blendScreenEdgeFade);
+ float roughnessBRDF = roughness * roughness;
- float2 vignette = saturate(abs(hitPixelNDC) * SSRBlendScreenEdgeFade - (SSRBlendScreenEdgeFade - 1.0f));
- float attenuation = saturate(1.0 - dot(vignette, vignette));
-
- return attenuation;
+ float Vis = V_SmithGGXCorrelated(roughnessBRDF, NdotV, sampleNdotL);
+ float D = D_GGX(roughnessBRDF, sampleNdotH, sampleH);
+ float localBRDF = Vis * D * sampleNdotL;
+
+ float weight = localBRDF / max(PDF, 0.00001f);
+
+ return weight;
}
-void GetSampleInfo(float2 velocity, float2 neighborUV, float2 uv, float3 P, float3 V, float3 N, float NdotV, float specularConeTangent, float roughness, out float4 sampleColor, out float weight)
+// Weighted incremental variance
+// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+void GetWeightedVariance(float4 sampleColor, float weight, float weightSum, inout float mean, inout float S)
{
- // Sample local pixel information
- float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, neighborUV, 0);
-
- float2 hitPixel = raytraceSource.xy + velocity;
- float hitDepth = raytraceSource.z;
- float hitPDF = raytraceSource.w;
+ float luminance = Luminance(sampleColor.rgb);
+ float oldMean = mean;
+ mean += weight / weightSum * (luminance - oldMean);
+ S += weight * (luminance - oldMean) * (luminance - mean);
+}
- float intersectionCircleRadius = specularConeTangent * length(hitPixel - uv);
- float sourceMip = clamp(log2(intersectionCircleRadius * ssr_input_resolution_max), 0.0, ssr_input_maxmip) * SSRResolveConeMip;
-
- sampleColor.rgb = texture_main.SampleLevel(sampler_linear_clamp, hitPixel, sourceMip).rgb; // Scene color
- sampleColor.a = CalculateEdgeFade(raytraceSource.xy); // Opacity - Since this is used for masking, we can ignore velocity
-
- // BRDF Weight
-
- float3 hitViewPosition = reconstruct_position(hitPixel, hitDepth, GetCamera().inverse_projection);
-
- float3 L = normalize(hitViewPosition - P);
- float3 H = normalize(L + V);
+// modified from 'globals.hlsli' with random shift
+// idx : iteration index
+// num : number of iterations in total
+// random : 16 bit random sequence
+inline float2 hammersley2d_random(uint idx, uint num, uint2 random)
+{
+ uint bits = idx;
+ bits = (bits << 16u) | (bits >> 16u);
+ bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+ bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+ bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+ bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+ const float radicalInverse_VdC = float(bits ^ random.y) * 2.3283064365386963e-10; // / 0x100000000
- float NdotH = saturate(dot(N, H));
- float NdotL = saturate(dot(N, L));
-
- Surface surface;
- surface.init();
- surface.roughnessBRDF = roughness * roughness;
- surface.NdotV = NdotV;
-
- SurfaceToLight surfaceToLight;
- surfaceToLight.NdotH = NdotH;
- surfaceToLight.NdotL = NdotL;
-
- // Calculate BRDF where Fresnel = 1
- float Vis = V_SmithGGXCorrelated(surface.roughnessBRDF, surface.NdotV, surfaceToLight.NdotL);
- float D = D_GGX(surface.roughnessBRDF, surfaceToLight.NdotH, surfaceToLight.H);
- float specularLight = Vis * D * PI / 4.0;
+ // ... & 0xffff) / (1 << 16): limit to 65536 then range 0 - 1
+ return float2(frac(float(idx) / float(num) + float(random.x & 0xffff) / (1 << 16)), radicalInverse_VdC); // frac since we only want range [0; 1[
+}
- weight = specularLight / max(hitPDF, 0.00001f);
+uint baseHash(uint3 p)
+{
+ p = 1103515245u * ((p.xyz >> 1u) ^ (p.yzx));
+ uint h32 = 1103515245u * ((p.x ^ p.z) ^ (p.y >> 3u));
+ return h32 ^ (h32 >> 16);
+}
+
+// Great quality hash with 3D input
+// based on: https://www.shadertoy.com/view/Xt3cDn
+uint3 hash33(uint3 x)
+{
+ uint n = baseHash(x);
+ return uint3(n, n * 16807u, n * 48271u); //see: http://random.mat.sbg.ac.at/results/karl/server/node4.html
+}
+
+// Computes post-projection depth from linear depth
+float getInverseLinearDepth(float lin, float near, float far)
+{
+ float z_n = ((lin - 2 * far) * near + far * lin) / (lin * near - far * lin);
+ float z = (z_n + 1) / 2;
+ return z;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
- const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
- if (depth == 0.0f)
- return;
+ const uint2 tracingCoord = DTid.xy / 2;
- // Everthing in view space:
- const float3 P = reconstruct_position(uv, depth, GetCamera().inverse_projection);
- const float3 V = normalize(-P);
+ const float depth = texture_depth[DTid.xy];
+ const float roughness = texture_surface_roughness[DTid.xy];
- PrimitiveID prim;
- prim.unpack(texture_gbuffer0[DTid.xy * 2]);
-
- Surface surface;
- surface.init();
- if (!surface.load(prim, P))
+ if (!NeedReflection(roughness, depth))
{
+ texture_resolve[DTid.xy] = texture_rayIndirectSpecular[tracingCoord];
+ texture_resolve_variance[DTid.xy] = 0.0;
+ texture_reprojectionDepth[DTid.xy] = 0.0;
return;
}
- const float3 N = normalize(mul((float3x3)GetCamera().view, surface.N));
- const float roughness = GetRoughness(surface.roughness);
-
+ // Everthing in world space:
+ const float3 P = reconstruct_position(uv, depth);
+ const float3 N = texture_surface_normal[DTid.xy];
+ const float3 V = normalize(GetCamera().position - P);
const float NdotV = saturate(dot(N, V));
- const float2 velocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy;
- const float2 prevUV = uv + velocity;
+ const float resolveSpatialScale = saturate(roughness * 5.0); // roughness 0.2 is destination
+ const float2 resolveSpatialSize = lerp(resolveSpatialSizeMinMax.x, resolveSpatialSizeMinMax.y, resolveSpatialScale);
- // Early out, useless if the roughness is out of range
- float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
- if (roughnessFade <= 0.0f)
- {
- texture_resolve[DTid.xy] = 0;
- return;
- }
-
- // Since we aren't importance sampling in this range, no need to resolve
- if (roughness < 0.05f)
- {
- float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, uv, 0);
- float2 hitPixel = raytraceSource.xy + velocity;
-
- float4 sampleColor;
- sampleColor.rgb = texture_main.SampleLevel(sampler_linear_clamp, hitPixel, 0).rgb; // Scene color
- sampleColor.a = CalculateEdgeFade(raytraceSource.xy); // Opacity
-
- texture_resolve[DTid.xy] = sampleColor;
- return;
- }
-
-
- // Cone mip sampling
- float specularConeTangent = lerp(0.0, roughness * (1.0 - GGX_IMPORTANCE_SAMPLE_BIAS), NdotV * sqrt(roughness));
- specularConeTangent *= lerp(saturate(NdotV * 2), 1.0f, sqrt(roughness));
-
-
-#if 1 // EAW spatial resolve
-
-
float4 result = 0.0f;
float weightSum = 0.0f;
-
-#define BLOCK_SAMPLE_RADIUS 1
-
- [unroll]
- for (int y = -BLOCK_SAMPLE_RADIUS; y <= BLOCK_SAMPLE_RADIUS; y++)
+
+ float mean = 0.0f;
+ float S = 0.0f;
+
+ float closestRayLength = 0.0f;
+
+ const uint sampleCount = resolveSpatialReconstructionCount;
+ const uint2 random = hash33(uint3(DTid.xy, GetFrame().frame_count)).xy;
+
+ for (int i = 0; i < sampleCount; i++)
{
- [loop]
- for (int x = -BLOCK_SAMPLE_RADIUS; x <= BLOCK_SAMPLE_RADIUS; x++)
+ float2 offset = (hammersley2d_random(i, sampleCount, random) - 0.5) * resolveSpatialSize;
+
+ int2 neighborTracingCoord = tracingCoord + offset;
+ int2 neighborCoord = DTid.xy + offset;
+
+ float neighborDepth = texture_depth[neighborCoord];
+ if (neighborDepth > 0.0)
{
- if (uint(abs(x) + abs(y)) % 2 == 0)
- continue;
-
- float2 offsetUV = float2(x, y) * postprocess.resolution_rcp * SSRResolveSpatialSize;
- float2 neighborUV = uv + offsetUV;
-
- float4 sampleColor;
- float weight;
- GetSampleInfo(velocity, neighborUV, uv, P, V, N, NdotV, specularConeTangent, roughness, sampleColor, weight);
-
+ float weight = GetWeight(neighborTracingCoord, V, N, roughness, NdotV);
+
+ float4 sampleColor = texture_rayIndirectSpecular[neighborTracingCoord];
sampleColor.rgb *= rcp(1 + Luminance(sampleColor.rgb));
-
+
result += sampleColor * weight;
weightSum += weight;
+
+ GetWeightedVariance(sampleColor, weight, weightSum, mean, S);
+
+ if (weight > 0.001)
+ {
+ float neighborRayLength = texture_rayLength[neighborTracingCoord];
+ closestRayLength = max(closestRayLength, neighborRayLength);
+ }
}
}
- result /= weightSum;
-
- result.rgb *= rcp(1 - Luminance(result.rgb));
-
-#undef BLOCK_SAMPLE_RADIUS
-
-
-#else // Frostbite presentation, spatial resolve
-
- float4 result = 0.0f;
- float weightSum = 0.0f;
-
-#define NUM_RESOLVE 4 // Four samples to achieve effective ray reuse patterns
-
- [unroll]
- for (uint i = 0; i < NUM_RESOLVE; i++)
- {
- float2 offsetUV = spatialReuseOffsets3x3[i] * postprocess.resolution_rcp * SSRResolveSpatialSize;
- float2 neighborUV = uv + offsetUV;
-
- float4 sampleColor;
- float weight;
- GetSampleInfo(velocity, neighborUV, uv, P, V, N, NdotV, specularConeTangent, roughness, sampleColor, weight);
-
- sampleColor.rgb *= rcp( 1 + Luminance(sampleColor.rgb) );
-
- result += sampleColor * weight;
- weightSum += weight;
- }
result /= weightSum;
-
- result.rgb *= rcp( 1 - Luminance(result.rgb) );
-
-#undef NUM_RESOLVE
-
-
-#endif
-
-
- result *= roughnessFade;
- result *= SSRIntensity;
-
+ result.rgb *= rcp(1 - Luminance(result.rgb));
+
+ // Population variance
+ float resolveVariance = S / weightSum;
+
+ // Convert to post-projection depth so we can construct dual source reprojection buffers later
+ const float lineardepth = texture_lineardepth[DTid.xy] * GetCamera().z_far;
+ float reprojectionDepth = getInverseLinearDepth(lineardepth + closestRayLength, GetCamera().z_near, GetCamera().z_far);
+
texture_resolve[DTid.xy] = max(result, 0.00001f);
+ texture_resolve_variance[DTid.xy] = resolveVariance;
+ texture_reprojectionDepth[DTid.xy] = reprojectionDepth;
}
diff --git a/WickedEngine/shaders/ssr_surfaceCS.hlsl b/WickedEngine/shaders/ssr_surfaceCS.hlsl
new file mode 100644
index 000000000..fa5c3ab17
--- /dev/null
+++ b/WickedEngine/shaders/ssr_surfaceCS.hlsl
@@ -0,0 +1,51 @@
+#include "globals.hlsli"
+#include "brdf.hlsli"
+#include "lightingHF.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+PUSHCONSTANT(postprocess, PostProcess);
+
+RWTexture2D output_surface_normal : register(u0);
+RWTexture2D output_surface_roughness : register(u1);
+RWTexture2D output_surface_environment : register(u2);
+
+[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+ uint2 dim;
+ texture_depth.GetDimensions(dim.x, dim.y);
+
+ float2 uv = (DTid.xy + 0.5f) / dim;
+
+ float depth = texture_depth[DTid.xy];
+ if (depth == 0.0)
+ {
+ output_surface_normal[DTid.xy] = 0.0;
+ output_surface_roughness[DTid.xy] = 0.0;
+ output_surface_environment[DTid.xy] = 0.0;
+ return;
+ }
+
+ uint2 primitiveID = texture_gbuffer0[DTid.xy]; // Map to resolution
+
+ PrimitiveID prim;
+ prim.unpack(primitiveID);
+
+ Surface surface;
+ surface.init();
+ if (!surface.load(prim, reconstruct_position(uv, depth)))
+ {
+ output_surface_normal[DTid.xy] = 0.0;
+ output_surface_roughness[DTid.xy] = 0.0;
+ output_surface_environment[DTid.xy] = 0.0;
+ return;
+ }
+
+ float3 N = surface.N;
+ float roughness = surface.roughness;
+ float3 environmentReflection = EnvironmentReflection_Global(surface);
+
+ output_surface_normal[DTid.xy] = N;
+ output_surface_roughness[DTid.xy] = roughness;
+ output_surface_environment[DTid.xy] = environmentReflection;
+}
diff --git a/WickedEngine/shaders/ssr_temporalCS.hlsl b/WickedEngine/shaders/ssr_temporalCS.hlsl
index 1a55190f6..2b362b9ff 100644
--- a/WickedEngine/shaders/ssr_temporalCS.hlsl
+++ b/WickedEngine/shaders/ssr_temporalCS.hlsl
@@ -4,177 +4,236 @@
PUSHCONSTANT(postprocess, PostProcess);
-Texture2D resolve_current : register(t0);
-Texture2D resolve_history : register(t1);
-Texture2D rayLengths : register(t3);
+Texture2D texture_surface_roughness : register(t0);
+Texture2D texture_color_current : register(t1);
+Texture2D texture_color_history : register(t2);
+Texture2D texture_variance_current : register(t3);
+Texture2D texture_variance_history : register(t4);
+Texture2D texture_reprojectionDepth : register(t5);
-RWTexture2D output : register(u0);
+RWTexture2D output_color : register(u0);
+RWTexture2D output_variance : register(u1);
-static const float temporalResponseMin = 0.75;
-static const float temporalResponseMax = 0.95f;
-static const float temporalScale = 3.0;
-static const float temporalExposure = 10.0f;
+static const float temporalResponse = 0.95;
+static const float temporalScale = 2.0;
+static const float disocclusionDepthWeight = 1.0f;
+static const float disocclusionThreshold = 0.9f;
+static const float varianceTemporalResponse = 0.9f;
-inline float Luma4(float3 color)
+float2 CalculateReprojectionBuffer(float2 uv, float depth)
{
- return (color.g * 2) + (color.r + color.b);
+ float x = uv.x * 2 - 1;
+ float y = (1 - uv.y) * 2 - 1;
+ float2 screenPosition = float2(x, y);
+
+ float4 thisClip = float4(screenPosition, depth, 1);
+
+ float4 prevClip = mul(GetCamera().inverse_view_projection, thisClip);
+ prevClip = mul(GetCamera().previous_view_projection, prevClip);
+
+ float2 prevScreen = prevClip.xy / prevClip.w;
+
+ float2 screenVelocity = screenPosition - prevScreen;
+ float2 prevScreenPosition = screenPosition - screenVelocity;
+
+ return prevScreenPosition * float2(0.5, -0.5) + 0.5;
}
-inline float HdrWeight4(float3 color, float exposure)
+float GetDisocclusion(float depth, float depthHistory)
{
- return rcp(Luma4(color) * exposure + 4.0f);
+ float lineardepthCurrent = compute_lineardepth(depth);
+ float lineardepthHistory = compute_lineardepth(depthHistory);
+
+ float disocclusion = 1.0
+ //* exp(-abs(1.0 - max(0.0, dot(normal, normalHistory))) * disocclusionNormalWeight) // Potential normal check if necessary
+ * exp(-abs(lineardepthHistory - lineardepthCurrent) / lineardepthCurrent * disocclusionDepthWeight);
+
+ return disocclusion;
}
-float4 clip_aabb(float3 aabb_min, float3 aabb_max, float4 p, float4 q)
+float4 SamplePreviousColor(float2 prevUV, float2 size, float depth, out float disocclusion, out float2 prevUVSample)
{
- float3 p_clip = 0.5 * (aabb_max + aabb_min);
- float3 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
+ prevUVSample = prevUV;
- float4 v_clip = q - float4(p_clip, p.w);
- float3 v_unit = v_clip.xyz / e_clip;
- float3 a_unit = abs(v_unit);
- float ma_unit = max(a_unit.x, max(a_unit.y, a_unit.z));
+ float4 previousColor = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVSample, 0);
+ float previousDepth = texture_depth_history.SampleLevel(sampler_point_clamp, prevUVSample, 0);
- if (ma_unit > 1.0)
- return float4(p_clip, p.w) + v_clip / ma_unit;
- else
- return q; // point inside aabb
-}
+ disocclusion = GetDisocclusion(depth, previousDepth);
+ if (disocclusion > disocclusionThreshold) // Good enough
+ {
+ return previousColor;
+ }
-inline void ResolverAABB(Texture2D currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
-{
- const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
-
- // Modulate Luma HDR
-
- float4 sampleColors[9];
- [unroll]
- for (uint i = 0; i < 9; i++)
- {
- sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
- }
+ // Try to find the closest sample in the vicinity if we are not convinced of a disocclusion
+ if (disocclusion < disocclusionThreshold)
+ {
+ float2 closestUV = prevUVSample;
+ float2 dudv = rcp(size);
- float sampleWeights[9];
- [unroll]
- for (uint j = 0; j < 9; j++)
- {
- sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
- }
+ const int searchRadius = 1;
+ for (int y = -searchRadius; y <= searchRadius; y++)
+ {
+ for (int x = -searchRadius; x <= searchRadius; x++)
+ {
+ int2 offset = int2(x, y);
+ float2 sampleUV = prevUVSample + offset * dudv;
- float totalWeight = 0;
- [unroll]
- for (uint k = 0; k < 9; k++)
- {
- totalWeight += sampleWeights[k];
- }
- sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
- sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
+ float samplePreviousDepth = texture_depth_history.SampleLevel(sampler_point_clamp, sampleUV, 0);
- // Variance Clipping (AABB)
-
- float4 m1 = 0.0;
- float4 m2 = 0.0;
- [unroll]
- for (uint x = 0; x < 9; x++)
- {
- m1 += sampleColors[x];
- m2 += sampleColors[x] * sampleColors[x];
- }
+ float weight = GetDisocclusion(depth, samplePreviousDepth);
+ if (weight > disocclusion)
+ {
+ disocclusion = weight;
+ closestUV = sampleUV;
+ prevUVSample = closestUV;
+ }
+ }
+ }
- float4 mean = m1 / 9.0;
- float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
-
- currentMin = mean - AABBScale * stddev;
- currentMax = mean + AABBScale * stddev;
+ previousColor = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVSample, 0);
+ }
- currentOutput = sampleColors[4];
- currentMin = min(currentMin, currentOutput);
- currentMax = max(currentMax, currentOutput);
- currentAverage = mean;
+ // Bilinear interpolation on fallback - near edges
+ if (disocclusion < disocclusionThreshold)
+ {
+ float2 weight = frac(prevUVSample * size + 0.5);
+
+ // Bilinear weights
+ float weights[4] =
+ {
+ (1 - weight.x) * (1 - weight.y),
+ weight.x * (1 - weight.y),
+ (1 - weight.x) * weight.y,
+ weight.x * weight.y
+ };
+
+ float4 previousColorResult = 0;
+ float previousDepthResult = 0;
+ float weightSum = 0;
+
+ uint2 prevCoord = uint2(size * prevUVSample - 0.5);
+ uint2 offsets[4] = { uint2(0, 0), uint2(1, 0), uint2(0, 1), uint2(1, 1) };
+
+ for (uint i = 0; i < 4; i++)
+ {
+ uint2 sampleCoord = prevCoord + offsets[i];
+
+ previousColorResult += weights[i] * texture_color_history[sampleCoord];
+ previousDepthResult += weights[i] * texture_depth_history[sampleCoord];
+
+ weightSum += weights[i];
+ }
+
+ previousColorResult /= max(weightSum, 0.00001);
+ previousDepthResult /= max(weightSum, 0.00001);
+
+ previousColor = previousColorResult;
+ disocclusion = GetDisocclusion(depth, previousDepthResult);
+ }
+
+ disocclusion = disocclusion < disocclusionThreshold ? 0.0 : disocclusion;
+ return previousColor;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
-void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
+void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
{
- if ((uint)ssr_frame == 0)
+ if ((uint) ssr_frame == 0)
{
- output[DTid.xy] = resolve_current[DTid.xy];
+ output_color[DTid.xy] = texture_color_current[DTid.xy];
return;
}
- const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
- const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0);
- if (depth == 0)
- return;
+ const float depth = texture_depth[DTid.xy];
+ const float roughness = texture_surface_roughness[DTid.xy];
- const float2 velocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy;
- float2 prevUV = uv + velocity;
- if (!is_saturated(prevUV))
+ if (!NeedReflection(roughness, depth))
{
- output[DTid.xy] = resolve_current[DTid.xy];
+ output_color[DTid.xy] = texture_color_current[DTid.xy];
+ output_variance[DTid.xy] = 0.0;
return;
}
- const float3 P = reconstruct_position(uv, depth, GetCamera().inverse_projection);
+ // Welford's online algorithm:
+ // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- PrimitiveID prim;
- prim.unpack(texture_gbuffer0[DTid.xy * 2]);
-
- Surface surface;
- surface.init();
- if (!surface.load(prim, P))
- return;
-
- const float roughness = surface.roughness;
-
- if (roughness < 0.01)
+ float4 m1 = 0.0;
+ float4 m2 = 0.0;
+ for (int x = -1; x <= 1; x++)
{
- output[DTid.xy] = resolve_current[DTid.xy];
- //return;
- }
-
- // Secondary reprojection based on ray lengths:
- // https://www.ea.com/seed/news/seed-dd18-presentation-slides-raytracing (Slide 45)
- if (roughness < 0.5)
- {
- float rayLength = rayLengths[DTid.xy];
- if (rayLength > 0)
+ for (int y = -1; y <= 1; y++)
{
- const float3 P = reconstruct_position(uv, depth);
- const float3 V = normalize(GetCamera().position - P);
- const float3 rayEnd = P - V * rayLength;
- float4 rayEndPrev = mul(GetCamera().previous_view_projection, float4(rayEnd, 1));
- rayEndPrev.xy /= rayEndPrev.w;
- prevUV = rayEndPrev.xy * float2(0.5, -0.5) + 0.5;
+ int2 offset = int2(x, y);
+ int2 coord = DTid.xy + offset;
+
+ float4 sampleColor = texture_color_current[coord];
+
+ m1 += sampleColor;
+ m2 += sampleColor * sampleColor;
}
}
- // Disocclusion fallback:
- float depth_current = compute_lineardepth(depth);
- float depth_history = compute_lineardepth(texture_depth_history.SampleLevel(sampler_point_clamp, prevUV, 1));
- if (abs(depth_current - depth_history) > 1)
+ float4 mean = m1 / 9.0;
+ float4 variance = (m2 / 9.0) - (mean * mean);
+ float4 stddev = sqrt(max(variance, 0.0f));
+
+ // Secondary reprojection based on ray lengths:
+ // https://www.ea.com/seed/news/seed-dd18-presentation-slides-raytracing (Slide 45)
+
+ float2 velocity = texture_gbuffer1[DTid.xy];
+ float reprojectionDepth = texture_reprojectionDepth[DTid.xy];
+
+ float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
+
+ float2 prevUVVelocity = uv + velocity;
+ float2 prevUVReflectionHit = CalculateReprojectionBuffer(uv, reprojectionDepth);
+
+ float4 previousColorVelocity = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVVelocity, 0);
+ float4 previousColorReflectionHit = texture_color_history.SampleLevel(sampler_linear_clamp, prevUVReflectionHit, 0);
+
+ float previousDistanceVelocity = abs(Luminance(previousColorVelocity.rgb) - Luminance(mean.rgb));
+ float previousDistanceReflectionHit = abs(Luminance(previousColorReflectionHit.rgb) - Luminance(mean.rgb));
+
+ float2 prevUV = previousDistanceVelocity < previousDistanceReflectionHit ? prevUVVelocity : prevUVReflectionHit;
+
+ float disocclusion = 0.0;
+ float2 prevUVSample = 0.0;
+ float4 previousColor = SamplePreviousColor(prevUV, postprocess.resolution, depth, disocclusion, prevUVSample);
+
+ float4 currentColor = texture_color_current[DTid.xy];
+ float4 resultColor = currentColor;
+
+ // Disocclusion fallback: color
+ if (disocclusion > disocclusionThreshold && is_saturated(prevUVSample))
{
- output[DTid.xy] = resolve_current[DTid.xy];
- //output[DTid.xy] = float4(1, 0, 0, 1);
- return;
+ // Color box clamp
+ float4 colorMin = mean - temporalScale * stddev;
+ float4 colorMax = mean + temporalScale * stddev;
+ previousColor = clamp(previousColor, colorMin, colorMax);
+
+ resultColor = lerp(currentColor, previousColor, temporalResponse);
}
-
- float4 previous = resolve_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
+#if 0 // Debug
+ else
+ {
+ resultColor = float4(1, 0, 0, 1);
+ }
+#endif
- // Luma HDR and AABB minmax
-
- float4 current = 0;
- float4 currentMin, currentMax, currentAverage;
- ResolverAABB(resolve_current, sampler_linear_clamp, 0, temporalExposure, temporalScale, uv, postprocess.resolution, currentMin, currentMax, currentAverage, current);
+ float currentVariance = texture_variance_current[DTid.xy];
+ float varianceResponse = varianceTemporalResponse;
- previous.xyz = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous).xyz;
- previous.a = clamp(previous.a, currentMin.a, currentMax.a);
-
- // Blend color & history
-
- float blendFinal = lerp(temporalResponseMin, temporalResponseMax, saturate(1.0 - length(velocity) * 100));
-
- float4 result = lerp(current, previous, blendFinal);
-
- output[DTid.xy] = max(0, result);
+ // Disocclusion fallback: variance
+ if (disocclusion < disocclusionThreshold || !is_saturated(prevUVSample))
+ {
+ // Apply white for variance on occlusion. This helps to hide artifacts from temporal
+ varianceResponse = 0.0f;
+ currentVariance = 1.0f;
+ }
+
+ float previousVariance = texture_variance_history.SampleLevel(sampler_linear_clamp, prevUVSample, 0);
+ float resultVariance = lerp(currentVariance, previousVariance, varianceResponse);
+
+ output_color[DTid.xy] = max(0, resultColor);
+ output_variance[DTid.xy] = max(0, resultVariance);
}
diff --git a/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl b/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl
new file mode 100644
index 000000000..d62c41c12
--- /dev/null
+++ b/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl
@@ -0,0 +1,43 @@
+#include "globals.hlsli"
+#include "brdf.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+PUSHCONSTANT(postprocess, PostProcess);
+
+Texture2D texture_surface_roughness : register(t0);
+
+RWTexture2D tile_minmax_roughness_horizontal : register(u0);
+
+[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+ const uint2 tile_upperleft = uint2(DTid.x * SSR_TILESIZE, DTid.y);
+ float minRoughness = 1.0;
+ float maxRoughness = 0.0;
+
+ uint2 dim;
+ texture_depth.GetDimensions(dim.x, dim.y);
+
+ [loop]
+ for (uint i = 0; i < SSR_TILESIZE; ++i)
+ {
+ const uint2 pixel = uint2(tile_upperleft.x + i, tile_upperleft.y);
+ if (pixel.x >= 0 && pixel.y >= 0 && pixel.x < dim.x && pixel.y < dim.y)
+ {
+ float depth = texture_depth[pixel];
+ if (depth == 0.0)
+ {
+ maxRoughness = max(maxRoughness, 1.0);
+ minRoughness = min(minRoughness, 1.0);
+ }
+ else
+ {
+ float roughness = texture_surface_roughness[pixel];
+ maxRoughness = max(maxRoughness, roughness);
+ minRoughness = min(minRoughness, roughness);
+ }
+ }
+ }
+
+ tile_minmax_roughness_horizontal[DTid.xy] = float2(minRoughness, maxRoughness);
+}
diff --git a/WickedEngine/shaders/ssr_tileMaxRoughness_verticalCS.hlsl b/WickedEngine/shaders/ssr_tileMaxRoughness_verticalCS.hlsl
new file mode 100644
index 000000000..c8eccaa66
--- /dev/null
+++ b/WickedEngine/shaders/ssr_tileMaxRoughness_verticalCS.hlsl
@@ -0,0 +1,58 @@
+#include "globals.hlsli"
+#include "brdf.hlsli"
+#include "stochasticSSRHF.hlsli"
+#include "ShaderInterop_Postprocess.h"
+
+Texture2D tile_minmax_roughness_horizontal : register(t0);
+
+RWByteAddressBuffer tile_tracing_statistics : register(u0);
+RWStructuredBuffer tiles_tracing_earlyexit : register(u1);
+RWStructuredBuffer tiles_tracing_cheap : register(u2);
+RWStructuredBuffer tiles_tracing_expensive : register(u3);
+RWTexture2D tile_minmax_roughness : register(u4);
+
+static const float SSRRoughnessCheap = 0.35;
+
+[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+ const uint2 tile_upperleft = uint2(DTid.x, DTid.y * SSR_TILESIZE);
+ float minRoughness = 1.0;
+ float maxRoughness = 0.0;
+
+ int2 dim;
+ tile_minmax_roughness_horizontal.GetDimensions(dim.x, dim.y);
+
+ [loop]
+ for (uint i = 0; i < SSR_TILESIZE; ++i)
+ {
+ const uint2 pixel = uint2(tile_upperleft.x, tile_upperleft.y + i);
+ if (pixel.x >= 0 && pixel.y >= 0 && pixel.x < dim.x && pixel.y < dim.y)
+ {
+ float2 minmax_roughness = tile_minmax_roughness_horizontal[pixel];
+ minRoughness = min(minRoughness, minmax_roughness.r);
+ maxRoughness = max(maxRoughness, minmax_roughness.g);
+ }
+ }
+
+ const uint tile = (DTid.x & 0xFFFF) | ((DTid.y & 0xFFFF) << 16);
+
+ uint prevCount;
+ if (minRoughness < SSRRoughnessCheap)
+ {
+ tile_tracing_statistics.InterlockedAdd(TILE_STATISTICS_OFFSET_EXPENSIVE, 1, prevCount);
+ tiles_tracing_expensive[prevCount] = tile;
+ }
+ else if (maxRoughness > SSRRoughnessCheap && minRoughness < ReflectionMaxRoughness)
+ {
+ tile_tracing_statistics.InterlockedAdd(TILE_STATISTICS_OFFSET_CHEAP, 1, prevCount);
+ tiles_tracing_cheap[prevCount] = tile;
+ }
+ else
+ {
+ tile_tracing_statistics.InterlockedAdd(TILE_STATISTICS_OFFSET_EARLYEXIT, 1, prevCount);
+ tiles_tracing_earlyexit[prevCount] = tile;
+ }
+
+ tile_minmax_roughness[DTid.xy] = float2(minRoughness, maxRoughness);
+}
diff --git a/WickedEngine/shaders/stochasticSSRHF.hlsli b/WickedEngine/shaders/stochasticSSRHF.hlsli
index 0655d9072..f369a95a7 100644
--- a/WickedEngine/shaders/stochasticSSRHF.hlsli
+++ b/WickedEngine/shaders/stochasticSSRHF.hlsli
@@ -1,162 +1,70 @@
#ifndef WI_STOCHASTICSSR_HF
#define WI_STOCHASTICSSR_HF
#include "brdf.hlsli"
-
-// Stochastic Screen Space Reflections reference:
-// https://www.ea.com/frostbite/news/stochastic-screen-space-reflections
-
+#include "ShaderInterop_Postprocess.h"
#define GGX_SAMPLE_VISIBLE
// Bias used on GGX importance sample when denoising, to remove part of the tale that create a lot more noise.
#define GGX_IMPORTANCE_SAMPLE_BIAS 0.1
-// Shared SSR settings:
-static const float SSRMaxRoughness = 1.0f; // Specify max roughness, this can improve performance in complex scenes.
-static const float SSRIntensity = 1.0f;
-static const float SSRResolveConeMip = 1.0f; // Control overall filtering of the importance sampling.
-static const float SSRResolveSpatialSize = 3.0f; // Seems to work best with the temporal pass in the [-3;3] range
-static const float SSRBlendScreenEdgeFade = 5.0f;
+// Shared Reflection settings:
+static const float ReflectionMaxRoughness = 0.6f;
-// Temporary
-static const float BRDFBias = 0.7f;
-
-
-float ComputeRoughnessMaskScale(in float maxRoughness)
+uint2 GetReflectionIndirectDispatchCoord(uint3 Gid, uint3 GTid, StructuredBuffer tiles, uint downsample)
{
- float MaxRoughness = clamp(maxRoughness, 0.01f, 1.0f);
-
- float roughnessMaskScale = -2.0f / MaxRoughness;
- return roughnessMaskScale * 1.0f; // 2.0f & 1.0f
+ uint tile_replicate = sqr(SSR_TILESIZE / downsample / POSTPROCESS_BLOCKSIZE);
+ uint tile_idx = Gid.x / tile_replicate;
+ uint tile_packed = tiles[tile_idx];
+ uint2 tile = uint2(tile_packed & 0xFFFF, (tile_packed >> 16) & 0xFFFF);
+ uint subtile_idx = Gid.x % tile_replicate;
+ uint2 subtile = unflatten2D(subtile_idx, SSR_TILESIZE / downsample / POSTPROCESS_BLOCKSIZE);
+ uint2 subtile_upperleft = tile * SSR_TILESIZE / downsample + subtile * POSTPROCESS_BLOCKSIZE;
+ return subtile_upperleft + unflatten2D(GTid.x, POSTPROCESS_BLOCKSIZE);
}
-float GetRoughnessFade(in float roughness, in float maxRoughness)
+bool NeedReflection(float roughness, float depth)
{
- float roughnessMaskScale = ComputeRoughnessMaskScale(maxRoughness);
- return min(roughness * roughnessMaskScale + 2, 1.0f);
-}
-
-float GetRoughness(float roughness)
-{
- return max(roughness, 0.02f);
-}
-
-float Luminance(float3 color)
-{
- return dot(color, float3(0.2126, 0.7152, 0.0722));
-}
-
-// Fast RNG inspired by PCG (Permuted Congruential Generator) - Based on Epic Games (Unreal Engine)
-// Returns three elements with 16 random bits each (0-0xffff (65535)).
-uint3 Rand_PCG16(int3 i)
-{
- // Epic Games had good results by interpreting signed values as unsigned.
- uint3 r = uint3(i);
-
- // Linear congruential generator
- // A simple but very fast pseudorandom number generator
- // see: https://en.wikipedia.org/wiki/Linear_congruential_generator
- r = r * 1664525u + 1013904223u; // LCG set from 'Numerical Recipes'
-
- // Final shuffle
- // In the original PCG code, they used xorshift for their final shuffle.
- // According to Epic Games, they would do simple Feistel steps instead since xorshift is expensive.
- // They would then use r.x, r.y and r.z as parts to create something persistence with few instructions.
- r.x += r.y * r.z;
- r.y += r.z * r.x;
- r.z += r.x * r.y;
-
- r.x += r.y * r.z;
- r.y += r.z * r.x;
- r.z += r.x * r.y;
-
- // PCG would then shuffle the top 16 bits thoroughly.
- return r >> 16u;
-}
-
-// Hammersley sequence manipulated by a random value and returns top 16 bits
-float2 HammersleyRandom16(uint idx, uint num, uint2 random)
-{
- // Reverse Bits 32
- uint bits = idx;
- bits = (bits << 16u) | (bits >> 16u);
- bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
- bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
- bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
- bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
-
- float E1 = frac(float(idx / num) + float(random.x) * 1.52587890625e-5); // / 0xffff (rcp(65536) )
- float E2 = float((bits >> 16) ^ random.y) * 1.52587890625e-5; // Shift reverse bits by 16 and compare bits with random
- return float2(E1, E2);
-}
-
-float2 HammersleyRandom16(uint idx, uint2 random)
-{
- uint bits = idx;
- bits = (bits << 16u) | (bits >> 16u);
- bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
- bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
- bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
- bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
-
- float E1 = frac(float(random.x) * 1.52587890625e-5); // / 0xffff (rcp(65536) )
- float E2 = float((bits >> 16) ^ random.y) * 1.52587890625e-5; // Shift reverse bits by 16 and compare bits with random
- return float2(E1, E2);
+ return (roughness < ReflectionMaxRoughness) && (depth > 0.0);
}
// Brian Karis, Epic Games "Real Shading in Unreal Engine 4"
float4 ImportanceSampleGGX(float2 Xi, float Roughness)
{
- float m = Roughness * Roughness;
- float m2 = m * m;
-
- float Phi = 2 * PI * Xi.x;
-
- float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
- float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
-
- float3 H;
- H.x = SinTheta * cos(Phi);
- H.y = SinTheta * sin(Phi);
- H.z = CosTheta;
-
- float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
- float D = m2 / (PI * d * d);
- float pdf = D * CosTheta;
+ float m = Roughness * Roughness;
+ float m2 = m * m;
- return float4(H, pdf);
+ float Phi = 2 * PI * Xi.x;
+
+ float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
+ float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
+
+ float3 H;
+ H.x = SinTheta * cos(Phi);
+ H.y = SinTheta * sin(Phi);
+ H.z = CosTheta;
+
+ float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
+ float D = m2 / (PI * d * d);
+ float pdf = D * CosTheta;
+
+ return float4(H, pdf);
}
// [ Duff et al. 2017, "Building an Orthonormal Basis, Revisited" ]
// http://jcgt.org/published/0006/01/01/
float3x3 GetTangentBasis(float3 TangentZ)
{
- const float Sign = TangentZ.z >= 0 ? 1 : -1;
- const float a = -rcp(Sign + TangentZ.z);
- const float b = TangentZ.x * TangentZ.y * a;
-
- float3 TangentX = { 1 + Sign * a * pow(TangentZ.x, 2), Sign * b, -Sign * TangentZ.x };
- float3 TangentY = { b, Sign + a * pow(TangentZ.y, 2), -TangentZ.y };
+ const float Sign = TangentZ.z >= 0 ? 1 : -1;
+ const float a = -rcp(Sign + TangentZ.z);
+ const float b = TangentZ.x * TangentZ.y * a;
- return float3x3(TangentX, TangentY, TangentZ);
+ float3 TangentX = { 1 + Sign * a * pow(TangentZ.x, 2), Sign * b, -Sign * TangentZ.x };
+ float3 TangentY = { b, Sign + a * pow(TangentZ.y, 2), -TangentZ.y };
+
+ return float3x3(TangentX, TangentY, TangentZ);
}
-float3 TangentToWorld(float3 vec, float3 tangentZ)
-{
- return mul(vec, GetTangentBasis(tangentZ));
-}
-
-float4 TangentToWorld(float4 H, float3 tangentZ)
-{
- return float4(mul(H.xyz, GetTangentBasis(tangentZ)), H.w);
-}
-
-float3 WorldToTangent(float3 vec, float3 tangentZ)
-{
- return mul(GetTangentBasis(tangentZ), vec);
-}
-
-
float2 SampleDisk(float2 Xi)
{
float theta = 2 * PI * Xi.x;
@@ -209,5 +117,9 @@ float4 ImportanceSampleVisibleGGX(float2 diskXi, float roughness, float3 V)
return float4(H, PDF);
}
+float Luminance(float3 color)
+{
+ return dot(color, float3(0.2126, 0.7152, 0.0722));
+}
#endif // WI_STOCHASTICSSR_HF
diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h
index 6d7482201..54581505a 100644
--- a/WickedEngine/wiEnums.h
+++ b/WickedEngine/wiEnums.h
@@ -284,10 +284,17 @@ namespace wi::enums
CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN,
CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN_BLENDOUT,
CSTYPE_POSTPROCESS_RTREFLECTION,
+ CSTYPE_POSTPROCESS_SSR_SURFACE,
+ CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL,
+ CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_VERTICAL,
+ CSTYPE_POSTPROCESS_SSR_KICKJOBS,
+ CSTYPE_POSTPROCESS_SSR_DEPTHHIERARCHY,
CSTYPE_POSTPROCESS_SSR_RAYTRACE,
+ CSTYPE_POSTPROCESS_SSR_RAYTRACE_EARLYEXIT,
+ CSTYPE_POSTPROCESS_SSR_RAYTRACE_CHEAP,
CSTYPE_POSTPROCESS_SSR_RESOLVE,
CSTYPE_POSTPROCESS_SSR_TEMPORAL,
- CSTYPE_POSTPROCESS_SSR_MEDIAN,
+ CSTYPE_POSTPROCESS_SSR_BILATERAL,
CSTYPE_POSTPROCESS_LIGHTSHAFTS,
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL,
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL,
diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp
index 21e82e4cd..64a0f5106 100644
--- a/WickedEngine/wiRenderPath3D.cpp
+++ b/WickedEngine/wiRenderPath3D.cpp
@@ -1599,8 +1599,8 @@ void RenderPath3D::setSSREnabled(bool value)
TextureDesc desc;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.format = Format::R16G16B16A16_FLOAT;
- desc.width = internalResolution.x / 2;
- desc.height = internalResolution.y / 2;
+ desc.width = internalResolution.x;
+ desc.height = internalResolution.y;
desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE;
device->CreateTexture(&desc, nullptr, &rtSSR);
device->SetName(&rtSSR, "rtSSR");
@@ -1625,8 +1625,8 @@ void RenderPath3D::setRaytracedReflectionsEnabled(bool value)
TextureDesc desc;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.format = Format::R11G11B10_FLOAT;
- desc.width = internalResolution.x / 2;
- desc.height = internalResolution.y / 2;
+ desc.width = internalResolution.x;
+ desc.height = internalResolution.y;
device->CreateTexture(&desc, nullptr, &rtSSR);
device->SetName(&rtSSR, "rtSSR");
diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp
index 4331b11ff..57053caa4 100644
--- a/WickedEngine/wiRenderer.cpp
+++ b/WickedEngine/wiRenderer.cpp
@@ -934,10 +934,17 @@ void LoadShaders()
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_BLENDOUT], "msao_blurupsampleCS_blendout.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN], "msao_blurupsampleCS_premin.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN_BLENDOUT], "msao_blurupsampleCS_premin_blendout.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_SURFACE], "ssr_surfaceCS.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL], "ssr_tileMaxRoughness_horizontalCS.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_VERTICAL], "ssr_tileMaxRoughness_verticalCS.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_KICKJOBS], "ssr_kickjobsCS.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_DEPTHHIERARCHY], "ssr_depthHierarchyCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE], "ssr_raytraceCS.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE_EARLYEXIT], "ssr_raytraceCS_earlyexit.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE_CHEAP], "ssr_raytraceCS_cheap.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], "ssr_resolveCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], "ssr_temporalCS.cso"); });
- wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_MEDIAN], "ssr_medianCS.cso"); });
+ wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_BILATERAL], "ssr_bilateralCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_LIGHTSHAFTS], "lightShaftsCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL], "depthoffield_tileMaxCOC_horizontalCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL], "depthoffield_tileMaxCOC_verticalCS.cso"); });
@@ -9382,21 +9389,44 @@ void CreateRTReflectionResources(RTReflectionResources& res, XMUINT2 resolution)
{
res.frame = 0;
+ TextureDesc surface_desc;
+ surface_desc.type = TextureDesc::Type::TEXTURE_2D;
+ surface_desc.width = resolution.x;
+ surface_desc.height = resolution.y;
+ surface_desc.format = Format::R8G8B8A8_SNORM;
+ surface_desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
+ device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_normal);
+ surface_desc.format = Format::R8_UNORM;
+ device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_roughness);
+ surface_desc.format = Format::R11G11B10_FLOAT;
+ device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_environment);
+
TextureDesc desc;
+ desc.type = TextureDesc::Type::TEXTURE_2D;
desc.width = resolution.x / 2;
desc.height = resolution.y / 2;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE;
- desc.format = Format::R11G11B10_FLOAT;
- device->CreateTexture(&desc, nullptr, &res.temporal[0]);
- device->SetName(&res.temporal[0], "rtreflection_temporal[0]");
- device->CreateTexture(&desc, nullptr, &res.temporal[1]);
- device->SetName(&res.temporal[1], "rtreflection_temporal[1]");
-
+ desc.format = Format::R16G16B16A16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_rayIndirectSpecular);
+ device->CreateTexture(&desc, nullptr, &res.texture_rayDirectionPDF);
desc.format = Format::R16_FLOAT;
- device->CreateTexture(&desc, nullptr, &res.rayLengths);
- device->SetName(&res.rayLengths, "rtreflection_rayLengths");
+ device->CreateTexture(&desc, nullptr, &res.texture_rayLengths);
+ device->SetName(&res.texture_rayLengths, "ssr_rayLengths");
+
+ desc.width = resolution.x;
+ desc.height = resolution.y;
+ desc.format = Format::R16G16B16A16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_resolve);
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal[0]);
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal[1]);
+ device->CreateTexture(&desc, nullptr, &res.texture_bilateral_temp);
+ desc.format = Format::R16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_resolve_variance);
+ device->CreateTexture(&desc, nullptr, &res.texture_resolve_reprojectionDepth);
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal_variance[0]);
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal_variance[1]);
}
void Postprocess_RTReflection(
const RTReflectionResources& res,
@@ -9414,90 +9444,206 @@ void Postprocess_RTReflection(
return;
device->EventBegin("Postprocess_RTReflection", cmd);
- auto prof_range = wi::profiler::BeginRangeGPU("RTReflection", cmd);
-
- const TextureDesc& desc = output.desc;
-
-#ifdef RTREFLECTION_WITH_RAYTRACING_PIPELINE
- device->BindRaytracingPipelineState(&RTPSO_reflection, cmd);
-#else
- device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTREFLECTION], cmd);
-#endif // RTREFLECTION_WITH_RAYTRACING_PIPELINE
+ auto profilerRange = wi::profiler::BeginRangeGPU("RTReflection", cmd);
BindCommonResources(cmd);
+ // Compute common Raytraced surface properties:
+ {
+ device->EventBegin("RTReflection Surface", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_SURFACE], cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
+ &res.texture_surface_environment,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_surface_normal, res.texture_surface_normal.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_surface_roughness, res.texture_surface_roughness.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_surface_environment, res.texture_surface_environment.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_surface_normal.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_surface_normal.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_surface_normal, ResourceState::UNORDERED_ACCESS, res.texture_surface_normal.desc.layout),
+ GPUBarrier::Image(&res.texture_surface_roughness, ResourceState::UNORDERED_ACCESS, res.texture_surface_roughness.desc.layout),
+ GPUBarrier::Image(&res.texture_surface_environment, ResourceState::UNORDERED_ACCESS, res.texture_surface_environment.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->EventEnd(cmd);
+ }
+
+ const TextureDesc& desc = output.desc;
+
+ // Render half-res:
PostProcess postprocess;
- postprocess.resolution.x = desc.width;
- postprocess.resolution.y = desc.height;
+ postprocess.resolution.x = desc.width / 2;
+ postprocess.resolution.y = desc.height / 2;
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
rtreflection_range = range;
rtreflection_frame = (float)res.frame;
std::memcpy(&postprocess.params1.x, &instanceInclusionMask, sizeof(instanceInclusionMask));
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
-
- const GPUResource* uavs[] = {
- &output,
- &res.rayLengths
- };
- device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
{
- GPUBarrier barriers[] = {
- GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
- GPUBarrier::Image(&res.rayLengths, res.rayLengths.desc.layout, ResourceState::UNORDERED_ACCESS),
- };
- device->Barrier(barriers, arraysize(barriers), cmd);
- }
+ //device->EventBegin("RTReflection Raytrace pass", cmd);
#ifdef RTREFLECTION_WITH_RAYTRACING_PIPELINE
- size_t shaderIdentifierSize = device->GetShaderIdentifierSize();
- GraphicsDevice::GPUAllocation shadertable_raygen = device->AllocateGPU(shaderIdentifierSize, cmd);
- GraphicsDevice::GPUAllocation shadertable_miss = device->AllocateGPU(shaderIdentifierSize, cmd);
- GraphicsDevice::GPUAllocation shadertable_hitgroup = device->AllocateGPU(shaderIdentifierSize, cmd);
+ device->BindRaytracingPipelineState(&RTPSO_reflection, cmd);
+#else
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTREFLECTION], cmd);
+#endif // RTREFLECTION_WITH_RAYTRACING_PIPELINE
- device->WriteShaderIdentifier(&RTPSO_reflection, 0, shadertable_raygen.data);
- device->WriteShaderIdentifier(&RTPSO_reflection, 1, shadertable_miss.data);
- device->WriteShaderIdentifier(&RTPSO_reflection, 2, shadertable_hitgroup.data);
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
- DispatchRaysDesc dispatchraysdesc;
- dispatchraysdesc.ray_generation.buffer = &shadertable_raygen.buffer;
- dispatchraysdesc.ray_generation.offset = shadertable_raygen.offset;
- dispatchraysdesc.ray_generation.size = shaderIdentifierSize;
+ const GPUResource* resarray[] = {
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
+ &res.texture_surface_environment,
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
- dispatchraysdesc.miss.buffer = &shadertable_miss.buffer;
- dispatchraysdesc.miss.offset = shadertable_miss.offset;
- dispatchraysdesc.miss.size = shaderIdentifierSize;
- dispatchraysdesc.miss.stride = shaderIdentifierSize;
+ const GPUResource* uavs[] = {
+ &res.texture_rayIndirectSpecular,
+ &res.texture_rayDirectionPDF,
+ &res.texture_rayLengths
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
- dispatchraysdesc.hit_group.buffer = &shadertable_hitgroup.buffer;
- dispatchraysdesc.hit_group.offset = shadertable_hitgroup.offset;
- dispatchraysdesc.hit_group.size = shaderIdentifierSize;
- dispatchraysdesc.hit_group.stride = shaderIdentifierSize;
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_rayIndirectSpecular, res.texture_rayIndirectSpecular.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_rayDirectionPDF, res.texture_rayDirectionPDF.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_rayLengths, res.texture_rayLengths.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
- dispatchraysdesc.width = desc.width;
- dispatchraysdesc.height = desc.height;
+#ifdef RTREFLECTION_WITH_RAYTRACING_PIPELINE
+ size_t shaderIdentifierSize = device->GetShaderIdentifierSize();
+ GraphicsDevice::GPUAllocation shadertable_raygen = device->AllocateGPU(shaderIdentifierSize, cmd);
+ GraphicsDevice::GPUAllocation shadertable_miss = device->AllocateGPU(shaderIdentifierSize, cmd);
+ GraphicsDevice::GPUAllocation shadertable_hitgroup = device->AllocateGPU(shaderIdentifierSize, cmd);
- device->DispatchRays(&dispatchraysdesc, cmd);
+ device->WriteShaderIdentifier(&RTPSO_reflection, 0, shadertable_raygen.data);
+ device->WriteShaderIdentifier(&RTPSO_reflection, 1, shadertable_miss.data);
+ device->WriteShaderIdentifier(&RTPSO_reflection, 2, shadertable_hitgroup.data);
+
+ DispatchRaysDesc dispatchraysdesc;
+ dispatchraysdesc.ray_generation.buffer = &shadertable_raygen.buffer;
+ dispatchraysdesc.ray_generation.offset = shadertable_raygen.offset;
+ dispatchraysdesc.ray_generation.size = shaderIdentifierSize;
+
+ dispatchraysdesc.miss.buffer = &shadertable_miss.buffer;
+ dispatchraysdesc.miss.offset = shadertable_miss.offset;
+ dispatchraysdesc.miss.size = shaderIdentifierSize;
+ dispatchraysdesc.miss.stride = shaderIdentifierSize;
+
+ dispatchraysdesc.hit_group.buffer = &shadertable_hitgroup.buffer;
+ dispatchraysdesc.hit_group.offset = shadertable_hitgroup.offset;
+ dispatchraysdesc.hit_group.size = shaderIdentifierSize;
+ dispatchraysdesc.hit_group.stride = shaderIdentifierSize;
+
+ dispatchraysdesc.width = desc.width / 2;
+ dispatchraysdesc.height = desc.height / 2;
+
+ device->DispatchRays(&dispatchraysdesc, cmd);
#else
- device->Dispatch(
- (desc.width + 7) / 8,
- (desc.height + 3) / 4,
- 1,
- cmd
- );
+ device->Dispatch(
+ (res.texture_rayIndirectSpecular.GetDesc().width + 7) / 8,
+ (res.texture_rayIndirectSpecular.GetDesc().height + 3) / 4,
+ 1,
+ cmd
+ );
#endif // RTREFLECTION_WITH_RAYTRACING_PIPELINE
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_rayIndirectSpecular, ResourceState::UNORDERED_ACCESS, res.texture_rayIndirectSpecular.desc.layout),
+ GPUBarrier::Image(&res.texture_rayDirectionPDF, ResourceState::UNORDERED_ACCESS, res.texture_rayDirectionPDF.desc.layout),
+ GPUBarrier::Image(&res.texture_rayLengths, ResourceState::UNORDERED_ACCESS, res.texture_rayLengths.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ //device->EventEnd(cmd);
+ }
+
+ // Upscale to full-res:
+ postprocess.resolution.x = desc.width;
+ postprocess.resolution.y = desc.height;
+ postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
+ postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ // Resolve pass:
{
- GPUBarrier barriers[] = {
- GPUBarrier::Memory(),
- GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
- GPUBarrier::Image(&res.rayLengths, ResourceState::UNORDERED_ACCESS, res.rayLengths.desc.layout),
+ device->EventBegin("RTReflection Resolve pass", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
+ &res.texture_rayIndirectSpecular,
+ &res.texture_rayDirectionPDF,
+ &res.texture_rayLengths,
};
- device->Barrier(barriers, arraysize(barriers), cmd);
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_resolve,
+ &res.texture_resolve_variance,
+ &res.texture_resolve_reprojectionDepth,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_resolve, res.texture_resolve.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_resolve_variance, res.texture_resolve_variance.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_resolve_reprojectionDepth, res.texture_resolve_reprojectionDepth.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_resolve.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_resolve.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_resolve, ResourceState::UNORDERED_ACCESS, res.texture_resolve.desc.layout),
+ GPUBarrier::Image(&res.texture_resolve_variance, ResourceState::UNORDERED_ACCESS, res.texture_resolve_variance.desc.layout),
+ GPUBarrier::Image(&res.texture_resolve_reprojectionDepth, ResourceState::UNORDERED_ACCESS, res.texture_resolve_reprojectionDepth.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->EventEnd(cmd);
}
int temporal_output = device->GetFrameCount() % 2;
@@ -9505,30 +9651,36 @@ void Postprocess_RTReflection(
// Temporal pass:
{
- device->EventBegin("Temporal pass", cmd);
+ device->EventBegin("RTReflection Temporal pass", cmd);
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd);
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
-
- device->BindResource(&output, 0, cmd);
- device->BindResource(&res.temporal[temporal_history], 1, cmd);
- device->BindResource(&res.rayLengths, 3, cmd);
+ const GPUResource* resarray[] = {
+ &res.texture_surface_roughness,
+ &res.texture_resolve,
+ &res.texture_temporal[temporal_history],
+ &res.texture_resolve_variance,
+ &res.texture_temporal_variance[temporal_history],
+ &res.texture_resolve_reprojectionDepth,
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
const GPUResource* uavs[] = {
- &res.temporal[temporal_output],
+ &res.texture_temporal[temporal_output],
+ &res.texture_temporal_variance[temporal_output],
};
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
- GPUBarrier::Image(&res.temporal[temporal_output], res.temporal[temporal_output].desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_temporal[temporal_output], res.texture_temporal[temporal_output].desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_temporal_variance[temporal_output], res.texture_temporal_variance[temporal_output].desc.layout, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->Dispatch(
- (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_temporal[temporal_output].GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_temporal[temporal_output].GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -9536,7 +9688,8 @@ void Postprocess_RTReflection(
{
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
- GPUBarrier::Image(&res.temporal[temporal_output], ResourceState::UNORDERED_ACCESS, res.temporal[temporal_output].desc.layout),
+ GPUBarrier::Image(&res.texture_temporal[temporal_output], ResourceState::UNORDERED_ACCESS, res.texture_temporal[temporal_output].desc.layout),
+ GPUBarrier::Image(&res.texture_temporal_variance[temporal_output], ResourceState::UNORDERED_ACCESS, res.texture_temporal_variance[temporal_output].desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -9544,40 +9697,93 @@ void Postprocess_RTReflection(
device->EventEnd(cmd);
}
- // Median blur pass:
+ // Bilateral blur pass:
{
- device->EventBegin("Median blur pass", cmd);
- device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_MEDIAN], cmd);
-
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
-
- device->BindResource(&res.temporal[temporal_output], 0, cmd);
-
- const GPUResource* uavs[] = {
- &output,
- };
- device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+ device->EventBegin("RTReflection Bilateral blur pass", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_BILATERAL], cmd);
+ // Horizontal:
{
- GPUBarrier barriers[] = {
- GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
+ postprocess.params0.x = 1;
+ postprocess.params0.y = 0;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_temporal[temporal_output],
+ &res.texture_temporal_variance[temporal_output],
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
};
- device->Barrier(barriers, arraysize(barriers), cmd);
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_bilateral_temp,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_bilateral_temp, res.texture_bilateral_temp.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_bilateral_temp.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_bilateral_temp.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_bilateral_temp, ResourceState::UNORDERED_ACCESS, res.texture_bilateral_temp.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
}
- device->Dispatch(
- (output.desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- (output.desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- 1,
- cmd
- );
-
+ // Vertical:
{
- GPUBarrier barriers[] = {
- GPUBarrier::Memory(),
- GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
+ postprocess.params0.x = 0;
+ postprocess.params0.y = 1;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_bilateral_temp,
+ &res.texture_temporal_variance[temporal_output],
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
};
- device->Barrier(barriers, arraysize(barriers), cmd);
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &output,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (output.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (output.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
}
device->EventEnd(cmd);
@@ -9585,13 +9791,48 @@ void Postprocess_RTReflection(
res.frame++;
- wi::profiler::EndRange(prof_range);
+ wi::profiler::EndRange(profilerRange);
device->EventEnd(cmd);
}
void CreateSSRResources(SSRResources& res, XMUINT2 resolution)
{
res.frame = 0;
+ TextureDesc surface_desc;
+ surface_desc.type = TextureDesc::Type::TEXTURE_2D;
+ surface_desc.width = resolution.x;
+ surface_desc.height = resolution.y;
+ surface_desc.format = Format::R8G8B8A8_SNORM;
+ surface_desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
+ device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_normal);
+ surface_desc.format = Format::R8_UNORM;
+ device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_roughness);
+
+ TextureDesc tile_desc;
+ tile_desc.type = TextureDesc::Type::TEXTURE_2D;
+ tile_desc.width = (resolution.x + SSR_TILESIZE - 1) / SSR_TILESIZE;
+ tile_desc.height = (resolution.y + SSR_TILESIZE - 1) / SSR_TILESIZE;
+ tile_desc.format = Format::R16G16_FLOAT;
+ tile_desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
+ device->CreateTexture(&tile_desc, nullptr, &res.texture_tile_minmax_roughness);
+
+ tile_desc.height = resolution.y;
+ device->CreateTexture(&tile_desc, nullptr, &res.texture_tile_minmax_roughness_horizontal);
+
+ GPUBufferDesc bufferdesc;
+ bufferdesc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
+
+ bufferdesc.size = TILE_STATISTICS_CAPACITY * sizeof(uint);
+ bufferdesc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS;
+ device->CreateBuffer(&bufferdesc, nullptr, &res.buffer_tile_tracing_statistics);
+
+ bufferdesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
+ bufferdesc.stride = sizeof(uint);
+ bufferdesc.size = tile_desc.width * tile_desc.height * bufferdesc.stride;
+ device->CreateBuffer(&bufferdesc, nullptr, &res.buffer_tiles_tracing_earlyexit);
+ device->CreateBuffer(&bufferdesc, nullptr, &res.buffer_tiles_tracing_cheap);
+ device->CreateBuffer(&bufferdesc, nullptr, &res.buffer_tiles_tracing_expensive);
+
TextureDesc desc;
desc.type = TextureDesc::Type::TEXTURE_2D;
desc.width = resolution.x / 2;
@@ -9599,13 +9840,39 @@ void CreateSSRResources(SSRResources& res, XMUINT2 resolution)
desc.format = Format::R16G16B16A16_FLOAT;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE;
- device->CreateTexture(&desc, nullptr, &res.texture_raytrace);
+ device->CreateTexture(&desc, nullptr, &res.texture_rayIndirectSpecular);
+ device->CreateTexture(&desc, nullptr, &res.texture_rayDirectionPDF);
+ desc.format = Format::R16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_rayLengths);
+ device->SetName(&res.texture_rayLengths, "ssr_rayLengths");
+
+ desc.width = resolution.x;
+ desc.height = resolution.y;
+ desc.format = Format::R16G16B16A16_FLOAT;
+ device->CreateTexture(&desc, nullptr, &res.texture_resolve);
device->CreateTexture(&desc, nullptr, &res.texture_temporal[0]);
device->CreateTexture(&desc, nullptr, &res.texture_temporal[1]);
-
+ device->CreateTexture(&desc, nullptr, &res.texture_bilateral_temp);
desc.format = Format::R16_FLOAT;
- device->CreateTexture(&desc, nullptr, &res.rayLengths);
- device->SetName(&res.rayLengths, "ssr_rayLengths");
+ device->CreateTexture(&desc, nullptr, &res.texture_resolve_variance);
+ device->CreateTexture(&desc, nullptr, &res.texture_resolve_reprojectionDepth);
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal_variance[0]);
+ device->CreateTexture(&desc, nullptr, &res.texture_temporal_variance[1]);
+
+ desc.width = (uint32_t)std::pow(2.0f, 1.0f + std::floor(std::log2((float)resolution.x / 2)));
+ desc.height = (uint32_t)std::pow(2.0f, 1.0f + std::floor(std::log2((float)resolution.y / 2)));
+ desc.format = Format::R32G32_FLOAT;
+ desc.mip_levels = 1 + (uint32_t)std::floor(std::log2f(std::max((float)desc.width, (float)desc.height)));
+ device->CreateTexture(&desc, nullptr, &res.texture_depth_hierarchy);
+
+ for (uint32_t i = 0; i < desc.mip_levels; ++i)
+ {
+ int subresource_index;
+ subresource_index = device->CreateSubresource(&res.texture_depth_hierarchy, SubresourceType::SRV, 0, 1, i, 1);
+ assert(subresource_index == i);
+ subresource_index = device->CreateSubresource(&res.texture_depth_hierarchy, SubresourceType::UAV, 0, 1, i, 1);
+ assert(subresource_index == i);
+ }
}
void Postprocess_SSR(
const SSRResources& res,
@@ -9615,47 +9882,33 @@ void Postprocess_SSR(
)
{
device->EventBegin("Postprocess_SSR", cmd);
- auto range = wi::profiler::BeginRangeGPU("SSR", cmd);
+ auto range = wi::profiler::BeginRangeGPU("Screen Space Reflections", cmd);
BindCommonResources(cmd);
- const TextureDesc& input_desc = input.GetDesc();
- const TextureDesc& desc = output.GetDesc();
-
- PostProcess postprocess;
- postprocess.resolution.x = desc.width;
- postprocess.resolution.y = desc.height;
- postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
- postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
- ssr_input_maxmip = float(input_desc.mip_levels - 1);
- ssr_input_resolution_max = (float)std::max(input_desc.width, input_desc.height);
- ssr_frame = (float)res.frame;
-
- // Raytrace pass:
+ // Compute common SSR surface properties:
{
- device->EventBegin("Stochastic Raytrace pass", cmd);
- device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE], cmd);
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
-
- device->BindResource(&input, 0, cmd);
+ device->EventBegin("SSR Surface", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_SURFACE], cmd);
const GPUResource* uavs[] = {
- &res.texture_raytrace,
- &res.rayLengths
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
};
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
- GPUBarrier::Image(&res.texture_raytrace, res.texture_raytrace.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_surface_normal, res.texture_surface_normal.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_surface_roughness, res.texture_surface_roughness.desc.layout, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->Dispatch(
- (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_surface_normal.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_surface_normal.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -9663,7 +9916,8 @@ void Postprocess_SSR(
{
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
- GPUBarrier::Image(&res.texture_raytrace, ResourceState::UNORDERED_ACCESS, res.texture_raytrace.desc.layout),
+ GPUBarrier::Image(&res.texture_surface_normal, ResourceState::UNORDERED_ACCESS, res.texture_surface_normal.desc.layout),
+ GPUBarrier::Image(&res.texture_surface_roughness, ResourceState::UNORDERED_ACCESS, res.texture_surface_roughness.desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -9671,30 +9925,31 @@ void Postprocess_SSR(
device->EventEnd(cmd);
}
- // Resolve pass:
+ // Compute tile classification (horizontal):
{
- device->EventBegin("Resolve pass", cmd);
- device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], cmd);
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+ device->EventBegin("SSR Tile Classification - Horizontal", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL], cmd);
- device->BindResource(&res.texture_raytrace, 0, cmd);
- device->BindResource(&input, 1, cmd);
+ const GPUResource* resarray[] = {
+ &res.texture_surface_roughness,
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
const GPUResource* uavs[] = {
- &output,
+ &res.texture_tile_minmax_roughness_horizontal,
};
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
- GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_tile_minmax_roughness_horizontal, res.texture_tile_minmax_roughness_horizontal.desc.layout, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->Dispatch(
- (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_tile_minmax_roughness_horizontal.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_tile_minmax_roughness_horizontal.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -9702,7 +9957,286 @@ void Postprocess_SSR(
{
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
- GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
+ GPUBarrier::Image(&res.texture_tile_minmax_roughness_horizontal, ResourceState::UNORDERED_ACCESS, res.texture_tile_minmax_roughness_horizontal.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->EventEnd(cmd);
+ }
+
+ // Compute tile classification (vertical):
+ {
+ device->EventBegin("SSR Tile Classification - Vertical", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_VERTICAL], cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_tile_minmax_roughness_horizontal,
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &res.buffer_tile_tracing_statistics,
+ &res.buffer_tiles_tracing_earlyexit,
+ &res.buffer_tiles_tracing_cheap,
+ &res.buffer_tiles_tracing_expensive,
+ &res.texture_tile_minmax_roughness,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_tile_minmax_roughness, res.texture_tile_minmax_roughness.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_tile_minmax_roughness.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_tile_minmax_roughness.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_tile_minmax_roughness, ResourceState::UNORDERED_ACCESS, res.texture_tile_minmax_roughness.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->EventEnd(cmd);
+ }
+
+ // Kick indirect tile jobs:
+ {
+ device->EventBegin("SSR Kickjobs", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_KICKJOBS], cmd);
+
+ const GPUResource* uavs[] = {
+ &res.buffer_tile_tracing_statistics,
+ &res.buffer_tiles_tracing_earlyexit,
+ &res.buffer_tiles_tracing_cheap,
+ &res.buffer_tiles_tracing_expensive,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ device->Dispatch(1, 1, 1, cmd);
+
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Buffer(&res.buffer_tile_tracing_statistics, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+
+ device->EventEnd(cmd);
+ }
+
+ PostProcess postprocess;
+
+ // Depth hierarchy:
+ {
+ device->EventBegin("SSR Depth hierarchy pass", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_DEPTHHIERARCHY], cmd);
+
+ TextureDesc hierarchyDesc = res.texture_depth_hierarchy.GetDesc();
+
+ {
+ device->BindUAV(&res.texture_depth_hierarchy, 0, cmd, 0);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_depth_hierarchy, res.texture_depth_hierarchy.desc.layout, ResourceState::UNORDERED_ACCESS, 0),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ postprocess.params0.x = (float)hierarchyDesc.width;
+ postprocess.params0.y = (float)hierarchyDesc.height;
+ postprocess.params0.z = 1.0f;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ device->Dispatch(
+ std::max(1u, hierarchyDesc.width / POSTPROCESS_BLOCKSIZE),
+ std::max(1u, hierarchyDesc.height / POSTPROCESS_BLOCKSIZE),
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_depth_hierarchy, ResourceState::UNORDERED_ACCESS, res.texture_depth_hierarchy.desc.layout, 0),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+ }
+
+ for (uint32_t i = 1; i < hierarchyDesc.mip_levels; i++)
+ {
+ device->BindResource(&res.texture_depth_hierarchy, 0, cmd, i - 1);
+ device->BindUAV(&res.texture_depth_hierarchy, 0, cmd, i);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_depth_hierarchy, res.texture_depth_hierarchy.desc.layout, ResourceState::UNORDERED_ACCESS, i),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ hierarchyDesc.width /= 2;
+ hierarchyDesc.height /= 2;
+
+ hierarchyDesc.width = std::max(1u, hierarchyDesc.width);
+ hierarchyDesc.height = std::max(1u, hierarchyDesc.height);
+
+ postprocess.params0.x = (float)hierarchyDesc.width;
+ postprocess.params0.y = (float)hierarchyDesc.height;
+ postprocess.params0.z = 0.0f;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ device->Dispatch(
+ std::max(1u, hierarchyDesc.width / POSTPROCESS_BLOCKSIZE),
+ std::max(1u, hierarchyDesc.height / POSTPROCESS_BLOCKSIZE),
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_depth_hierarchy, ResourceState::UNORDERED_ACCESS, res.texture_depth_hierarchy.desc.layout, i),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+ }
+
+ device->EventEnd(cmd);
+ }
+
+ const TextureDesc& desc = output.GetDesc();
+
+ // Render half-res:
+ postprocess.resolution.x = desc.width / 2;
+ postprocess.resolution.y = desc.height / 2;
+ postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
+ postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
+
+ // Factor to scale ratio between hierarchy and trace pass
+ postprocess.params1.x = (float)postprocess.resolution.x / (float)res.texture_depth_hierarchy.GetDesc().width;
+ postprocess.params1.y = (float)postprocess.resolution.y / (float)res.texture_depth_hierarchy.GetDesc().height;
+ postprocess.params1.z = 1.0f / postprocess.params1.x;
+ postprocess.params1.w = 1.0f / postprocess.params1.y;
+ ssr_frame = (float)res.frame;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ // Raytrace pass:
+ {
+ device->EventBegin("SSR Raytrace pass", cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
+ &res.texture_depth_hierarchy,
+ &input,
+ &res.buffer_tiles_tracing_earlyexit,
+ &res.buffer_tiles_tracing_cheap,
+ &res.buffer_tiles_tracing_expensive
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_rayIndirectSpecular,
+ &res.texture_rayDirectionPDF,
+ &res.texture_rayLengths
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Buffer(&res.buffer_tiles_tracing_earlyexit, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE),
+ GPUBarrier::Buffer(&res.buffer_tiles_tracing_cheap, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE),
+ GPUBarrier::Buffer(&res.buffer_tiles_tracing_expensive, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE),
+ GPUBarrier::Image(&res.texture_rayIndirectSpecular, res.texture_rayIndirectSpecular.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_rayDirectionPDF, res.texture_rayDirectionPDF.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_rayLengths, res.texture_rayLengths.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE_EARLYEXIT], cmd);
+ device->DispatchIndirect(&res.buffer_tile_tracing_statistics, INDIRECT_OFFSET_EARLYEXIT, cmd);
+
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE_CHEAP], cmd);
+ device->DispatchIndirect(&res.buffer_tile_tracing_statistics, INDIRECT_OFFSET_CHEAP, cmd);
+
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RAYTRACE], cmd);
+ device->DispatchIndirect(&res.buffer_tile_tracing_statistics, INDIRECT_OFFSET_EXPENSIVE, cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_rayIndirectSpecular, ResourceState::UNORDERED_ACCESS, res.texture_rayIndirectSpecular.desc.layout),
+ GPUBarrier::Image(&res.texture_rayDirectionPDF, ResourceState::UNORDERED_ACCESS, res.texture_rayDirectionPDF.desc.layout),
+ GPUBarrier::Image(&res.texture_rayLengths, ResourceState::UNORDERED_ACCESS, res.texture_rayLengths.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->EventEnd(cmd);
+ }
+
+ // Upscale to full-res:
+ postprocess.resolution.x = desc.width;
+ postprocess.resolution.y = desc.height;
+ postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
+ postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ // Resolve pass:
+ {
+ device->EventBegin("SSR Resolve pass", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
+ &res.texture_rayIndirectSpecular,
+ &res.texture_rayDirectionPDF,
+ &res.texture_rayLengths,
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_resolve,
+ &res.texture_resolve_variance,
+ &res.texture_resolve_reprojectionDepth,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_resolve, res.texture_resolve.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_resolve_variance, res.texture_resolve_variance.desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_resolve_reprojectionDepth, res.texture_resolve_reprojectionDepth.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_resolve.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_resolve.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_resolve, ResourceState::UNORDERED_ACCESS, res.texture_resolve.desc.layout),
+ GPUBarrier::Image(&res.texture_resolve_variance, ResourceState::UNORDERED_ACCESS, res.texture_resolve_variance.desc.layout),
+ GPUBarrier::Image(&res.texture_resolve_reprojectionDepth, ResourceState::UNORDERED_ACCESS, res.texture_resolve_reprojectionDepth.desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -9715,29 +10249,36 @@ void Postprocess_SSR(
// Temporal pass:
{
- device->EventBegin("Temporal pass", cmd);
+ device->EventBegin("SSR Temporal pass", cmd);
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd);
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
- device->BindResource(&output, 0, cmd);
- device->BindResource(&res.texture_temporal[temporal_history], 1, cmd);
- device->BindResource(&res.rayLengths, 3, cmd);
+ const GPUResource* resarray[] = {
+ &res.texture_surface_roughness,
+ &res.texture_resolve,
+ &res.texture_temporal[temporal_history],
+ &res.texture_resolve_variance,
+ &res.texture_temporal_variance[temporal_history],
+ &res.texture_resolve_reprojectionDepth,
+ };
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
const GPUResource* uavs[] = {
&res.texture_temporal[temporal_output],
+ &res.texture_temporal_variance[temporal_output],
};
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&res.texture_temporal[temporal_output], res.texture_temporal[temporal_output].desc.layout, ResourceState::UNORDERED_ACCESS),
+ GPUBarrier::Image(&res.texture_temporal_variance[temporal_output], res.texture_temporal_variance[temporal_output].desc.layout, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->Dispatch(
- (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_temporal[temporal_output].GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_temporal[temporal_output].GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -9746,6 +10287,7 @@ void Postprocess_SSR(
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
GPUBarrier::Image(&res.texture_temporal[temporal_output], ResourceState::UNORDERED_ACCESS, res.texture_temporal[temporal_output].desc.layout),
+ GPUBarrier::Image(&res.texture_temporal_variance[temporal_output], ResourceState::UNORDERED_ACCESS, res.texture_temporal_variance[temporal_output].desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -9753,39 +10295,93 @@ void Postprocess_SSR(
device->EventEnd(cmd);
}
- // Median blur pass:
+ // Bilateral blur pass:
{
- device->EventBegin("Median blur pass", cmd);
- device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_MEDIAN], cmd);
- device->PushConstants(&postprocess, sizeof(postprocess), cmd);
-
- device->BindResource(&res.texture_temporal[temporal_output], 0, cmd);
-
- const GPUResource* uavs[] = {
- &output,
- };
- device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+ device->EventBegin("SSR Bilateral blur pass", cmd);
+ device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_BILATERAL], cmd);
+ // Horizontal:
{
- GPUBarrier barriers[] = {
- GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
+ postprocess.params0.x = 1;
+ postprocess.params0.y = 0;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_temporal[temporal_output],
+ &res.texture_temporal_variance[temporal_output],
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
};
- device->Barrier(barriers, arraysize(barriers), cmd);
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &res.texture_bilateral_temp,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&res.texture_bilateral_temp, res.texture_bilateral_temp.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (res.texture_bilateral_temp.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (res.texture_bilateral_temp.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&res.texture_bilateral_temp, ResourceState::UNORDERED_ACCESS, res.texture_bilateral_temp.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
}
- device->Dispatch(
- (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
- 1,
- cmd
- );
-
+ // Vertical:
{
- GPUBarrier barriers[] = {
- GPUBarrier::Memory(),
- GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
+ postprocess.params0.x = 0;
+ postprocess.params0.y = 1;
+ device->PushConstants(&postprocess, sizeof(postprocess), cmd);
+
+ const GPUResource* resarray[] = {
+ &res.texture_bilateral_temp,
+ &res.texture_temporal_variance[temporal_output],
+ &res.texture_surface_normal,
+ &res.texture_surface_roughness,
};
- device->Barrier(barriers, arraysize(barriers), cmd);
+ device->BindResources(resarray, 0, arraysize(resarray), cmd);
+
+ const GPUResource* uavs[] = {
+ &output,
+ };
+ device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
+
+ device->Dispatch(
+ (output.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ (output.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
+ 1,
+ cmd
+ );
+
+ {
+ GPUBarrier barriers[] = {
+ GPUBarrier::Memory(),
+ GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
+ };
+ device->Barrier(barriers, arraysize(barriers), cmd);
+ }
}
device->EventEnd(cmd);
diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h
index b0a14c69e..6e0c96bc4 100644
--- a/WickedEngine/wiRenderer.h
+++ b/WickedEngine/wiRenderer.h
@@ -444,8 +444,18 @@ namespace wi::renderer
struct RTReflectionResources
{
mutable int frame = 0;
- wi::graphics::Texture temporal[2];
- wi::graphics::Texture rayLengths;
+ wi::graphics::Texture texture_surface_normal;
+ wi::graphics::Texture texture_surface_roughness;
+ wi::graphics::Texture texture_surface_environment;
+ wi::graphics::Texture texture_rayIndirectSpecular;
+ wi::graphics::Texture texture_rayDirectionPDF;
+ wi::graphics::Texture texture_rayLengths;
+ wi::graphics::Texture texture_resolve;
+ wi::graphics::Texture texture_resolve_variance;
+ wi::graphics::Texture texture_resolve_reprojectionDepth;
+ wi::graphics::Texture texture_temporal[2];
+ wi::graphics::Texture texture_temporal_variance[2];
+ wi::graphics::Texture texture_bilateral_temp;
};
void CreateRTReflectionResources(RTReflectionResources& res, XMUINT2 resolution);
void Postprocess_RTReflection(
@@ -459,9 +469,24 @@ namespace wi::renderer
struct SSRResources
{
mutable int frame = 0;
- wi::graphics::Texture texture_raytrace;
- wi::graphics::Texture rayLengths;
+ wi::graphics::Texture texture_surface_normal;
+ wi::graphics::Texture texture_surface_roughness;
+ wi::graphics::Texture texture_tile_minmax_roughness_horizontal;
+ wi::graphics::Texture texture_tile_minmax_roughness;
+ wi::graphics::Texture texture_depth_hierarchy;
+ wi::graphics::Texture texture_rayIndirectSpecular;
+ wi::graphics::Texture texture_rayDirectionPDF;
+ wi::graphics::Texture texture_rayLengths;
+ wi::graphics::Texture texture_resolve;
+ wi::graphics::Texture texture_resolve_variance;
+ wi::graphics::Texture texture_resolve_reprojectionDepth;
wi::graphics::Texture texture_temporal[2];
+ wi::graphics::Texture texture_temporal_variance[2];
+ wi::graphics::Texture texture_bilateral_temp;
+ wi::graphics::GPUBuffer buffer_tile_tracing_statistics;
+ wi::graphics::GPUBuffer buffer_tiles_tracing_earlyexit;
+ wi::graphics::GPUBuffer buffer_tiles_tracing_cheap;
+ wi::graphics::GPUBuffer buffer_tiles_tracing_expensive;
};
void CreateSSRResources(SSRResources& res, XMUINT2 resolution);
void Postprocess_SSR(
diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp
index 406079d0b..e50de2a7a 100644
--- a/WickedEngine/wiVersion.cpp
+++ b/WickedEngine/wiVersion.cpp
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 60;
// minor bug fixes, alterations, refactors, updates
- const int revision = 27;
+ const int revision = 28;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);