Merge pull request #96 from Kliaxe/master

Stochastic Screen Space Reflections
This commit is contained in:
Turánszki János
2020-03-10 22:08:02 +00:00
committed by GitHub
18 changed files with 1198 additions and 10 deletions
+16
View File
@@ -36,6 +36,15 @@ void RenderPath3D::ResizeBuffers()
assert(subresource_index == i);
}
}
{
TextureDesc desc;
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
desc.Format = FORMAT_R16G16B16A16_FLOAT;
desc.Width = wiRenderer::GetInternalResolution().x;
desc.Height = wiRenderer::GetInternalResolution().y;
device->CreateTexture(&desc, nullptr, &rtStochasticSSR);
device->SetName(&rtStochasticSSR, "rtStochasticSSR");
}
{
TextureDesc desc;
desc.BindFlags = BIND_RENDER_TARGET | BIND_SHADER_RESOURCE;
@@ -441,6 +450,13 @@ void RenderPath3D::RenderSSR(const Texture& srcSceneRT, const wiGraphics::Textur
wiRenderer::Postprocess_SSR(srcSceneRT, depthBuffer_Copy, rtLinearDepth_minmax, gbuffer1, rtSSR, cmd);
}
}
void RenderPath3D::RenderStochasticSSR(const Texture& srcSceneRT, const wiGraphics::Texture& gbuffer0, const wiGraphics::Texture& gbuffer1, const wiGraphics::Texture& gbuffer2, CommandList cmd) const
{
if (getSSREnabled())
{
wiRenderer::Postprocess_StochasticSSR(srcSceneRT, depthBuffer_Copy, rtLinearDepth_minmax, gbuffer0, gbuffer1, gbuffer2, rtStochasticSSR, cmd);
}
}
void RenderPath3D::DownsampleDepthBuffer(CommandList cmd) const
{
GraphicsDevice* device = wiRenderer::GetDevice();
+3 -1
View File
@@ -51,7 +51,8 @@ private:
protected:
wiGraphics::Texture rtReflection; // conains the scene rendered for planar reflections
wiGraphics::Texture rtSSR; // screen-space reflection results
wiGraphics::Texture rtSSR; // standard screen-space reflection results
wiGraphics::Texture rtStochasticSSR; // stochastic screen-space reflection results
wiGraphics::Texture rtSceneCopy; // contains the rendered scene that can be fed into transparent pass for distortion effect
wiGraphics::Texture rtWaterRipple; // water ripple sprite normal maps are rendered into this
wiGraphics::Texture rtParticleDistortion; // contains distortive particles
@@ -102,6 +103,7 @@ protected:
virtual void RenderLinearDepth(wiGraphics::CommandList cmd) const;
virtual void RenderSSAO(wiGraphics::CommandList cmd) const;
virtual void RenderSSR(const wiGraphics::Texture& srcSceneRT, const wiGraphics::Texture& gbuffer1, wiGraphics::CommandList cmd) const;
virtual void RenderStochasticSSR(const wiGraphics::Texture& srcSceneRT, const wiGraphics::Texture& gbuffer0, const wiGraphics::Texture& gbuffer1, const wiGraphics::Texture& gbuffer2, wiGraphics::CommandList cmd) const;
virtual void DownsampleDepthBuffer(wiGraphics::CommandList cmd) const;
virtual void RenderOutline(const wiGraphics::Texture& dstSceneRT, wiGraphics::CommandList cmd) const;
virtual void RenderLightShafts(wiGraphics::CommandList cmd) const;
+2 -2
View File
@@ -209,7 +209,7 @@ void RenderPath3D_Deferred::Render() const
device->BindViewports(1, &vp, cmd);
device->BindResource(PS, getSSAOEnabled() ? &rtSSAO[0] : wiTextureHelper::getWhite(), TEXSLOT_RENDERPATH_SSAO, cmd);
device->BindResource(PS, getSSREnabled() ? &rtSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
device->BindResource(PS, getSSREnabled() ? &rtStochasticSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
wiRenderer::DrawDeferredLights(wiRenderer::GetCamera(), depthBuffer_Copy, rtGBuffer[0], rtGBuffer[1], rtGBuffer[2], cmd);
device->RenderPassEnd(cmd);
@@ -227,7 +227,7 @@ void RenderPath3D_Deferred::Render() const
RenderDeferredComposition(cmd);
RenderSSR(rtDeferred, rtGBuffer[1], cmd);
RenderStochasticSSR(rtDeferred, rtGBuffer[0], rtGBuffer[1], rtGBuffer[2], cmd);
DownsampleDepthBuffer(cmd);
+2 -2
View File
@@ -105,7 +105,7 @@ void RenderPath3D_TiledDeferred::Render() const
RenderDecals(cmd);
device->BindResource(CS, getSSAOEnabled() ? &rtSSAO[0] : wiTextureHelper::getWhite(), TEXSLOT_RENDERPATH_SSAO, cmd);
device->BindResource(CS, getSSREnabled() ? &rtSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
device->BindResource(CS, getSSREnabled() ? &rtStochasticSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
if (device->CheckCapability(GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_UAV_LOAD_FORMAT_R11G11B10_FLOAT))
@@ -156,7 +156,7 @@ void RenderPath3D_TiledDeferred::Render() const
RenderDeferredComposition(cmd);
RenderSSR(rtDeferred, rtGBuffer[1], cmd);
RenderStochasticSSR(rtDeferred, rtGBuffer[0], rtGBuffer[1], rtGBuffer[2], cmd);
DownsampleDepthBuffer(cmd);
+17 -1
View File
@@ -31,6 +31,7 @@
<None Include="quad.hlsli" />
<None Include="raytracingHF.hlsli" />
<None Include="skyHF.hlsli" />
<None Include="stochasticSSRHF.hlsli" />
<None Include="uvsphere.hlsli" />
<None Include="volumeLightHF.hlsli" />
<None Include="voxelConeTracingHF.hlsli" />
@@ -801,12 +802,27 @@
<FxCompile Include="ssaoCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="stochasticSSRCS_combine.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="stochasticSSRCS_median.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="stochasticSSRCS_raytrace.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="ssrCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="sssPS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
</FxCompile>
<FxCompile Include="stochasticSSRCS_resolve.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="stochasticSSRCS_temporal.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
</FxCompile>
<FxCompile Include="sunPS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
</FxCompile>
@@ -974,4 +990,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>
@@ -85,6 +85,9 @@
<None Include="depthoffieldHF.hlsli">
<Filter>HF</Filter>
</None>
<None Include="stochasticSSRHF.hlsli">
<Filter>HF</Filter>
</None>
</ItemGroup>
<ItemGroup>
<FxCompile Include="objectHS.hlsl">
@@ -852,6 +855,21 @@
<FxCompile Include="raytrace_tilesortCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="stochasticSSRCS_raytrace.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="stochasticSSRCS_resolve.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="stochasticSSRCS_median.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="stochasticSSRCS_temporal.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="stochasticSSRCS_combine.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="upsample_bilateral_float4CS.hlsl">
<Filter>CS</Filter>
</FxCompile>
@@ -897,4 +915,4 @@
<UniqueIdentifier>{12396e21-0254-42fa-a88b-805f0703eca5}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>
+1 -1
View File
@@ -19,5 +19,5 @@ float4 main(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_TARGET
ApplyFog(depth, color);
return color;
return max(0, color);
}
+29 -1
View File
@@ -147,6 +147,34 @@ inline float2 hammersley2d(uint idx, uint num) {
return float2(float(idx) / float(num), radicalInverse_VdC);
}
inline float2 HammersleyRandom(uint idx, uint num, uint2 random)
{
uint bits = idx;
bits = (bits << 16) | (bits >> 16);
bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
float E1 = frac((float) idx / num + float(random.x) * (1.0 / 65536.0));
float E2 = float((bits >> 16) ^ random.y) * (1.0 / 65536.0);
return float2(E1, E2);
}
inline float2 HammersleyRandom(uint idx, uint2 random)
{
uint bits = idx;
bits = (bits << 16) | (bits >> 16);
bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
float E1 = frac(float(random.x) * (1.0 / 65536.0));
float E2 = float((bits >> 16) ^ random.y) * (1.0 / 65536.0);
return float2(E1, E2);
}
// "Next Generation Post Processing in Call of Duty: Advanced Warfare"
// http://advances.realtimerendering.com/s2014/index.html
float InterleavedGradientNoise(float2 uv, uint frameCount)
@@ -554,4 +582,4 @@ inline float dither(in float2 pixel)
return ditherMask8(pixel);
}
#endif // WI_SHADER_GLOBALS_HF
#endif // WI_SHADER_GLOBALS_HF
+44
View File
@@ -0,0 +1,44 @@
#include "globals.hlsli"
#include "brdf.hlsli"
#include "stochasticSSRHF.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(texture_median, float4, TEXSLOT_ONDEMAND0);
RWTEXTURE2D(output, float4, 0);
// Final Stochastic SSR pass. Here we can apply final touches like specular occlusion or fresnel and BRDFLUT?
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
if (depth == 0.0f)
return;
// Everything in view space:
const float3 P = reconstructPosition(uv, depth, g_xCamera_InvP);
const float3 N = mul((float3x3) g_xCamera_View, decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy)).xyz;
const float3 V = normalize(P);
float NdotV = max(dot(N, V), 0.0f);
float3 albedo = texture_gbuffer0.SampleLevel(sampler_point_clamp, uv, 0).rgb;
float4 baseColor = float4(albedo, 1.0f);
float4 GBuffer2 = texture_gbuffer2.SampleLevel(sampler_point_clamp, uv, 0);
//float occlusion = GBuffer2.r;
//float roughness = GBuffer2.g;
float metalness = GBuffer2.b;
float reflectance = GBuffer2.a;
float3 f0 = ComputeF0(baseColor, reflectance, metalness);
float f90 = saturate(50.0 * dot(f0, 0.33));
float3 F = F_Schlick(f0, f90, NdotV);
float4 final = texture_median.SampleLevel(sampler_point_clamp, uv, 0);
final.rgb *= F;
output[DTid.xy] = final;
}
+64
View File
@@ -0,0 +1,64 @@
#include "globals.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(texture_temporal, float4, TEXSLOT_ONDEMAND0);
RWTEXTURE2D(output, float4, 0);
// A Fast, Small-Radius GPU Median Filter by Morgan McGuire
// https://casual-effects.com/research/McGuire2008Median/index.html
#define s2(a, b) temp = a; a = min(a, b); b = max(temp, b);
#define t2(a, b) s2(v[a], v[b]);
#define t24(a, b, c, d, e, f, g, h) t2(a, b); t2(c, d); t2(e, f); t2(g, h);
#define t25(a, b, c, d, e, f, g, h, i, j) t24(a, b, c, d, e, f, g, h); t2(i, j);
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
if (depth == 0.0f)
return;
half4 v[25];
// Add the pixels which make up our window to the pixel array.
[unroll]
for (int dX = -2; dX <= 2; ++dX)
{
[unroll]
for (int dY = -2; dY <= 2; ++dY)
{
float2 offset = float2(float(dX), float(dY));
// If a pixel in the window is located at (x+dX, y+dY), put it at index (dX + R)(2R + 1) + (dY + R) of the
// pixel array. This will fill the pixel array, with the top left pixel of the window at pixel[0] and the
// bottom right pixel of the window at pixel[N-1].
v[(dX + 2) * 5 + (dY + 2)] = texture_temporal.SampleLevel(sampler_linear_clamp, uv + offset * xPPResolution_rcp, 0);
}
}
half4 temp;
t25(0, 1, 3, 4, 2, 4, 2, 3, 6, 7);
t25(5, 7, 5, 6, 9, 7, 1, 7, 1, 4);
t25(12, 13, 11, 13, 11, 12, 15, 16, 14, 16);
t25(14, 15, 18, 19, 17, 19, 17, 18, 21, 22);
t25(20, 22, 20, 21, 23, 24, 2, 5, 3, 6);
t25(0, 6, 0, 3, 4, 7, 1, 7, 1, 4);
t25(11, 14, 8, 14, 8, 11, 12, 15, 9, 15);
t25(9, 12, 13, 16, 10, 16, 10, 13, 20, 23);
t25(17, 23, 17, 20, 21, 24, 18, 24, 18, 21);
t25(19, 22, 8, 17, 9, 18, 0, 18, 0, 9);
t25(10, 19, 1, 19, 1, 10, 11, 20, 2, 20);
t25(2, 11, 12, 21, 3, 21, 3, 12, 13, 22);
t25(4, 22, 4, 13, 14, 23, 5, 23, 5, 14);
t25(15, 24, 6, 24, 6, 15, 7, 16, 7, 19);
t25(3, 11, 5, 17, 11, 17, 9, 17, 4, 10);
t25(6, 12, 7, 14, 4, 6, 4, 7, 12, 14);
t25(10, 14, 6, 7, 10, 12, 6, 10, 6, 17);
t25(12, 17, 7, 17, 7, 10, 12, 18, 7, 12);
t24(10, 18, 12, 20, 10, 20, 10, 12);
output[DTid.xy] = v[12];
}
+296
View File
@@ -0,0 +1,296 @@
#include "globals.hlsli"
#include "stochasticSSRHF.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(input, float4, TEXSLOT_ONDEMAND0);
TEXTURE2D(texture_lineardepth_minmax, float2, TEXSLOT_ONDEMAND1);
RWTEXTURE2D(texture_raytrace, float4, 0);
RWTEXTURE2D(texture_mask, float2, 1);
// Use this to use reduced precision, but higher framerate:
#define USE_LINEARDEPTH
static const float rayTraceStride = 1.0f; // Step in horizontal or vertical pixels between samples.
static const float rayTraceMaxStep = 512.0f; // Maximum number of iterations. Higher gives better images but may be slow.
static const float rayTraceHitThickness = 1.5f; // Thickness to ascribe to each pixel in the depth buffer.
static const float rayTraceHitThicknessBias = 7.0f; // Bias to control the thickness along distance.
static const float rayTraceMaxDistance = 1000.0f; // Maximum camera-space distance to trace before returning a miss.
static const float rayTraceStrideCutoff = 100.0f; // More distant pixels are smaller in screen space. This value tells at what point to
// start relaxing the stride to give higher quality reflections for objects far from the camera.
static const float raytraceHZBBias = 1.0f;
float DistanceSquared(float2 a, float2 b)
{
a -= b;
return dot(a, a);
}
bool intersectsDepthBuffer(float z, float minZ, float maxZ)
{
// Increase thickness along distance.
// This will help objects from dissapering in the distance.
float thicknessScale = min(1.0f, z / rayTraceStrideCutoff);
float thickness = rayTraceHitThickness * rayTraceHitThicknessBias * thicknessScale;
thickness = clamp(thickness, rayTraceHitThickness, 10.0f);
// Effectively remove line/tiny artifacts, mostly caused by Zbuffers precision.
float depthScale = min(1.0f, z / rayTraceStrideCutoff);
z += lerp(0.05f, 0.0f, depthScale);
return (minZ >= z) && (maxZ - thickness <= z);
}
// Heavily adapted from McGuire and Mara's original implementation
// http://casual-effects.blogspot.com/2014/08/screen-space-ray-tracing.html
bool ScreenSpaceRayTrace(float3 csOrig, float3 csDir, float jitter, float roughness, out float2 hitPixel, out float3 hitPoint, out float iterationCount)
{
float rayLength = ((csOrig.z + csDir.z * rayTraceMaxDistance) < g_xCamera_ZNearP) ? (g_xCamera_ZNearP - csOrig.z) / csDir.z : rayTraceMaxDistance;
float3 csRayEnd = csOrig + csDir * rayLength;
// Project into homogeneous clip space
float4 clipRayOrigin = mul(g_xCamera_Proj, float4(csOrig, 1.0f));
float4 clipRayEnd = mul(g_xCamera_Proj, float4(csRayEnd, 1.0f));
float k0 = 1.0f / clipRayOrigin.w;
float k1 = 1.0f / clipRayEnd.w;
float3 Q0 = csOrig * k0;
float3 Q1 = csRayEnd * k1;
// Screen-space endpoints
float2 P0 = clipRayOrigin.xy * k0;
float2 P1 = clipRayEnd.xy * k1;
// Project to pixel
P0 = P0 * float2(0.5, -0.5) + float2(0.5, 0.5);
P1 = P1 * float2(0.5, -0.5) + float2(0.5, 0.5);
P0.xy *= xPPResolution.xy;
P1.xy *= xPPResolution.xy;
#if 1
// Clip to the screen coordinates. Alternatively we could just modify rayTraceMaxStep instead
// This will also improve the framerate, without losing quality or features
float2 yDelta = float2(xPPResolution.y + 2.0f, -2.0f); // - 0.5, 0.5
float2 xDelta = float2(xPPResolution.x + 2.0f, -2.0f); // - 0.5, 0.5
float alpha = 0.0;
// P0 must be in bounds
if (P1.y > yDelta.x || P1.y < yDelta.y)
{
float yClip = (P1.y > yDelta.x) ? yDelta.x : yDelta.y;
float yAlpha = (P1.y - yClip) / (P1.y - P0.y);
alpha = yAlpha;
}
// P1 must be in bounds
if (P1.x > xDelta.x || P1.x < xDelta.y)
{
float xClip = (P1.x > xDelta.x) ? xDelta.x : xDelta.y;
float xAlpha = (P1.x - xClip) / (P1.x - P0.x);
alpha = max(alpha, xAlpha);
}
// These are all in homogeneous space, so they interpolate linearly
P1 = lerp(P1, P0, alpha);
k1 = lerp(k1, k0, alpha);
Q1 = lerp(Q1, Q0, alpha);
#endif
// If the line is degenerate, make it cover at least one pixel to avoid handling zero-pixel extent as a special case later
P1 += (DistanceSquared(P0, P1) < 0.0001f) ? float2(0.01f, 0.01f) : 0.0f;
float2 screenOffset = P1 - P0;
// Permute so that the primary iteration is in x to collapse all quadrant-specific DDA cases later
bool permute = false;
if (abs(screenOffset.x) < abs(screenOffset.y))
{
permute = true;
screenOffset = screenOffset.yx;
P0 = P0.yx;
P1 = P1.yx;
}
float stepDirection = sign(screenOffset.x);
float stepInterval = stepDirection / screenOffset.x;
// Track the derivatives of Q and k
float3 dQ = (Q1 - Q0) * stepInterval;
float dk = (k1 - k0) * stepInterval;
// Because we test 1/2 a texel forward along the ray, on the very last iteration
// the interpolation can go past the end of the ray. Use these bounds to clamp it.
float zMin = min(csRayEnd.z, csOrig.z);
float zMax = max(csRayEnd.z, csOrig.z);
float2 dP = float2(stepDirection, screenOffset.y * stepInterval);
// Scale derivatives by the desired pixel stride and then offset the starting values by the jitter fraction
float strideScale = 1.0f - min(1.0f, csOrig.z / rayTraceStrideCutoff);
float stride = 1.0f + strideScale * rayTraceStride;
dP *= stride;
dQ *= stride;
dk *= stride;
P0 += dP * jitter;
Q0 += dQ * jitter;
k0 += dk * jitter;
float4 PQk = float4(P0, Q0.z, k0);
float4 dPQk = float4(dP, dQ.z, dk);
float3 Q = Q0;
// Adjust end condition for iteration direction
float end = P1.x * stepDirection;
// raytrace iterations based on roughness
// Matte materials will get less samples
float roughnessTraceStep = max(rayTraceMaxStep * (1.0 - roughness), 1.0f);
float stepCount = 0.0f;
float level = 0.0f; // 1.0f start level. Parameter?
float prevZMaxEstimate = csOrig.z;
float rayZMin = prevZMaxEstimate;
float rayZMax = prevZMaxEstimate;
float sceneZMax = rayZMax + 100000.0f;
[loop]
for (; ((PQk.x * stepDirection) <= end) &&
(stepCount <= roughnessTraceStep - 1) &&
!intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax) &&
(sceneZMax != 0.0f) &&
(level > -1);
PQk += dPQk, stepCount++)
{
if (!is_saturated(hitPixel))
{
return false;
}
rayZMin = prevZMaxEstimate;
// Compute the value at 1/2 step into the future
rayZMax = (dPQk.z * 0.5f + PQk.z) / (dPQk.w * 0.5f + PQk.w);
rayZMax = clamp(rayZMax, zMin, zMax);
prevZMaxEstimate = rayZMax;
[flatten]
if (rayTraceMaxDistance < rayZMax)
{
return false;
}
[flatten]
if (rayZMin > rayZMax)
{
float t = rayZMin;
rayZMin = rayZMax;
rayZMax = t;
}
// A simple HZB approach based on roughness
level += min(raytraceHZBBias / 10.0f, 5.0f) * roughness;
hitPixel = permute ? PQk.yx : PQk.xy;
hitPixel *= xPPResolution_rcp;
#ifdef USE_LINEARDEPTH
sceneZMax = texture_lineardepth_minmax.SampleLevel(sampler_point_clamp, hitPixel, level).g * g_xCamera_ZFarP;
#else
sceneZMax = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, hitPixel, 0).r);
#endif
}
// Advance Q based on the number of steps
Q.xy += dQ.xy * stepCount;
hitPoint = Q * (1.0f / PQk.w);
iterationCount = stepCount;
return intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax);
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
if (depth == 0.0f)
return;
// Everything in view space:
const float3 P = reconstructPosition(uv, depth, g_xCamera_InvP);
const float3 N = mul((float3x3)g_xCamera_View, decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy)).xyz;
const float3 V = normalize(P);
const float roughness = GetRoughness(texture_gbuffer2.SampleLevel(sampler_point_clamp, uv, 0).g);
const float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
if (roughnessFade <= 0.0f)
{
return;
}
float4 H;
if (roughness > 0.1f)
{
const float surfaceMargin = 0.0f;
const float maxRegenCount = 15.0f;
uint2 Random = Rand3DPCG16(int3((DTid.xy + 0.5f), g_xFrame_FrameCount)).xy;
// Pick the best rays
float RdotN = 0.0f;
float regenCount = 0;
[loop]
for (; RdotN <= surfaceMargin && regenCount < maxRegenCount; regenCount++)
{
// Low-discrepancy sequence
//float2 Xi = float2(Random) * rcp(65536.0); // equivalent to HammersleyRandom(0, 1, Random).
float2 Xi = HammersleyRandom(regenCount, Random); // SingleSPP
Xi.y = lerp(Xi.y, 0.0f, BRDFBias);
// I should probably use importance sampling of visible normals http://jcgt.org/published/0007/04/01/paper.pdf
H = ImportanceSampleGGX(Xi, roughness);
H = TangentToWorld(H, N);
RdotN = dot(N, reflect(V, H.xyz));
}
}
else
{
H = float4(N.xyz, 1.0f);
}
float3 dir = reflect(V, H.xyz);
float2 hitPixel = float2(0.0f, 0.0f);
float3 hitPoint = float3(0.0f, 0.0f, 0.0f);
float iterationCount = 0.0f;
float2 uv2 = (DTid.xy + 0.5f);
//float jitter = 1.0f + rand(uv2 + g_xFrame_Time);
float jitter = 1.0f + InterleavedGradientNoise(uv2, g_xFrame_FrameCount);
bool hit = ScreenSpaceRayTrace(P, dir, jitter, roughness, hitPixel, hitPoint, iterationCount);
float hitDepth = texture_depth.SampleLevel(sampler_point_clamp, hitPixel, 0);
// Output:
// xy: hit pixel
// z: hit depth
// w: pdf
float4 raytrace = max(0, float4(hitPixel, hitDepth, H.w));
texture_raytrace[DTid.xy] = raytrace;
// Output:
// x: hit (bool)
// y: iteration count / rayTraceMaxStep
float2 mask = float2(hit, iterationCount / rayTraceMaxStep);
texture_mask[DTid.xy] = mask;
}
+147
View File
@@ -0,0 +1,147 @@
#include "globals.hlsli"
#include "brdf.hlsli"
#include "stochasticSSRHF.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(texture_raytrace, float4, TEXSLOT_ONDEMAND0);
TEXTURE2D(texture_mask, float2, TEXSLOT_ONDEMAND1);
TEXTURE2D(texture_main, float4, TEXSLOT_ONDEMAND2);
RWTEXTURE2D(texture_resolve, float4, 0);
static const float resolveSequenceSize = 20.0f; // Can help reduce noise on rough surfaces, but too high values tend to wash out contact points.
static const float resolveMip = 1.0f;
static const float resolveSSRIntensity = 1.0f;
static const float blendScreenEdgeFade = 5.0f;
static const bool blendReflectSky = true;
float CalculateBlendIntersection(bool hit, float iterationStep, float2 hitPixel)
{
float confidence = 1.0 - pow(iterationStep, 8.0f);
float2 hitPixelNDC = hitPixel * 2.0 - 1.0;
//float maxDimension = min(1.0, max(abs(hitPixelNDC.x), abs(hitPixelNDC.y)));
//float attenuation = 1.0 - max(0.0, maxDimension - blendScreenEdgeFade) / (1.0 - blendScreenEdgeFade);
float2 vignette = saturate(abs(hitPixelNDC) * blendScreenEdgeFade - (blendScreenEdgeFade - 1.0f));
float attenuation = saturate(1.0 - dot(vignette, vignette));
float blend = confidence * attenuation;
if (!hit && !blendReflectSky)
blend = 0.0;
return blend;
}
// I probably need to figure out a better way to deal with this.
float2 CalculateTailDirection(float3 viewNormal)
{
float3 upVector = abs(viewNormal.z) < 0.999 ? float3(0.0, 0.0, 1.0) : float3(1.0, 0.0, 0.0);
float3 T = normalize(cross(upVector, viewNormal));
float tailDirection = T.x * -viewNormal.y;
return lerp(float2(1.0, 0.1), float2(0.1, 1.0), tailDirection);
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
if (depth == 0.0f)
return;
// Everthing in view space:
const float3 P = reconstructPosition(uv, depth, g_xCamera_InvP);
const float3 N = mul((float3x3) g_xCamera_View, decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy)).xyz;
const float3 V = normalize(-P);
const float NdotV = saturate(dot(N, V));
const float roughness = GetRoughness(texture_gbuffer2.SampleLevel(sampler_point_clamp, uv, 0).g);
const float roughnessSequenceSize = resolveSequenceSize * roughness + 1.0f;
// Early out, useless if the roughness is out of range
float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
if (roughnessFade <= 0.0f)
{
texture_resolve[DTid.xy] = 0;
return;
}
float specularConeTangent = lerp(0.0, roughness * (1.0 - BRDFBias), NdotV * sqrt(roughness));
specularConeTangent *= lerp(saturate(NdotV * 2), 1.0f, sqrt(roughness));
const float maxMipLevel = 11.0f - 1.0f;
const uint2 Random = Rand3DPCG16(int3((DTid.xy + 0.5f), g_xFrame_FrameCount)).xy;
float4 result = 0.0f;
float weightSum = 0.0f;
const uint NumResolve = 4;
[unroll]
for (uint i = 0; i < NumResolve; i++)
{
float2 offsetRotation = (HammersleyRandom(i, NumResolve, Random) * 2.0 - 1.0) * roughnessSequenceSize;
float2x2 offsetRotationMatrix = float2x2(offsetRotation.x, offsetRotation.y, -offsetRotation.y, offsetRotation.x);
float2 offsetUV = offset[i] * (1.0f / xPPResolution);
offsetUV = uv + mul(offsetRotationMatrix, offsetUV) * CalculateTailDirection(N);
float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, offsetUV, 0);
float2 maskSource = texture_mask.SampleLevel(sampler_point_clamp, offsetUV, 0);
float2 hitPixel = raytraceSource.xy;
float hitDepth = raytraceSource.z;
float hitPDF = raytraceSource.w;
bool hit = (bool)maskSource.x;
float iterationStep = maskSource.y;
float intersectionCircleRadius = specularConeTangent * length(hitPixel - uv);
float sourceMip = clamp(log2(intersectionCircleRadius * max(xPPResolution.x, xPPResolution.y)), 0.0, maxMipLevel) * resolveMip;
float4 sampleColor;
sampleColor.rgb = texture_main.SampleLevel(sampler_linear_clamp, hitPixel, sourceMip).xyz;
sampleColor.a = CalculateBlendIntersection(hit, iterationStep, hitPixel);
sampleColor.rgb /= 1 + Luminance(sampleColor.rgb);
// BRDF
float3 hitViewPosition = reconstructPosition(hitPixel, hitDepth, g_xCamera_InvP);
float3 L = normalize(hitViewPosition - P);
float3 H = normalize(L + V);
float NdotH = saturate(dot(N, H));
float NdotL = saturate(dot(N, L));
Surface surface;
surface.alphaRoughnessSq = pow(roughness, 4);
SurfaceToLight surfaceToLight;
surfaceToLight.NdotH = NdotH;
surfaceToLight.NdotL = NdotL;
surfaceToLight.NdotV = NdotV;
// We could simply use BRDF_GetSpecular, but we exclude fresnel for later
float Vis = visibilityOcclusion(surface, surfaceToLight);
float D = microfacetDistribution(surface, surfaceToLight);
float specularLight = Vis * D * surfaceToLight.NdotL;
float weight = specularLight / max(hitPDF, 0.00001f);
result += sampleColor * weight;
weightSum += weight;
}
result /= weightSum;
result.rgb /= 1 - Luminance(result.rgb);
result *= roughnessFade;
result *= resolveSSRIntensity;
texture_resolve[DTid.xy] = max(result, 0.00001f);
}
+171
View File
@@ -0,0 +1,171 @@
#include "globals.hlsli"
#include "stochasticSSRHF.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(resolve_current, float4, TEXSLOT_ONDEMAND0);
TEXTURE2D(resolve_history, float4, TEXSLOT_ONDEMAND1);
TEXTURE2D(texture_raytrace, float4, TEXSLOT_ONDEMAND2);
RWTEXTURE2D(output, float4, 0);
static const float temporalResponseMin = 0.85f;
static const float temporalResponseMax = 1.0f;
static const float temporalScale = 2.0f;
static const float temporalExposure = 10.0f;
inline float Luma4(float3 color)
{
return (color.g * 2) + (color.r + color.b);
}
inline float HdrWeight4(float3 color, float exposure)
{
return rcp(Luma4(color) * exposure + 4.0f);
}
float4 clip_aabb(float3 aabb_min, float3 aabb_max, float4 p, float4 q)
{
float3 p_clip = 0.5 * (aabb_max + aabb_min);
float3 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
float4 v_clip = q - float4(p_clip, p.w);
float3 v_unit = v_clip.xyz / e_clip;
float3 a_unit = abs(v_unit);
float ma_unit = max(a_unit.x, max(a_unit.y, a_unit.z));
if (ma_unit > 1.0)
return float4(p_clip, p.w) + v_clip / ma_unit;
else
return q; // point inside aabb
}
inline void ResolverAABB(Texture2D<float4> currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
{
const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
// Modulate Luma HDR
float4 sampleColors[9];
[unroll]
for (uint i = 0; i < 9; i++)
{
sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
}
float sampleWeights[9];
[unroll]
for (uint j = 0; j < 9; j++)
{
sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
}
float totalWeight = 0;
[unroll]
for (uint k = 0; k < 9; k++)
{
totalWeight += sampleWeights[k];
}
sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
// Variance Clipping (AABB)
float4 m1 = 0.0;
float4 m2 = 0.0;
[unroll]
for (uint x = 0; x < 9; x++)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
}
float4 mean = m1 / 9.0;
float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
currentMin = mean - AABBScale * stddev;
currentMax = mean + AABBScale * stddev;
currentOutput = sampleColors[4];
currentMin = min(currentMin, currentOutput);
currentMax = max(currentMax, currentOutput);
currentAverage = mean;
}
float2 CalculateCustomMotion(float depth, float2 uv)
{
float4 sampleWorldPosition = float4(reconstructPosition(uv, depth, g_xCamera_InvVP), 1.0f);
float4 thisClip = mul(g_xCamera_VP, sampleWorldPosition);
float4 prevClip = mul(g_xFrame_MainCamera_PrevVP, sampleWorldPosition);
float2 thisScreen = thisClip.xy * rcp(thisClip.w);
float2 prevScreen = prevClip.xy * rcp(prevClip.w);
thisScreen = (thisScreen.xy + 1.0f) / 2.0f;
prevScreen = (prevScreen.xy + 1.0f) / 2.0f;
return thisScreen - prevScreen;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
const float3 worldNormal = decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy);
float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, uv, 0);
float hitDepth = raytraceSource.z;
float2 hitPixel = raytraceSource.xy;
// Calculate custom motion vectors to counter smearing, which we would get by using normal gbuffer velocity
float2 reflectionCustomVelocity = CalculateCustomMotion(hitDepth, uv);
float2 hitCustomVelocity = CalculateCustomMotion(hitDepth, hitPixel);
float2 customVelocity = CalculateCustomMotion(depth, uv);
float2 standardHitVelocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, hitPixel, 0).zw;
float2 standardVelocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).zw;
float2 velocityDifference = customVelocity - standardVelocity;
float2 hitVelocityDifference = hitCustomVelocity - standardHitVelocity;
float objectVelocityMask = saturate(dot(velocityDifference, velocityDifference) * xPPResolution_rcp.x * 100.0f);
float hitObjectVelocityMask = saturate(dot(hitVelocityDifference, hitVelocityDifference) * xPPResolution_rcp.x * 100.0f);
float2 objectVelocity = standardVelocity * objectVelocityMask;
float2 hitObjectVelocity = standardHitVelocity * hitObjectVelocityMask;
float2 velocity = lerp(lerp(reflectionCustomVelocity, hitObjectVelocity, hitObjectVelocityMask), objectVelocity, objectVelocityMask);
float2 prevUV = float2(uv.x - velocity.x, uv.y + velocity.y);
float4 previous = resolve_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
// Luma HDR and AABB minmax
float4 current = 0;
float4 currentMin, currentMax, currentAverage;
ResolverAABB(resolve_current, sampler_linear_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
previous.xyz = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous).xyz;
previous.a = clamp(previous.a, currentMin.a, currentMax.a);
// Blend color & history
// Feedback weight from unbiased luminance difference (Timothy Lottes)
float lumFiltered = Luminance(current.rgb); // Luma4(current.rgb)
float lumHistory = Luminance(previous.rgb);
float lumDifference = abs(lumFiltered - lumHistory) / max(lumFiltered, max(lumHistory, 0.2f));
float lumWeight = sqr(1.0f - lumDifference);
float blendFinal = lerp(temporalResponseMin, temporalResponseMax, lumWeight);
// Reduce ghosting by refreshing the blend by velocity... but adds additional noise
//float2 velocityScreen = velocity * xPPResolution;
//float velocityBlend = sqrt(dot(velocityScreen, velocityScreen));
//blendFinal = lerp(blendFinal, 0.2f, saturate(velocityBlend / 100.0f));
float4 result = lerp(current, previous, blendFinal);
output[DTid.xy] = result;
}
+116
View File
@@ -0,0 +1,116 @@
#ifndef WI_STOCHASTICSSR_HF
#define WI_STOCHASTICSSR_HF
// Shared SSR settings:
static const float SSRMaxRoughness = 1.0f; // Specify max roughness, this can improve performance in complex scenes.
static const float BRDFBias = 0.7f;
float ComputeRoughnessMaskScale(in float maxRoughness)
{
float MaxRoughness = clamp(maxRoughness, 0.01f, 1.0f);
float roughnessMaskScale = -2.0f / MaxRoughness;
return roughnessMaskScale * 1.0f; // 2.0f & 1.0f
}
float GetRoughnessFade(in float roughness, in float maxRoughness)
{
float roughnessMaskScale = ComputeRoughnessMaskScale(maxRoughness);
return min(roughness * roughnessMaskScale + 2, 1.0f);
}
float GetRoughness(float roughness)
{
return max(roughness, 0.02f);
}
float Luminance(float3 color)
{
return dot(color, float3(0.2126, 0.7152, 0.0722));
}
static const float2 offset[9] =
{
float2(-2.0, -2.0),
float2(0.0, -2.0),
float2(2.0, -2.0),
float2(-2.0, 0.0),
float2(0.0, 0.0),
float2(2.0, 0.0),
float2(-2.0, 2.0),
float2(0.0, 2.0),
float2(2.0, 2.0)
};
uint3 Rand3DPCG16(int3 p)
{
uint3 v = uint3(p);
v = v * 1664525u + 1013904223u;
v.x += v.y * v.z;
v.y += v.z * v.x;
v.z += v.x * v.y;
v.x += v.y * v.z;
v.y += v.z * v.x;
v.z += v.x * v.y;
// only top 16 bits are well shuffled
return v >> 16u;
}
// Brian Karis, Epic Games "Real Shading in Unreal Engine 4"
float4 ImportanceSampleGGX(float2 Xi, float Roughness)
{
float m = Roughness * Roughness;
float m2 = m * m;
float Phi = 2 * PI * Xi.x;
float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
float3 H;
H.x = SinTheta * cos(Phi);
H.y = SinTheta * sin(Phi);
H.z = CosTheta;
float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
float D = m2 / (PI * d * d);
float pdf = D * CosTheta;
return float4(H, pdf);
}
// [ Duff et al. 2017, "Building an Orthonormal Basis, Revisited" ]
// http://jcgt.org/published/0006/01/01/
float3x3 GetTangentBasis(float3 TangentZ)
{
const float Sign = TangentZ.z >= 0 ? 1 : -1;
const float a = -rcp(Sign + TangentZ.z);
const float b = TangentZ.x * TangentZ.y * a;
float3 TangentX = { 1 + Sign * a * pow(TangentZ.x, 2), Sign * b, -Sign * TangentZ.x };
float3 TangentY = { b, Sign + a * pow(TangentZ.y, 2), -TangentZ.y };
return float3x3(TangentX, TangentY, TangentZ);
}
float3 TangentToWorld(float3 vec, float3 tangentZ)
{
return mul(vec, GetTangentBasis(tangentZ));
}
float4 TangentToWorld(float4 H, float3 tangentZ)
{
return float4(mul(H.xyz, GetTangentBasis(tangentZ)), H.w);
}
float3 WorldToTangent(float3 vec, float3 tangentZ)
{
return mul(GetTangentBasis(tangentZ), vec);
}
#endif // WI_STOCHASTICSSR_HF
+5
View File
@@ -300,6 +300,11 @@ enum CSTYPES
CSTYPE_POSTPROCESS_BLUR_BILATERAL_UNORM4,
CSTYPE_POSTPROCESS_SSAO,
CSTYPE_POSTPROCESS_SSR,
CSTYPE_POSTPROCESS_STOCHASTICSSR_RAYTRACE,
CSTYPE_POSTPROCESS_STOCHASTICSSR_RESOLVE,
CSTYPE_POSTPROCESS_STOCHASTICSSR_TEMPORAL,
CSTYPE_POSTPROCESS_STOCHASTICSSR_MEDIAN,
CSTYPE_POSTPROCESS_STOCHASTICSSR_COMBINE,
CSTYPE_POSTPROCESS_LIGHTSHAFTS,
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL,
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL,
+255
View File
@@ -1333,6 +1333,11 @@ void LoadShaders()
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_BLUR_BILATERAL_UNORM4], "blur_bilateral_unorm4CS.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_SSAO], "ssaoCS.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_SSR], "ssrCS.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RAYTRACE], "stochasticSSRCS_raytrace.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RESOLVE], "stochasticSSRCS_resolve.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_TEMPORAL], "stochasticSSRCS_temporal.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_MEDIAN], "stochasticSSRCS_median.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_COMBINE], "stochasticSSRCS_combine.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_LIGHTSHAFTS], "lightshaftsCS.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL], "depthoffield_tileMaxCOC_horizontalCS.cso"); });
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL], "depthoffield_tileMaxCOC_verticalCS.cso"); });
@@ -8843,6 +8848,256 @@ void Postprocess_SSR(
wiProfiler::EndRange(range);
device->EventEnd(cmd);
}
void Postprocess_StochasticSSR(
const Texture& input,
const Texture& depthbuffer,
const Texture& lineardepth_minmax,
const Texture& gbuffer0,
const Texture& gbuffer1,
const Texture& gbuffer2,
const Texture& output,
CommandList cmd
)
{
GraphicsDevice* device = GetDevice();
device->EventBegin("Postprocess_StochasticSSR", cmd);
auto range = wiProfiler::BeginRangeGPU("Stochastic SSR", cmd);
device->UnbindResources(TEXSLOT_RENDERPATH_SSR, 1, cmd);
const TextureDesc& desc = output.GetDesc();
static TextureDesc initialized_desc;
static Texture texture_main;
static Texture texture_raytrace;
static Texture texture_mask;
static Texture texture_resolve;
static Texture texture_temporal[2];
static Texture texture_median;
// Initialize once
if (initialized_desc.Width != desc.Width || initialized_desc.Height != desc.Height)
{
initialized_desc = desc;
TextureDesc main_desc;
main_desc.type = TextureDesc::TEXTURE_2D;
main_desc.Width = desc.Width;
main_desc.Height = desc.Height;
main_desc.Format = FORMAT_R16G16B16A16_FLOAT;
main_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
main_desc.MipLevels = 0; // full mip chain
device->CreateTexture(&main_desc, nullptr, &texture_main);
main_desc = texture_main.GetDesc(); // mip count was initialized in CreateTexture()
for (uint32_t i = 0; i < main_desc.MipLevels; ++i)
{
int subresource_index;
subresource_index = device->CreateSubresource(&texture_main, SRV, 0, 1, i, 1);
assert(subresource_index == i);
subresource_index = device->CreateSubresource(&texture_main, UAV, 0, 1, i, 1);
assert(subresource_index == i);
}
TextureDesc cast_desc;
cast_desc.type = TextureDesc::TEXTURE_2D;
cast_desc.Width = desc.Width / 2;
cast_desc.Height = desc.Height / 2;
cast_desc.Format = FORMAT_R16G16B16A16_FLOAT;
cast_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
device->CreateTexture(&cast_desc, nullptr, &texture_raytrace);
cast_desc.Format = FORMAT_R16G16_FLOAT;
device->CreateTexture(&cast_desc, nullptr, &texture_mask);
TextureDesc buffer_desc;
buffer_desc.type = TextureDesc::TEXTURE_2D;
buffer_desc.Width = desc.Width;
buffer_desc.Height = desc.Height;
buffer_desc.Format = FORMAT_R16G16B16A16_FLOAT;
buffer_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
device->CreateTexture(&buffer_desc, nullptr, &texture_resolve);
device->CreateTexture(&buffer_desc, nullptr, &texture_temporal[0]);
device->CreateTexture(&buffer_desc, nullptr, &texture_temporal[1]);
device->CreateTexture(&buffer_desc, nullptr, &texture_median);
}
// This is very expensive. There is problably a better way of getting LOD of input.
// For now I'm just making a copy of input, to stay on the safe side.
// Main buffer copy and mip:
{
device->EventBegin("Main buffer pass", cmd);
CopyTexture2D(texture_main, 0, 0, 0, input, 0, cmd);
GenerateMipChain(texture_main, MIPGENFILTER_GAUSSIAN, cmd);
device->EventEnd(cmd);
}
// Switch to half res
PostProcessCB cb;
cb.xPPResolution.x = desc.Width / 2;
cb.xPPResolution.y = desc.Height / 2;
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
// Raytrace pass:
{
device->EventBegin("Stochastic Raytrace pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RAYTRACE], cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
device->BindResource(CS, &gbuffer2, TEXSLOT_GBUFFER2, cmd);
device->BindResource(CS, &input, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &lineardepth_minmax, TEXSLOT_ONDEMAND1, cmd);
const GPUResource* uavs[] = {
&texture_raytrace,
&texture_mask,
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(texture_raytrace.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(texture_raytrace.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
// Switch to full res
cb.xPPResolution.x = desc.Width;
cb.xPPResolution.y = desc.Height;
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
// Resolve pass:
{
device->EventBegin("Resolve pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RESOLVE], cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
device->BindResource(CS, &gbuffer2, TEXSLOT_GBUFFER2, cmd);
device->BindResource(CS, &texture_raytrace, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &texture_mask, TEXSLOT_ONDEMAND1, cmd);
device->BindResource(CS, &texture_main, TEXSLOT_ONDEMAND2, cmd);
const GPUResource* uavs[] = {
&texture_resolve,
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(texture_resolve.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(texture_resolve.GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
int temporal_output = device->GetFrameCount() % 2;
int temporal_history = 1 - temporal_output;
// Temporal pass:
{
device->EventBegin("Temporal pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_TEMPORAL], cmd);
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &texture_resolve, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &texture_temporal[temporal_history], TEXSLOT_ONDEMAND1, cmd);
device->BindResource(CS, &texture_raytrace, TEXSLOT_ONDEMAND2, cmd);
const GPUResource* uavs[] = {
&texture_temporal[temporal_output],
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(texture_temporal[temporal_output].GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(texture_temporal[temporal_output].GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
// Median blur pass:
{
device->EventBegin("Median blur pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_MEDIAN], cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &texture_temporal[temporal_output], TEXSLOT_ONDEMAND0, cmd);
const GPUResource* uavs[] = {
&texture_median,
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(texture_median.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(texture_median.GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
//Postprocess_Blur_Bilateral(texture_temporal[temporal_output], lineardepth, texture_temp, output, cmd, 0.85f, 0.85f, 1.2f);
// combine pass:
{
device->EventBegin("Combine pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_COMBINE], cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &gbuffer0, TEXSLOT_GBUFFER0, cmd);
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
device->BindResource(CS, &gbuffer2, TEXSLOT_GBUFFER2, cmd);
device->BindResource(CS, &texture_median, TEXSLOT_ONDEMAND0, cmd);
const GPUResource* uavs[] = {
&output,
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(desc.Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
wiProfiler::EndRange(range);
device->EventEnd(cmd);
}
void Postprocess_SSS(
const Texture& lineardepth,
const Texture& gbuffer0,
+10
View File
@@ -208,6 +208,16 @@ namespace wiRenderer
const wiGraphics::Texture& output,
wiGraphics::CommandList cmd
);
void Postprocess_StochasticSSR(
const wiGraphics::Texture& input,
const wiGraphics::Texture& depthbuffer,
const wiGraphics::Texture& lineardepth_minmax,
const wiGraphics::Texture& gbuffer0,
const wiGraphics::Texture& gbuffer1,
const wiGraphics::Texture& gbuffer2,
const wiGraphics::Texture& output,
wiGraphics::CommandList cmd
);
void Postprocess_SSS(
const wiGraphics::Texture& lineardepth,
const wiGraphics::Texture& gbuffer0,
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wiVersion
// minor features, major updates
const int minor = 38;
// minor bug fixes, alterations, refactors, updates
const int revision = 7;
const int revision = 8;
long GetVersion()