Merge pull request #96 from Kliaxe/master
Stochastic Screen Space Reflections
This commit is contained in:
@@ -36,6 +36,15 @@ void RenderPath3D::ResizeBuffers()
|
||||
assert(subresource_index == i);
|
||||
}
|
||||
}
|
||||
{
|
||||
TextureDesc desc;
|
||||
desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
desc.Format = FORMAT_R16G16B16A16_FLOAT;
|
||||
desc.Width = wiRenderer::GetInternalResolution().x;
|
||||
desc.Height = wiRenderer::GetInternalResolution().y;
|
||||
device->CreateTexture(&desc, nullptr, &rtStochasticSSR);
|
||||
device->SetName(&rtStochasticSSR, "rtStochasticSSR");
|
||||
}
|
||||
{
|
||||
TextureDesc desc;
|
||||
desc.BindFlags = BIND_RENDER_TARGET | BIND_SHADER_RESOURCE;
|
||||
@@ -441,6 +450,13 @@ void RenderPath3D::RenderSSR(const Texture& srcSceneRT, const wiGraphics::Textur
|
||||
wiRenderer::Postprocess_SSR(srcSceneRT, depthBuffer_Copy, rtLinearDepth_minmax, gbuffer1, rtSSR, cmd);
|
||||
}
|
||||
}
|
||||
void RenderPath3D::RenderStochasticSSR(const Texture& srcSceneRT, const wiGraphics::Texture& gbuffer0, const wiGraphics::Texture& gbuffer1, const wiGraphics::Texture& gbuffer2, CommandList cmd) const
|
||||
{
|
||||
if (getSSREnabled())
|
||||
{
|
||||
wiRenderer::Postprocess_StochasticSSR(srcSceneRT, depthBuffer_Copy, rtLinearDepth_minmax, gbuffer0, gbuffer1, gbuffer2, rtStochasticSSR, cmd);
|
||||
}
|
||||
}
|
||||
void RenderPath3D::DownsampleDepthBuffer(CommandList cmd) const
|
||||
{
|
||||
GraphicsDevice* device = wiRenderer::GetDevice();
|
||||
|
||||
@@ -51,7 +51,8 @@ private:
|
||||
|
||||
protected:
|
||||
wiGraphics::Texture rtReflection; // conains the scene rendered for planar reflections
|
||||
wiGraphics::Texture rtSSR; // screen-space reflection results
|
||||
wiGraphics::Texture rtSSR; // standard screen-space reflection results
|
||||
wiGraphics::Texture rtStochasticSSR; // stochastic screen-space reflection results
|
||||
wiGraphics::Texture rtSceneCopy; // contains the rendered scene that can be fed into transparent pass for distortion effect
|
||||
wiGraphics::Texture rtWaterRipple; // water ripple sprite normal maps are rendered into this
|
||||
wiGraphics::Texture rtParticleDistortion; // contains distortive particles
|
||||
@@ -102,6 +103,7 @@ protected:
|
||||
virtual void RenderLinearDepth(wiGraphics::CommandList cmd) const;
|
||||
virtual void RenderSSAO(wiGraphics::CommandList cmd) const;
|
||||
virtual void RenderSSR(const wiGraphics::Texture& srcSceneRT, const wiGraphics::Texture& gbuffer1, wiGraphics::CommandList cmd) const;
|
||||
virtual void RenderStochasticSSR(const wiGraphics::Texture& srcSceneRT, const wiGraphics::Texture& gbuffer0, const wiGraphics::Texture& gbuffer1, const wiGraphics::Texture& gbuffer2, wiGraphics::CommandList cmd) const;
|
||||
virtual void DownsampleDepthBuffer(wiGraphics::CommandList cmd) const;
|
||||
virtual void RenderOutline(const wiGraphics::Texture& dstSceneRT, wiGraphics::CommandList cmd) const;
|
||||
virtual void RenderLightShafts(wiGraphics::CommandList cmd) const;
|
||||
|
||||
@@ -209,7 +209,7 @@ void RenderPath3D_Deferred::Render() const
|
||||
device->BindViewports(1, &vp, cmd);
|
||||
|
||||
device->BindResource(PS, getSSAOEnabled() ? &rtSSAO[0] : wiTextureHelper::getWhite(), TEXSLOT_RENDERPATH_SSAO, cmd);
|
||||
device->BindResource(PS, getSSREnabled() ? &rtSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
|
||||
device->BindResource(PS, getSSREnabled() ? &rtStochasticSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
|
||||
wiRenderer::DrawDeferredLights(wiRenderer::GetCamera(), depthBuffer_Copy, rtGBuffer[0], rtGBuffer[1], rtGBuffer[2], cmd);
|
||||
|
||||
device->RenderPassEnd(cmd);
|
||||
@@ -227,7 +227,7 @@ void RenderPath3D_Deferred::Render() const
|
||||
|
||||
RenderDeferredComposition(cmd);
|
||||
|
||||
RenderSSR(rtDeferred, rtGBuffer[1], cmd);
|
||||
RenderStochasticSSR(rtDeferred, rtGBuffer[0], rtGBuffer[1], rtGBuffer[2], cmd);
|
||||
|
||||
DownsampleDepthBuffer(cmd);
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ void RenderPath3D_TiledDeferred::Render() const
|
||||
RenderDecals(cmd);
|
||||
|
||||
device->BindResource(CS, getSSAOEnabled() ? &rtSSAO[0] : wiTextureHelper::getWhite(), TEXSLOT_RENDERPATH_SSAO, cmd);
|
||||
device->BindResource(CS, getSSREnabled() ? &rtSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
|
||||
device->BindResource(CS, getSSREnabled() ? &rtStochasticSSR : wiTextureHelper::getTransparent(), TEXSLOT_RENDERPATH_SSR, cmd);
|
||||
|
||||
|
||||
if (device->CheckCapability(GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_UAV_LOAD_FORMAT_R11G11B10_FLOAT))
|
||||
@@ -156,7 +156,7 @@ void RenderPath3D_TiledDeferred::Render() const
|
||||
|
||||
RenderDeferredComposition(cmd);
|
||||
|
||||
RenderSSR(rtDeferred, rtGBuffer[1], cmd);
|
||||
RenderStochasticSSR(rtDeferred, rtGBuffer[0], rtGBuffer[1], rtGBuffer[2], cmd);
|
||||
|
||||
DownsampleDepthBuffer(cmd);
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
<None Include="quad.hlsli" />
|
||||
<None Include="raytracingHF.hlsli" />
|
||||
<None Include="skyHF.hlsli" />
|
||||
<None Include="stochasticSSRHF.hlsli" />
|
||||
<None Include="uvsphere.hlsli" />
|
||||
<None Include="volumeLightHF.hlsli" />
|
||||
<None Include="voxelConeTracingHF.hlsli" />
|
||||
@@ -801,12 +802,27 @@
|
||||
<FxCompile Include="ssaoCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_combine.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_median.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_raytrace.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="ssrCS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="sssPS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_resolve.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_temporal.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
</FxCompile>
|
||||
<FxCompile Include="sunPS.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
|
||||
</FxCompile>
|
||||
@@ -974,4 +990,4 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
||||
@@ -85,6 +85,9 @@
|
||||
<None Include="depthoffieldHF.hlsli">
|
||||
<Filter>HF</Filter>
|
||||
</None>
|
||||
<None Include="stochasticSSRHF.hlsli">
|
||||
<Filter>HF</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="objectHS.hlsl">
|
||||
@@ -852,6 +855,21 @@
|
||||
<FxCompile Include="raytrace_tilesortCS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_raytrace.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_resolve.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_median.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_temporal.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="stochasticSSRCS_combine.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
<FxCompile Include="upsample_bilateral_float4CS.hlsl">
|
||||
<Filter>CS</Filter>
|
||||
</FxCompile>
|
||||
@@ -897,4 +915,4 @@
|
||||
<UniqueIdentifier>{12396e21-0254-42fa-a88b-805f0703eca5}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
||||
@@ -19,5 +19,5 @@ float4 main(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_TARGET
|
||||
|
||||
ApplyFog(depth, color);
|
||||
|
||||
return color;
|
||||
return max(0, color);
|
||||
}
|
||||
@@ -147,6 +147,34 @@ inline float2 hammersley2d(uint idx, uint num) {
|
||||
return float2(float(idx) / float(num), radicalInverse_VdC);
|
||||
}
|
||||
|
||||
inline float2 HammersleyRandom(uint idx, uint num, uint2 random)
|
||||
{
|
||||
uint bits = idx;
|
||||
bits = (bits << 16) | (bits >> 16);
|
||||
bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
|
||||
bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
|
||||
bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
|
||||
bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
|
||||
|
||||
float E1 = frac((float) idx / num + float(random.x) * (1.0 / 65536.0));
|
||||
float E2 = float((bits >> 16) ^ random.y) * (1.0 / 65536.0);
|
||||
return float2(E1, E2);
|
||||
}
|
||||
|
||||
inline float2 HammersleyRandom(uint idx, uint2 random)
|
||||
{
|
||||
uint bits = idx;
|
||||
bits = (bits << 16) | (bits >> 16);
|
||||
bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
|
||||
bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
|
||||
bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
|
||||
bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
|
||||
|
||||
float E1 = frac(float(random.x) * (1.0 / 65536.0));
|
||||
float E2 = float((bits >> 16) ^ random.y) * (1.0 / 65536.0);
|
||||
return float2(E1, E2);
|
||||
}
|
||||
|
||||
// "Next Generation Post Processing in Call of Duty: Advanced Warfare"
|
||||
// http://advances.realtimerendering.com/s2014/index.html
|
||||
float InterleavedGradientNoise(float2 uv, uint frameCount)
|
||||
@@ -554,4 +582,4 @@ inline float dither(in float2 pixel)
|
||||
return ditherMask8(pixel);
|
||||
}
|
||||
|
||||
#endif // WI_SHADER_GLOBALS_HF
|
||||
#endif // WI_SHADER_GLOBALS_HF
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
#include "globals.hlsli"
|
||||
#include "brdf.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
TEXTURE2D(texture_median, float4, TEXSLOT_ONDEMAND0);
|
||||
|
||||
RWTEXTURE2D(output, float4, 0);
|
||||
|
||||
// Final Stochastic SSR pass. Here we can apply final touches like specular occlusion or fresnel and BRDFLUT?
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
if (depth == 0.0f)
|
||||
return;
|
||||
|
||||
// Everything in view space:
|
||||
const float3 P = reconstructPosition(uv, depth, g_xCamera_InvP);
|
||||
const float3 N = mul((float3x3) g_xCamera_View, decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy)).xyz;
|
||||
const float3 V = normalize(P);
|
||||
|
||||
float NdotV = max(dot(N, V), 0.0f);
|
||||
|
||||
float3 albedo = texture_gbuffer0.SampleLevel(sampler_point_clamp, uv, 0).rgb;
|
||||
float4 baseColor = float4(albedo, 1.0f);
|
||||
|
||||
float4 GBuffer2 = texture_gbuffer2.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
//float occlusion = GBuffer2.r;
|
||||
//float roughness = GBuffer2.g;
|
||||
float metalness = GBuffer2.b;
|
||||
float reflectance = GBuffer2.a;
|
||||
|
||||
float3 f0 = ComputeF0(baseColor, reflectance, metalness);
|
||||
float f90 = saturate(50.0 * dot(f0, 0.33));
|
||||
float3 F = F_Schlick(f0, f90, NdotV);
|
||||
|
||||
float4 final = texture_median.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
final.rgb *= F;
|
||||
|
||||
output[DTid.xy] = final;
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
#include "globals.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
TEXTURE2D(texture_temporal, float4, TEXSLOT_ONDEMAND0);
|
||||
|
||||
RWTEXTURE2D(output, float4, 0);
|
||||
|
||||
// A Fast, Small-Radius GPU Median Filter by Morgan McGuire
|
||||
// https://casual-effects.com/research/McGuire2008Median/index.html
|
||||
|
||||
#define s2(a, b) temp = a; a = min(a, b); b = max(temp, b);
|
||||
#define t2(a, b) s2(v[a], v[b]);
|
||||
#define t24(a, b, c, d, e, f, g, h) t2(a, b); t2(c, d); t2(e, f); t2(g, h);
|
||||
#define t25(a, b, c, d, e, f, g, h, i, j) t24(a, b, c, d, e, f, g, h); t2(i, j);
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
if (depth == 0.0f)
|
||||
return;
|
||||
|
||||
half4 v[25];
|
||||
|
||||
// Add the pixels which make up our window to the pixel array.
|
||||
[unroll]
|
||||
for (int dX = -2; dX <= 2; ++dX)
|
||||
{
|
||||
[unroll]
|
||||
for (int dY = -2; dY <= 2; ++dY)
|
||||
{
|
||||
float2 offset = float2(float(dX), float(dY));
|
||||
|
||||
// If a pixel in the window is located at (x+dX, y+dY), put it at index (dX + R)(2R + 1) + (dY + R) of the
|
||||
// pixel array. This will fill the pixel array, with the top left pixel of the window at pixel[0] and the
|
||||
// bottom right pixel of the window at pixel[N-1].
|
||||
v[(dX + 2) * 5 + (dY + 2)] = texture_temporal.SampleLevel(sampler_linear_clamp, uv + offset * xPPResolution_rcp, 0);
|
||||
}
|
||||
}
|
||||
|
||||
half4 temp;
|
||||
t25(0, 1, 3, 4, 2, 4, 2, 3, 6, 7);
|
||||
t25(5, 7, 5, 6, 9, 7, 1, 7, 1, 4);
|
||||
t25(12, 13, 11, 13, 11, 12, 15, 16, 14, 16);
|
||||
t25(14, 15, 18, 19, 17, 19, 17, 18, 21, 22);
|
||||
t25(20, 22, 20, 21, 23, 24, 2, 5, 3, 6);
|
||||
t25(0, 6, 0, 3, 4, 7, 1, 7, 1, 4);
|
||||
t25(11, 14, 8, 14, 8, 11, 12, 15, 9, 15);
|
||||
t25(9, 12, 13, 16, 10, 16, 10, 13, 20, 23);
|
||||
t25(17, 23, 17, 20, 21, 24, 18, 24, 18, 21);
|
||||
t25(19, 22, 8, 17, 9, 18, 0, 18, 0, 9);
|
||||
t25(10, 19, 1, 19, 1, 10, 11, 20, 2, 20);
|
||||
t25(2, 11, 12, 21, 3, 21, 3, 12, 13, 22);
|
||||
t25(4, 22, 4, 13, 14, 23, 5, 23, 5, 14);
|
||||
t25(15, 24, 6, 24, 6, 15, 7, 16, 7, 19);
|
||||
t25(3, 11, 5, 17, 11, 17, 9, 17, 4, 10);
|
||||
t25(6, 12, 7, 14, 4, 6, 4, 7, 12, 14);
|
||||
t25(10, 14, 6, 7, 10, 12, 6, 10, 6, 17);
|
||||
t25(12, 17, 7, 17, 7, 10, 12, 18, 7, 12);
|
||||
t24(10, 18, 12, 20, 10, 20, 10, 12);
|
||||
|
||||
output[DTid.xy] = v[12];
|
||||
}
|
||||
@@ -0,0 +1,296 @@
|
||||
#include "globals.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
TEXTURE2D(input, float4, TEXSLOT_ONDEMAND0);
|
||||
TEXTURE2D(texture_lineardepth_minmax, float2, TEXSLOT_ONDEMAND1);
|
||||
|
||||
RWTEXTURE2D(texture_raytrace, float4, 0);
|
||||
RWTEXTURE2D(texture_mask, float2, 1);
|
||||
|
||||
// Use this to use reduced precision, but higher framerate:
|
||||
#define USE_LINEARDEPTH
|
||||
|
||||
static const float rayTraceStride = 1.0f; // Step in horizontal or vertical pixels between samples.
|
||||
static const float rayTraceMaxStep = 512.0f; // Maximum number of iterations. Higher gives better images but may be slow.
|
||||
static const float rayTraceHitThickness = 1.5f; // Thickness to ascribe to each pixel in the depth buffer.
|
||||
static const float rayTraceHitThicknessBias = 7.0f; // Bias to control the thickness along distance.
|
||||
static const float rayTraceMaxDistance = 1000.0f; // Maximum camera-space distance to trace before returning a miss.
|
||||
static const float rayTraceStrideCutoff = 100.0f; // More distant pixels are smaller in screen space. This value tells at what point to
|
||||
// start relaxing the stride to give higher quality reflections for objects far from the camera.
|
||||
static const float raytraceHZBBias = 1.0f;
|
||||
|
||||
float DistanceSquared(float2 a, float2 b)
|
||||
{
|
||||
a -= b;
|
||||
return dot(a, a);
|
||||
}
|
||||
|
||||
bool intersectsDepthBuffer(float z, float minZ, float maxZ)
|
||||
{
|
||||
// Increase thickness along distance.
|
||||
// This will help objects from dissapering in the distance.
|
||||
float thicknessScale = min(1.0f, z / rayTraceStrideCutoff);
|
||||
float thickness = rayTraceHitThickness * rayTraceHitThicknessBias * thicknessScale;
|
||||
thickness = clamp(thickness, rayTraceHitThickness, 10.0f);
|
||||
|
||||
// Effectively remove line/tiny artifacts, mostly caused by Zbuffers precision.
|
||||
float depthScale = min(1.0f, z / rayTraceStrideCutoff);
|
||||
z += lerp(0.05f, 0.0f, depthScale);
|
||||
|
||||
return (minZ >= z) && (maxZ - thickness <= z);
|
||||
}
|
||||
|
||||
// Heavily adapted from McGuire and Mara's original implementation
|
||||
// http://casual-effects.blogspot.com/2014/08/screen-space-ray-tracing.html
|
||||
bool ScreenSpaceRayTrace(float3 csOrig, float3 csDir, float jitter, float roughness, out float2 hitPixel, out float3 hitPoint, out float iterationCount)
|
||||
{
|
||||
float rayLength = ((csOrig.z + csDir.z * rayTraceMaxDistance) < g_xCamera_ZNearP) ? (g_xCamera_ZNearP - csOrig.z) / csDir.z : rayTraceMaxDistance;
|
||||
|
||||
float3 csRayEnd = csOrig + csDir * rayLength;
|
||||
|
||||
// Project into homogeneous clip space
|
||||
float4 clipRayOrigin = mul(g_xCamera_Proj, float4(csOrig, 1.0f));
|
||||
float4 clipRayEnd = mul(g_xCamera_Proj, float4(csRayEnd, 1.0f));
|
||||
|
||||
float k0 = 1.0f / clipRayOrigin.w;
|
||||
float k1 = 1.0f / clipRayEnd.w;
|
||||
|
||||
float3 Q0 = csOrig * k0;
|
||||
float3 Q1 = csRayEnd * k1;
|
||||
|
||||
// Screen-space endpoints
|
||||
float2 P0 = clipRayOrigin.xy * k0;
|
||||
float2 P1 = clipRayEnd.xy * k1;
|
||||
|
||||
// Project to pixel
|
||||
P0 = P0 * float2(0.5, -0.5) + float2(0.5, 0.5);
|
||||
P1 = P1 * float2(0.5, -0.5) + float2(0.5, 0.5);
|
||||
|
||||
P0.xy *= xPPResolution.xy;
|
||||
P1.xy *= xPPResolution.xy;
|
||||
|
||||
#if 1
|
||||
// Clip to the screen coordinates. Alternatively we could just modify rayTraceMaxStep instead
|
||||
// This will also improve the framerate, without losing quality or features
|
||||
float2 yDelta = float2(xPPResolution.y + 2.0f, -2.0f); // - 0.5, 0.5
|
||||
float2 xDelta = float2(xPPResolution.x + 2.0f, -2.0f); // - 0.5, 0.5
|
||||
float alpha = 0.0;
|
||||
|
||||
// P0 must be in bounds
|
||||
if (P1.y > yDelta.x || P1.y < yDelta.y)
|
||||
{
|
||||
float yClip = (P1.y > yDelta.x) ? yDelta.x : yDelta.y;
|
||||
float yAlpha = (P1.y - yClip) / (P1.y - P0.y);
|
||||
alpha = yAlpha;
|
||||
}
|
||||
|
||||
// P1 must be in bounds
|
||||
if (P1.x > xDelta.x || P1.x < xDelta.y)
|
||||
{
|
||||
float xClip = (P1.x > xDelta.x) ? xDelta.x : xDelta.y;
|
||||
float xAlpha = (P1.x - xClip) / (P1.x - P0.x);
|
||||
alpha = max(alpha, xAlpha);
|
||||
}
|
||||
|
||||
// These are all in homogeneous space, so they interpolate linearly
|
||||
P1 = lerp(P1, P0, alpha);
|
||||
k1 = lerp(k1, k0, alpha);
|
||||
Q1 = lerp(Q1, Q0, alpha);
|
||||
#endif
|
||||
|
||||
// If the line is degenerate, make it cover at least one pixel to avoid handling zero-pixel extent as a special case later
|
||||
P1 += (DistanceSquared(P0, P1) < 0.0001f) ? float2(0.01f, 0.01f) : 0.0f;
|
||||
float2 screenOffset = P1 - P0;
|
||||
|
||||
// Permute so that the primary iteration is in x to collapse all quadrant-specific DDA cases later
|
||||
bool permute = false;
|
||||
if (abs(screenOffset.x) < abs(screenOffset.y))
|
||||
{
|
||||
permute = true;
|
||||
screenOffset = screenOffset.yx;
|
||||
P0 = P0.yx;
|
||||
P1 = P1.yx;
|
||||
}
|
||||
|
||||
float stepDirection = sign(screenOffset.x);
|
||||
float stepInterval = stepDirection / screenOffset.x;
|
||||
|
||||
// Track the derivatives of Q and k
|
||||
float3 dQ = (Q1 - Q0) * stepInterval;
|
||||
float dk = (k1 - k0) * stepInterval;
|
||||
|
||||
// Because we test 1/2 a texel forward along the ray, on the very last iteration
|
||||
// the interpolation can go past the end of the ray. Use these bounds to clamp it.
|
||||
float zMin = min(csRayEnd.z, csOrig.z);
|
||||
float zMax = max(csRayEnd.z, csOrig.z);
|
||||
|
||||
float2 dP = float2(stepDirection, screenOffset.y * stepInterval);
|
||||
|
||||
// Scale derivatives by the desired pixel stride and then offset the starting values by the jitter fraction
|
||||
float strideScale = 1.0f - min(1.0f, csOrig.z / rayTraceStrideCutoff);
|
||||
float stride = 1.0f + strideScale * rayTraceStride;
|
||||
|
||||
dP *= stride;
|
||||
dQ *= stride;
|
||||
dk *= stride;
|
||||
|
||||
P0 += dP * jitter;
|
||||
Q0 += dQ * jitter;
|
||||
k0 += dk * jitter;
|
||||
|
||||
float4 PQk = float4(P0, Q0.z, k0);
|
||||
float4 dPQk = float4(dP, dQ.z, dk);
|
||||
float3 Q = Q0;
|
||||
|
||||
// Adjust end condition for iteration direction
|
||||
float end = P1.x * stepDirection;
|
||||
|
||||
// raytrace iterations based on roughness
|
||||
// Matte materials will get less samples
|
||||
float roughnessTraceStep = max(rayTraceMaxStep * (1.0 - roughness), 1.0f);
|
||||
|
||||
float stepCount = 0.0f;
|
||||
float level = 0.0f; // 1.0f start level. Parameter?
|
||||
|
||||
float prevZMaxEstimate = csOrig.z;
|
||||
float rayZMin = prevZMaxEstimate;
|
||||
float rayZMax = prevZMaxEstimate;
|
||||
float sceneZMax = rayZMax + 100000.0f;
|
||||
|
||||
[loop]
|
||||
for (; ((PQk.x * stepDirection) <= end) &&
|
||||
(stepCount <= roughnessTraceStep - 1) &&
|
||||
!intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax) &&
|
||||
(sceneZMax != 0.0f) &&
|
||||
(level > -1);
|
||||
PQk += dPQk, stepCount++)
|
||||
{
|
||||
if (!is_saturated(hitPixel))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
rayZMin = prevZMaxEstimate;
|
||||
|
||||
// Compute the value at 1/2 step into the future
|
||||
rayZMax = (dPQk.z * 0.5f + PQk.z) / (dPQk.w * 0.5f + PQk.w);
|
||||
rayZMax = clamp(rayZMax, zMin, zMax);
|
||||
prevZMaxEstimate = rayZMax;
|
||||
|
||||
[flatten]
|
||||
if (rayTraceMaxDistance < rayZMax)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
[flatten]
|
||||
if (rayZMin > rayZMax)
|
||||
{
|
||||
float t = rayZMin;
|
||||
rayZMin = rayZMax;
|
||||
rayZMax = t;
|
||||
}
|
||||
|
||||
// A simple HZB approach based on roughness
|
||||
level += min(raytraceHZBBias / 10.0f, 5.0f) * roughness;
|
||||
|
||||
hitPixel = permute ? PQk.yx : PQk.xy;
|
||||
hitPixel *= xPPResolution_rcp;
|
||||
|
||||
#ifdef USE_LINEARDEPTH
|
||||
sceneZMax = texture_lineardepth_minmax.SampleLevel(sampler_point_clamp, hitPixel, level).g * g_xCamera_ZFarP;
|
||||
#else
|
||||
sceneZMax = getLinearDepth(texture_depth.SampleLevel(sampler_point_clamp, hitPixel, 0).r);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Advance Q based on the number of steps
|
||||
Q.xy += dQ.xy * stepCount;
|
||||
hitPoint = Q * (1.0f / PQk.w);
|
||||
iterationCount = stepCount;
|
||||
|
||||
return intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax);
|
||||
}
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
if (depth == 0.0f)
|
||||
return;
|
||||
|
||||
// Everything in view space:
|
||||
const float3 P = reconstructPosition(uv, depth, g_xCamera_InvP);
|
||||
const float3 N = mul((float3x3)g_xCamera_View, decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy)).xyz;
|
||||
const float3 V = normalize(P);
|
||||
|
||||
const float roughness = GetRoughness(texture_gbuffer2.SampleLevel(sampler_point_clamp, uv, 0).g);
|
||||
|
||||
const float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
|
||||
if (roughnessFade <= 0.0f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
float4 H;
|
||||
if (roughness > 0.1f)
|
||||
{
|
||||
const float surfaceMargin = 0.0f;
|
||||
const float maxRegenCount = 15.0f;
|
||||
|
||||
uint2 Random = Rand3DPCG16(int3((DTid.xy + 0.5f), g_xFrame_FrameCount)).xy;
|
||||
|
||||
// Pick the best rays
|
||||
|
||||
float RdotN = 0.0f;
|
||||
float regenCount = 0;
|
||||
[loop]
|
||||
for (; RdotN <= surfaceMargin && regenCount < maxRegenCount; regenCount++)
|
||||
{
|
||||
// Low-discrepancy sequence
|
||||
//float2 Xi = float2(Random) * rcp(65536.0); // equivalent to HammersleyRandom(0, 1, Random).
|
||||
float2 Xi = HammersleyRandom(regenCount, Random); // SingleSPP
|
||||
|
||||
Xi.y = lerp(Xi.y, 0.0f, BRDFBias);
|
||||
|
||||
// I should probably use importance sampling of visible normals http://jcgt.org/published/0007/04/01/paper.pdf
|
||||
H = ImportanceSampleGGX(Xi, roughness);
|
||||
H = TangentToWorld(H, N);
|
||||
|
||||
RdotN = dot(N, reflect(V, H.xyz));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
H = float4(N.xyz, 1.0f);
|
||||
}
|
||||
|
||||
float3 dir = reflect(V, H.xyz);
|
||||
|
||||
float2 hitPixel = float2(0.0f, 0.0f);
|
||||
float3 hitPoint = float3(0.0f, 0.0f, 0.0f);
|
||||
float iterationCount = 0.0f;
|
||||
|
||||
float2 uv2 = (DTid.xy + 0.5f);
|
||||
//float jitter = 1.0f + rand(uv2 + g_xFrame_Time);
|
||||
float jitter = 1.0f + InterleavedGradientNoise(uv2, g_xFrame_FrameCount);
|
||||
|
||||
bool hit = ScreenSpaceRayTrace(P, dir, jitter, roughness, hitPixel, hitPoint, iterationCount);
|
||||
|
||||
float hitDepth = texture_depth.SampleLevel(sampler_point_clamp, hitPixel, 0);
|
||||
|
||||
// Output:
|
||||
// xy: hit pixel
|
||||
// z: hit depth
|
||||
// w: pdf
|
||||
float4 raytrace = max(0, float4(hitPixel, hitDepth, H.w));
|
||||
texture_raytrace[DTid.xy] = raytrace;
|
||||
|
||||
// Output:
|
||||
// x: hit (bool)
|
||||
// y: iteration count / rayTraceMaxStep
|
||||
float2 mask = float2(hit, iterationCount / rayTraceMaxStep);
|
||||
texture_mask[DTid.xy] = mask;
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
#include "globals.hlsli"
|
||||
#include "brdf.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
TEXTURE2D(texture_raytrace, float4, TEXSLOT_ONDEMAND0);
|
||||
TEXTURE2D(texture_mask, float2, TEXSLOT_ONDEMAND1);
|
||||
TEXTURE2D(texture_main, float4, TEXSLOT_ONDEMAND2);
|
||||
|
||||
RWTEXTURE2D(texture_resolve, float4, 0);
|
||||
|
||||
static const float resolveSequenceSize = 20.0f; // Can help reduce noise on rough surfaces, but too high values tend to wash out contact points.
|
||||
static const float resolveMip = 1.0f;
|
||||
static const float resolveSSRIntensity = 1.0f;
|
||||
|
||||
static const float blendScreenEdgeFade = 5.0f;
|
||||
static const bool blendReflectSky = true;
|
||||
|
||||
float CalculateBlendIntersection(bool hit, float iterationStep, float2 hitPixel)
|
||||
{
|
||||
float confidence = 1.0 - pow(iterationStep, 8.0f);
|
||||
float2 hitPixelNDC = hitPixel * 2.0 - 1.0;
|
||||
|
||||
//float maxDimension = min(1.0, max(abs(hitPixelNDC.x), abs(hitPixelNDC.y)));
|
||||
//float attenuation = 1.0 - max(0.0, maxDimension - blendScreenEdgeFade) / (1.0 - blendScreenEdgeFade);
|
||||
|
||||
float2 vignette = saturate(abs(hitPixelNDC) * blendScreenEdgeFade - (blendScreenEdgeFade - 1.0f));
|
||||
float attenuation = saturate(1.0 - dot(vignette, vignette));
|
||||
|
||||
float blend = confidence * attenuation;
|
||||
|
||||
if (!hit && !blendReflectSky)
|
||||
blend = 0.0;
|
||||
|
||||
return blend;
|
||||
}
|
||||
|
||||
// I probably need to figure out a better way to deal with this.
|
||||
float2 CalculateTailDirection(float3 viewNormal)
|
||||
{
|
||||
float3 upVector = abs(viewNormal.z) < 0.999 ? float3(0.0, 0.0, 1.0) : float3(1.0, 0.0, 0.0);
|
||||
float3 T = normalize(cross(upVector, viewNormal));
|
||||
|
||||
float tailDirection = T.x * -viewNormal.y;
|
||||
|
||||
return lerp(float2(1.0, 0.1), float2(0.1, 1.0), tailDirection);
|
||||
}
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
if (depth == 0.0f)
|
||||
return;
|
||||
|
||||
// Everthing in view space:
|
||||
const float3 P = reconstructPosition(uv, depth, g_xCamera_InvP);
|
||||
const float3 N = mul((float3x3) g_xCamera_View, decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy)).xyz;
|
||||
const float3 V = normalize(-P);
|
||||
const float NdotV = saturate(dot(N, V));
|
||||
|
||||
const float roughness = GetRoughness(texture_gbuffer2.SampleLevel(sampler_point_clamp, uv, 0).g);
|
||||
const float roughnessSequenceSize = resolveSequenceSize * roughness + 1.0f;
|
||||
|
||||
// Early out, useless if the roughness is out of range
|
||||
float roughnessFade = GetRoughnessFade(roughness, SSRMaxRoughness);
|
||||
if (roughnessFade <= 0.0f)
|
||||
{
|
||||
texture_resolve[DTid.xy] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
float specularConeTangent = lerp(0.0, roughness * (1.0 - BRDFBias), NdotV * sqrt(roughness));
|
||||
specularConeTangent *= lerp(saturate(NdotV * 2), 1.0f, sqrt(roughness));
|
||||
|
||||
const float maxMipLevel = 11.0f - 1.0f;
|
||||
const uint2 Random = Rand3DPCG16(int3((DTid.xy + 0.5f), g_xFrame_FrameCount)).xy;
|
||||
|
||||
float4 result = 0.0f;
|
||||
float weightSum = 0.0f;
|
||||
|
||||
const uint NumResolve = 4;
|
||||
[unroll]
|
||||
for (uint i = 0; i < NumResolve; i++)
|
||||
{
|
||||
float2 offsetRotation = (HammersleyRandom(i, NumResolve, Random) * 2.0 - 1.0) * roughnessSequenceSize;
|
||||
float2x2 offsetRotationMatrix = float2x2(offsetRotation.x, offsetRotation.y, -offsetRotation.y, offsetRotation.x);
|
||||
|
||||
float2 offsetUV = offset[i] * (1.0f / xPPResolution);
|
||||
offsetUV = uv + mul(offsetRotationMatrix, offsetUV) * CalculateTailDirection(N);
|
||||
|
||||
float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, offsetUV, 0);
|
||||
float2 maskSource = texture_mask.SampleLevel(sampler_point_clamp, offsetUV, 0);
|
||||
|
||||
float2 hitPixel = raytraceSource.xy;
|
||||
float hitDepth = raytraceSource.z;
|
||||
float hitPDF = raytraceSource.w;
|
||||
bool hit = (bool)maskSource.x;
|
||||
float iterationStep = maskSource.y;
|
||||
|
||||
float intersectionCircleRadius = specularConeTangent * length(hitPixel - uv);
|
||||
float sourceMip = clamp(log2(intersectionCircleRadius * max(xPPResolution.x, xPPResolution.y)), 0.0, maxMipLevel) * resolveMip;
|
||||
|
||||
float4 sampleColor;
|
||||
sampleColor.rgb = texture_main.SampleLevel(sampler_linear_clamp, hitPixel, sourceMip).xyz;
|
||||
sampleColor.a = CalculateBlendIntersection(hit, iterationStep, hitPixel);
|
||||
|
||||
sampleColor.rgb /= 1 + Luminance(sampleColor.rgb);
|
||||
|
||||
// BRDF
|
||||
|
||||
float3 hitViewPosition = reconstructPosition(hitPixel, hitDepth, g_xCamera_InvP);
|
||||
|
||||
float3 L = normalize(hitViewPosition - P);
|
||||
float3 H = normalize(L + V);
|
||||
|
||||
float NdotH = saturate(dot(N, H));
|
||||
float NdotL = saturate(dot(N, L));
|
||||
|
||||
Surface surface;
|
||||
surface.alphaRoughnessSq = pow(roughness, 4);
|
||||
|
||||
SurfaceToLight surfaceToLight;
|
||||
surfaceToLight.NdotH = NdotH;
|
||||
surfaceToLight.NdotL = NdotL;
|
||||
surfaceToLight.NdotV = NdotV;
|
||||
|
||||
// We could simply use BRDF_GetSpecular, but we exclude fresnel for later
|
||||
float Vis = visibilityOcclusion(surface, surfaceToLight);
|
||||
float D = microfacetDistribution(surface, surfaceToLight);
|
||||
float specularLight = Vis * D * surfaceToLight.NdotL;
|
||||
|
||||
float weight = specularLight / max(hitPDF, 0.00001f);
|
||||
|
||||
result += sampleColor * weight;
|
||||
weightSum += weight;
|
||||
}
|
||||
result /= weightSum;
|
||||
|
||||
result.rgb /= 1 - Luminance(result.rgb);
|
||||
|
||||
result *= roughnessFade;
|
||||
result *= resolveSSRIntensity;
|
||||
|
||||
texture_resolve[DTid.xy] = max(result, 0.00001f);
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
#include "globals.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
TEXTURE2D(resolve_current, float4, TEXSLOT_ONDEMAND0);
|
||||
TEXTURE2D(resolve_history, float4, TEXSLOT_ONDEMAND1);
|
||||
TEXTURE2D(texture_raytrace, float4, TEXSLOT_ONDEMAND2);
|
||||
|
||||
RWTEXTURE2D(output, float4, 0);
|
||||
|
||||
static const float temporalResponseMin = 0.85f;
|
||||
static const float temporalResponseMax = 1.0f;
|
||||
static const float temporalScale = 2.0f;
|
||||
static const float temporalExposure = 10.0f;
|
||||
|
||||
inline float Luma4(float3 color)
|
||||
{
|
||||
return (color.g * 2) + (color.r + color.b);
|
||||
}
|
||||
|
||||
inline float HdrWeight4(float3 color, float exposure)
|
||||
{
|
||||
return rcp(Luma4(color) * exposure + 4.0f);
|
||||
}
|
||||
|
||||
float4 clip_aabb(float3 aabb_min, float3 aabb_max, float4 p, float4 q)
|
||||
{
|
||||
float3 p_clip = 0.5 * (aabb_max + aabb_min);
|
||||
float3 e_clip = 0.5 * (aabb_max - aabb_min) + 0.00000001f;
|
||||
|
||||
float4 v_clip = q - float4(p_clip, p.w);
|
||||
float3 v_unit = v_clip.xyz / e_clip;
|
||||
float3 a_unit = abs(v_unit);
|
||||
float ma_unit = max(a_unit.x, max(a_unit.y, a_unit.z));
|
||||
|
||||
if (ma_unit > 1.0)
|
||||
return float4(p_clip, p.w) + v_clip / ma_unit;
|
||||
else
|
||||
return q; // point inside aabb
|
||||
}
|
||||
|
||||
inline void ResolverAABB(Texture2D<float4> currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float4 currentMin, inout float4 currentMax, inout float4 currentAverage, inout float4 currentOutput)
|
||||
{
|
||||
const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
|
||||
|
||||
// Modulate Luma HDR
|
||||
|
||||
float4 sampleColors[9];
|
||||
[unroll]
|
||||
for (uint i = 0; i < 9; i++)
|
||||
{
|
||||
sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
|
||||
}
|
||||
|
||||
float sampleWeights[9];
|
||||
[unroll]
|
||||
for (uint j = 0; j < 9; j++)
|
||||
{
|
||||
sampleWeights[j] = HdrWeight4(sampleColors[j].rgb, exposureScale);
|
||||
}
|
||||
|
||||
float totalWeight = 0;
|
||||
[unroll]
|
||||
for (uint k = 0; k < 9; k++)
|
||||
{
|
||||
totalWeight += sampleWeights[k];
|
||||
}
|
||||
sampleColors[4] = (sampleColors[0] * sampleWeights[0] + sampleColors[1] * sampleWeights[1] + sampleColors[2] * sampleWeights[2] + sampleColors[3] * sampleWeights[3] + sampleColors[4] * sampleWeights[4] +
|
||||
sampleColors[5] * sampleWeights[5] + sampleColors[6] * sampleWeights[6] + sampleColors[7] * sampleWeights[7] + sampleColors[8] * sampleWeights[8]) / totalWeight;
|
||||
|
||||
// Variance Clipping (AABB)
|
||||
|
||||
float4 m1 = 0.0;
|
||||
float4 m2 = 0.0;
|
||||
[unroll]
|
||||
for (uint x = 0; x < 9; x++)
|
||||
{
|
||||
m1 += sampleColors[x];
|
||||
m2 += sampleColors[x] * sampleColors[x];
|
||||
}
|
||||
|
||||
float4 mean = m1 / 9.0;
|
||||
float4 stddev = sqrt((m2 / 9.0) - sqr(mean));
|
||||
|
||||
currentMin = mean - AABBScale * stddev;
|
||||
currentMax = mean + AABBScale * stddev;
|
||||
|
||||
currentOutput = sampleColors[4];
|
||||
currentMin = min(currentMin, currentOutput);
|
||||
currentMax = max(currentMax, currentOutput);
|
||||
currentAverage = mean;
|
||||
}
|
||||
|
||||
float2 CalculateCustomMotion(float depth, float2 uv)
|
||||
{
|
||||
float4 sampleWorldPosition = float4(reconstructPosition(uv, depth, g_xCamera_InvVP), 1.0f);
|
||||
|
||||
float4 thisClip = mul(g_xCamera_VP, sampleWorldPosition);
|
||||
float4 prevClip = mul(g_xFrame_MainCamera_PrevVP, sampleWorldPosition);
|
||||
|
||||
float2 thisScreen = thisClip.xy * rcp(thisClip.w);
|
||||
float2 prevScreen = prevClip.xy * rcp(prevClip.w);
|
||||
thisScreen = (thisScreen.xy + 1.0f) / 2.0f;
|
||||
prevScreen = (prevScreen.xy + 1.0f) / 2.0f;
|
||||
|
||||
return thisScreen - prevScreen;
|
||||
}
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
|
||||
{
|
||||
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
|
||||
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
|
||||
const float3 worldNormal = decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy);
|
||||
|
||||
float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, uv, 0);
|
||||
float hitDepth = raytraceSource.z;
|
||||
float2 hitPixel = raytraceSource.xy;
|
||||
|
||||
// Calculate custom motion vectors to counter smearing, which we would get by using normal gbuffer velocity
|
||||
|
||||
float2 reflectionCustomVelocity = CalculateCustomMotion(hitDepth, uv);
|
||||
float2 hitCustomVelocity = CalculateCustomMotion(hitDepth, hitPixel);
|
||||
float2 customVelocity = CalculateCustomMotion(depth, uv);
|
||||
|
||||
float2 standardHitVelocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, hitPixel, 0).zw;
|
||||
float2 standardVelocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).zw;
|
||||
|
||||
float2 velocityDifference = customVelocity - standardVelocity;
|
||||
float2 hitVelocityDifference = hitCustomVelocity - standardHitVelocity;
|
||||
|
||||
float objectVelocityMask = saturate(dot(velocityDifference, velocityDifference) * xPPResolution_rcp.x * 100.0f);
|
||||
float hitObjectVelocityMask = saturate(dot(hitVelocityDifference, hitVelocityDifference) * xPPResolution_rcp.x * 100.0f);
|
||||
|
||||
float2 objectVelocity = standardVelocity * objectVelocityMask;
|
||||
float2 hitObjectVelocity = standardHitVelocity * hitObjectVelocityMask;
|
||||
|
||||
float2 velocity = lerp(lerp(reflectionCustomVelocity, hitObjectVelocity, hitObjectVelocityMask), objectVelocity, objectVelocityMask);
|
||||
float2 prevUV = float2(uv.x - velocity.x, uv.y + velocity.y);
|
||||
|
||||
float4 previous = resolve_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
|
||||
|
||||
// Luma HDR and AABB minmax
|
||||
|
||||
float4 current = 0;
|
||||
float4 currentMin, currentMax, currentAverage;
|
||||
ResolverAABB(resolve_current, sampler_linear_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
|
||||
|
||||
previous.xyz = clip_aabb(currentMin.xyz, currentMax.xyz, clamp(currentAverage, currentMin, currentMax), previous).xyz;
|
||||
previous.a = clamp(previous.a, currentMin.a, currentMax.a);
|
||||
|
||||
// Blend color & history
|
||||
// Feedback weight from unbiased luminance difference (Timothy Lottes)
|
||||
|
||||
float lumFiltered = Luminance(current.rgb); // Luma4(current.rgb)
|
||||
float lumHistory = Luminance(previous.rgb);
|
||||
|
||||
float lumDifference = abs(lumFiltered - lumHistory) / max(lumFiltered, max(lumHistory, 0.2f));
|
||||
float lumWeight = sqr(1.0f - lumDifference);
|
||||
float blendFinal = lerp(temporalResponseMin, temporalResponseMax, lumWeight);
|
||||
|
||||
// Reduce ghosting by refreshing the blend by velocity... but adds additional noise
|
||||
//float2 velocityScreen = velocity * xPPResolution;
|
||||
//float velocityBlend = sqrt(dot(velocityScreen, velocityScreen));
|
||||
//blendFinal = lerp(blendFinal, 0.2f, saturate(velocityBlend / 100.0f));
|
||||
|
||||
float4 result = lerp(current, previous, blendFinal);
|
||||
|
||||
output[DTid.xy] = result;
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
|
||||
#ifndef WI_STOCHASTICSSR_HF
|
||||
#define WI_STOCHASTICSSR_HF
|
||||
|
||||
// Shared SSR settings:
|
||||
static const float SSRMaxRoughness = 1.0f; // Specify max roughness, this can improve performance in complex scenes.
|
||||
static const float BRDFBias = 0.7f;
|
||||
|
||||
float ComputeRoughnessMaskScale(in float maxRoughness)
|
||||
{
|
||||
float MaxRoughness = clamp(maxRoughness, 0.01f, 1.0f);
|
||||
|
||||
float roughnessMaskScale = -2.0f / MaxRoughness;
|
||||
return roughnessMaskScale * 1.0f; // 2.0f & 1.0f
|
||||
}
|
||||
|
||||
float GetRoughnessFade(in float roughness, in float maxRoughness)
|
||||
{
|
||||
float roughnessMaskScale = ComputeRoughnessMaskScale(maxRoughness);
|
||||
return min(roughness * roughnessMaskScale + 2, 1.0f);
|
||||
}
|
||||
|
||||
float GetRoughness(float roughness)
|
||||
{
|
||||
return max(roughness, 0.02f);
|
||||
}
|
||||
|
||||
float Luminance(float3 color)
|
||||
{
|
||||
return dot(color, float3(0.2126, 0.7152, 0.0722));
|
||||
}
|
||||
|
||||
static const float2 offset[9] =
|
||||
{
|
||||
float2(-2.0, -2.0),
|
||||
float2(0.0, -2.0),
|
||||
float2(2.0, -2.0),
|
||||
float2(-2.0, 0.0),
|
||||
float2(0.0, 0.0),
|
||||
float2(2.0, 0.0),
|
||||
float2(-2.0, 2.0),
|
||||
float2(0.0, 2.0),
|
||||
float2(2.0, 2.0)
|
||||
};
|
||||
|
||||
|
||||
uint3 Rand3DPCG16(int3 p)
|
||||
{
|
||||
uint3 v = uint3(p);
|
||||
|
||||
v = v * 1664525u + 1013904223u;
|
||||
|
||||
v.x += v.y * v.z;
|
||||
v.y += v.z * v.x;
|
||||
v.z += v.x * v.y;
|
||||
v.x += v.y * v.z;
|
||||
v.y += v.z * v.x;
|
||||
v.z += v.x * v.y;
|
||||
|
||||
// only top 16 bits are well shuffled
|
||||
return v >> 16u;
|
||||
}
|
||||
|
||||
// Brian Karis, Epic Games "Real Shading in Unreal Engine 4"
|
||||
float4 ImportanceSampleGGX(float2 Xi, float Roughness)
|
||||
{
|
||||
float m = Roughness * Roughness;
|
||||
float m2 = m * m;
|
||||
|
||||
float Phi = 2 * PI * Xi.x;
|
||||
|
||||
float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
|
||||
float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
|
||||
|
||||
float3 H;
|
||||
H.x = SinTheta * cos(Phi);
|
||||
H.y = SinTheta * sin(Phi);
|
||||
H.z = CosTheta;
|
||||
|
||||
float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
|
||||
float D = m2 / (PI * d * d);
|
||||
float pdf = D * CosTheta;
|
||||
|
||||
return float4(H, pdf);
|
||||
}
|
||||
|
||||
// [ Duff et al. 2017, "Building an Orthonormal Basis, Revisited" ]
|
||||
// http://jcgt.org/published/0006/01/01/
|
||||
float3x3 GetTangentBasis(float3 TangentZ)
|
||||
{
|
||||
const float Sign = TangentZ.z >= 0 ? 1 : -1;
|
||||
const float a = -rcp(Sign + TangentZ.z);
|
||||
const float b = TangentZ.x * TangentZ.y * a;
|
||||
|
||||
float3 TangentX = { 1 + Sign * a * pow(TangentZ.x, 2), Sign * b, -Sign * TangentZ.x };
|
||||
float3 TangentY = { b, Sign + a * pow(TangentZ.y, 2), -TangentZ.y };
|
||||
|
||||
return float3x3(TangentX, TangentY, TangentZ);
|
||||
}
|
||||
|
||||
float3 TangentToWorld(float3 vec, float3 tangentZ)
|
||||
{
|
||||
return mul(vec, GetTangentBasis(tangentZ));
|
||||
}
|
||||
|
||||
float4 TangentToWorld(float4 H, float3 tangentZ)
|
||||
{
|
||||
return float4(mul(H.xyz, GetTangentBasis(tangentZ)), H.w);
|
||||
}
|
||||
|
||||
float3 WorldToTangent(float3 vec, float3 tangentZ)
|
||||
{
|
||||
return mul(GetTangentBasis(tangentZ), vec);
|
||||
}
|
||||
|
||||
#endif // WI_STOCHASTICSSR_HF
|
||||
@@ -300,6 +300,11 @@ enum CSTYPES
|
||||
CSTYPE_POSTPROCESS_BLUR_BILATERAL_UNORM4,
|
||||
CSTYPE_POSTPROCESS_SSAO,
|
||||
CSTYPE_POSTPROCESS_SSR,
|
||||
CSTYPE_POSTPROCESS_STOCHASTICSSR_RAYTRACE,
|
||||
CSTYPE_POSTPROCESS_STOCHASTICSSR_RESOLVE,
|
||||
CSTYPE_POSTPROCESS_STOCHASTICSSR_TEMPORAL,
|
||||
CSTYPE_POSTPROCESS_STOCHASTICSSR_MEDIAN,
|
||||
CSTYPE_POSTPROCESS_STOCHASTICSSR_COMBINE,
|
||||
CSTYPE_POSTPROCESS_LIGHTSHAFTS,
|
||||
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL,
|
||||
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL,
|
||||
|
||||
@@ -1333,6 +1333,11 @@ void LoadShaders()
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_BLUR_BILATERAL_UNORM4], "blur_bilateral_unorm4CS.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_SSAO], "ssaoCS.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_SSR], "ssrCS.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RAYTRACE], "stochasticSSRCS_raytrace.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RESOLVE], "stochasticSSRCS_resolve.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_TEMPORAL], "stochasticSSRCS_temporal.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_MEDIAN], "stochasticSSRCS_median.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_COMBINE], "stochasticSSRCS_combine.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_LIGHTSHAFTS], "lightshaftsCS.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL], "depthoffield_tileMaxCOC_horizontalCS.cso"); });
|
||||
wiJobSystem::Execute(ctx, [] { LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL], "depthoffield_tileMaxCOC_verticalCS.cso"); });
|
||||
@@ -8843,6 +8848,256 @@ void Postprocess_SSR(
|
||||
wiProfiler::EndRange(range);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
void Postprocess_StochasticSSR(
|
||||
const Texture& input,
|
||||
const Texture& depthbuffer,
|
||||
const Texture& lineardepth_minmax,
|
||||
const Texture& gbuffer0,
|
||||
const Texture& gbuffer1,
|
||||
const Texture& gbuffer2,
|
||||
const Texture& output,
|
||||
CommandList cmd
|
||||
)
|
||||
{
|
||||
GraphicsDevice* device = GetDevice();
|
||||
|
||||
device->EventBegin("Postprocess_StochasticSSR", cmd);
|
||||
auto range = wiProfiler::BeginRangeGPU("Stochastic SSR", cmd);
|
||||
|
||||
device->UnbindResources(TEXSLOT_RENDERPATH_SSR, 1, cmd);
|
||||
|
||||
const TextureDesc& desc = output.GetDesc();
|
||||
|
||||
static TextureDesc initialized_desc;
|
||||
static Texture texture_main;
|
||||
static Texture texture_raytrace;
|
||||
static Texture texture_mask;
|
||||
static Texture texture_resolve;
|
||||
static Texture texture_temporal[2];
|
||||
static Texture texture_median;
|
||||
|
||||
// Initialize once
|
||||
if (initialized_desc.Width != desc.Width || initialized_desc.Height != desc.Height)
|
||||
{
|
||||
initialized_desc = desc;
|
||||
|
||||
TextureDesc main_desc;
|
||||
main_desc.type = TextureDesc::TEXTURE_2D;
|
||||
main_desc.Width = desc.Width;
|
||||
main_desc.Height = desc.Height;
|
||||
main_desc.Format = FORMAT_R16G16B16A16_FLOAT;
|
||||
main_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
main_desc.MipLevels = 0; // full mip chain
|
||||
device->CreateTexture(&main_desc, nullptr, &texture_main);
|
||||
|
||||
main_desc = texture_main.GetDesc(); // mip count was initialized in CreateTexture()
|
||||
for (uint32_t i = 0; i < main_desc.MipLevels; ++i)
|
||||
{
|
||||
int subresource_index;
|
||||
subresource_index = device->CreateSubresource(&texture_main, SRV, 0, 1, i, 1);
|
||||
assert(subresource_index == i);
|
||||
subresource_index = device->CreateSubresource(&texture_main, UAV, 0, 1, i, 1);
|
||||
assert(subresource_index == i);
|
||||
}
|
||||
|
||||
TextureDesc cast_desc;
|
||||
cast_desc.type = TextureDesc::TEXTURE_2D;
|
||||
cast_desc.Width = desc.Width / 2;
|
||||
cast_desc.Height = desc.Height / 2;
|
||||
cast_desc.Format = FORMAT_R16G16B16A16_FLOAT;
|
||||
cast_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
device->CreateTexture(&cast_desc, nullptr, &texture_raytrace);
|
||||
cast_desc.Format = FORMAT_R16G16_FLOAT;
|
||||
device->CreateTexture(&cast_desc, nullptr, &texture_mask);
|
||||
|
||||
TextureDesc buffer_desc;
|
||||
buffer_desc.type = TextureDesc::TEXTURE_2D;
|
||||
buffer_desc.Width = desc.Width;
|
||||
buffer_desc.Height = desc.Height;
|
||||
buffer_desc.Format = FORMAT_R16G16B16A16_FLOAT;
|
||||
buffer_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS;
|
||||
device->CreateTexture(&buffer_desc, nullptr, &texture_resolve);
|
||||
device->CreateTexture(&buffer_desc, nullptr, &texture_temporal[0]);
|
||||
device->CreateTexture(&buffer_desc, nullptr, &texture_temporal[1]);
|
||||
device->CreateTexture(&buffer_desc, nullptr, &texture_median);
|
||||
}
|
||||
|
||||
// This is very expensive. There is problably a better way of getting LOD of input.
|
||||
// For now I'm just making a copy of input, to stay on the safe side.
|
||||
|
||||
// Main buffer copy and mip:
|
||||
{
|
||||
device->EventBegin("Main buffer pass", cmd);
|
||||
|
||||
CopyTexture2D(texture_main, 0, 0, 0, input, 0, cmd);
|
||||
GenerateMipChain(texture_main, MIPGENFILTER_GAUSSIAN, cmd);
|
||||
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
// Switch to half res
|
||||
PostProcessCB cb;
|
||||
cb.xPPResolution.x = desc.Width / 2;
|
||||
cb.xPPResolution.y = desc.Height / 2;
|
||||
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
|
||||
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
|
||||
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
|
||||
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
|
||||
|
||||
// Raytrace pass:
|
||||
{
|
||||
device->EventBegin("Stochastic Raytrace pass", cmd);
|
||||
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RAYTRACE], cmd);
|
||||
|
||||
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
|
||||
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
|
||||
device->BindResource(CS, &gbuffer2, TEXSLOT_GBUFFER2, cmd);
|
||||
device->BindResource(CS, &input, TEXSLOT_ONDEMAND0, cmd);
|
||||
device->BindResource(CS, &lineardepth_minmax, TEXSLOT_ONDEMAND1, cmd);
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&texture_raytrace,
|
||||
&texture_mask,
|
||||
};
|
||||
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(texture_raytrace.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(texture_raytrace.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
|
||||
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
|
||||
device->UnbindUAVs(0, arraysize(uavs), cmd);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
// Switch to full res
|
||||
cb.xPPResolution.x = desc.Width;
|
||||
cb.xPPResolution.y = desc.Height;
|
||||
cb.xPPResolution_rcp.x = 1.0f / cb.xPPResolution.x;
|
||||
cb.xPPResolution_rcp.y = 1.0f / cb.xPPResolution.y;
|
||||
device->UpdateBuffer(&constantBuffers[CBTYPE_POSTPROCESS], &cb, cmd);
|
||||
device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_POSTPROCESS], CB_GETBINDSLOT(PostProcessCB), cmd);
|
||||
|
||||
// Resolve pass:
|
||||
{
|
||||
device->EventBegin("Resolve pass", cmd);
|
||||
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_RESOLVE], cmd);
|
||||
|
||||
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
|
||||
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
|
||||
device->BindResource(CS, &gbuffer2, TEXSLOT_GBUFFER2, cmd);
|
||||
device->BindResource(CS, &texture_raytrace, TEXSLOT_ONDEMAND0, cmd);
|
||||
device->BindResource(CS, &texture_mask, TEXSLOT_ONDEMAND1, cmd);
|
||||
device->BindResource(CS, &texture_main, TEXSLOT_ONDEMAND2, cmd);
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&texture_resolve,
|
||||
};
|
||||
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(texture_resolve.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(texture_resolve.GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
|
||||
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
|
||||
device->UnbindUAVs(0, arraysize(uavs), cmd);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
int temporal_output = device->GetFrameCount() % 2;
|
||||
int temporal_history = 1 - temporal_output;
|
||||
|
||||
// Temporal pass:
|
||||
{
|
||||
device->EventBegin("Temporal pass", cmd);
|
||||
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_TEMPORAL], cmd);
|
||||
|
||||
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
|
||||
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
|
||||
device->BindResource(CS, &texture_resolve, TEXSLOT_ONDEMAND0, cmd);
|
||||
device->BindResource(CS, &texture_temporal[temporal_history], TEXSLOT_ONDEMAND1, cmd);
|
||||
device->BindResource(CS, &texture_raytrace, TEXSLOT_ONDEMAND2, cmd);
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&texture_temporal[temporal_output],
|
||||
};
|
||||
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(texture_temporal[temporal_output].GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(texture_temporal[temporal_output].GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
|
||||
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
|
||||
device->UnbindUAVs(0, arraysize(uavs), cmd);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
// Median blur pass:
|
||||
{
|
||||
device->EventBegin("Median blur pass", cmd);
|
||||
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_MEDIAN], cmd);
|
||||
|
||||
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
|
||||
device->BindResource(CS, &texture_temporal[temporal_output], TEXSLOT_ONDEMAND0, cmd);
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&texture_median,
|
||||
};
|
||||
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(texture_median.GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(texture_median.GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
|
||||
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
|
||||
device->UnbindUAVs(0, arraysize(uavs), cmd);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
//Postprocess_Blur_Bilateral(texture_temporal[temporal_output], lineardepth, texture_temp, output, cmd, 0.85f, 0.85f, 1.2f);
|
||||
|
||||
// combine pass:
|
||||
{
|
||||
device->EventBegin("Combine pass", cmd);
|
||||
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_STOCHASTICSSR_COMBINE], cmd);
|
||||
|
||||
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
|
||||
device->BindResource(CS, &gbuffer0, TEXSLOT_GBUFFER0, cmd);
|
||||
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
|
||||
device->BindResource(CS, &gbuffer2, TEXSLOT_GBUFFER2, cmd);
|
||||
device->BindResource(CS, &texture_median, TEXSLOT_ONDEMAND0, cmd);
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&output,
|
||||
};
|
||||
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(desc.Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(desc.Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
|
||||
device->Barrier(&GPUBarrier::Memory(), 1, cmd);
|
||||
device->UnbindUAVs(0, arraysize(uavs), cmd);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
wiProfiler::EndRange(range);
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
void Postprocess_SSS(
|
||||
const Texture& lineardepth,
|
||||
const Texture& gbuffer0,
|
||||
|
||||
@@ -208,6 +208,16 @@ namespace wiRenderer
|
||||
const wiGraphics::Texture& output,
|
||||
wiGraphics::CommandList cmd
|
||||
);
|
||||
void Postprocess_StochasticSSR(
|
||||
const wiGraphics::Texture& input,
|
||||
const wiGraphics::Texture& depthbuffer,
|
||||
const wiGraphics::Texture& lineardepth_minmax,
|
||||
const wiGraphics::Texture& gbuffer0,
|
||||
const wiGraphics::Texture& gbuffer1,
|
||||
const wiGraphics::Texture& gbuffer2,
|
||||
const wiGraphics::Texture& output,
|
||||
wiGraphics::CommandList cmd
|
||||
);
|
||||
void Postprocess_SSS(
|
||||
const wiGraphics::Texture& lineardepth,
|
||||
const wiGraphics::Texture& gbuffer0,
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace wiVersion
|
||||
// minor features, major updates
|
||||
const int minor = 38;
|
||||
// minor bug fixes, alterations, refactors, updates
|
||||
const int revision = 7;
|
||||
const int revision = 8;
|
||||
|
||||
|
||||
long GetVersion()
|
||||
|
||||
Reference in New Issue
Block a user