rtao and vulkan mesh shader update

This commit is contained in:
Turanszki Janos
2020-09-20 16:14:42 +02:00
parent aa6fff51d5
commit fb8985cf44
13 changed files with 183 additions and 38 deletions
+1
View File
@@ -27,6 +27,7 @@ CBUFFER(PostProcessCB, CBSLOT_RENDERER_POSTPROCESS)
#define rtao_range ssao_range
#define rtao_samplecount ssao_samplecount
#define rtao_power ssao_power
#define rtao_seed xPPParams0.w
static const uint POSTPROCESS_HBAO_THREADCOUNT = 320;
#define hbao_direction xPPParams0.xy
+11
View File
@@ -616,6 +616,17 @@
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Pixel</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Pixel</ShaderType>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)rtao_temporalCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">Compute</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">Compute</ShaderType>
</FxCompile>
<None Include="$(MSBuildThisFileDirectory)emittedparticleMS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
@@ -1019,5 +1019,8 @@
<FxCompile Include="$(MSBuildThisFileDirectory)shadingRateClassificationCS_DEBUG.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)rtao_temporalCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
</ItemGroup>
</Project>
+6 -5
View File
@@ -48,13 +48,14 @@ for item in root.iter():
cmd += "as"
cmd += "_6_5 "
cmd += " -Fo " + "shaders/" + outputdir + "/" + os.path.splitext(name)[0] + ".cso "
#cmd += "-D RAYTRACING_INLINE "
#cmd += "-D RAYTRACING_TIER_1_1 "
cmd += " -flegacy-macro-expansion "
cmd += "-D HLSL6 "
cmd += "-flegacy-macro-expansion -Fo " + "shaders/" + outputdir + "/" + os.path.splitext(name)[0] + ".cso "
cmd += " -D HLSL6 "
#cmd += " -D RAYTRACING_INLINE "
#cmd += " -D RAYTRACING_GEOMETRYINDEX "
print(cmd)
+9 -6
View File
@@ -48,13 +48,12 @@ for item in root.iter():
cmd += "_6_5 "
#cmd += "-D RAYTRACING_INLINE "
cmd += " -Fo " + "shaders/" + outputdir + "/" + os.path.splitext(name)[0] + ".cso "
cmd += "-D SPIRV "
cmd += " -spirv -fvk-use-dx-layout -flegacy-macro-expansion -Fo " + "shaders/" + outputdir + "/" + os.path.splitext(name)[0] + ".cso "
cmd += "-fspv-target-env=vulkan1.2 "
cmd += " -spirv "
cmd += " -fspv-target-env=vulkan1.2 "
cmd += " -fvk-use-dx-layout "
cmd += " -flegacy-macro-expansion "
if profile == "VS" or profile == "DS" or profile == "GS":
cmd += " -fvk-invert-y "
@@ -64,6 +63,10 @@ for item in root.iter():
cmd += " -fvk-u-shift 2000 all "
cmd += " -fvk-s-shift 3000 all "
cmd += " -D SPIRV "
#cmd += " -D RAYTRACING_INLINE "
#cmd += " -D RAYTRACING_GEOMETRYINDEX "
print(cmd)
try:
+4
View File
@@ -98,6 +98,10 @@ void main(
Out.P = mul(g_xCamera_InvV, float4(Out.pos.xyz, 1)).xyz;
Out.pos = mul(g_xCamera_Proj, Out.pos);
#ifdef SPIRV
Out.pos.y = -Out.pos.y;
#endif // SPIRV
Out.tex = float4(uv, uv2);
Out.size = size;
Out.color = (particle.color_mirror & 0x00FFFFFF) | (uint(opacity * 255.0f) << 24);
+8 -8
View File
@@ -4,13 +4,13 @@
RWTEXTURE2D(output, unorm float, 0);
#ifdef RAYTRACING_TIER_1_1
#ifdef RAYTRACING_GEOMETRYINDEX
ConstantBuffer<ShaderMaterial> subsets_material[MAX_DESCRIPTOR_INDEXING] : register(b0, space1);
Texture2D<float4> subsets_texture_baseColor[MAX_DESCRIPTOR_INDEXING] : register(t0, space1);
Buffer<uint> subsets_indexBuffer[MAX_DESCRIPTOR_INDEXING] : register(t100000, space1);
Buffer<float2> subsets_vertexBuffer_UV0[MAX_DESCRIPTOR_INDEXING] : register(t300000, space1);
Buffer<float2> subsets_vertexBuffer_UV1[MAX_DESCRIPTOR_INDEXING] : register(t400000, space1);
#endif // RAYTRACING_TIER_1_1
#endif // RAYTRACING_GEOMETRYINDEX
typedef BuiltInTriangleIntersectionAttributes MyAttributes;
struct RayPayload
@@ -42,7 +42,7 @@ void RTAO_Raygen()
float3 N = normalize(cross(P1 - P0, P2 - P0));
float seed = 666;
float seed = rtao_seed;
RayDesc ray;
ray.TMin = 0.001;
@@ -55,10 +55,10 @@ void RTAO_Raygen()
{
ray.Direction = SampleHemisphere_cos(N, seed, uv);
TraceRay(scene_acceleration_structure,
#ifndef RAYTRACING_TIER_1_1 // tier 1_0 method of alpha test without GeometryIndex() is not implemented yet
#ifndef RAYTRACING_GEOMETRYINDEX // tier 1_0 method of alpha test without GeometryIndex() is not implemented yet
RAY_FLAG_FORCE_OPAQUE |
RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH |
#endif // RAYTRACING_TIER_1_1
#endif // RAYTRACING_GEOMETRYINDEX
RAY_FLAG_SKIP_CLOSEST_HIT_SHADER
, ~0, 0, 1, 0, ray, payload);
}
@@ -76,13 +76,13 @@ void RTAO_ClosestHit(inout RayPayload payload, in MyAttributes attr)
[shader("anyhit")]
void RTAO_AnyHit(inout RayPayload payload, in MyAttributes attr)
{
#ifdef RAYTRACING_TIER_1_1
#ifdef RAYTRACING_GEOMETRYINDEX
float u = attr.barycentrics.x;
float v = attr.barycentrics.y;
float w = 1 - u - v;
uint primitiveIndex = PrimitiveIndex();
uint geometryOffset = InstanceID();
uint geometryIndex = GeometryIndex(); // requires tier_1_1!!
uint geometryIndex = GeometryIndex(); // requires tier_1_1 GeometryIndex feature!!
uint descriptorIndex = geometryOffset + geometryIndex;
ShaderMaterial material = subsets_material[descriptorIndex];
uint i0 = subsets_indexBuffer[descriptorIndex][primitiveIndex / 3 + 0];
@@ -114,7 +114,7 @@ void RTAO_AnyHit(inout RayPayload payload, in MyAttributes attr)
{
payload.color += 1 - baseColor.a;
}
#endif // RAYTRACING_TIER_1_1
#endif // RAYTRACING_GEOMETRYINDEX
}
[shader("miss")]
+87
View File
@@ -0,0 +1,87 @@
#include "globals.hlsli"
#include "stochasticSSRHF.hlsli"
#include "ShaderInterop_Postprocess.h"
TEXTURE2D(resolve_current, float, TEXSLOT_ONDEMAND0);
TEXTURE2D(resolve_history, float, TEXSLOT_ONDEMAND1);
RWTEXTURE2D(output, unorm float, 0);
static const float temporalResponseMin = 0.85;
static const float temporalResponseMax = 1.0f;
static const float temporalScale = 2.0;
static const float temporalExposure = 10.0f;
inline void ResolverAABB(Texture2D<float> currentColor, SamplerState currentSampler, float sharpness, float exposureScale, float AABBScale, float2 uv, float2 texelSize, inout float currentMin, inout float currentMax, inout float currentAverage, inout float currentOutput)
{
const int2 SampleOffset[9] = { int2(-1.0, -1.0), int2(0.0, -1.0), int2(1.0, -1.0), int2(-1.0, 0.0), int2(0.0, 0.0), int2(1.0, 0.0), int2(-1.0, 1.0), int2(0.0, 1.0), int2(1.0, 1.0) };
float sampleColors[9];
[unroll]
for (uint i = 0; i < 9; i++)
{
sampleColors[i] = currentColor.SampleLevel(currentSampler, uv + (SampleOffset[i] / texelSize), 0.0f);
}
// Variance Clipping (AABB)
float m1 = 0.0;
float m2 = 0.0;
[unroll]
for (uint x = 0; x < 9; x++)
{
m1 += sampleColors[x];
m2 += sampleColors[x] * sampleColors[x];
}
float mean = m1 / 9.0;
float stddev = sqrt((m2 / 9.0) - sqr(mean));
currentMin = mean - AABBScale * stddev;
currentMax = mean + AABBScale * stddev;
currentOutput = sampleColors[4];
currentMin = min(currentMin, currentOutput);
currentMax = max(currentMax, currentOutput);
currentAverage = mean;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
float4 pos = float4(reconstructPosition(uv, depth, g_xCamera_InvVP), 1.0f);
float4 thisClip = mul(g_xCamera_VP, pos);
float4 prevClip = mul(g_xFrame_MainCamera_PrevVP, pos);
float2 thisScreen = thisClip.xy * rcp(thisClip.w);
float2 prevScreen = prevClip.xy * rcp(prevClip.w);
thisScreen = thisScreen.xy * float2(0.5, -0.5) + 0.5;
prevScreen = prevScreen.xy * float2(0.5, -0.5) + 0.5;
float2 velocity = thisScreen - prevScreen;
float2 prevUV = uv - velocity;
float previous = resolve_history.SampleLevel(sampler_linear_clamp, prevUV, 0);
float current = 0;
float currentMin, currentMax, currentAverage;
ResolverAABB(resolve_current, sampler_linear_clamp, 0, temporalExposure, temporalScale, uv, xPPResolution, currentMin, currentMax, currentAverage, current);
float lumDifference = abs(current - previous) / max(current, max(previous, 0.2f));
float lumWeight = sqr(1.0f - lumDifference);
float blendFinal = lerp(temporalResponseMin, temporalResponseMax, lumWeight);
// Reduce ghosting by refreshing the blend by velocity (Unreal)
float2 velocityScreen = velocity * xPPResolution;
float velocityBlend = sqrt(dot(velocityScreen, velocityScreen));
blendFinal = lerp(blendFinal, 0.2, saturate(velocityBlend / 100.0));
float result = lerp(current, previous, blendFinal);
output[DTid.xy] = result;
}
+1 -6
View File
@@ -92,6 +92,7 @@ inline void ResolverAABB(Texture2D<float4> currentColor, SamplerState currentSam
float2 CalculateCustomMotion(float depth, float2 uv)
{
// Velocity buffer not good, because that contains object motion, and reflection is camera relative
float4 sampleWorldPosition = float4(reconstructPosition(uv, depth, g_xCamera_InvVP), 1.0f);
float4 thisClip = mul(g_xCamera_VP, sampleWorldPosition);
@@ -110,12 +111,6 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3
{
const float2 uv = (DTid.xy + 0.5f) * xPPResolution_rcp;
const float depth = texture_depth.SampleLevel(sampler_point_clamp, uv, 0);
const float3 worldNormal = decodeNormal(texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy);
//float4 raytraceSource = texture_raytrace.SampleLevel(sampler_point_clamp, uv, 0);
//float hitDepth = raytraceSource.z;
//float2 hitPixel = raytraceSource.xy;
// Normal velocity seems to work best in most scenarios
float2 customVelocity = CalculateCustomMotion(depth, uv);
+1
View File
@@ -333,6 +333,7 @@ enum CSTYPES
CSTYPE_POSTPROCESS_SSR_RESOLVE,
CSTYPE_POSTPROCESS_SSR_TEMPORAL,
CSTYPE_POSTPROCESS_SSR_MEDIAN,
CSTYPE_POSTPROCESS_RTAO_TEMPORAL,
CSTYPE_POSTPROCESS_LIGHTSHAFTS,
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_HORIZONTAL,
CSTYPE_POSTPROCESS_DEPTHOFFIELD_TILEMAXCOC_VERTICAL,
+1 -1
View File
@@ -2380,7 +2380,7 @@ using namespace Vulkan_Internal;
if (mesh_shader_features.meshShader == VK_TRUE && mesh_shader_features.taskShader == VK_TRUE)
{
// Currently, creating pipeline state with mesh shader crashes nvidia driver for me, so disable until solved
// Enable mesh shader here (problematic with certain driver versions, disabled by default):
//MESH_SHADER = true;
}
+50 -11
View File
@@ -10467,6 +10467,9 @@ void Postprocess_RTAO(
rtdesc.max_attribute_size_in_bytes = sizeof(XMFLOAT2); // bary
success = device->CreateRaytracingPipelineState(&rtdesc, &RTPSO);
assert(success);
success = LoadShader(CS, computeShaders[CSTYPE_POSTPROCESS_RTAO_TEMPORAL], "rtao_temporalCS.cso");
assert(success);
};
static wiEvent::Handle handle = wiEvent::Subscribe(SYSTEM_EVENT_RELOAD_SHADERS, load_shaders);
@@ -10476,8 +10479,8 @@ void Postprocess_RTAO(
}
static TextureDesc saved_desc;
static Texture temp0;
static Texture temp1;
static Texture temp;
static Texture temporal[2];
const TextureDesc& lineardepth_desc = lineardepth.GetDesc();
if (saved_desc.Width != lineardepth_desc.Width || saved_desc.Height != lineardepth_desc.Height)
@@ -10489,13 +10492,16 @@ void Postprocess_RTAO(
desc.Format = FORMAT_R8_UNORM;
desc.Width = (desc.Width + 1) / 2;
desc.Height = (desc.Height + 1) / 2;
device->CreateTexture(&desc, nullptr, &temp0);
device->SetName(&temp0, "rtao_temp0");
device->CreateTexture(&desc, nullptr, &temp1);
device->SetName(&temp1, "rtao_temp1");
device->CreateTexture(&desc, nullptr, &temp);
device->SetName(&temp, "rtao_temp");
device->CreateTexture(&desc, nullptr, &temporal[0]);
device->SetName(&temporal[0], "rtao_temporal[0]");
device->CreateTexture(&desc, nullptr, &temporal[1]);
device->SetName(&temporal[1], "rtao_temporal[1]");
}
const TextureDesc& desc = temp0.GetDesc();
const TextureDesc& desc = temp.GetDesc();
PostProcessCB cb;
cb.xPPResolution.x = desc.Width;
@@ -10505,13 +10511,14 @@ void Postprocess_RTAO(
cb.rtao_range = range;
cb.rtao_samplecount = (float)samplecount;
cb.rtao_power = power;
cb.rtao_seed = renderTime;
GraphicsDevice::GPUAllocation cb_alloc = device->AllocateGPU(sizeof(cb), cmd);
memcpy(cb_alloc.data, &cb, sizeof(cb));
device->BindRaytracingPipelineState(&RTPSO, cmd);
device->WriteDescriptor(&descriptorTable, 0, 0, &depthbuffer);
device->WriteDescriptor(&descriptorTable, 1, 0, &scene.TLAS);
device->WriteDescriptor(&descriptorTable, 2, 0, &temp0);
device->WriteDescriptor(&descriptorTable, 2, 0, &temp);
device->BindDescriptorTable(RAYTRACING, 0, &descriptorTable, cmd);
device->BindDescriptorTable(RAYTRACING, 1, &scene.descriptorTable, cmd);
device->BindRootDescriptor(RAYTRACING, 0, &constantBuffers[CBTYPE_CAMERA], 0, cmd);
@@ -10551,8 +10558,41 @@ void Postprocess_RTAO(
};
device->Barrier(barriers, arraysize(barriers), cmd);
Postprocess_Blur_Bilateral(temp0, lineardepth, temp1, temp0, cmd, 1.2f, -1, -1, true);
Postprocess_Upsample_Bilateral(temp0, lineardepth, output, cmd);
int temporal_output = device->GetFrameCount() % 2;
int temporal_history = 1 - temporal_output;
// Temporal pass:
{
device->EventBegin("Temporal pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_RTAO_TEMPORAL], cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &temp, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &temporal[temporal_history], TEXSLOT_ONDEMAND1, cmd);
const GPUResource* uavs[] = {
&temporal[temporal_output],
};
device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(temporal[temporal_output].GetDesc().Width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(temporal[temporal_output].GetDesc().Height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
};
device->Barrier(barriers, arraysize(barriers), cmd);
device->UnbindUAVs(0, arraysize(uavs), cmd);
device->EventEnd(cmd);
}
Postprocess_Blur_Bilateral(temporal[temporal_output], lineardepth, temp, temporal[temporal_output], cmd, 1.2f, -1, -1, true);
Postprocess_Upsample_Bilateral(temporal[temporal_output], lineardepth, output, cmd);
wiProfiler::EndRange(prof_range);
device->EventEnd(cmd);
@@ -10702,7 +10742,6 @@ void Postprocess_SSR(
device->EventBegin("Temporal pass", cmd);
device->BindComputeShader(&computeShaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd);
device->BindResource(CS, &gbuffer1, TEXSLOT_GBUFFER1, cmd);
device->BindResource(CS, &depthbuffer, TEXSLOT_DEPTH, cmd);
device->BindResource(CS, &texture_resolve, TEXSLOT_ONDEMAND0, cmd);
device->BindResource(CS, &texture_temporal[temporal_history], TEXSLOT_ONDEMAND1, cmd);
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wiVersion
// minor features, major updates, breaking API changes
const int minor = 47;
// minor bug fixes, alterations, refactors, updates
const int revision = 34;
const int revision = 35;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);