raytraced shadow: switched to halfres with upsampling

This commit is contained in:
Turánszki János
2024-04-25 08:56:28 +02:00
parent 4dfcd936ec
commit 47d2c02e76
14 changed files with 221 additions and 65 deletions
+1
View File
@@ -196,6 +196,7 @@ wi::vector<ShaderEntry> shaders = {
{"rtshadow_denoise_tileclassificationCS", wi::graphics::ShaderStage::CS },
{"rtshadow_denoise_filterCS", wi::graphics::ShaderStage::CS },
{"rtshadow_denoise_temporalCS", wi::graphics::ShaderStage::CS },
{"rtshadow_upsampleCS", wi::graphics::ShaderStage::CS },
{"rtaoCS", wi::graphics::ShaderStage::CS, wi::graphics::ShaderModel::SM_6_5 },
{"rtao_denoise_tileclassificationCS", wi::graphics::ShaderStage::CS },
{"rtao_denoise_filterCS", wi::graphics::ShaderStage::CS },
@@ -433,6 +433,10 @@
<FxCompile Include="$(MSBuildThisFileDirectory)objectPS_prepass_depthonly_alphatest.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Pixel</ShaderType>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)rtshadow_upsampleCS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4.0</ShaderModel>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)screenVS.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
</FxCompile>
@@ -1163,6 +1163,9 @@
<FxCompile Include="$(MSBuildThisFileDirectory)ddgi_indirectprepareCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
<FxCompile Include="$(MSBuildThisFileDirectory)rtshadow_upsampleCS.hlsl">
<Filter>CS</Filter>
</FxCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(MSBuildThisFileDirectory)ShaderInterop.h">
@@ -33,7 +33,7 @@ float FFX_DNSR_Shadows_GetDepthSimilaritySigma()
float FFX_DNSR_Shadows_ReadDepth(uint2 did)
{
return texture_depth[did];
return texture_depth[did * 2];
}
float16_t3 FFX_DNSR_Shadows_ReadNormals(uint2 did)
{
@@ -68,7 +68,7 @@ inline void ResolverAABB(in uint shadow_index, float sharpness, float exposureSc
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
{
if (texture_depth[DTid.xy] == 0)
if (texture_depth[DTid.xy * 2] == 0)
return;
// first 4 lights are denoised
@@ -46,11 +46,11 @@ float4x4 FFX_DNSR_Shadows_GetReprojectionMatrix()
float FFX_DNSR_Shadows_ReadDepth(uint2 did)
{
return texture_depth[did];
return texture_depth[did * 2];
}
float FFX_DNSR_Shadows_ReadPreviousDepth(int2 idx)
{
return texture_depth_history[idx];
return texture_depth_history[idx * 2];
}
float3 FFX_DNSR_Shadows_ReadNormals(uint2 did)
{
@@ -70,7 +70,7 @@ float FFX_DNSR_Shadows_ReadHistory(float2 history_uv)
}
float2 FFX_DNSR_Shadows_ReadVelocity(uint2 did)
{
return -texture_velocity[did].xy;
return -texture_velocity[did * 2].xy;
}
void FFX_DNSR_Shadows_WriteReprojectionResults(uint2 did, float2 value)
@@ -0,0 +1,115 @@
#include "globals.hlsli"
#include "ShaderInterop_Postprocess.h"
PUSHCONSTANT(postprocess, PostProcess);
Texture2D<uint4> input : register(t0);
Texture2D<float> lineardepth_lowres : register(t1);
RWTexture2DArray<unorm float> output : register(u0);
float load_shadow(in uint shadow_index, in uint4 shadow_mask)
{
uint mask_shift = (shadow_index % 4) * 8;
uint mask_bucket = shadow_index / 4;
uint mask = (shadow_mask[mask_bucket] >> mask_shift) & 0xFF;
return mask / 255.0;
}
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
{
uint2 pixel = DTid.xy;
const float2 uv = (pixel + 0.5f) * postprocess.resolution_rcp;
uint2 dim;
uint MAX_RTSHADOWS;
output.GetDimensions(dim.x, dim.y, MAX_RTSHADOWS);
const uint2 tileIndex = uint2(floor(pixel / TILED_CULLING_BLOCKSIZE));
const uint flatTileIndex = flatten2D(tileIndex, GetCamera().entity_culling_tilecount.xy) * SHADER_ENTITY_TILE_BUCKET_COUNT;
const float2 lowres_size = postprocess.params1.xy;
const float2 lowres_texel_size = postprocess.params1.zw;
float2 sam_pixel = uv * lowres_size + (-0.5 + 1.0 / 512.0); // (1.0 / 512.0) correction is described here: https://www.reedbeta.com/blog/texture-gathers-and-coordinate-precision/
float2 sam_pixel_frac = frac(sam_pixel);
uint2 pixel0 = DTid.xy / 2 + uint2(0, 0);
uint2 pixel1 = DTid.xy / 2 + uint2(1, 0);
uint2 pixel2 = DTid.xy / 2 + uint2(0, 1);
uint2 pixel3 = DTid.xy / 2 + uint2(1, 1);
uint4 shadow_mask0 = input[pixel0];
uint4 shadow_mask1 = input[pixel1];
uint4 shadow_mask2 = input[pixel2];
uint4 shadow_mask3 = input[pixel3];
float lineardepth0 = lineardepth_lowres[pixel0] * GetCamera().z_far;
float lineardepth1 = lineardepth_lowres[pixel1] * GetCamera().z_far;
float lineardepth2 = lineardepth_lowres[pixel2] * GetCamera().z_far;
float lineardepth3 = lineardepth_lowres[pixel3] * GetCamera().z_far;
float lineardepth_highres = texture_lineardepth[pixel] * GetCamera().z_far;
float threshold = 2;
float4 weights = max(0.001, 1 - saturate(abs(float4(lineardepth0, lineardepth1, lineardepth2, lineardepth3) - lineardepth_highres) * threshold));
float weights_norm = rcp(bilinear(weights, sam_pixel_frac));
uint shadow_index = 0;
[branch]
if (GetFrame().lightarray_count > 0)
{
// Loop through light buckets in the tile:
const uint first_item = GetFrame().lightarray_offset;
const uint last_item = first_item + GetFrame().lightarray_count - 1;
const uint first_bucket = first_item / 32;
const uint last_bucket = min(last_item / 32, max(0, SHADER_ENTITY_TILE_BUCKET_COUNT - 1));
[loop]
for (uint bucket = first_bucket; bucket <= last_bucket && shadow_index < MAX_RTSHADOWS; ++bucket)
{
uint bucket_bits = load_entitytile(flatTileIndex + bucket);
// Bucket scalarizer - Siggraph 2017 - Improved Culling [Michal Drobot]:
bucket_bits = WaveReadLaneFirst(WaveActiveBitOr(bucket_bits));
[loop]
while (bucket_bits != 0 && shadow_index < MAX_RTSHADOWS)
{
// Retrieve global entity index from local bucket, then remove bit from local bucket:
const uint bucket_bit_index = firstbitlow(bucket_bits);
const uint entity_index = bucket * 32 + bucket_bit_index;
bucket_bits ^= 1u << bucket_bit_index;
// Check if it is a light and process:
[branch]
if (entity_index >= first_item && entity_index <= last_item)
{
shadow_index = entity_index - GetFrame().lightarray_offset;
if (shadow_index >= MAX_RTSHADOWS)
break;
ShaderEntity light = load_entity(entity_index);
if (!light.IsCastingShadow())
{
continue;
}
if (light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC)
{
continue; // static lights will be skipped (they are used in lightmap baking)
}
float shadow0 = load_shadow(shadow_index, shadow_mask0);
float shadow1 = load_shadow(shadow_index, shadow_mask1);
float shadow2 = load_shadow(shadow_index, shadow_mask2);
float shadow3 = load_shadow(shadow_index, shadow_mask3);
float shadow = bilinear(float4(shadow0,shadow1,shadow2,shadow3) * weights, sam_pixel_frac);
shadow *= weights_norm;
output[uint3(pixel, shadow_index)] = shadow;
}
}
}
}
}
+21 -5
View File
@@ -7,11 +7,22 @@
PUSHCONSTANT(postprocess, PostProcess);
static const uint MAX_RTSHADOWS = 16;
RWTexture2D<uint4> output : register(u0);
#ifdef RTSHADOW
RWTexture2D<uint4> output : register(u0);
RWTexture2D<float3> output_normals : register(u1);
RWStructuredBuffer<uint4> output_tiles : register(u2);
static const uint DOWNSAMPLE = 2;
#else
static const uint DOWNSAMPLE = 1;
RWTexture2DArray<unorm float> output : register(u0);
float load_shadow(in uint shadow_index, in uint4 shadow_mask)
{
uint mask_shift = (shadow_index % 4) * 8;
uint mask_bucket = shadow_index / 4;
uint mask = (shadow_mask[mask_bucket] >> mask_shift) & 0xFF;
return mask / 255.0;
}
#endif // RTSHADOW
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
@@ -36,7 +47,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid :
#endif // RTSHADOW
float3 P = reconstruct_position(uv, depth);
float3 N = decode_oct(texture_normal[DTid.xy]);
float3 N = decode_oct(texture_normal[DTid.xy * DOWNSAMPLE]);
Surface surface;
surface.init();
@@ -45,10 +56,10 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid :
const float4 bluenoise = blue_noise(DTid.xy);
const uint2 tileIndex = uint2(floor(DTid.xy / TILED_CULLING_BLOCKSIZE));
const uint2 tileIndex = uint2(floor(DTid.xy * DOWNSAMPLE / TILED_CULLING_BLOCKSIZE));
const uint flatTileIndex = flatten2D(tileIndex, GetCamera().entity_culling_tilecount.xy) * SHADER_ENTITY_TILE_BUCKET_COUNT;
uint shadow_mask[4] = {0,0,0,0}; // FXC issue: can't dynamically index into uint4, unless unrolling all loops
uint4 shadow_mask = 0;
uint shadow_index = 0;
RayDesc ray;
@@ -307,7 +318,12 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid :
uint bit = ((shadow_mask[0] >> (i * 8)) & 0xFF) ? (1u << lane_index) : 0;
InterlockedOr(output_tiles[flatTileIdx][i], bit);
}
output[DTid.xy] = uint4(shadow_mask[0], shadow_mask[1], shadow_mask[2], shadow_mask[3]);
#else
for(uint i = 0; i < 16; ++i)
{
output[uint3(DTid.xy, i)] = load_shadow(i, shadow_mask);
}
#endif // RTSHADOW
output[DTid.xy] = uint4(shadow_mask[0], shadow_mask[1], shadow_mask[2], shadow_mask[3]);
}
+3 -20
View File
@@ -386,16 +386,6 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
[branch]
if (GetFrame().lightarray_count > 0)
{
#if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT)
const bool shadow_mask_enabled = (GetFrame().options & OPTION_BIT_SHADOW_MASK) && GetCamera().texture_rtshadow_index >= 0;
uint4 shadow_mask_packed = 0;
[branch]
if(shadow_mask_enabled)
{
shadow_mask_packed = bindless_textures_uint4[GetCamera().texture_rtshadow_index][surface.pixel];
}
#endif // SHADOW_MASK_ENABLED && !TRANSPARENT
// Loop through light buckets in the tile:
const uint first_item = GetFrame().lightarray_offset;
const uint last_item = first_item + GetFrame().lightarray_count - 1;
@@ -430,20 +420,13 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
float shadow_mask = 1;
#if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT)
[branch]
if (shadow_mask_enabled && light.IsCastingShadow())
if (light.IsCastingShadow() && (GetFrame().options & OPTION_BIT_SHADOW_MASK) && GetCamera().texture_rtshadow_index >= 0)
{
uint shadow_index = entity_index - GetFrame().lightarray_offset;
if (shadow_index < 16)
{
uint mask_shift = (shadow_index % 4) * 8;
uint mask_bucket = shadow_index / 4;
uint mask = (shadow_mask_packed[mask_bucket] >> mask_shift) & 0xFF;
[branch]
if (mask == 0)
{
continue;
}
shadow_mask = mask / 255.0;
shadow_mask = bindless_textures2DArray[GetCamera().texture_rtshadow_index][uint3(surface.pixel, shadow_index)].r;
}
}
#endif // SHADOW_MASK_ENABLED && !TRANSPARENT
+1
View File
@@ -374,6 +374,7 @@ namespace wi::enums
CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TILECLASSIFICATION,
CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_FILTER,
CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL,
CSTYPE_POSTPROCESS_RTSHADOW_UPSAMPLE,
CSTYPE_POSTPROCESS_RTAO,
CSTYPE_POSTPROCESS_RTAO_DENOISE_TILECLASSIFICATION,
CSTYPE_POSTPROCESS_RTAO_DENOISE_FILTER,
+2 -1
View File
@@ -594,7 +594,8 @@ namespace wi
{
TextureDesc desc;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.format = Format::R32G32B32A32_UINT;
desc.format = Format::R8_UNORM;
desc.array_size = 16;
desc.width = internalResolution.x;
desc.height = internalResolution.y;
desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE;
+63 -32
View File
@@ -1058,6 +1058,7 @@ void LoadShaders()
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TILECLASSIFICATION], "rtshadow_denoise_tileclassificationCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_FILTER], "rtshadow_denoise_filterCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL], "rtshadow_denoise_temporalCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_UPSAMPLE], "rtshadow_upsampleCS.cso"); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTAO], "rtaoCS.cso", ShaderModel::SM_6_5); });
wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTAO_DENOISE_TILECLASSIFICATION], "rtao_denoise_tileclassificationCS.cso"); });
@@ -13824,12 +13825,14 @@ void CreateRTShadowResources(RTShadowResources& res, XMUINT2 resolution)
res.frame = 0;
TextureDesc desc;
desc.width = resolution.x;
desc.height = resolution.y;
desc.width = resolution.x / 2;
desc.height = resolution.y / 2;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE;
desc.format = Format::R32G32B32A32_UINT;
device->CreateTexture(&desc, nullptr, &res.raytraced);
device->SetName(&res.raytraced, "raytraced");
device->CreateTexture(&desc, nullptr, &res.temporal[0]);
device->SetName(&res.temporal[0], "rtshadow_temporal[0]");
device->CreateTexture(&desc, nullptr, &res.temporal[1]);
@@ -13892,7 +13895,7 @@ void Postprocess_RTShadow(
{
// Maybe we don't need to clear them all, but it's safer this way:
GPUBarrier barriers[] = {
GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&res.raytraced, res.raytraced.desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&res.temporal[0], res.temporal[0].desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&res.temporal[1], res.temporal[1].desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&res.denoised, res.denoised.desc.layout, ResourceState::UNORDERED_ACCESS),
@@ -13915,7 +13918,7 @@ void Postprocess_RTShadow(
GPUBarrier::Image(&res.moments[3][1], res.moments[3][1].desc.layout, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
device->ClearUAV(&output, 0, cmd);
device->ClearUAV(&res.raytraced, 0, cmd);
device->ClearUAV(&res.temporal[0], 0, cmd);
device->ClearUAV(&res.temporal[1], 0, cmd);
device->ClearUAV(&res.denoised, 0, cmd);
@@ -13943,8 +13946,6 @@ void Postprocess_RTShadow(
device->Barrier(barriers, arraysize(barriers), cmd);
}
const TextureDesc& desc = output.GetDesc();
BindCommonResources(cmd);
device->EventBegin("Raytrace", cmd);
@@ -13952,8 +13953,8 @@ void Postprocess_RTShadow(
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW], cmd);
PostProcess postprocess = {};
postprocess.resolution.x = desc.width;
postprocess.resolution.y = desc.height;
postprocess.resolution.x = res.raytraced.desc.width;
postprocess.resolution.y = res.raytraced.desc.height;
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
postprocess.params0.w = (float)res.frame;
@@ -13962,7 +13963,7 @@ void Postprocess_RTShadow(
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
const GPUResource* uavs[] = {
&output,
&res.raytraced,
&res.normals,
&res.tiles
};
@@ -13971,22 +13972,25 @@ void Postprocess_RTShadow(
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&res.raytraced, res.raytraced.desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&res.normals, res.normals.desc.layout, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Buffer(&res.tiles, ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->ClearUAV(&output, 0, cmd);
device->Dispatch(
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
GPUBarrier::Image(&res.raytraced, ResourceState::UNORDERED_ACCESS, res.raytraced.desc.layout),
GPUBarrier::Image(&res.normals, ResourceState::UNORDERED_ACCESS, res.normals.desc.layout),
GPUBarrier::Buffer(&res.tiles, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE),
};
@@ -14036,8 +14040,8 @@ void Postprocess_RTShadow(
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -14098,8 +14102,8 @@ void Postprocess_RTShadow(
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -14138,8 +14142,8 @@ void Postprocess_RTShadow(
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -14178,8 +14182,8 @@ void Postprocess_RTShadow(
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -14207,7 +14211,7 @@ void Postprocess_RTShadow(
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL], cmd);
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
device->BindResource(&output, 0, cmd);
device->BindResource(&res.raytraced, 0, cmd);
device->BindResource(&res.temporal[temporal_history], 1, cmd);
device->BindResource(&res.denoised, 3, cmd);
@@ -14224,8 +14228,8 @@ void Postprocess_RTShadow(
}
device->Dispatch(
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
@@ -14233,6 +14237,7 @@ void Postprocess_RTShadow(
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&res.temporal[temporal_output], ResourceState::UNORDERED_ACCESS, res.temporal[temporal_output].desc.layout),
GPUBarrier::Memory(&output),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
@@ -14240,18 +14245,44 @@ void Postprocess_RTShadow(
device->EventEnd(cmd);
}
postprocess.resolution.x = output.desc.width;
postprocess.resolution.y = output.desc.height;
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
postprocess.params0.x = (float)res.raytraced.desc.width;
postprocess.params0.y = (float)res.raytraced.desc.height;
postprocess.params0.z = 1.0f / postprocess.params0.x;
postprocess.params0.w = 1.0f / postprocess.params0.y;
// Upsample pass:
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&output, output.desc.layout, ResourceState::COPY_DST),
GPUBarrier::Image(&res.temporal[temporal_output], output.desc.layout, ResourceState::COPY_SRC),
device->EventBegin("Upsample", cmd);
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW_UPSAMPLE], cmd);
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
device->BindResource(&res.temporal[temporal_output], 0, cmd);
device->BindResource(&lineardepth, 1, cmd, 1);
const GPUResource* uavs[] = {
&output,
};
device->Barrier(barriers, arraysize(barriers), cmd);
device->CopyResource(&output, &res.temporal[temporal_output], cmd);
for (auto& x : barriers)
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
device->Dispatch(
(postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
(postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
1,
cmd
);
{
std::swap(x.image.layout_before, x.image.layout_after);
GPUBarrier barriers[] = {
GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, res.temporal[temporal_output].desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->Barrier(barriers, arraysize(barriers), cmd);
device->EventEnd(cmd);
}
wi::profiler::EndRange(prof_range);
@@ -17141,7 +17172,7 @@ void AddDeferredBlockCompression(const wi::graphics::Texture& texture_src, const
void SetTransparentShadowsEnabled(float value) { TRANSPARENTSHADOWSENABLED = value; }
void SetTransparentShadowsEnabled(bool value) { TRANSPARENTSHADOWSENABLED = value; }
float GetTransparentShadowsEnabled() { return TRANSPARENTSHADOWSENABLED; }
void SetWireRender(bool value) { wireRender = value; }
bool IsWireRender() { return wireRender; }
+2 -1
View File
@@ -623,6 +623,7 @@ namespace wi::renderer
);
struct RTShadowResources
{
wi::graphics::Texture raytraced;
wi::graphics::Texture temporal[2];
wi::graphics::Texture normals;
@@ -1004,7 +1005,7 @@ namespace wi::renderer
void SetTransparentShadowsEnabled(float value);
void SetTransparentShadowsEnabled(bool value);
float GetTransparentShadowsEnabled();
void SetWireRender(bool value);
bool IsWireRender();
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 441;
const int revision = 442;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);