diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 116f56d9a..dcc1c4811 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -196,6 +196,7 @@ wi::vector shaders = { {"rtshadow_denoise_tileclassificationCS", wi::graphics::ShaderStage::CS }, {"rtshadow_denoise_filterCS", wi::graphics::ShaderStage::CS }, {"rtshadow_denoise_temporalCS", wi::graphics::ShaderStage::CS }, + {"rtshadow_upsampleCS", wi::graphics::ShaderStage::CS }, {"rtaoCS", wi::graphics::ShaderStage::CS, wi::graphics::ShaderModel::SM_6_5 }, {"rtao_denoise_tileclassificationCS", wi::graphics::ShaderStage::CS }, {"rtao_denoise_filterCS", wi::graphics::ShaderStage::CS }, diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 6850a489b..7e6f46bdf 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -433,6 +433,10 @@ Pixel + + Compute + 4.0 + Vertex diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 4374cacc3..515eadc8d 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -1163,6 +1163,9 @@ CS + + CS + diff --git a/WickedEngine/shaders/rtshadow_denoise_filterCS.hlsl b/WickedEngine/shaders/rtshadow_denoise_filterCS.hlsl index be627db0f..fb0d0d61b 100644 --- a/WickedEngine/shaders/rtshadow_denoise_filterCS.hlsl +++ b/WickedEngine/shaders/rtshadow_denoise_filterCS.hlsl @@ -33,7 +33,7 @@ float FFX_DNSR_Shadows_GetDepthSimilaritySigma() float FFX_DNSR_Shadows_ReadDepth(uint2 did) { - return texture_depth[did]; + return texture_depth[did * 2]; } float16_t3 FFX_DNSR_Shadows_ReadNormals(uint2 did) { diff --git a/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl b/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl index d659926ce..92e07d70d 100644 --- a/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl +++ b/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl @@ -68,7 +68,7 @@ inline void ResolverAABB(in uint shadow_index, float sharpness, float exposureSc [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex) { - if (texture_depth[DTid.xy] == 0) + if (texture_depth[DTid.xy * 2] == 0) return; // first 4 lights are denoised diff --git a/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl b/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl index 9dbf1b23e..dfd6eb2df 100644 --- a/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl +++ b/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl @@ -46,11 +46,11 @@ float4x4 FFX_DNSR_Shadows_GetReprojectionMatrix() float FFX_DNSR_Shadows_ReadDepth(uint2 did) { - return texture_depth[did]; + return texture_depth[did * 2]; } float FFX_DNSR_Shadows_ReadPreviousDepth(int2 idx) { - return texture_depth_history[idx]; + return texture_depth_history[idx * 2]; } float3 FFX_DNSR_Shadows_ReadNormals(uint2 did) { @@ -70,7 +70,7 @@ float FFX_DNSR_Shadows_ReadHistory(float2 history_uv) } float2 FFX_DNSR_Shadows_ReadVelocity(uint2 did) { - return -texture_velocity[did].xy; + return -texture_velocity[did * 2].xy; } void FFX_DNSR_Shadows_WriteReprojectionResults(uint2 did, float2 value) diff --git a/WickedEngine/shaders/rtshadow_upsampleCS.hlsl b/WickedEngine/shaders/rtshadow_upsampleCS.hlsl new file mode 100644 index 000000000..b62cc888c --- /dev/null +++ b/WickedEngine/shaders/rtshadow_upsampleCS.hlsl @@ -0,0 +1,115 @@ +#include "globals.hlsli" +#include "ShaderInterop_Postprocess.h" + +PUSHCONSTANT(postprocess, PostProcess); + +Texture2D input : register(t0); +Texture2D lineardepth_lowres : register(t1); + +RWTexture2DArray output : register(u0); + +float load_shadow(in uint shadow_index, in uint4 shadow_mask) +{ + uint mask_shift = (shadow_index % 4) * 8; + uint mask_bucket = shadow_index / 4; + uint mask = (shadow_mask[mask_bucket] >> mask_shift) & 0xFF; + return mask / 255.0; +} + +[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex) +{ + uint2 pixel = DTid.xy; + const float2 uv = (pixel + 0.5f) * postprocess.resolution_rcp; + + uint2 dim; + uint MAX_RTSHADOWS; + output.GetDimensions(dim.x, dim.y, MAX_RTSHADOWS); + + const uint2 tileIndex = uint2(floor(pixel / TILED_CULLING_BLOCKSIZE)); + const uint flatTileIndex = flatten2D(tileIndex, GetCamera().entity_culling_tilecount.xy) * SHADER_ENTITY_TILE_BUCKET_COUNT; + + const float2 lowres_size = postprocess.params1.xy; + const float2 lowres_texel_size = postprocess.params1.zw; + + float2 sam_pixel = uv * lowres_size + (-0.5 + 1.0 / 512.0); // (1.0 / 512.0) correction is described here: https://www.reedbeta.com/blog/texture-gathers-and-coordinate-precision/ + float2 sam_pixel_frac = frac(sam_pixel); + + uint2 pixel0 = DTid.xy / 2 + uint2(0, 0); + uint2 pixel1 = DTid.xy / 2 + uint2(1, 0); + uint2 pixel2 = DTid.xy / 2 + uint2(0, 1); + uint2 pixel3 = DTid.xy / 2 + uint2(1, 1); + uint4 shadow_mask0 = input[pixel0]; + uint4 shadow_mask1 = input[pixel1]; + uint4 shadow_mask2 = input[pixel2]; + uint4 shadow_mask3 = input[pixel3]; + float lineardepth0 = lineardepth_lowres[pixel0] * GetCamera().z_far; + float lineardepth1 = lineardepth_lowres[pixel1] * GetCamera().z_far; + float lineardepth2 = lineardepth_lowres[pixel2] * GetCamera().z_far; + float lineardepth3 = lineardepth_lowres[pixel3] * GetCamera().z_far; + float lineardepth_highres = texture_lineardepth[pixel] * GetCamera().z_far; + + float threshold = 2; + float4 weights = max(0.001, 1 - saturate(abs(float4(lineardepth0, lineardepth1, lineardepth2, lineardepth3) - lineardepth_highres) * threshold)); + float weights_norm = rcp(bilinear(weights, sam_pixel_frac)); + + uint shadow_index = 0; + + [branch] + if (GetFrame().lightarray_count > 0) + { + // Loop through light buckets in the tile: + const uint first_item = GetFrame().lightarray_offset; + const uint last_item = first_item + GetFrame().lightarray_count - 1; + const uint first_bucket = first_item / 32; + const uint last_bucket = min(last_item / 32, max(0, SHADER_ENTITY_TILE_BUCKET_COUNT - 1)); + [loop] + for (uint bucket = first_bucket; bucket <= last_bucket && shadow_index < MAX_RTSHADOWS; ++bucket) + { + uint bucket_bits = load_entitytile(flatTileIndex + bucket); + + // Bucket scalarizer - Siggraph 2017 - Improved Culling [Michal Drobot]: + bucket_bits = WaveReadLaneFirst(WaveActiveBitOr(bucket_bits)); + + [loop] + while (bucket_bits != 0 && shadow_index < MAX_RTSHADOWS) + { + // Retrieve global entity index from local bucket, then remove bit from local bucket: + const uint bucket_bit_index = firstbitlow(bucket_bits); + const uint entity_index = bucket * 32 + bucket_bit_index; + bucket_bits ^= 1u << bucket_bit_index; + + // Check if it is a light and process: + [branch] + if (entity_index >= first_item && entity_index <= last_item) + { + shadow_index = entity_index - GetFrame().lightarray_offset; + if (shadow_index >= MAX_RTSHADOWS) + break; + + ShaderEntity light = load_entity(entity_index); + + if (!light.IsCastingShadow()) + { + continue; + } + + if (light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) + { + continue; // static lights will be skipped (they are used in lightmap baking) + } + + float shadow0 = load_shadow(shadow_index, shadow_mask0); + float shadow1 = load_shadow(shadow_index, shadow_mask1); + float shadow2 = load_shadow(shadow_index, shadow_mask2); + float shadow3 = load_shadow(shadow_index, shadow_mask3); + + float shadow = bilinear(float4(shadow0,shadow1,shadow2,shadow3) * weights, sam_pixel_frac); + shadow *= weights_norm; + + output[uint3(pixel, shadow_index)] = shadow; + } + } + } + } +} diff --git a/WickedEngine/shaders/screenspaceshadowCS.hlsl b/WickedEngine/shaders/screenspaceshadowCS.hlsl index 935c6da78..20028a8af 100644 --- a/WickedEngine/shaders/screenspaceshadowCS.hlsl +++ b/WickedEngine/shaders/screenspaceshadowCS.hlsl @@ -7,11 +7,22 @@ PUSHCONSTANT(postprocess, PostProcess); static const uint MAX_RTSHADOWS = 16; -RWTexture2D output : register(u0); #ifdef RTSHADOW +RWTexture2D output : register(u0); RWTexture2D output_normals : register(u1); RWStructuredBuffer output_tiles : register(u2); +static const uint DOWNSAMPLE = 2; +#else +static const uint DOWNSAMPLE = 1; +RWTexture2DArray output : register(u0); +float load_shadow(in uint shadow_index, in uint4 shadow_mask) +{ + uint mask_shift = (shadow_index % 4) * 8; + uint mask_bucket = shadow_index / 4; + uint mask = (shadow_mask[mask_bucket] >> mask_shift) & 0xFF; + return mask / 255.0; +} #endif // RTSHADOW [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] @@ -36,7 +47,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : #endif // RTSHADOW float3 P = reconstruct_position(uv, depth); - float3 N = decode_oct(texture_normal[DTid.xy]); + float3 N = decode_oct(texture_normal[DTid.xy * DOWNSAMPLE]); Surface surface; surface.init(); @@ -45,10 +56,10 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : const float4 bluenoise = blue_noise(DTid.xy); - const uint2 tileIndex = uint2(floor(DTid.xy / TILED_CULLING_BLOCKSIZE)); + const uint2 tileIndex = uint2(floor(DTid.xy * DOWNSAMPLE / TILED_CULLING_BLOCKSIZE)); const uint flatTileIndex = flatten2D(tileIndex, GetCamera().entity_culling_tilecount.xy) * SHADER_ENTITY_TILE_BUCKET_COUNT; - uint shadow_mask[4] = {0,0,0,0}; // FXC issue: can't dynamically index into uint4, unless unrolling all loops + uint4 shadow_mask = 0; uint shadow_index = 0; RayDesc ray; @@ -307,7 +318,12 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : uint bit = ((shadow_mask[0] >> (i * 8)) & 0xFF) ? (1u << lane_index) : 0; InterlockedOr(output_tiles[flatTileIdx][i], bit); } + output[DTid.xy] = uint4(shadow_mask[0], shadow_mask[1], shadow_mask[2], shadow_mask[3]); +#else + for(uint i = 0; i < 16; ++i) + { + output[uint3(DTid.xy, i)] = load_shadow(i, shadow_mask); + } #endif // RTSHADOW - output[DTid.xy] = uint4(shadow_mask[0], shadow_mask[1], shadow_mask[2], shadow_mask[3]); } diff --git a/WickedEngine/shaders/shadingHF.hlsli b/WickedEngine/shaders/shadingHF.hlsli index 36dd68155..e6fc56bc5 100644 --- a/WickedEngine/shaders/shadingHF.hlsli +++ b/WickedEngine/shaders/shadingHF.hlsli @@ -386,16 +386,6 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f [branch] if (GetFrame().lightarray_count > 0) { -#if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT) - const bool shadow_mask_enabled = (GetFrame().options & OPTION_BIT_SHADOW_MASK) && GetCamera().texture_rtshadow_index >= 0; - uint4 shadow_mask_packed = 0; - [branch] - if(shadow_mask_enabled) - { - shadow_mask_packed = bindless_textures_uint4[GetCamera().texture_rtshadow_index][surface.pixel]; - } -#endif // SHADOW_MASK_ENABLED && !TRANSPARENT - // Loop through light buckets in the tile: const uint first_item = GetFrame().lightarray_offset; const uint last_item = first_item + GetFrame().lightarray_count - 1; @@ -430,20 +420,13 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f float shadow_mask = 1; #if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT) [branch] - if (shadow_mask_enabled && light.IsCastingShadow()) + if (light.IsCastingShadow() && (GetFrame().options & OPTION_BIT_SHADOW_MASK) && GetCamera().texture_rtshadow_index >= 0) { + uint shadow_index = entity_index - GetFrame().lightarray_offset; if (shadow_index < 16) { - uint mask_shift = (shadow_index % 4) * 8; - uint mask_bucket = shadow_index / 4; - uint mask = (shadow_mask_packed[mask_bucket] >> mask_shift) & 0xFF; - [branch] - if (mask == 0) - { - continue; - } - shadow_mask = mask / 255.0; + shadow_mask = bindless_textures2DArray[GetCamera().texture_rtshadow_index][uint3(surface.pixel, shadow_index)].r; } } #endif // SHADOW_MASK_ENABLED && !TRANSPARENT diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 43db3346b..dc2007ff5 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -374,6 +374,7 @@ namespace wi::enums CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TILECLASSIFICATION, CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_FILTER, CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL, + CSTYPE_POSTPROCESS_RTSHADOW_UPSAMPLE, CSTYPE_POSTPROCESS_RTAO, CSTYPE_POSTPROCESS_RTAO_DENOISE_TILECLASSIFICATION, CSTYPE_POSTPROCESS_RTAO_DENOISE_FILTER, diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index 096f92e14..2a61c6ad9 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -594,7 +594,8 @@ namespace wi { TextureDesc desc; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - desc.format = Format::R32G32B32A32_UINT; + desc.format = Format::R8_UNORM; + desc.array_size = 16; desc.width = internalResolution.x; desc.height = internalResolution.y; desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 7d3745929..7ed66b5b6 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -1058,6 +1058,7 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TILECLASSIFICATION], "rtshadow_denoise_tileclassificationCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_FILTER], "rtshadow_denoise_filterCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL], "rtshadow_denoise_temporalCS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTSHADOW_UPSAMPLE], "rtshadow_upsampleCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTAO], "rtaoCS.cso", ShaderModel::SM_6_5); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_RTAO_DENOISE_TILECLASSIFICATION], "rtao_denoise_tileclassificationCS.cso"); }); @@ -13824,12 +13825,14 @@ void CreateRTShadowResources(RTShadowResources& res, XMUINT2 resolution) res.frame = 0; TextureDesc desc; - desc.width = resolution.x; - desc.height = resolution.y; + desc.width = resolution.x / 2; + desc.height = resolution.y / 2; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; desc.format = Format::R32G32B32A32_UINT; + device->CreateTexture(&desc, nullptr, &res.raytraced); + device->SetName(&res.raytraced, "raytraced"); device->CreateTexture(&desc, nullptr, &res.temporal[0]); device->SetName(&res.temporal[0], "rtshadow_temporal[0]"); device->CreateTexture(&desc, nullptr, &res.temporal[1]); @@ -13892,7 +13895,7 @@ void Postprocess_RTShadow( { // Maybe we don't need to clear them all, but it's safer this way: GPUBarrier barriers[] = { - GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS), + GPUBarrier::Image(&res.raytraced, res.raytraced.desc.layout, ResourceState::UNORDERED_ACCESS), GPUBarrier::Image(&res.temporal[0], res.temporal[0].desc.layout, ResourceState::UNORDERED_ACCESS), GPUBarrier::Image(&res.temporal[1], res.temporal[1].desc.layout, ResourceState::UNORDERED_ACCESS), GPUBarrier::Image(&res.denoised, res.denoised.desc.layout, ResourceState::UNORDERED_ACCESS), @@ -13915,7 +13918,7 @@ void Postprocess_RTShadow( GPUBarrier::Image(&res.moments[3][1], res.moments[3][1].desc.layout, ResourceState::UNORDERED_ACCESS), }; device->Barrier(barriers, arraysize(barriers), cmd); - device->ClearUAV(&output, 0, cmd); + device->ClearUAV(&res.raytraced, 0, cmd); device->ClearUAV(&res.temporal[0], 0, cmd); device->ClearUAV(&res.temporal[1], 0, cmd); device->ClearUAV(&res.denoised, 0, cmd); @@ -13943,8 +13946,6 @@ void Postprocess_RTShadow( device->Barrier(barriers, arraysize(barriers), cmd); } - const TextureDesc& desc = output.GetDesc(); - BindCommonResources(cmd); device->EventBegin("Raytrace", cmd); @@ -13952,8 +13953,8 @@ void Postprocess_RTShadow( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW], cmd); PostProcess postprocess = {}; - postprocess.resolution.x = desc.width; - postprocess.resolution.y = desc.height; + postprocess.resolution.x = res.raytraced.desc.width; + postprocess.resolution.y = res.raytraced.desc.height; postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x; postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; postprocess.params0.w = (float)res.frame; @@ -13962,7 +13963,7 @@ void Postprocess_RTShadow( device->PushConstants(&postprocess, sizeof(postprocess), cmd); const GPUResource* uavs[] = { - &output, + &res.raytraced, &res.normals, &res.tiles }; @@ -13971,22 +13972,25 @@ void Postprocess_RTShadow( { GPUBarrier barriers[] = { GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS), + GPUBarrier::Image(&res.raytraced, res.raytraced.desc.layout, ResourceState::UNORDERED_ACCESS), GPUBarrier::Image(&res.normals, res.normals.desc.layout, ResourceState::UNORDERED_ACCESS), GPUBarrier::Buffer(&res.tiles, ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::UNORDERED_ACCESS), }; device->Barrier(barriers, arraysize(barriers), cmd); } + device->ClearUAV(&output, 0, cmd); + device->Dispatch( - (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd ); { GPUBarrier barriers[] = { - GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout), + GPUBarrier::Image(&res.raytraced, ResourceState::UNORDERED_ACCESS, res.raytraced.desc.layout), GPUBarrier::Image(&res.normals, ResourceState::UNORDERED_ACCESS, res.normals.desc.layout), GPUBarrier::Buffer(&res.tiles, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE), }; @@ -14036,8 +14040,8 @@ void Postprocess_RTShadow( device->BindUAVs(uavs, 0, arraysize(uavs), cmd); device->Dispatch( - (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd ); @@ -14098,8 +14102,8 @@ void Postprocess_RTShadow( device->BindUAVs(uavs, 0, arraysize(uavs), cmd); device->Dispatch( - (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd ); @@ -14138,8 +14142,8 @@ void Postprocess_RTShadow( device->BindUAVs(uavs, 0, arraysize(uavs), cmd); device->Dispatch( - (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd ); @@ -14178,8 +14182,8 @@ void Postprocess_RTShadow( device->BindUAVs(uavs, 0, arraysize(uavs), cmd); device->Dispatch( - (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd ); @@ -14207,7 +14211,7 @@ void Postprocess_RTShadow( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW_DENOISE_TEMPORAL], cmd); device->PushConstants(&postprocess, sizeof(postprocess), cmd); - device->BindResource(&output, 0, cmd); + device->BindResource(&res.raytraced, 0, cmd); device->BindResource(&res.temporal[temporal_history], 1, cmd); device->BindResource(&res.denoised, 3, cmd); @@ -14224,8 +14228,8 @@ void Postprocess_RTShadow( } device->Dispatch( - (desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd ); @@ -14233,6 +14237,7 @@ void Postprocess_RTShadow( { GPUBarrier barriers[] = { GPUBarrier::Image(&res.temporal[temporal_output], ResourceState::UNORDERED_ACCESS, res.temporal[temporal_output].desc.layout), + GPUBarrier::Memory(&output), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -14240,18 +14245,44 @@ void Postprocess_RTShadow( device->EventEnd(cmd); } + postprocess.resolution.x = output.desc.width; + postprocess.resolution.y = output.desc.height; + postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x; + postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y; + postprocess.params0.x = (float)res.raytraced.desc.width; + postprocess.params0.y = (float)res.raytraced.desc.height; + postprocess.params0.z = 1.0f / postprocess.params0.x; + postprocess.params0.w = 1.0f / postprocess.params0.y; + + // Upsample pass: { - GPUBarrier barriers[] = { - GPUBarrier::Image(&output, output.desc.layout, ResourceState::COPY_DST), - GPUBarrier::Image(&res.temporal[temporal_output], output.desc.layout, ResourceState::COPY_SRC), + device->EventBegin("Upsample", cmd); + device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_RTSHADOW_UPSAMPLE], cmd); + device->PushConstants(&postprocess, sizeof(postprocess), cmd); + + device->BindResource(&res.temporal[temporal_output], 0, cmd); + device->BindResource(&lineardepth, 1, cmd, 1); + + const GPUResource* uavs[] = { + &output, }; - device->Barrier(barriers, arraysize(barriers), cmd); - device->CopyResource(&output, &res.temporal[temporal_output], cmd); - for (auto& x : barriers) + device->BindUAVs(uavs, 0, arraysize(uavs), cmd); + + device->Dispatch( + (postprocess.resolution.x + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + (postprocess.resolution.y + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, + 1, + cmd + ); + { - std::swap(x.image.layout_before, x.image.layout_after); + GPUBarrier barriers[] = { + GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, res.temporal[temporal_output].desc.layout), + }; + device->Barrier(barriers, arraysize(barriers), cmd); } - device->Barrier(barriers, arraysize(barriers), cmd); + + device->EventEnd(cmd); } wi::profiler::EndRange(prof_range); @@ -17141,7 +17172,7 @@ void AddDeferredBlockCompression(const wi::graphics::Texture& texture_src, const -void SetTransparentShadowsEnabled(float value) { TRANSPARENTSHADOWSENABLED = value; } +void SetTransparentShadowsEnabled(bool value) { TRANSPARENTSHADOWSENABLED = value; } float GetTransparentShadowsEnabled() { return TRANSPARENTSHADOWSENABLED; } void SetWireRender(bool value) { wireRender = value; } bool IsWireRender() { return wireRender; } diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index 15e725176..138fc93bc 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -623,6 +623,7 @@ namespace wi::renderer ); struct RTShadowResources { + wi::graphics::Texture raytraced; wi::graphics::Texture temporal[2]; wi::graphics::Texture normals; @@ -1004,7 +1005,7 @@ namespace wi::renderer - void SetTransparentShadowsEnabled(float value); + void SetTransparentShadowsEnabled(bool value); float GetTransparentShadowsEnabled(); void SetWireRender(bool value); bool IsWireRender(); diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index c2943c7f8..c26a64786 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 441; + const int revision = 442; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);