ssgi updates
This commit is contained in:
@@ -12,7 +12,7 @@ RWTexture2D<float4> output_diffuse : register(u0);
|
||||
|
||||
#ifdef WIDE
|
||||
static const uint THREADCOUNT = 16;
|
||||
static const int TILE_BORDER = 18;
|
||||
static const int TILE_BORDER = 16;
|
||||
#else
|
||||
static const uint THREADCOUNT = 8;
|
||||
static const int TILE_BORDER = 4;
|
||||
@@ -25,7 +25,7 @@ groupshared uint group_valid;
|
||||
|
||||
inline uint coord_to_cache(int2 coord)
|
||||
{
|
||||
return flatten2D(clamp(TILE_BORDER + coord, 0, TILE_SIZE - 1), TILE_SIZE);
|
||||
return flatten2D(clamp(coord, 0, TILE_SIZE - 1), TILE_SIZE);
|
||||
}
|
||||
|
||||
static const float depthRejection = 8;
|
||||
@@ -34,11 +34,10 @@ static const float depthRejection_rcp = rcp(depthRejection);
|
||||
float3 compute_diffuse(
|
||||
float3 origin_position,
|
||||
float3 origin_normal,
|
||||
int2 GTid,
|
||||
int2 offset
|
||||
int2 originLoc, // coord in cache
|
||||
int2 sampleLoc // coord in cache
|
||||
)
|
||||
{
|
||||
const int2 sampleLoc = GTid + offset;
|
||||
const uint t = coord_to_cache(sampleLoc);
|
||||
uint c = cache_rgb[t];
|
||||
if(c == 0)
|
||||
@@ -56,7 +55,7 @@ float3 compute_diffuse(
|
||||
const float sample_z = sample_position.z;
|
||||
|
||||
// DDA occlusion:
|
||||
const int2 start = GTid;
|
||||
const int2 start = originLoc;
|
||||
const int2 goal = sampleLoc;
|
||||
|
||||
const int dx = int(goal.x) - int(start.x);
|
||||
@@ -86,7 +85,8 @@ float3 compute_diffuse(
|
||||
const float sz = cache_z[tt];
|
||||
if(sz < z - 0.1)
|
||||
{
|
||||
return occlusion * Unpack_R11G11B10_FLOAT(cache_rgb[tt]);
|
||||
c = cache_rgb[tt];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -127,7 +127,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint2 GTid :
|
||||
if (group_valid == 0)
|
||||
return; // if no valid color was cached, whole group can exit early
|
||||
|
||||
const uint t = coord_to_cache(GTid.xy);
|
||||
const int2 originLoc = GTid.xy + TILE_BORDER;
|
||||
const uint t = coord_to_cache(originLoc);
|
||||
float3 P;
|
||||
P.z = cache_z[t];
|
||||
|
||||
@@ -137,23 +138,26 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint2 GTid :
|
||||
|
||||
P.xy = unpack_half2(cache_xy[t]);
|
||||
|
||||
const uint2 pixel = DTid.xy;
|
||||
const float3 N = mul((float3x3)GetCamera().view, decode_oct(input_normal[interleaved_pixel].rg));
|
||||
|
||||
float3 diffuse = 0;
|
||||
float sum = 0;
|
||||
const int range = int(postprocess.params0.x);
|
||||
const float spread = postprocess.params0.y + dither(pixel);
|
||||
const float spread = postprocess.params0.y /*+ dither(DTid.xy)*/;
|
||||
const float rangespread_rcp2 = postprocess.params0.z;
|
||||
|
||||
|
||||
const int2 pixel_base = Gid.xy * THREADCOUNT + GTid;
|
||||
for(int x = -range; x <= range; ++x)
|
||||
{
|
||||
for(int y = -range; y <= range; ++y)
|
||||
{
|
||||
const int2 pixel = pixel_base + int2(x, y);
|
||||
if(any(pixel < 0) || any(pixel >= postprocess.resolution))
|
||||
continue; // to not lose energy when sampling outside of textures, we skip those offsets
|
||||
const float2 foffset = float2(x, y) * spread;
|
||||
const int2 offset = round(foffset);
|
||||
const float weight = saturate(1 - abs(foffset.x) * abs(foffset.y) * rangespread_rcp2);
|
||||
diffuse += compute_diffuse(P, N, GTid, offset) * weight;
|
||||
diffuse += compute_diffuse(P, N, originLoc, originLoc + offset) * weight;
|
||||
sum += weight;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,17 +12,17 @@ RWTexture2DArray<float3> atlas4x_color : register(u5);
|
||||
RWTexture2DArray<float3> atlas8x_color : register(u6);
|
||||
RWTexture2DArray<float3> atlas16x_color : register(u7);
|
||||
RWTexture2D<float> regular2x_depth : register(u8);
|
||||
RWTexture2D<float2> regular2x_normal : register(u9);
|
||||
RWTexture2D<float> regular4x_depth : register(u10);
|
||||
RWTexture2D<float2> regular4x_normal : register(u11);
|
||||
RWTexture2D<float> regular8x_depth : register(u12);
|
||||
RWTexture2D<float2> regular8x_normal : register(u13);
|
||||
RWTexture2D<float> regular16x_depth : register(u14);
|
||||
RWTexture2D<float> regular4x_depth : register(u9);
|
||||
RWTexture2D<float> regular8x_depth : register(u10);
|
||||
RWTexture2D<float> regular16x_depth : register(u11);
|
||||
RWTexture2D<float2> regular2x_normal : register(u12);
|
||||
RWTexture2D<float2> regular4x_normal : register(u13);
|
||||
RWTexture2D<float2> regular8x_normal : register(u14);
|
||||
RWTexture2D<float2> regular16x_normal : register(u15);
|
||||
|
||||
groupshared float shared_depths[256];
|
||||
groupshared float2 shared_normals[256];
|
||||
groupshared float3 shared_colors[256];
|
||||
groupshared uint shared_normals[256];
|
||||
groupshared uint shared_colors[256];
|
||||
|
||||
[numthreads(8, 8, 1)]
|
||||
void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
|
||||
@@ -38,10 +38,10 @@ void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex, uint3 GTid :
|
||||
shared_depths[destIdx + 128] = texture_depth[min(startST | uint2(0, 8), dim - 1)];
|
||||
shared_depths[destIdx + 136] = texture_depth[min(startST | uint2(8, 8), dim - 1)];
|
||||
|
||||
shared_normals[destIdx + 0] = texture_normal[min(startST | uint2(0, 0), dim - 1)];
|
||||
shared_normals[destIdx + 8] = texture_normal[min(startST | uint2(8, 0), dim - 1)];
|
||||
shared_normals[destIdx + 128] = texture_normal[min(startST | uint2(0, 8), dim - 1)];
|
||||
shared_normals[destIdx + 136] = texture_normal[min(startST | uint2(8, 8), dim - 1)];
|
||||
shared_normals[destIdx + 0] = pack_half2(texture_normal[min(startST | uint2(0, 0), dim - 1)]);
|
||||
shared_normals[destIdx + 8] = pack_half2(texture_normal[min(startST | uint2(8, 0), dim - 1)]);
|
||||
shared_normals[destIdx + 128] = pack_half2(texture_normal[min(startST | uint2(0, 8), dim - 1)]);
|
||||
shared_normals[destIdx + 136] = pack_half2(texture_normal[min(startST | uint2(8, 8), dim - 1)]);
|
||||
|
||||
const float2 uv0 = float2(startST | uint2(0, 0)) * dim_rcp;
|
||||
const float2 uv1 = float2(startST | uint2(8, 0)) * dim_rcp;
|
||||
@@ -55,18 +55,18 @@ void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex, uint3 GTid :
|
||||
const float2 prevUV1 = uv1 + velocity1;
|
||||
const float2 prevUV2 = uv2 + velocity2;
|
||||
const float2 prevUV3 = uv3 + velocity3;
|
||||
shared_colors[destIdx + 0] = texture_input.SampleLevel(sampler_linear_clamp, prevUV0, 0);
|
||||
shared_colors[destIdx + 8] = texture_input.SampleLevel(sampler_linear_clamp, prevUV1, 0);
|
||||
shared_colors[destIdx + 128] = texture_input.SampleLevel(sampler_linear_clamp, prevUV2, 0);
|
||||
shared_colors[destIdx + 136] = texture_input.SampleLevel(sampler_linear_clamp, prevUV3, 0);
|
||||
shared_colors[destIdx + 0] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV0, 0));
|
||||
shared_colors[destIdx + 8] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV1, 0));
|
||||
shared_colors[destIdx + 128] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV2, 0));
|
||||
shared_colors[destIdx + 136] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV3, 0));
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
uint ldsIndex = (GTid.x << 1) | (GTid.y << 5);
|
||||
|
||||
float depth = shared_depths[ldsIndex];
|
||||
float2 normal = shared_normals[ldsIndex];
|
||||
float3 color = shared_colors[ldsIndex];
|
||||
float2 normal = unpack_half2(shared_normals[ldsIndex]);
|
||||
float3 color = Unpack_R11G11B10_FLOAT(shared_colors[ldsIndex]);
|
||||
|
||||
color = color - 0.2; // cut out pixels that shouldn't act as lights
|
||||
color *= 0.9; // accumulation energy loss
|
||||
|
||||
@@ -42,8 +42,8 @@ void main(uint2 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
|
||||
const int range = int(postprocess.params0.x);
|
||||
const float spread = postprocess.params0.y;
|
||||
#else
|
||||
const int range = 1;
|
||||
const float spread = 8;
|
||||
const int range = 2;
|
||||
const float spread = 6;
|
||||
#endif
|
||||
for(int x = -range; x <= range; ++x)
|
||||
{
|
||||
|
||||
+140
-188
@@ -12444,6 +12444,8 @@ void Postprocess_RTDiffuse(
|
||||
}
|
||||
void CreateSSGIResources(SSGIResources& res, XMUINT2 resolution)
|
||||
{
|
||||
res.cleared = false;
|
||||
|
||||
TextureDesc desc;
|
||||
desc.type = TextureDesc::Type::TEXTURE_2D;
|
||||
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
|
||||
@@ -12456,34 +12458,11 @@ void CreateSSGIResources(SSGIResources& res, XMUINT2 resolution)
|
||||
desc.width = (resolution.x + 7) / 8;
|
||||
desc.height = (resolution.y + 7) / 8;
|
||||
desc.array_size = 16;
|
||||
desc.mip_levels = 4;
|
||||
desc.format = Format::R32_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas2x_depth);
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas_depth);
|
||||
desc.format = Format::R11G11B10_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas2x_color);
|
||||
|
||||
desc.width = (resolution.x + 15) / 16;
|
||||
desc.height = (resolution.y + 15) / 16;
|
||||
desc.array_size = 16;
|
||||
desc.format = Format::R32_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas4x_depth);
|
||||
desc.format = Format::R11G11B10_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas4x_color);
|
||||
|
||||
desc.width = (resolution.x + 31) / 32;
|
||||
desc.height = (resolution.y + 31) / 32;
|
||||
desc.array_size = 16;
|
||||
desc.format = Format::R32_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas8x_depth);
|
||||
desc.format = Format::R11G11B10_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas8x_color);
|
||||
|
||||
desc.width = (resolution.x + 63) / 64;
|
||||
desc.height = (resolution.y + 63) / 64;
|
||||
desc.array_size = 16;
|
||||
desc.format = Format::R32_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas16x_depth);
|
||||
desc.format = Format::R11G11B10_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas16x_color);
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_atlas_color);
|
||||
|
||||
desc.array_size = 1;
|
||||
desc.mip_levels = 4;
|
||||
@@ -12496,9 +12475,17 @@ void CreateSSGIResources(SSGIResources& res, XMUINT2 resolution)
|
||||
desc.format = Format::R11G11B10_FLOAT;
|
||||
device->CreateTexture(&desc, nullptr, &res.texture_diffuse_mips);
|
||||
|
||||
for (uint32_t i = 0; i < desc.mip_levels; ++i)
|
||||
for (uint32_t i = 0; i < 4u; ++i)
|
||||
{
|
||||
int subresource_index;
|
||||
subresource_index = device->CreateSubresource(&res.texture_atlas_depth, SubresourceType::SRV, 0, 16, i, 1);
|
||||
assert(subresource_index == i);
|
||||
subresource_index = device->CreateSubresource(&res.texture_atlas_depth, SubresourceType::UAV, 0, 16, i, 1);
|
||||
assert(subresource_index == i);
|
||||
subresource_index = device->CreateSubresource(&res.texture_atlas_color, SubresourceType::SRV, 0, 16, i, 1);
|
||||
assert(subresource_index == i);
|
||||
subresource_index = device->CreateSubresource(&res.texture_atlas_color, SubresourceType::UAV, 0, 16, i, 1);
|
||||
assert(subresource_index == i);
|
||||
subresource_index = device->CreateSubresource(&res.texture_depth_mips, SubresourceType::SRV, 0, 1, i, 1);
|
||||
assert(subresource_index == i);
|
||||
subresource_index = device->CreateSubresource(&res.texture_depth_mips, SubresourceType::UAV, 0, 1, i, 1);
|
||||
@@ -12527,14 +12514,8 @@ void Postprocess_SSGI(
|
||||
|
||||
{
|
||||
GPUBarrier barriers[] = {
|
||||
GPUBarrier::Image(&res.texture_atlas2x_depth, res.texture_atlas2x_depth.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas4x_depth, res.texture_atlas4x_depth.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas8x_depth, res.texture_atlas8x_depth.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas16x_depth, res.texture_atlas16x_depth.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas2x_color, res.texture_atlas2x_color.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas4x_color, res.texture_atlas4x_color.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas8x_color, res.texture_atlas8x_color.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas16x_color, res.texture_atlas16x_color.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas_depth, res.texture_atlas_depth.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_atlas_color, res.texture_atlas_color.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_depth_mips, res.texture_depth_mips.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_normal_mips, res.texture_normal_mips.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
GPUBarrier::Image(&res.texture_diffuse_mips, res.texture_diffuse_mips.desc.layout, ResourceState::UNORDERED_ACCESS),
|
||||
@@ -12543,31 +12524,20 @@ void Postprocess_SSGI(
|
||||
device->Barrier(barriers, arraysize(barriers), cmd);
|
||||
}
|
||||
|
||||
device->ClearUAV(&res.texture_atlas2x_depth, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas4x_depth, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas8x_depth, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas16x_depth, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas2x_color, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas4x_color, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas8x_color, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas16x_color, 0, cmd);
|
||||
device->ClearUAV(&res.texture_depth_mips, 0, cmd);
|
||||
device->ClearUAV(&res.texture_normal_mips, 0, cmd);
|
||||
if (!res.cleared)
|
||||
{
|
||||
res.cleared = true;
|
||||
device->ClearUAV(&res.texture_atlas_depth, 0, cmd);
|
||||
device->ClearUAV(&res.texture_atlas_color, 0, cmd);
|
||||
device->ClearUAV(&res.texture_depth_mips, 0, cmd);
|
||||
device->ClearUAV(&res.texture_normal_mips, 0, cmd);
|
||||
}
|
||||
device->ClearUAV(&res.texture_diffuse_mips, 0, cmd);
|
||||
device->ClearUAV(&output, 0, cmd);
|
||||
|
||||
{
|
||||
GPUBarrier barriers[] = {
|
||||
GPUBarrier::Memory(&res.texture_atlas2x_depth),
|
||||
GPUBarrier::Memory(&res.texture_atlas4x_depth),
|
||||
GPUBarrier::Memory(&res.texture_atlas8x_depth),
|
||||
GPUBarrier::Memory(&res.texture_atlas16x_depth),
|
||||
GPUBarrier::Memory(&res.texture_atlas2x_color),
|
||||
GPUBarrier::Memory(&res.texture_atlas4x_color),
|
||||
GPUBarrier::Memory(&res.texture_atlas8x_color),
|
||||
GPUBarrier::Memory(&res.texture_atlas16x_color),
|
||||
GPUBarrier::Memory(&res.texture_depth_mips),
|
||||
GPUBarrier::Memory(&res.texture_normal_mips),
|
||||
GPUBarrier::Memory(),
|
||||
};
|
||||
device->Barrier(barriers, arraysize(barriers), cmd);
|
||||
}
|
||||
@@ -12582,31 +12552,30 @@ void Postprocess_SSGI(
|
||||
|
||||
device->BindResource(&input, 0, cmd);
|
||||
|
||||
const GPUResource* uavs[] = {
|
||||
&res.texture_atlas2x_depth,
|
||||
&res.texture_atlas4x_depth,
|
||||
&res.texture_atlas8x_depth,
|
||||
&res.texture_atlas16x_depth,
|
||||
&res.texture_atlas2x_color,
|
||||
&res.texture_atlas4x_color,
|
||||
&res.texture_atlas8x_color,
|
||||
&res.texture_atlas16x_color,
|
||||
};
|
||||
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
|
||||
device->BindUAV(&res.texture_atlas_depth, 0, cmd, 0);
|
||||
device->BindUAV(&res.texture_atlas_depth, 1, cmd, 1);
|
||||
device->BindUAV(&res.texture_atlas_depth, 2, cmd, 2);
|
||||
device->BindUAV(&res.texture_atlas_depth, 3, cmd, 3);
|
||||
|
||||
device->BindUAV(&res.texture_depth_mips, arraysize(uavs) + 0, cmd, 0);
|
||||
device->BindUAV(&res.texture_normal_mips, arraysize(uavs) + 1, cmd, 0);
|
||||
device->BindUAV(&res.texture_depth_mips, arraysize(uavs) + 2, cmd, 1);
|
||||
device->BindUAV(&res.texture_normal_mips, arraysize(uavs) + 3, cmd, 1);
|
||||
device->BindUAV(&res.texture_depth_mips, arraysize(uavs) + 4, cmd, 2);
|
||||
device->BindUAV(&res.texture_normal_mips, arraysize(uavs) + 5, cmd, 2);
|
||||
device->BindUAV(&res.texture_depth_mips, arraysize(uavs) + 6, cmd, 3);
|
||||
device->BindUAV(&res.texture_normal_mips, arraysize(uavs) + 7, cmd, 3);
|
||||
device->BindUAV(&res.texture_atlas_color, 4, cmd, 0);
|
||||
device->BindUAV(&res.texture_atlas_color, 5, cmd, 1);
|
||||
device->BindUAV(&res.texture_atlas_color, 6, cmd, 2);
|
||||
device->BindUAV(&res.texture_atlas_color, 7, cmd, 3);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas4x_depth.GetDesc();
|
||||
device->BindUAV(&res.texture_depth_mips, 8, cmd, 0);
|
||||
device->BindUAV(&res.texture_depth_mips, 9, cmd, 1);
|
||||
device->BindUAV(&res.texture_depth_mips, 10, cmd, 2);
|
||||
device->BindUAV(&res.texture_depth_mips, 11, cmd, 3);
|
||||
|
||||
device->BindUAV(&res.texture_normal_mips, 12, cmd, 0);
|
||||
device->BindUAV(&res.texture_normal_mips, 13, cmd, 1);
|
||||
device->BindUAV(&res.texture_normal_mips, 14, cmd, 2);
|
||||
device->BindUAV(&res.texture_normal_mips, 15, cmd, 3);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas_depth.GetDesc();
|
||||
device->Dispatch(
|
||||
desc.width,
|
||||
desc.height,
|
||||
desc.width >> 1,
|
||||
desc.height >> 1,
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
@@ -12616,15 +12585,8 @@ void Postprocess_SSGI(
|
||||
|
||||
{
|
||||
GPUBarrier barriers[] = {
|
||||
GPUBarrier::Memory(&res.texture_diffuse_mips),
|
||||
GPUBarrier::Image(&res.texture_atlas2x_depth, ResourceState::UNORDERED_ACCESS, res.texture_atlas2x_depth.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas4x_depth, ResourceState::UNORDERED_ACCESS, res.texture_atlas4x_depth.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas8x_depth, ResourceState::UNORDERED_ACCESS, res.texture_atlas8x_depth.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas16x_depth, ResourceState::UNORDERED_ACCESS, res.texture_atlas16x_depth.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas2x_color, ResourceState::UNORDERED_ACCESS, res.texture_atlas2x_color.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas4x_color, ResourceState::UNORDERED_ACCESS, res.texture_atlas4x_color.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas8x_color, ResourceState::UNORDERED_ACCESS, res.texture_atlas8x_color.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas16x_color, ResourceState::UNORDERED_ACCESS, res.texture_atlas16x_color.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas_depth, ResourceState::UNORDERED_ACCESS, res.texture_atlas_depth.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_atlas_color, ResourceState::UNORDERED_ACCESS, res.texture_atlas_color.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_depth_mips, ResourceState::UNORDERED_ACCESS, res.texture_depth_mips.desc.layout),
|
||||
GPUBarrier::Image(&res.texture_normal_mips, ResourceState::UNORDERED_ACCESS, res.texture_normal_mips.desc.layout),
|
||||
};
|
||||
@@ -12634,110 +12596,20 @@ void Postprocess_SSGI(
|
||||
{
|
||||
device->EventBegin("SSGI - diffuse", cmd);
|
||||
|
||||
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSGI], cmd);
|
||||
|
||||
// 2x:
|
||||
{
|
||||
const GPUResource* resarray[] = {
|
||||
&res.texture_atlas2x_depth,
|
||||
&res.texture_atlas2x_color,
|
||||
};
|
||||
device->BindResources(resarray, 0, arraysize(resarray), cmd);
|
||||
device->BindResource(&res.texture_normal_mips, arraysize(resarray) + 0, cmd, 0);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 0);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas2x_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width;
|
||||
postprocess.resolution.y = desc.height;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 1; // range
|
||||
postprocess.params0.y = 2; // spread
|
||||
postprocess.params0.z = std::pow(1.0f / (postprocess.params0.x * postprocess.params0.y), 2.0f); // rangespread_rcp2
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(desc.width + 7) / 8,
|
||||
(desc.height + 7) / 8,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
// 4x:
|
||||
{
|
||||
const GPUResource* resarray[] = {
|
||||
&res.texture_atlas4x_depth,
|
||||
&res.texture_atlas4x_color,
|
||||
};
|
||||
device->BindResources(resarray, 0, arraysize(resarray), cmd);
|
||||
device->BindResource(&res.texture_normal_mips, arraysize(resarray) + 0, cmd, 1);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 1);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas4x_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width;
|
||||
postprocess.resolution.y = desc.height;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 2; // range
|
||||
postprocess.params0.y = 2; // spread
|
||||
postprocess.params0.z = std::pow(1.0f / (postprocess.params0.x * postprocess.params0.y), 2.0f); // rangespread_rcp2
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(desc.width + 7) / 8,
|
||||
(desc.height + 7) / 8,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
|
||||
// Switch to wide sampling shader:
|
||||
// Wide sampling passes:
|
||||
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSGI_WIDE], cmd);
|
||||
|
||||
// 8x:
|
||||
{
|
||||
const GPUResource* resarray[] = {
|
||||
&res.texture_atlas8x_depth,
|
||||
&res.texture_atlas8x_color,
|
||||
};
|
||||
device->BindResources(resarray, 0, arraysize(resarray), cmd);
|
||||
device->BindResource(&res.texture_normal_mips, arraysize(resarray) + 0, cmd, 2);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 2);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas8x_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width;
|
||||
postprocess.resolution.y = desc.height;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 4; // range
|
||||
postprocess.params0.y = 4; // spread
|
||||
postprocess.params0.z = std::pow(1.0f / (postprocess.params0.x * postprocess.params0.y), 2.0f); // rangespread_rcp2
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(desc.width + 15) / 16,
|
||||
(desc.height + 15) / 16,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
// 16x:
|
||||
{
|
||||
const GPUResource* resarray[] = {
|
||||
&res.texture_atlas16x_depth,
|
||||
&res.texture_atlas16x_color,
|
||||
};
|
||||
device->BindResources(resarray, 0, arraysize(resarray), cmd);
|
||||
device->BindResource(&res.texture_normal_mips, arraysize(resarray) + 0, cmd, 3);
|
||||
device->BindResource(&res.texture_atlas_depth, 0, cmd, 3);
|
||||
device->BindResource(&res.texture_atlas_color, 1, cmd, 3);
|
||||
device->BindResource(&res.texture_normal_mips, 2, cmd, 3);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 3);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas16x_depth.GetDesc();
|
||||
const TextureDesc& desc = res.texture_atlas_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width;
|
||||
postprocess.resolution.y = desc.height;
|
||||
postprocess.resolution.x = desc.width >> 3;
|
||||
postprocess.resolution.y = desc.height >> 3;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 8; // range
|
||||
@@ -12746,8 +12618,88 @@ void Postprocess_SSGI(
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(desc.width + 15) / 16,
|
||||
(desc.height + 15) / 16,
|
||||
(postprocess.resolution.x + 15) / 16,
|
||||
(postprocess.resolution.y + 15) / 16,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
// 8x:
|
||||
{
|
||||
device->BindResource(&res.texture_atlas_depth, 0, cmd, 2);
|
||||
device->BindResource(&res.texture_atlas_color, 1, cmd, 2);
|
||||
device->BindResource(&res.texture_normal_mips, 2, cmd, 2);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 2);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width >> 2;
|
||||
postprocess.resolution.y = desc.height >> 2;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 4; // range
|
||||
postprocess.params0.y = 4; // spread
|
||||
postprocess.params0.z = std::pow(1.0f / (postprocess.params0.x * postprocess.params0.y), 2.0f); // rangespread_rcp2
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(postprocess.resolution.x + 15) / 16,
|
||||
(postprocess.resolution.y + 15) / 16,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
|
||||
// Narrow sampling passes:
|
||||
device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSGI], cmd);
|
||||
|
||||
// 4x:
|
||||
{
|
||||
device->BindResource(&res.texture_atlas_depth, 0, cmd, 1);
|
||||
device->BindResource(&res.texture_atlas_color, 1, cmd, 1);
|
||||
device->BindResource(&res.texture_normal_mips, 2, cmd, 1);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 1);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width >> 1u;
|
||||
postprocess.resolution.y = desc.height >> 1u;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 2; // range
|
||||
postprocess.params0.y = 2; // spread
|
||||
postprocess.params0.z = std::pow(1.0f / (postprocess.params0.x * postprocess.params0.y), 2.0f); // rangespread_rcp2
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(postprocess.resolution.x + 7) / 8,
|
||||
(postprocess.resolution.y + 7) / 8,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
|
||||
// 2x:
|
||||
{
|
||||
device->BindResource(&res.texture_atlas_depth, 0, cmd, 0);
|
||||
device->BindResource(&res.texture_atlas_color, 1, cmd, 0);
|
||||
device->BindResource(&res.texture_normal_mips, 2, cmd, 0);
|
||||
device->BindUAV(&res.texture_diffuse_mips, 0, cmd, 0);
|
||||
|
||||
const TextureDesc& desc = res.texture_atlas_depth.GetDesc();
|
||||
|
||||
postprocess.resolution.x = desc.width;
|
||||
postprocess.resolution.y = desc.height;
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 1; // range
|
||||
postprocess.params0.y = 4; // spread
|
||||
postprocess.params0.z = std::pow(1.0f / (postprocess.params0.x * postprocess.params0.y), 2.0f); // rangespread_rcp2
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(postprocess.resolution.x + 7) / 8,
|
||||
(postprocess.resolution.y + 7) / 8,
|
||||
16,
|
||||
cmd
|
||||
);
|
||||
@@ -12782,7 +12734,7 @@ void Postprocess_SSGI(
|
||||
postprocess.resolution_rcp.x = 1.0f / postprocess.resolution.x;
|
||||
postprocess.resolution_rcp.y = 1.0f / postprocess.resolution.y;
|
||||
postprocess.params0.x = 2; // range
|
||||
postprocess.params0.y = 8; // spread
|
||||
postprocess.params0.y = 4; // spread
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
@@ -12885,8 +12837,8 @@ void Postprocess_SSGI(
|
||||
device->PushConstants(&postprocess, sizeof(postprocess), cmd);
|
||||
|
||||
device->Dispatch(
|
||||
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE,
|
||||
(desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, // dispatch is using desc size (unaligned!)
|
||||
(desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, // dispatch is using desc size (unaligned!)
|
||||
1,
|
||||
cmd
|
||||
);
|
||||
|
||||
@@ -554,14 +554,9 @@ namespace wi::renderer
|
||||
);
|
||||
struct SSGIResources
|
||||
{
|
||||
wi::graphics::Texture texture_atlas2x_depth;
|
||||
wi::graphics::Texture texture_atlas4x_depth;
|
||||
wi::graphics::Texture texture_atlas8x_depth;
|
||||
wi::graphics::Texture texture_atlas16x_depth;
|
||||
wi::graphics::Texture texture_atlas2x_color;
|
||||
wi::graphics::Texture texture_atlas4x_color;
|
||||
wi::graphics::Texture texture_atlas8x_color;
|
||||
wi::graphics::Texture texture_atlas16x_color;
|
||||
mutable bool cleared = false;
|
||||
wi::graphics::Texture texture_atlas_depth;
|
||||
wi::graphics::Texture texture_atlas_color;
|
||||
wi::graphics::Texture texture_depth_mips;
|
||||
wi::graphics::Texture texture_normal_mips;
|
||||
wi::graphics::Texture texture_diffuse_mips;
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace wi::version
|
||||
// minor features, major updates, breaking compatibility changes
|
||||
const int minor = 71;
|
||||
// minor bug fixes, alterations, refactors, updates
|
||||
const int revision = 418;
|
||||
const int revision = 419;
|
||||
|
||||
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);
|
||||
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
#include "globals.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float> input_depth_low : register(t0);
|
||||
Texture2D<float2> input_normal_low : register(t1);
|
||||
Texture2D<float4> input_diffuse_low : register(t2);
|
||||
Texture2D<float> input_depth_high : register(t3);
|
||||
Texture2D<float2> input_normal_high : register(t4);
|
||||
|
||||
RWTexture2D<float4> output : register(u0);
|
||||
|
||||
static const float depthThreshold = 1000.0;
|
||||
static const float normalThreshold = 1.0;
|
||||
|
||||
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
|
||||
void main(uint2 DTid : SV_DispatchThreadID)
|
||||
{
|
||||
uint2 pixel = DTid.xy;
|
||||
const float2 uv = (pixel + 0.5) * postprocess.resolution_rcp;
|
||||
|
||||
const float depth = input_depth_high[pixel];
|
||||
const float linearDepth = compute_lineardepth(depth);
|
||||
const float3 N = decode_oct(input_normal_high[pixel].rg);
|
||||
const float3 P = reconstruct_position(uv, depth);
|
||||
|
||||
float3 result = 0;
|
||||
float sum = 0;
|
||||
#if 1
|
||||
const int range = int(postprocess.params0.x);
|
||||
const float spread = postprocess.params0.y;
|
||||
#else
|
||||
const int range = 1;
|
||||
const float spread = 8;
|
||||
#endif
|
||||
for(int x = -range; x <= range; ++x)
|
||||
{
|
||||
for(int y = -range; y <= range; ++y)
|
||||
{
|
||||
const float2 offset = float2(x, y) * spread * postprocess.resolution_rcp;
|
||||
const float2 sample_uv = uv + offset;
|
||||
|
||||
const float sampleDepth = input_depth_low.SampleLevel(sampler_linear_clamp, sample_uv, 0);
|
||||
const float3 sampleN = decode_oct(input_normal_low.SampleLevel(sampler_linear_clamp, sample_uv, 0));
|
||||
const float3 sampleDiffuse = input_diffuse_low.SampleLevel(sampler_linear_clamp, sample_uv, 0).rgb;
|
||||
const float3 sampleP = reconstruct_position(sample_uv, sampleDepth);
|
||||
|
||||
float3 dq = P - sampleP;
|
||||
float planeError = max(abs(dot(dq, sampleN)), abs(dot(dq, N)));
|
||||
float relativeDepthDifference = planeError / linearDepth;
|
||||
float bilateralDepthWeight = exp(-sqr(relativeDepthDifference) * depthThreshold);
|
||||
|
||||
float normalError = pow(saturate(dot(sampleN, N)), 4.0);
|
||||
float bilateralNormalWeight = saturate(1.0 - (1.0 - normalError) * normalThreshold);
|
||||
|
||||
float weight = bilateralDepthWeight * bilateralNormalWeight;
|
||||
|
||||
//weight = 1;
|
||||
result += sampleDiffuse * weight;
|
||||
sum += weight;
|
||||
}
|
||||
}
|
||||
|
||||
if(sum > 0)
|
||||
{
|
||||
result /= sum;
|
||||
}
|
||||
|
||||
result = max(0, result);
|
||||
|
||||
output[pixel] = (output[pixel] + float4(result, 1)) ;
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
#include "globals.hlsli"
|
||||
#include "stochasticSSRHF.hlsli"
|
||||
#include "ShaderInterop_Postprocess.h"
|
||||
|
||||
PUSHCONSTANT(postprocess, PostProcess);
|
||||
|
||||
Texture2D<float4> input : register(t0);
|
||||
Texture2DArray<float> input_depth : register(t1);
|
||||
Texture2D<float2> input_normal : register(t2);
|
||||
|
||||
RWTexture2D<float4> output_diffuse : register(u0);
|
||||
|
||||
#ifdef WIDE
|
||||
static const uint THREADCOUNT = 16;
|
||||
static const int TILE_BORDER = 18;
|
||||
#else
|
||||
static const uint THREADCOUNT = 8;
|
||||
static const int TILE_BORDER = 4;
|
||||
#endif // WIDE
|
||||
static const int TILE_SIZE = TILE_BORDER + THREADCOUNT + TILE_BORDER;
|
||||
groupshared uint cache_xy[TILE_SIZE * TILE_SIZE];
|
||||
groupshared float cache_z[TILE_SIZE * TILE_SIZE];
|
||||
groupshared uint cache_rgb[TILE_SIZE * TILE_SIZE];
|
||||
groupshared uint group_valid;
|
||||
|
||||
inline uint coord_to_cache(int2 coord)
|
||||
{
|
||||
return flatten2D(clamp(TILE_BORDER + coord, 0, TILE_SIZE - 1), TILE_SIZE);
|
||||
}
|
||||
|
||||
static const float radius = 14;
|
||||
static const float radius2 = radius * radius;
|
||||
static const float radius2_rcp_negative = -rcp(radius2);
|
||||
|
||||
#if 0
|
||||
static const uint depth_test_count = 1;
|
||||
static const float depth_tests[] = {0.33};
|
||||
#else
|
||||
static const uint depth_test_count = 3;
|
||||
static const float depth_tests[] = {0.125, 0.25, 0.75};
|
||||
#endif
|
||||
|
||||
float3 compute_diffuse(
|
||||
float3 origin_position,
|
||||
float3 origin_normal,
|
||||
int2 GTid,
|
||||
int2 offset
|
||||
)
|
||||
{
|
||||
const int2 sampleLoc = GTid + offset;
|
||||
const uint t = coord_to_cache(sampleLoc);
|
||||
float3 sample_position;
|
||||
sample_position.z = cache_z[t];
|
||||
if(sample_position.z > GetCamera().z_far - 1)
|
||||
return 0;
|
||||
sample_position.xy = unpack_half2(cache_xy[t]);
|
||||
const float3 origin_to_sample = sample_position - origin_position;
|
||||
const float distance2 = dot(origin_to_sample, origin_to_sample);
|
||||
float occlusion = saturate(dot(origin_normal, origin_to_sample));
|
||||
occlusion *= saturate(distance2 * radius2_rcp_negative + 1.0f);
|
||||
|
||||
if(occlusion > 0)
|
||||
{
|
||||
const float origin_z = origin_position.z;
|
||||
const float sample_z = sample_position.z;
|
||||
|
||||
#if 1
|
||||
// DDA occlusion:
|
||||
const int2 start = GTid;
|
||||
const int2 goal = sampleLoc;
|
||||
|
||||
const int dx = int(goal.x) - int(start.x);
|
||||
const int dy = int(goal.y) - int(start.y);
|
||||
|
||||
int step = max(abs(dx), abs(dy));
|
||||
step = (step + 1) / 2; // reduce steps
|
||||
const float step_rcp = rcp(step);
|
||||
|
||||
const float x_incr = float(dx) * step_rcp;
|
||||
const float y_incr = float(dy) * step_rcp;
|
||||
|
||||
float x = float(start.x);
|
||||
float y = float(start.y);
|
||||
|
||||
for (int i = 0; i < step - 1; i++)
|
||||
{
|
||||
x += x_incr;
|
||||
y += y_incr;
|
||||
|
||||
const int2 loc = int2(round(x), round(y));
|
||||
const uint tt = coord_to_cache(loc);
|
||||
|
||||
const float dt = float(i) / float(step);
|
||||
const float z = lerp(origin_z, sample_z, dt);
|
||||
|
||||
const float sz = cache_z[tt];
|
||||
if(sz < z - 0.1)
|
||||
{
|
||||
return occlusion * Unpack_R11G11B10_FLOAT(cache_rgb[tt]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
// Simple occlusion:
|
||||
for (uint i = 0; i < depth_test_count; ++i)
|
||||
{
|
||||
const float dt = depth_tests[i];
|
||||
const float z = lerp(origin_z, sample_z, dt);
|
||||
const int2 loc = round(lerp(float2(GTid), float2(sampleLoc), dt));
|
||||
const uint tt = coord_to_cache(loc);
|
||||
const float sz = cache_z[tt];
|
||||
if (sz < z - 0.1)
|
||||
{
|
||||
return occlusion * Unpack_R11G11B10_FLOAT(cache_rgb[tt]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return occlusion * Unpack_R11G11B10_FLOAT(cache_rgb[t]);
|
||||
}
|
||||
|
||||
[numthreads(THREADCOUNT, THREADCOUNT, 1)]
|
||||
void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint2 GTid : SV_GroupThreadID, uint groupIndex : SV_GroupIndex)
|
||||
{
|
||||
const uint layer = DTid.z;
|
||||
const uint2 interleaved_pixel = DTid.xy << 2 | uint2(DTid.z & 3, DTid.z >> 2);
|
||||
|
||||
if(groupIndex == 0)
|
||||
{
|
||||
group_valid = 0;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
const int2 tile_upperleft = Gid.xy * THREADCOUNT - TILE_BORDER;
|
||||
for(uint t = groupIndex; t < TILE_SIZE * TILE_SIZE; t += THREADCOUNT * THREADCOUNT)
|
||||
{
|
||||
const int2 pixel = tile_upperleft + unflatten2D(t, TILE_SIZE);
|
||||
const float depth = input_depth[uint3(pixel, layer)];
|
||||
const float2 uv = (pixel + 0.5f) * postprocess.resolution_rcp;
|
||||
const float3 P = reconstruct_position(uv, depth, GetCamera().inverse_projection);
|
||||
const float3 color = input.SampleLevel(sampler_linear_clamp, uv, 0).rgb;
|
||||
const uint pkcolor = Pack_R11G11B10_FLOAT(color.rgb);
|
||||
cache_xy[t] = pack_half2(P.xy);
|
||||
cache_z[t] = P.z;
|
||||
cache_rgb[t] = pkcolor;
|
||||
if(pkcolor)
|
||||
InterlockedOr(group_valid, 1u);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
[branch]
|
||||
if (group_valid == 0)
|
||||
return; // if no valid color was cached, whole group can exit early
|
||||
|
||||
const uint t = coord_to_cache(GTid.xy);
|
||||
float3 P;
|
||||
P.z = cache_z[t];
|
||||
|
||||
[branch]
|
||||
if(P.z > GetCamera().z_far - 1)
|
||||
return; // if pixel depth is not valid, it can exit early
|
||||
|
||||
P.xy = unpack_half2(cache_xy[t]);
|
||||
|
||||
const uint2 pixel = DTid.xy;
|
||||
const float3 N = mul((float3x3)GetCamera().view, decode_oct(input_normal[interleaved_pixel].rg));
|
||||
|
||||
float3 diffuse = 0;
|
||||
float sum = 0;
|
||||
const int range = int(postprocess.params0.x);
|
||||
const float spread = postprocess.params0.y /*+ dither(pixel)*/;
|
||||
const float rangespread_rcp2 = postprocess.params0.z;
|
||||
|
||||
for(int x = -range; x <= range; ++x)
|
||||
{
|
||||
for(int y = -range; y <= range; ++y)
|
||||
{
|
||||
const float2 foffset = float2(x, y) * spread;
|
||||
const int2 offset = round(foffset);
|
||||
const float weight = saturate(1 - abs(foffset.x) * abs(foffset.y) * rangespread_rcp2);
|
||||
diffuse += compute_diffuse(P, N, GTid, offset) * weight;
|
||||
sum += weight;
|
||||
}
|
||||
}
|
||||
if(sum > 0)
|
||||
{
|
||||
diffuse = diffuse / sum;
|
||||
}
|
||||
|
||||
// interleave result:
|
||||
output_diffuse[interleaved_pixel] = float4(diffuse, 1);
|
||||
}
|
||||
Reference in New Issue
Block a user