DDGI: packing to R9G9B9E5_SHAREDEXP format

This commit is contained in:
Turánszki János
2022-11-28 10:42:56 +01:00
parent cec57146c4
commit 438e575da7
15 changed files with 218 additions and 13 deletions
@@ -0,0 +1,82 @@
//
// Copyright (c) Microsoft. All rights reserved.
// This code is licensed under the MIT License (MIT).
// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
//
// Developed by Minigraph
//
// Author: James Stanard
//
#ifndef __PIXEL_PACKING_RGBE_HLSLI__
#define __PIXEL_PACKING_RGBE_HLSLI__
#include "ColorSpaceUtility.hlsli"
// RGBE, aka R9G9B9E5_SHAREDEXP, is an unsigned float HDR pixel format where red, green,
// and blue all share the same exponent. The color channels store a 9-bit value ranging
// from [0/512, 511/512] which multiplies by 2^Exp and Exp ranges from [-15, 16].
// Floating point specials are not encoded.
uint PackRGBE(float3 rgb)
{
// To determine the shared exponent, we must clamp the channels to an expressible range
const float kMaxVal = asfloat(0x477F8000); // 1.FF x 2^+15
const float kMinVal = asfloat(0x37800000); // 1.00 x 2^-16
// Non-negative and <= kMaxVal
rgb = clamp(rgb, 0, kMaxVal);
// From the maximum channel we will determine the exponent. We clamp to a min value
// so that the exponent is within the valid 5-bit range.
float MaxChannel = max(max(kMinVal, rgb.r), max(rgb.g, rgb.b));
// 'Bias' has to have the biggest exponent plus 15 (and nothing in the mantissa). When
// added to the three channels, it shifts the explicit '1' and the 8 most significant
// mantissa bits into the low 9 bits. IEEE rules of float addition will round rather
// than truncate the discarded bits. Channels with smaller natural exponents will be
// shifted further to the right (discarding more bits).
float Bias = asfloat((asuint(MaxChannel) + 0x07804000) & 0x7F800000);
// Shift bits into the right places
uint3 RGB = asuint(rgb + Bias);
uint E = (asuint(Bias) << 4) + 0x10000000;
return E | RGB.b << 18 | RGB.g << 9 | (RGB.r & 0x1FF);
}
float3 UnpackRGBE(uint p)
{
float3 rgb = uint3(p, p >> 9, p >> 18) & 0x1FF;
return ldexp(rgb, (int)(p >> 27) - 24);
}
// This non-standard variant applies a non-linear ramp to the mantissa to get better precision
// with bright and saturated colors. These colors tend to have one or two channels that prop
// up the shared exponent, leaving little to no information in the dark channels.
uint PackRGBE_sqrt(float3 rgb)
{
// To determine the shared exponent, we must clamp the channels to an expressible range
const float kMaxVal = asfloat(0x477FFFFF); // 1.FFFFFF x 2^+15
const float kMinVal = asfloat(0x37800000); // 1.000000 x 2^-16
rgb = clamp(rgb, 0, kMaxVal);
float MaxChannel = max(max(kMinVal, rgb.r), max(rgb.g, rgb.b));
// Scaling the maximum channel puts it into the range [0, 1). It does this by negating
// and subtracting one from the max exponent.
float Scale = asfloat((0x7EFFFFFF - asuint(MaxChannel)) & 0x7F800000);
uint3 RGB = sqrt(rgb * Scale) * 511.0 + 0.5;
uint E = (0x47000000 - asuint(Scale)) << 4;
return E | RGB.b << 18 | RGB.g << 9 | RGB.r;
}
float3 UnpackRGBE_sqrt(uint p)
{
float3 rgb = (uint3(p, p >> 9, p >> 18) & 0x1FF) / 511.0;
return ldexp(rgb * rgb, (int)(p >> 27) - 15);
}
#endif // __PIXEL_PACKING_RGBE_HLSLI__
@@ -35,6 +35,7 @@
<None Include="$(MSBuildThisFileDirectory)objectHF_tessellation.hlsli" />
<None Include="$(MSBuildThisFileDirectory)oceanSurfaceHF.hlsli" />
<None Include="$(MSBuildThisFileDirectory)PixelPacking_R11G11B10.hlsli" />
<None Include="$(MSBuildThisFileDirectory)PixelPacking_RGBE.hlsli" />
<None Include="$(MSBuildThisFileDirectory)quad.hlsli" />
<None Include="$(MSBuildThisFileDirectory)raytracingHF.hlsli" />
<None Include="$(MSBuildThisFileDirectory)skyAtmosphere.hlsli" />
@@ -141,6 +141,9 @@
<None Include="$(MSBuildThisFileDirectory)surfaceHF.hlsli">
<Filter>HF</Filter>
</None>
<None Include="$(MSBuildThisFileDirectory)PixelPacking_RGBE.hlsli">
<Filter>HF</Filter>
</None>
</ItemGroup>
<ItemGroup>
<FxCompile Include="$(MSBuildThisFileDirectory)hairparticle_simulateCS.hlsl">
+8 -2
View File
@@ -20,7 +20,7 @@ RWTexture2D<float2> output : register(u0);
RWByteAddressBuffer ddgiOffsetBuffer:register(u1);
#else
static const uint THREADCOUNT = DDGI_COLOR_RESOLUTION;
RWTexture2D<float3> output : register(u0);
RWTexture2D<uint> output : register(u0); // raw uint alias for Format::R9G9B9E5_SHAREDEXP
#endif // DDGI_UPDATE_DEPTH
static const uint CACHE_SIZE = THREADCOUNT * THREADCOUNT;
@@ -136,11 +136,12 @@ void main(uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex
result = lerp(prev_result, result, push.blendSpeed);
}
#ifdef DDGI_UPDATE_DEPTH
output[pixel_current] = result;
DeviceMemoryBarrierWithGroupSync();
#ifdef DDGI_UPDATE_DEPTH
// Copy depth borders:
for (uint index = groupIndex; index < 68; index += THREADCOUNT * THREADCOUNT)
{
@@ -160,6 +161,11 @@ void main(uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex
ddgiOffsetBuffer.Store<DDGIProbeOffset>(probeIndex * sizeof(DDGIProbeOffset), ofs);
}
#else
output[pixel_current] = PackRGBE(result);
DeviceMemoryBarrierWithGroupSync();
// Copy color borders:
for (uint index = groupIndex; index < 36; index += THREADCOUNT * THREADCOUNT)
{
+1
View File
@@ -2,6 +2,7 @@
#define WI_SHADER_GLOBALS_HF
#include "ColorSpaceUtility.hlsli"
#include "PixelPacking_R11G11B10.hlsli"
#include "PixelPacking_RGBE.hlsli"
#include "ShaderInterop.h"
// The root signature will affect shader compilation for DX12.
+2
View File
@@ -246,6 +246,7 @@ namespace wi::graphics
R32_UINT,
R32_SINT,
D24_UNORM_S8_UINT, // depth (24-bit) + stencil (8-bit) | SRV: R24_INTERNAL
R9G9B9E5_SHAREDEXP,
R8G8_UNORM,
R8G8_UINT,
@@ -1303,6 +1304,7 @@ namespace wi::graphics
case Format::R32_UINT:
case Format::R32_SINT:
case Format::D24_UNORM_S8_UINT:
case Format::R9G9B9E5_SHAREDEXP:
return 4u;
case Format::R8G8_UNORM:
+8
View File
@@ -565,6 +565,10 @@ namespace dx12_internal
return DXGI_FORMAT_R32_UINT;
case Format::R32_SINT:
return DXGI_FORMAT_R32_SINT;
case Format::D24_UNORM_S8_UINT:
return DXGI_FORMAT_D24_UNORM_S8_UINT;
case Format::R9G9B9E5_SHAREDEXP:
return DXGI_FORMAT_R9G9B9E5_SHAREDEXP;
case Format::R8G8_UNORM:
return DXGI_FORMAT_R8G8_UNORM;
case Format::R8G8_UINT:
@@ -875,6 +879,10 @@ namespace dx12_internal
return Format::R32_UINT;
case DXGI_FORMAT_R32_SINT:
return Format::R32_SINT;
case DXGI_FORMAT_D24_UNORM_S8_UINT:
return Format::D24_UNORM_S8_UINT;
case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
return Format::R9G9B9E5_SHAREDEXP;
case DXGI_FORMAT_R8G8_UNORM:
return Format::R8G8_UNORM;
case DXGI_FORMAT_R8G8_UINT:
+5 -3
View File
@@ -110,6 +110,8 @@ namespace vulkan_internal
return VK_FORMAT_R32_SINT;
case Format::D24_UNORM_S8_UINT:
return VK_FORMAT_D24_UNORM_S8_UINT;
case Format::R9G9B9E5_SHAREDEXP:
return VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
case Format::R8G8_UNORM:
return VK_FORMAT_R8G8_UNORM;
case Format::R8G8_UINT:
@@ -6935,9 +6937,9 @@ using namespace vulkan_internal;
out_image_memory_bind.offset.x = in_coordinate.x * internal_sparse->sparse_texture_properties.tile_width;
out_image_memory_bind.offset.y = in_coordinate.y * internal_sparse->sparse_texture_properties.tile_height;
out_image_memory_bind.offset.z = in_coordinate.z * internal_sparse->sparse_texture_properties.tile_depth;
out_image_memory_bind.extent.width = in_size.width * internal_sparse->sparse_texture_properties.tile_width;
out_image_memory_bind.extent.height = in_size.height * internal_sparse->sparse_texture_properties.tile_height;
out_image_memory_bind.extent.depth = in_size.depth * internal_sparse->sparse_texture_properties.tile_depth;
out_image_memory_bind.extent.width = std::min(texture_desc.width, in_size.width * internal_sparse->sparse_texture_properties.tile_width);
out_image_memory_bind.extent.height = std::min(texture_desc.height, in_size.height * internal_sparse->sparse_texture_properties.tile_height);
out_image_memory_bind.extent.depth = std::min(texture_desc.depth, in_size.depth * internal_sparse->sparse_texture_properties.tile_depth);
}
}
+26
View File
@@ -346,6 +346,32 @@ namespace wi::helper
data32[i] = rgba8;
}
}
else if (desc.format == Format::R9G9B9E5_SHAREDEXP)
{
// This will be converted first to rgba8 before saving to common format:
XMFLOAT3SE* dataSrc = (XMFLOAT3SE*)texturedata.data();
uint32_t* data32 = (uint32_t*)texturedata.data();
for (uint32_t i = 0; i < data_count; ++i)
{
XMFLOAT3SE pixel = dataSrc[i];
XMVECTOR V = XMLoadFloat3SE(&pixel);
XMFLOAT3 pixel3;
XMStoreFloat3(&pixel3, V);
float r = std::max(0.0f, std::min(pixel3.x, 1.0f));
float g = std::max(0.0f, std::min(pixel3.y, 1.0f));
float b = std::max(0.0f, std::min(pixel3.z, 1.0f));
float a = 1;
uint32_t rgba8 = 0;
rgba8 |= (uint32_t)(r * 255.0f) << 0;
rgba8 |= (uint32_t)(g * 255.0f) << 8;
rgba8 |= (uint32_t)(b * 255.0f) << 16;
rgba8 |= (uint32_t)(a * 255.0f) << 24;
data32[i] = rgba8;
}
}
else if (desc.format == Format::B8G8R8A8_UNORM || desc.format == Format::B8G8R8A8_UNORM_SRGB)
{
// This will be converted first to rgba8 before saving to common format:
+15
View File
@@ -309,6 +309,21 @@ namespace wi::math
XMStoreFloat3PK(&pk, XMLoadFloat3(&color));
return pk.v;
}
inline XMFLOAT3 Unpack_R9G9B9E5_SHAREDEXP(uint32_t value)
{
XMFLOAT3SE se;
se.v = value;
XMVECTOR V = XMLoadFloat3SE(&se);
XMFLOAT3 result;
XMStoreFloat3(&result, V);
return result;
}
inline uint32_t Pack_R9G9B9E5_SHAREDEXP(const XMFLOAT3& value)
{
XMFLOAT3SE se;
XMStoreFloat3SE(&se, XMLoadFloat3(&value));
return se;
}
+2 -3
View File
@@ -8959,7 +8959,6 @@ void DDGI(
GPUBarrier barriers[] = {
GPUBarrier::Memory(),
GPUBarrier::Buffer(&scene.ddgi.ray_buffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE),
GPUBarrier::Image(&scene.ddgi.color_texture[1], ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Image(&scene.ddgi.depth_texture[1], ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::UNORDERED_ACCESS),
GPUBarrier::Buffer(&scene.ddgi.offset_buffer, ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::UNORDERED_ACCESS),
};
@@ -8979,7 +8978,7 @@ void DDGI(
device->BindResources(res, 0, arraysize(res), cmd);
const GPUResource* uavs[] = {
&scene.ddgi.color_texture[1],
&scene.ddgi.color_texture_rw[1],
};
device->BindUAVs(uavs, 0, arraysize(uavs), cmd);
@@ -9013,7 +9012,7 @@ void DDGI(
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&scene.ddgi.color_texture[1], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE),
GPUBarrier::Memory(&scene.ddgi.color_texture_rw[1]),
GPUBarrier::Image(&scene.ddgi.depth_texture[1], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE),
GPUBarrier::Buffer(&scene.ddgi.offset_buffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE),
};
+52 -2
View File
@@ -458,23 +458,73 @@ namespace wi::scene
tex.width = DDGI_COLOR_TEXELS * ddgi.grid_dimensions.x * ddgi.grid_dimensions.y;
tex.height = DDGI_COLOR_TEXELS * ddgi.grid_dimensions.z;
//tex.format = Format::R11G11B10_FLOAT; // not enough precision with this format, causes green hue in GI
tex.format = Format::R16G16B16A16_FLOAT;
tex.bind_flags = BindFlag::UNORDERED_ACCESS | BindFlag::SHADER_RESOURCE;
//tex.format = Format::R16G16B16A16_FLOAT; // this is trivial to use but fat
tex.format = Format::R9G9B9E5_SHAREDEXP; // must be packed manually as uint32, good quality and fast to sample
tex.misc_flags = ResourceMiscFlag::SPARSE; // sparse aliasing to write R9G9B9E5_SHAREDEXP as uint
tex.width = std::max(128u, tex.width); // force non-packed mip behaviour
tex.height = std::max(128u, tex.height); // force non-packed mip behaviour
tex.bind_flags = BindFlag::SHADER_RESOURCE;
tex.layout = ResourceState::SHADER_RESOURCE;
device->CreateTexture(&tex, nullptr, &ddgi.color_texture[0]);
device->SetName(&ddgi.color_texture[0], "ddgi.color_texture[0]");
device->CreateTexture(&tex, nullptr, &ddgi.color_texture[1]);
device->SetName(&ddgi.color_texture[1], "ddgi.color_texture[1]");
tex.format = Format::R32_UINT; // packed R9G9B9E5_SHAREDEXP
tex.bind_flags = BindFlag::UNORDERED_ACCESS;
tex.layout = ResourceState::UNORDERED_ACCESS;
device->CreateTexture(&tex, nullptr, &ddgi.color_texture_rw[0]);
device->SetName(&ddgi.color_texture_rw[0], "ddgi.color_texture_rw[0]");
device->CreateTexture(&tex, nullptr, &ddgi.color_texture_rw[1]);
device->SetName(&ddgi.color_texture_rw[1], "ddgi.color_texture_rw[1]");
buf = {};
buf.alignment = ddgi.color_texture_rw[0].sparse_page_size;
buf.size = ddgi.color_texture_rw[0].sparse_properties->total_tile_count * buf.alignment * 2;
buf.misc_flags = ResourceMiscFlag::SPARSE_TILE_POOL_TEXTURE_NON_RT_DS;
device->CreateBuffer(&buf, nullptr, &ddgi.sparse_tile_pool);
SparseUpdateCommand commands[4];
commands[0].sparse_resource = &ddgi.color_texture[0];
commands[0].tile_pool = &ddgi.sparse_tile_pool;
commands[0].num_resource_regions = 1;
uint32_t tile_count = ddgi.color_texture_rw[0].sparse_properties->total_tile_count;
uint32_t tile_offset[2] = { 0, tile_count };
SparseRegionSize region;
region.width = (tex.width + ddgi.color_texture_rw[0].sparse_properties->tile_width - 1) / ddgi.color_texture_rw[0].sparse_properties->tile_width;
region.height = (tex.height + ddgi.color_texture_rw[0].sparse_properties->tile_height - 1) / ddgi.color_texture_rw[0].sparse_properties->tile_height;
SparseResourceCoordinate coordinate;
coordinate.x = 0;
coordinate.y = 0;
TileRangeFlags flags = TileRangeFlags::None;
commands[0].sizes = &region;
commands[0].coordinates = &coordinate;
commands[0].range_flags = &flags;
commands[0].range_tile_counts = &tile_count;
commands[0].range_start_offsets = &tile_offset[0];
commands[1] = commands[0];
commands[1].sparse_resource = &ddgi.color_texture_rw[0];
commands[2] = commands[0];
commands[2].sparse_resource = &ddgi.color_texture[1];
commands[2].range_start_offsets = &tile_offset[1];
commands[3] = commands[0];
commands[3].sparse_resource = &ddgi.color_texture_rw[1];
commands[3].range_start_offsets = &tile_offset[1];
device->SparseUpdate(QUEUE_COMPUTE, commands, arraysize(commands));
tex.width = DDGI_DEPTH_TEXELS * ddgi.grid_dimensions.x * ddgi.grid_dimensions.y;
tex.height = DDGI_DEPTH_TEXELS * ddgi.grid_dimensions.z;
tex.format = Format::R16G16_FLOAT;
tex.misc_flags = {};
tex.bind_flags = BindFlag::UNORDERED_ACCESS | BindFlag::SHADER_RESOURCE;
tex.layout = ResourceState::SHADER_RESOURCE;
device->CreateTexture(&tex, nullptr, &ddgi.depth_texture[0]);
device->SetName(&ddgi.depth_texture[0], "ddgi.depth_texture[0]");
device->CreateTexture(&tex, nullptr, &ddgi.depth_texture[1]);
device->SetName(&ddgi.depth_texture[1], "ddgi.depth_texture[1]");
}
std::swap(ddgi.color_texture[0], ddgi.color_texture[1]);
std::swap(ddgi.color_texture_rw[0], ddgi.color_texture_rw[1]);
std::swap(ddgi.depth_texture[0], ddgi.depth_texture[1]);
ddgi.grid_min = bounds.getMin();
ddgi.grid_min.x -= 1;
+2
View File
@@ -167,7 +167,9 @@ namespace wi::scene
float smooth_backface = 0; // smoothness of backface test
wi::graphics::GPUBuffer ray_buffer;
wi::graphics::GPUBuffer offset_buffer;
wi::graphics::GPUBuffer sparse_tile_pool;
wi::graphics::Texture color_texture[2];
wi::graphics::Texture color_texture_rw[2]; // alias of color_texture
wi::graphics::Texture depth_texture[2];
void Serialize(wi::Archive& archive);
+10 -2
View File
@@ -1807,8 +1807,16 @@ namespace wi::scene
TextureDesc desc;
desc.width = DDGI_COLOR_TEXELS * grid_dimensions.x * grid_dimensions.y;
desc.height = DDGI_COLOR_TEXELS * grid_dimensions.z;
desc.format = Format::R16G16B16A16_FLOAT;
desc.bind_flags = BindFlag::UNORDERED_ACCESS | BindFlag::SHADER_RESOURCE;
if (data.size() == desc.width * desc.height * GetFormatStride(Format::R9G9B9E5_SHAREDEXP))
{
desc.format = Format::R9G9B9E5_SHAREDEXP;
}
else
{
assert(data.size() == desc.width * desc.height * GetFormatStride(Format::R16G16B16A16_FLOAT));
desc.format = Format::R16G16B16A16_FLOAT;
}
desc.bind_flags = BindFlag::SHADER_RESOURCE;
SubresourceData initdata;
initdata.data_ptr = data.data();
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 104;
const int revision = 105;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);