From d88122e28e6c8c650ea5f571d3d0407e79efdb8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Mon, 29 May 2023 21:56:48 +0200 Subject: [PATCH] Envmap BC6 compression (#686) --- WickedEngine/offlineshadercompiler.cpp | 2 + WickedEngine/shaders/ShaderInterop_Renderer.h | 14 +- WickedEngine/shaders/Shaders_SOURCE.vcxitems | 8 + .../shaders/Shaders_SOURCE.vcxitems.filters | 6 + WickedEngine/shaders/aerialPerspectiveCS.hlsl | 8 +- .../shaders/blockcompressCS_BC6H.hlsl | 794 ++++++++++++++++++ .../shaders/blockcompressCS_BC6H_cubemap.hlsl | 2 + WickedEngine/shaders/filterEnvMapCS.hlsl | 6 +- .../shaders/volumetricCloud_renderCS.hlsl | 6 +- WickedEngine/wiEnums.h | 2 + WickedEngine/wiGraphicsDevice_DX12.cpp | 2 +- WickedEngine/wiGraphicsDevice_Vulkan.cpp | 4 + WickedEngine/wiRenderer.cpp | 185 ++-- WickedEngine/wiRenderer.h | 5 +- WickedEngine/wiResourceManager.cpp | 56 +- WickedEngine/wiScene.cpp | 78 +- WickedEngine/wiScene.h | 3 +- WickedEngine/wiVersion.cpp | 2 +- third_party_software.txt | 26 + 19 files changed, 1042 insertions(+), 167 deletions(-) create mode 100644 WickedEngine/shaders/blockcompressCS_BC6H.hlsl create mode 100644 WickedEngine/shaders/blockcompressCS_BC6H_cubemap.hlsl diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 8c538225e..ddf284ed6 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -33,6 +33,8 @@ wi::vector shaders = { {"blockcompressCS_BC3", wi::graphics::ShaderStage::CS}, {"blockcompressCS_BC4", wi::graphics::ShaderStage::CS}, {"blockcompressCS_BC5", wi::graphics::ShaderStage::CS}, + {"blockcompressCS_BC6H", wi::graphics::ShaderStage::CS}, + {"blockcompressCS_BC6H_cubemap", wi::graphics::ShaderStage::CS}, {"blur_gaussian_float4CS", wi::graphics::ShaderStage::CS}, {"bloomseparateCS", wi::graphics::ShaderStage::CS}, {"depthoffield_mainCS", wi::graphics::ShaderStage::CS}, diff --git a/WickedEngine/shaders/ShaderInterop_Renderer.h b/WickedEngine/shaders/ShaderInterop_Renderer.h index f182d6eb4..fc7d9f4e0 100644 --- a/WickedEngine/shaders/ShaderInterop_Renderer.h +++ b/WickedEngine/shaders/ShaderInterop_Renderer.h @@ -1195,14 +1195,16 @@ struct FilterEnvmapPushConstants { uint2 filterResolution; float2 filterResolution_rcp; - uint filterArrayIndex; + float filterRoughness; uint filterRayCount; uint padding_filterCB; int texture_input; + int texture_output; int padding0; int padding1; + int padding2; }; // CopyTexture2D params: @@ -1277,10 +1279,10 @@ struct AerialPerspectiveCapturePushConstants uint2 resolution; float2 resolution_rcp; - uint arrayIndex; int texture_input; int texture_output; - float padding; + float padding0; + float padding1; }; struct VolumetricCloudCapturePushConstants @@ -1288,15 +1290,15 @@ struct VolumetricCloudCapturePushConstants uint2 resolution; float2 resolution_rcp; - uint arrayIndex; int texture_input; int texture_output; int maxStepCount; - float LODMin; + float shadowSampleCount; float groundContributionSampleCount; - float padding; + float padding0; + float padding1; }; diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 16a85a7d4..ee980871c 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -70,6 +70,14 @@ Compute 4.0 + + Compute + 4.0 + + + Compute + 4.0 + Compute Compute diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 31e105716..4722a0f7b 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -1100,6 +1100,12 @@ CS + + CS + + + CS + diff --git a/WickedEngine/shaders/aerialPerspectiveCS.hlsl b/WickedEngine/shaders/aerialPerspectiveCS.hlsl index 8f2978be4..2bcd5611a 100644 --- a/WickedEngine/shaders/aerialPerspectiveCS.hlsl +++ b/WickedEngine/shaders/aerialPerspectiveCS.hlsl @@ -95,7 +95,7 @@ void RenderAerialPerspective(uint3 DTid, float2 uv, float depth, float3 depthWor [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID) { - TextureCubeArray input = bindless_cubearrays[capture.texture_input]; + TextureCube input = bindless_cubemaps[capture.texture_input]; RWTexture2DArray output = bindless_rwtextures2DArray[capture.texture_output]; const float2 uv = (DTid.xy + 0.5) * capture.resolution_rcp; @@ -113,12 +113,12 @@ void main(uint3 DTid : SV_DispatchThreadID) const float depth = texture_input_depth.SampleLevel(sampler_point_clamp, N, 0).r; #endif // MSAA - float4 composite = input.SampleLevel(sampler_linear_clamp, float4(N, capture.arrayIndex), 0); + float4 composite = input.SampleLevel(sampler_linear_clamp, N, 0); // Ignore skybox if (depth == 0.0) { - output[uint3(DTid.xy, DTid.z + capture.arrayIndex * 6)] = composite; + output[uint3(DTid.xy, DTid.z)] = composite; return; } @@ -134,7 +134,7 @@ void main(uint3 DTid : SV_DispatchThreadID) float4 result = float4(luminance, transmittance); // Output - output[uint3(DTid.xy, DTid.z + capture.arrayIndex * 6)] = float4(composite.rgb * (1.0 - result.a) + result.rgb, composite.a * (1.0 - result.a)); + output[uint3(DTid.xy, DTid.z)] = float4(composite.rgb * (1.0 - result.a) + result.rgb, composite.a * (1.0 - result.a)); } #else [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] diff --git a/WickedEngine/shaders/blockcompressCS_BC6H.hlsl b/WickedEngine/shaders/blockcompressCS_BC6H.hlsl new file mode 100644 index 000000000..29505b94c --- /dev/null +++ b/WickedEngine/shaders/blockcompressCS_BC6H.hlsl @@ -0,0 +1,794 @@ +// Modified version of: https://github.com/knarkowicz/GPURealTimeBC6H/blob/master/bin/compress.hlsl +#include "globals.hlsli" + +#pragma warning(disable : 3078) // "loop control variable conflicts with a previous declaration in the outer scope" + +// Whether to use P2 modes (4 endpoints) for compression. Slow, but improves quality. +#define ENCODE_P2 0 + +// Improve quality at small performance loss +#define INSET_COLOR_BBOX 1 +#define OPTIMIZE_ENDPOINTS 0 + +// Whether to optimize for luminance error or for RGB error +#define LUMINANCE_WEIGHTS 1 + + +static const float HALF_MAX = 65504.0f; +static const uint PATTERN_NUM = 32; + +#ifdef COMPRESS_CUBEMAP +TextureCube SrcTexture : register(t0); +RWTexture2DArray OutputTexture : register(u0); +#else +Texture2D SrcTexture : register(t0); +RWTexture2D OutputTexture : register(u0); +#endif // COMPRESS_CUBEMAP + +float CalcMSLE(float3 a, float3 b) +{ + float3 delta = log2((b + 1.0f) / (a + 1.0f)); + float3 deltaSq = delta * delta; + +#if LUMINANCE_WEIGHTS + float3 luminanceWeights = float3(0.299f, 0.587f, 0.114f); + deltaSq *= luminanceWeights; +#endif + + return deltaSq.x + deltaSq.y + deltaSq.z; +} + +uint PatternFixupID(uint i) +{ + uint ret = 15; + ret = ((3441033216u >> i) & 0x1) ? 2 : ret; + ret = ((845414400u >> i) & 0x1) ? 8 : ret; + return ret; +} + +uint Pattern(uint p, uint i) +{ + uint p2 = p / 2; + uint p3 = p - p2 * 2; + + uint enc = 0; + enc = p2 == 0 ? 2290666700 : enc; + enc = p2 == 1 ? 3972591342 : enc; + enc = p2 == 2 ? 4276930688 : enc; + enc = p2 == 3 ? 3967876808 : enc; + enc = p2 == 4 ? 4293707776 : enc; + enc = p2 == 5 ? 3892379264 : enc; + enc = p2 == 6 ? 4278255592 : enc; + enc = p2 == 7 ? 4026597360 : enc; + enc = p2 == 8 ? 9369360 : enc; + enc = p2 == 9 ? 147747072 : enc; + enc = p2 == 10 ? 1930428556 : enc; + enc = p2 == 11 ? 2362323200 : enc; + enc = p2 == 12 ? 823134348 : enc; + enc = p2 == 13 ? 913073766 : enc; + enc = p2 == 14 ? 267393000 : enc; + enc = p2 == 15 ? 966553998 : enc; + + enc = p3 ? enc >> 16 : enc; + uint ret = (enc >> i) & 0x1; + return ret; +} + +float3 Quantize7(float3 x) +{ + return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f); +} + +float3 Quantize9(float3 x) +{ + return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f); +} + +float3 Quantize10(float3 x) +{ + return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f); +} + +float3 Unquantize7(float3 x) +{ + return (x * 65536.0f + 0x8000) / 128.0f; +} + +float3 Unquantize9(float3 x) +{ + return (x * 65536.0f + 0x8000) / 512.0f; +} + +float3 Unquantize10(float3 x) +{ + return (x * 65536.0f + 0x8000) / 1024.0f; +} + +float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) +{ + float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); + return f16tof32(uint3(comp)); +} + +void Swap(inout float3 a, inout float3 b) +{ + float3 tmp = a; + a = b; + b = tmp; +} + +void Swap(inout float a, inout float b) +{ + float tmp = a; + a = b; + b = tmp; +} + +uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos) +{ + float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); + return (uint) clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f); +} + +uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) +{ + float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); + return (uint) clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f); +} + +void SignExtend(inout float3 v1, uint mask, uint signFlag) +{ + int3 v = (int3) v1; + v.x = (v.x & mask) | (v.x < 0 ? signFlag : 0); + v.y = (v.y & mask) | (v.y < 0 ? signFlag : 0); + v.z = (v.z & mask) | (v.z < 0 ? signFlag : 0); + v1 = v; +} + +// Refine endpoints by insetting bounding box in log2 RGB space +void InsetColorBBoxP1(float3 texels[16], inout float3 blockMin, inout float3 blockMax) +{ + float3 refinedBlockMin = blockMax; + float3 refinedBlockMax = blockMin; + + for (uint i = 0; i < 16; ++i) + { + refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); + refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); + } + + float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f); + float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f); + + float3 logBlockMax = log2(blockMax + 1.0f); + float3 logBlockMin = log2(blockMin + 1.0f); + float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); + + logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); + logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); + + blockMin = exp2(logBlockMin) - 1.0f; + blockMax = exp2(logBlockMax) - 1.0f; +} + +// Refine endpoints by insetting bounding box in log2 RGB space +void InsetColorBBoxP2(float3 texels[16], uint pattern, uint patternSelector, inout float3 blockMin, inout float3 blockMax) +{ + float3 refinedBlockMin = blockMax; + float3 refinedBlockMax = blockMin; + + for (uint i = 0; i < 16; ++i) + { + uint paletteID = Pattern(pattern, i); + if (paletteID == patternSelector) + { + refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); + refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); + } + } + + float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f); + float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f); + + float3 logBlockMax = log2(blockMax + 1.0f); + float3 logBlockMin = log2(blockMin + 1.0f); + float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); + + logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); + logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); + + blockMin = exp2(logBlockMin) - 1.0f; + blockMax = exp2(logBlockMax) - 1.0f; +} + +// Least squares optimization to find best endpoints for the selected block indices +void OptimizeEndpointsP1(float3 texels[16], inout float3 blockMin, inout float3 blockMax) +{ + float3 blockDir = blockMax - blockMin; + blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); + + float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); + float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); + + float3 alphaTexelSum = 0.0f; + float3 betaTexelSum = 0.0f; + float alphaBetaSum = 0.0f; + float alphaSqSum = 0.0f; + float betaSqSum = 0.0f; + + for (int i = 0; i < 16; i++) + { + float texelPos = f32tof16(dot(texels[i], blockDir)); + uint texelIndex = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos); + + float beta = saturate(texelIndex / 15.0f); + float alpha = 1.0f - beta; + + float3 texelF16 = f32tof16(texels[i].xyz); + alphaTexelSum += alpha * texelF16; + betaTexelSum += beta * texelF16; + + alphaBetaSum += alpha * beta; + + alphaSqSum += alpha * alpha; + betaSqSum += beta * beta; + } + + float det = alphaSqSum * betaSqSum - alphaBetaSum * alphaBetaSum; + + if (abs(det) > 0.00001f) + { + float detRcp = rcp(det); + blockMin = f16tof32(clamp(detRcp * (alphaTexelSum * betaSqSum - betaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)); + blockMax = f16tof32(clamp(detRcp * (betaTexelSum * alphaSqSum - alphaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)); + } +} + +// Least squares optimization to find best endpoints for the selected block indices +void OptimizeEndpointsP2(float3 texels[16], uint pattern, uint patternSelector, inout float3 blockMin, inout float3 blockMax) +{ + float3 blockDir = blockMax - blockMin; + blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); + + float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); + float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); + + float3 alphaTexelSum = 0.0f; + float3 betaTexelSum = 0.0f; + float alphaBetaSum = 0.0f; + float alphaSqSum = 0.0f; + float betaSqSum = 0.0f; + + for (int i = 0; i < 16; i++) + { + uint paletteID = Pattern(pattern, i); + if (paletteID == patternSelector) + { + float texelPos = f32tof16(dot(texels[i], blockDir)); + uint texelIndex = ComputeIndex3(texelPos, endPoint0Pos, endPoint1Pos); + + float beta = saturate(texelIndex / 7.0f); + float alpha = 1.0f - beta; + + float3 texelF16 = f32tof16(texels[i].xyz); + alphaTexelSum += alpha * texelF16; + betaTexelSum += beta * texelF16; + + alphaBetaSum += alpha * beta; + + alphaSqSum += alpha * alpha; + betaSqSum += beta * beta; + } + } + + float det = alphaSqSum * betaSqSum - alphaBetaSum * alphaBetaSum; + + if (abs(det) > 0.00001f) + { + float detRcp = rcp(det); + blockMin = f16tof32(clamp(detRcp * (alphaTexelSum * betaSqSum - betaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)); + blockMax = f16tof32(clamp(detRcp * (betaTexelSum * alphaSqSum - alphaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)); + } +} + +void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) +{ + // compute endpoints (min/max RGB bbox) + float3 blockMin = texels[0]; + float3 blockMax = texels[0]; + uint i; + for (i = 1; i < 16; ++i) + { + blockMin = min(blockMin, texels[i]); + blockMax = max(blockMax, texels[i]); + } + +#if INSET_COLOR_BBOX + InsetColorBBoxP1(texels, blockMin, blockMax); +#endif + +#if OPTIMIZE_ENDPOINTS + OptimizeEndpointsP1(texels, blockMin, blockMax); +#endif + + + float3 blockDir = blockMax - blockMin; + blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); + + float3 endpoint0 = Quantize10(blockMin); + float3 endpoint1 = Quantize10(blockMax); + float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); + float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); + + // check if endpoint swap is required + float fixupTexelPos = f32tof16(dot(texels[0], blockDir)); + uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos); + if (fixupIndex > 7) + { + Swap(endPoint0Pos, endPoint1Pos); + Swap(endpoint0, endpoint1); + } + + // compute indices + uint indices[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + for (i = 0; i < 16; ++i) + { + float texelPos = f32tof16(dot(texels[i], blockDir)); + indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos); + } + + // compute compression error (MSLE) + float3 endpoint0Unq = Unquantize10(endpoint0); + float3 endpoint1Unq = Unquantize10(endpoint1); + float msle = 0.0f; + for (i = 0; i < 16; ++i) + { + float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f); + float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); + + msle += CalcMSLE(texels[i], texelUnc); + } + + + // encode block for mode 11 + blockMSLE = msle; + block.x = 0x03; + + // endpoints + block.x |= (uint) endpoint0.x << 5; + block.x |= (uint) endpoint0.y << 15; + block.x |= (uint) endpoint0.z << 25; + block.y |= (uint) endpoint0.z >> 7; + block.y |= (uint) endpoint1.x << 3; + block.y |= (uint) endpoint1.y << 13; + block.y |= (uint) endpoint1.z << 23; + block.z |= (uint) endpoint1.z >> 9; + + // indices + block.z |= indices[0] << 1; + block.z |= indices[1] << 4; + block.z |= indices[2] << 8; + block.z |= indices[3] << 12; + block.z |= indices[4] << 16; + block.z |= indices[5] << 20; + block.z |= indices[6] << 24; + block.z |= indices[7] << 28; + block.w |= indices[8] << 0; + block.w |= indices[9] << 4; + block.w |= indices[10] << 8; + block.w |= indices[11] << 12; + block.w |= indices[12] << 16; + block.w |= indices[13] << 20; + block.w |= indices[14] << 24; + block.w |= indices[15] << 28; +} + +float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) +{ + float3 w = Point - PointOnLine; + float3 x = w - dot(w, LineDirection) * LineDirection; + return dot(x, x); +} + +// Evaluate how good is given P2 pattern for encoding current block +float EvaluateP2Pattern(int pattern, float3 texels[16]) +{ + float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); + float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); + + uint i; + for (i = 0; i < 16; ++i) + { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) + { + p0BlockMin = min(p0BlockMin, texels[i]); + p0BlockMax = max(p0BlockMax, texels[i]); + } + else + { + p1BlockMin = min(p1BlockMin, texels[i]); + p1BlockMax = max(p1BlockMax, texels[i]); + } + } + + float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); + float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); + + float sqDistanceFromLine = 0.0f; + + for (i = 0; i < 16; ++i) + { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) + { + sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]); + } + else + { + sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]); + } + } + + return sqDistanceFromLine; +} + +void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, int pattern, float3 texels[16]) +{ + float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); + float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); + + uint i; + for (i = 0; i < 16; ++i) + { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) + { + p0BlockMin = min(p0BlockMin, texels[i]); + p0BlockMax = max(p0BlockMax, texels[i]); + } + else + { + p1BlockMin = min(p1BlockMin, texels[i]); + p1BlockMax = max(p1BlockMax, texels[i]); + } + } + +#if INSET_COLOR_BBOX + // Disabled because it was a negligible quality increase + //InsetColorBBoxP2(texels, pattern, 0, p0BlockMin, p0BlockMax); + //InsetColorBBoxP2(texels, pattern, 1, p1BlockMin, p1BlockMax); +#endif + +#if OPTIMIZE_ENDPOINTS + OptimizeEndpointsP2(texels, pattern, 0, p0BlockMin, p0BlockMax); + OptimizeEndpointsP2(texels, pattern, 1, p1BlockMin, p1BlockMax); +#endif + + float3 p0BlockDir = p0BlockMax - p0BlockMin; + float3 p1BlockDir = p1BlockMax - p1BlockMin; + p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z); + p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z); + + + float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir)); + float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir)); + float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir)); + float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir)); + + + uint fixupID = PatternFixupID(pattern); + float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir)); + float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir)); + uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos); + uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos); + if (p0FixupIndex > 3) + { + Swap(p0Endpoint0Pos, p0Endpoint1Pos); + Swap(p0BlockMin, p0BlockMax); + } + if (p1FixupIndex > 3) + { + Swap(p1Endpoint0Pos, p1Endpoint1Pos); + Swap(p1BlockMin, p1BlockMax); + } + + uint indices[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + for (i = 0; i < 16; ++i) + { + float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir)); + float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir)); + uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos); + uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos); + + uint paletteID = Pattern(pattern, i); + indices[i] = paletteID == 0 ? p0Index : p1Index; + } + + float3 endpoint760 = floor(Quantize7(p0BlockMin)); + float3 endpoint761 = floor(Quantize7(p0BlockMax)); + float3 endpoint762 = floor(Quantize7(p1BlockMin)); + float3 endpoint763 = floor(Quantize7(p1BlockMax)); + + float3 endpoint950 = floor(Quantize9(p0BlockMin)); + float3 endpoint951 = floor(Quantize9(p0BlockMax)); + float3 endpoint952 = floor(Quantize9(p1BlockMin)); + float3 endpoint953 = floor(Quantize9(p1BlockMax)); + + endpoint761 = endpoint761 - endpoint760; + endpoint762 = endpoint762 - endpoint760; + endpoint763 = endpoint763 - endpoint760; + + endpoint951 = endpoint951 - endpoint950; + endpoint952 = endpoint952 - endpoint950; + endpoint953 = endpoint953 - endpoint950; + + int maxVal76 = 0x1F; + endpoint761 = clamp(endpoint761, -maxVal76, maxVal76); + endpoint762 = clamp(endpoint762, -maxVal76, maxVal76); + endpoint763 = clamp(endpoint763, -maxVal76, maxVal76); + + int maxVal95 = 0xF; + endpoint951 = clamp(endpoint951, -maxVal95, maxVal95); + endpoint952 = clamp(endpoint952, -maxVal95, maxVal95); + endpoint953 = clamp(endpoint953, -maxVal95, maxVal95); + + float3 endpoint760Unq = Unquantize7(endpoint760); + float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); + float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); + float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); + float3 endpoint950Unq = Unquantize9(endpoint950); + float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); + float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); + float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); + + float msle76 = 0.0f; + float msle95 = 0.0f; + for (i = 0; i < 16; ++i) + { + uint paletteID = Pattern(pattern, i); + + float3 tmp760Unq = paletteID == 0 ? endpoint760Unq : endpoint762Unq; + float3 tmp761Unq = paletteID == 0 ? endpoint761Unq : endpoint763Unq; + float3 tmp950Unq = paletteID == 0 ? endpoint950Unq : endpoint952Unq; + float3 tmp951Unq = paletteID == 0 ? endpoint951Unq : endpoint953Unq; + + float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f); + float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); + float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); + + msle76 += CalcMSLE(texels[i], texelUnc76); + msle95 += CalcMSLE(texels[i], texelUnc95); + } + + SignExtend(endpoint761, 0x1F, 0x20); + SignExtend(endpoint762, 0x1F, 0x20); + SignExtend(endpoint763, 0x1F, 0x20); + + SignExtend(endpoint951, 0xF, 0x10); + SignExtend(endpoint952, 0xF, 0x10); + SignExtend(endpoint953, 0xF, 0x10); + + // encode block + float p2MSLE = min(msle76, msle95); + if (p2MSLE < blockMSLE) + { + blockMSLE = p2MSLE; + block = uint4(0, 0, 0, 0); + + if (p2MSLE == msle76) + { + // 7.6 + block.x = 0x1; + block.x |= ((uint) endpoint762.y & 0x20) >> 3; + block.x |= ((uint) endpoint763.y & 0x10) >> 1; + block.x |= ((uint) endpoint763.y & 0x20) >> 1; + block.x |= (uint) endpoint760.x << 5; + block.x |= ((uint) endpoint763.z & 0x01) << 12; + block.x |= ((uint) endpoint763.z & 0x02) << 12; + block.x |= ((uint) endpoint762.z & 0x10) << 10; + block.x |= (uint) endpoint760.y << 15; + block.x |= ((uint) endpoint762.z & 0x20) << 17; + block.x |= ((uint) endpoint763.z & 0x04) << 21; + block.x |= ((uint) endpoint762.y & 0x10) << 20; + block.x |= (uint) endpoint760.z << 25; + block.y |= ((uint) endpoint763.z & 0x08) >> 3; + block.y |= ((uint) endpoint763.z & 0x20) >> 4; + block.y |= ((uint) endpoint763.z & 0x10) >> 2; + block.y |= (uint) endpoint761.x << 3; + block.y |= ((uint) endpoint762.y & 0x0F) << 9; + block.y |= (uint) endpoint761.y << 13; + block.y |= ((uint) endpoint763.y & 0x0F) << 19; + block.y |= (uint) endpoint761.z << 23; + block.y |= ((uint) endpoint762.z & 0x07) << 29; + block.z |= ((uint) endpoint762.z & 0x08) >> 3; + block.z |= (uint) endpoint762.x << 1; + block.z |= (uint) endpoint763.x << 7; + } + else + { + // 9.5 + block.x = 0xE; + block.x |= (uint) endpoint950.x << 5; + block.x |= ((uint) endpoint952.z & 0x10) << 10; + block.x |= (uint) endpoint950.y << 15; + block.x |= ((uint) endpoint952.y & 0x10) << 20; + block.x |= (uint) endpoint950.z << 25; + block.y |= (uint) endpoint950.z >> 7; + block.y |= ((uint) endpoint953.z & 0x10) >> 2; + block.y |= (uint) endpoint951.x << 3; + block.y |= ((uint) endpoint953.y & 0x10) << 4; + block.y |= ((uint) endpoint952.y & 0x0F) << 9; + block.y |= (uint) endpoint951.y << 13; + block.y |= ((uint) endpoint953.z & 0x01) << 18; + block.y |= ((uint) endpoint953.y & 0x0F) << 19; + block.y |= (uint) endpoint951.z << 23; + block.y |= ((uint) endpoint953.z & 0x02) << 27; + block.y |= (uint) endpoint952.z << 29; + block.z |= ((uint) endpoint952.z & 0x08) >> 3; + block.z |= (uint) endpoint952.x << 1; + block.z |= ((uint) endpoint953.z & 0x04) << 4; + block.z |= (uint) endpoint953.x << 7; + block.z |= ((uint) endpoint953.z & 0x08) << 9; + } + + block.z |= pattern << 13; + uint blockFixupID = PatternFixupID(pattern); + if (blockFixupID == 15) + { + block.z |= indices[0] << 18; + block.z |= indices[1] << 20; + block.z |= indices[2] << 23; + block.z |= indices[3] << 26; + block.z |= indices[4] << 29; + block.w |= indices[5] << 0; + block.w |= indices[6] << 3; + block.w |= indices[7] << 6; + block.w |= indices[8] << 9; + block.w |= indices[9] << 12; + block.w |= indices[10] << 15; + block.w |= indices[11] << 18; + block.w |= indices[12] << 21; + block.w |= indices[13] << 24; + block.w |= indices[14] << 27; + block.w |= indices[15] << 30; + } + else if (blockFixupID == 2) + { + block.z |= indices[0] << 18; + block.z |= indices[1] << 20; + block.z |= indices[2] << 23; + block.z |= indices[3] << 25; + block.z |= indices[4] << 28; + block.z |= indices[5] << 31; + block.w |= indices[5] >> 1; + block.w |= indices[6] << 2; + block.w |= indices[7] << 5; + block.w |= indices[8] << 8; + block.w |= indices[9] << 11; + block.w |= indices[10] << 14; + block.w |= indices[11] << 17; + block.w |= indices[12] << 20; + block.w |= indices[13] << 23; + block.w |= indices[14] << 26; + block.w |= indices[15] << 29; + } + else + { + block.z |= indices[0] << 18; + block.z |= indices[1] << 20; + block.z |= indices[2] << 23; + block.z |= indices[3] << 26; + block.z |= indices[4] << 29; + block.w |= indices[5] << 0; + block.w |= indices[6] << 3; + block.w |= indices[7] << 6; + block.w |= indices[8] << 9; + block.w |= indices[9] << 11; + block.w |= indices[10] << 14; + block.w |= indices[11] << 17; + block.w |= indices[12] << 20; + block.w |= indices[13] << 23; + block.w |= indices[14] << 26; + block.w |= indices[15] << 29; + } + } +} + +[numthreads(8, 8, 1)] +void main(uint3 groupID : SV_GroupID, + uint3 dispatchThreadID : SV_DispatchThreadID, + uint3 groupThreadID : SV_GroupThreadID) +{ + uint2 blockCoord = dispatchThreadID.xy; + + uint2 dim; + SrcTexture.GetDimensions(dim.x, dim.y); + float2 TextureSizeRcp = rcp(dim); + uint2 TextureSizeInBlocks = dim / 4; + + if (all(blockCoord < TextureSizeInBlocks)) + { + // Gather texels for current 4x4 block + // 0 1 2 3 + // 4 5 6 7 + // 8 9 10 11 + // 12 13 14 15 + float2 uv = blockCoord * TextureSizeRcp * 4.0f + TextureSizeRcp; +#ifdef COMPRESS_CUBEMAP + float3 block0UV = uv_to_cubemap(uv, dispatchThreadID.z); + float3 block1UV = uv_to_cubemap(uv + float2(2.0f * TextureSizeRcp.x, 0.0f), dispatchThreadID.z); + float3 block2UV = uv_to_cubemap(uv + float2(0.0f, 2.0f * TextureSizeRcp.y), dispatchThreadID.z); + float3 block3UV = uv_to_cubemap(uv + float2(2.0f * TextureSizeRcp.x, 2.0f * TextureSizeRcp.y), dispatchThreadID.z); +#else + float2 block0UV = uv; + float2 block1UV = uv + float2(2.0f * TextureSizeRcp.x, 0.0f); + float2 block2UV = uv + float2(0.0f, 2.0f * TextureSizeRcp.y); + float2 block3UV = uv + float2(2.0f * TextureSizeRcp.x, 2.0f * TextureSizeRcp.y); +#endif // COMPRESS_CUBEMAP + float4 block0X = SrcTexture.GatherRed(sampler_linear_clamp, block0UV); + float4 block1X = SrcTexture.GatherRed(sampler_linear_clamp, block1UV); + float4 block2X = SrcTexture.GatherRed(sampler_linear_clamp, block2UV); + float4 block3X = SrcTexture.GatherRed(sampler_linear_clamp, block3UV); + float4 block0Y = SrcTexture.GatherGreen(sampler_linear_clamp, block0UV); + float4 block1Y = SrcTexture.GatherGreen(sampler_linear_clamp, block1UV); + float4 block2Y = SrcTexture.GatherGreen(sampler_linear_clamp, block2UV); + float4 block3Y = SrcTexture.GatherGreen(sampler_linear_clamp, block3UV); + float4 block0Z = SrcTexture.GatherBlue(sampler_linear_clamp, block0UV); + float4 block1Z = SrcTexture.GatherBlue(sampler_linear_clamp, block1UV); + float4 block2Z = SrcTexture.GatherBlue(sampler_linear_clamp, block2UV); + float4 block3Z = SrcTexture.GatherBlue(sampler_linear_clamp, block3UV); + + float3 texels[16]; + texels[0] = float3(block0X.w, block0Y.w, block0Z.w); + texels[1] = float3(block0X.z, block0Y.z, block0Z.z); + texels[2] = float3(block1X.w, block1Y.w, block1Z.w); + texels[3] = float3(block1X.z, block1Y.z, block1Z.z); + texels[4] = float3(block0X.x, block0Y.x, block0Z.x); + texels[5] = float3(block0X.y, block0Y.y, block0Z.y); + texels[6] = float3(block1X.x, block1Y.x, block1Z.x); + texels[7] = float3(block1X.y, block1Y.y, block1Z.y); + texels[8] = float3(block2X.w, block2Y.w, block2Z.w); + texels[9] = float3(block2X.z, block2Y.z, block2Z.z); + texels[10] = float3(block3X.w, block3Y.w, block3Z.w); + texels[11] = float3(block3X.z, block3Y.z, block3Z.z); + texels[12] = float3(block2X.x, block2Y.x, block2Z.x); + texels[13] = float3(block2X.y, block2Y.y, block2Z.y); + texels[14] = float3(block3X.x, block3Y.x, block3Z.x); + texels[15] = float3(block3X.y, block3Y.y, block3Z.y); + + uint4 block = uint4(0, 0, 0, 0); + float blockMSLE = 0.0f; + + EncodeP1(block, blockMSLE, texels); + +#if ENCODE_P2 + // First find pattern which is a best fit for a current block + float bestScore = EvaluateP2Pattern(0, texels); + uint bestPattern = 0; + + for (uint patternIndex = 1; patternIndex < 32; ++patternIndex) + { + float score = EvaluateP2Pattern(patternIndex, texels); + if (score < bestScore) + { + bestPattern = patternIndex; + bestScore = score; + } + } + + // Then encode it + EncodeP2Pattern(block, blockMSLE, bestPattern, texels); +#endif + +#ifdef COMPRESS_CUBEMAP + OutputTexture[dispatchThreadID] = block; +#else + OutputTexture[blockCoord] = block; +#endif // COMPRESS_CUBEMAP + } +} diff --git a/WickedEngine/shaders/blockcompressCS_BC6H_cubemap.hlsl b/WickedEngine/shaders/blockcompressCS_BC6H_cubemap.hlsl new file mode 100644 index 000000000..757260afd --- /dev/null +++ b/WickedEngine/shaders/blockcompressCS_BC6H_cubemap.hlsl @@ -0,0 +1,2 @@ +#define COMPRESS_CUBEMAP +#include "blockcompressCS_BC6H.hlsl" diff --git a/WickedEngine/shaders/filterEnvMapCS.hlsl b/WickedEngine/shaders/filterEnvMapCS.hlsl index 0511a773b..6da1cf7ac 100644 --- a/WickedEngine/shaders/filterEnvMapCS.hlsl +++ b/WickedEngine/shaders/filterEnvMapCS.hlsl @@ -22,7 +22,7 @@ void main(uint3 DTid : SV_DispatchThreadID) { if (DTid.x < push.filterResolution.x && DTid.y < push.filterResolution.y) { - TextureCubeArray input = bindless_cubearrays[push.texture_input]; + TextureCube input = bindless_cubemaps[push.texture_input]; RWTexture2DArray output = bindless_rwtextures2DArray[push.texture_output]; float2 uv = (DTid.xy + 0.5f) * push.filterResolution_rcp.xy; @@ -38,10 +38,10 @@ void main(uint3 DTid : SV_DispatchThreadID) float3 hemisphere = ImportanceSampleGGX(hamm, push.filterRoughness, N); float3 cone = mul(hemisphere, tangentSpace); - col += input.SampleLevel(sampler_linear_clamp, float4(cone, push.filterArrayIndex), 0); + col += input.SampleLevel(sampler_linear_clamp, cone, 0); } col /= (float)push.filterRayCount; - output[uint3(DTid.xy, DTid.z + push.filterArrayIndex * 6)] = col; + output[uint3(DTid.xy, DTid.z)] = col; } } diff --git a/WickedEngine/shaders/volumetricCloud_renderCS.hlsl b/WickedEngine/shaders/volumetricCloud_renderCS.hlsl index 49b97dcc1..a4e54965d 100644 --- a/WickedEngine/shaders/volumetricCloud_renderCS.hlsl +++ b/WickedEngine/shaders/volumetricCloud_renderCS.hlsl @@ -724,7 +724,7 @@ void RenderClouds(uint3 DTid, float2 uv, float depth, float3 depthWorldPosition, [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID) { - TextureCubeArray input = bindless_cubearrays[capture.texture_input]; + TextureCube input = bindless_cubemaps[capture.texture_input]; RWTexture2DArray output = bindless_rwtextures2DArray[capture.texture_output]; const float2 uv = (DTid.xy + 0.5) * capture.resolution_rcp; @@ -751,10 +751,10 @@ void main(uint3 DTid : SV_DispatchThreadID) float2 cloudDepth = 0; RenderClouds(DTid, uv, depth, depthWorldPosition, rayOrigin, rayDirection, cloudColor, cloudDepth); - float4 composite = input.SampleLevel(sampler_linear_clamp, float4(N, capture.arrayIndex), 0); + float4 composite = input.SampleLevel(sampler_linear_clamp, N, 0); // Output - output[uint3(DTid.xy, DTid.z + capture.arrayIndex * 6)] = float4(composite.rgb * (1.0 - cloudColor.a) + cloudColor.rgb, composite.a * (1.0 - cloudColor.a)); + output[uint3(DTid.xy, DTid.z)] = float4(composite.rgb * (1.0 - cloudColor.a) + cloudColor.rgb, composite.a * (1.0 - cloudColor.a)); } #else [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 6beb8e709..47abdc517 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -250,6 +250,8 @@ namespace wi::enums CSTYPE_BLOCKCOMPRESS_BC3, CSTYPE_BLOCKCOMPRESS_BC4, CSTYPE_BLOCKCOMPRESS_BC5, + CSTYPE_BLOCKCOMPRESS_BC6H, + CSTYPE_BLOCKCOMPRESS_BC6H_CUBEMAP, CSTYPE_FILTERENVMAP, CSTYPE_COPYTEXTURE2D_UNORM4, CSTYPE_COPYTEXTURE2D_FLOAT4, diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index dcdc7f80b..f0348b2e7 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -4638,7 +4638,7 @@ using namespace dx12_internal; { if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::TEXTURECUBE)) { - if (texture->desc.array_size > 6 && sliceCount > 6) + if (texture->desc.array_size > 6) { srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; srv_desc.TextureCubeArray.First2DArrayFace = firstSlice; diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index d6bb67b53..395cab0f6 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -8401,6 +8401,10 @@ using namespace vulkan_internal; copy.extent.height = std::min(dst->desc.height, src->desc.height); } copy.extent.depth = std::min(dst->desc.depth, src->desc.depth); + + copy.extent.width = std::max(1u, copy.extent.width >> srcMip); + copy.extent.height = std::max(1u, copy.extent.height >> srcMip); + copy.extent.depth = std::max(1u, copy.extent.depth >> srcMip); } else { diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 3790398f8..d659f179f 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -896,6 +896,8 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_BLOCKCOMPRESS_BC3], "blockcompressCS_BC3.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_BLOCKCOMPRESS_BC4], "blockcompressCS_BC4.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_BLOCKCOMPRESS_BC5], "blockcompressCS_BC5.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_BLOCKCOMPRESS_BC6H], "blockcompressCS_BC6H.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_BLOCKCOMPRESS_BC6H_CUBEMAP], "blockcompressCS_BC6H_cubemap.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_FILTERENVMAP], "filterEnvMapCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_COPYTEXTURE2D_UNORM4], "copytexture2D_unorm4CS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_COPYTEXTURE2D_FLOAT4], "copytexture2D_float4CS.cso"); }); @@ -6493,7 +6495,7 @@ void DrawDebugWorld( } else { - device->BindResource(&scene.envmapArray, 0, cmd, scene.envmapArray.GetDesc().mip_levels + probe.textureIndex); + device->BindResource(&scene.envmapArray, 0, cmd, probe.textureIndex); } device->Draw(vertexCount_uvsphere, 0, cmd); @@ -7099,6 +7101,14 @@ void ComputeSkyAtmosphereTextures(CommandList cmd) } void ComputeSkyAtmosphereSkyViewLut(CommandList cmd) { + const int threadSize = 8; + const int skyViewLutWidth = textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT].GetDesc().width; + const int skyViewLutHeight = textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT].GetDesc().height; + const int skyViewLutThreadX = static_cast(std::ceil(skyViewLutWidth / threadSize)); + const int skyViewLutThreadY = static_cast(std::ceil(skyViewLutHeight / threadSize)); + if (skyViewLutThreadX * skyViewLutThreadY < 1) + return; + device->EventBegin("ComputeSkyAtmosphereSkyViewLut", cmd); BindCommonResources(cmd); @@ -7123,17 +7133,10 @@ void ComputeSkyAtmosphereSkyViewLut(CommandList cmd) device->Barrier(barriers, arraysize(barriers), cmd); } - const int threadSize = 8; - const int skyViewLutWidth = textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT].GetDesc().width; - const int skyViewLutHeight = textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT].GetDesc().height; - const int skyViewLutThreadX = static_cast(std::ceil(skyViewLutWidth / threadSize)); - const int skyViewLutThreadY = static_cast(std::ceil(skyViewLutHeight / threadSize)); - device->Dispatch(skyViewLutThreadX, skyViewLutThreadY, 1, cmd); { GPUBarrier barriers[] = { - GPUBarrier::Memory(), GPUBarrier::Image(&textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_2D_SKYATMOSPHERE_SKYVIEWLUT].desc.layout) }; device->Barrier(barriers, arraysize(barriers), cmd); @@ -7285,10 +7288,10 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) ResourceState::RENDERTARGET ), RenderPassImage::Resolve( - &vis.scene->envmapArray, + &vis.scene->envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE, - vis.scene->envmapArray.desc.mip_levels + vis.scene->envmapCount + probe.textureIndex // subresource: individual cubes only mip0 + 0 ) }; device->RenderPassBegin(rp, arraysize(rp), cmd); @@ -7305,12 +7308,11 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) ResourceState::SHADER_RESOURCE ), RenderPassImage::RenderTarget( - &vis.scene->envmapArray, + &vis.scene->envrenderingColorBuffer, RenderPassImage::LoadOp::DONTCARE, RenderPassImage::StoreOp::STORE, ResourceState::SHADER_RESOURCE, - ResourceState::SHADER_RESOURCE, - probe.textureIndex + ResourceState::SHADER_RESOURCE ) }; device->RenderPassBegin(rp, arraysize(rp), cmd); @@ -7385,28 +7387,21 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) device->BindResource(&vis.scene->envrenderingDepthBuffer, 0, cmd); } - TextureDesc desc = vis.scene->envmapArray.GetDesc(); - int arrayIndex = probe.textureIndex; + TextureDesc desc = vis.scene->envrenderingColorBuffer.GetDesc(); AerialPerspectiveCapturePushConstants push; push.resolution.x = desc.width; push.resolution.y = desc.height; push.resolution_rcp.x = 1.0f / push.resolution.x; push.resolution_rcp.y = 1.0f / push.resolution.y; - push.arrayIndex = arrayIndex; - push.texture_input = device->GetDescriptorIndex(&vis.scene->envmapArray, SubresourceType::SRV); - push.texture_output = device->GetDescriptorIndex(&vis.scene->envmapArray, SubresourceType::UAV); + push.texture_input = device->GetDescriptorIndex(&vis.scene->envrenderingColorBuffer, SubresourceType::SRV); + push.texture_output = device->GetDescriptorIndex(&vis.scene->envrenderingColorBuffer, SubresourceType::UAV); device->PushConstants(&push, sizeof(push), cmd); { GPUBarrier barriers[] = { - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 0), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 1), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 2), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 3), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 4), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 5), + GPUBarrier::Image(&vis.scene->envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -7419,13 +7414,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) { GPUBarrier barriers[] = { - GPUBarrier::Memory(), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 0), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 1), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 2), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 3), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 4), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 5), + GPUBarrier::Image(&vis.scene->envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -7471,17 +7460,15 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) device->BindResource(&texture_weatherMap, 4, cmd); } - TextureDesc desc = vis.scene->envmapArray.GetDesc(); - int arrayIndex = probe.textureIndex; + TextureDesc desc = vis.scene->envrenderingColorBuffer.GetDesc(); VolumetricCloudCapturePushConstants push; push.resolution.x = desc.width; push.resolution.y = desc.height; push.resolution_rcp.x = 1.0f / push.resolution.x; push.resolution_rcp.y = 1.0f / push.resolution.y; - push.arrayIndex = arrayIndex; - push.texture_input = device->GetDescriptorIndex(&vis.scene->envmapArray, SubresourceType::SRV); - push.texture_output = device->GetDescriptorIndex(&vis.scene->envmapArray, SubresourceType::UAV); + push.texture_input = device->GetDescriptorIndex(&vis.scene->envrenderingColorBuffer, SubresourceType::SRV); + push.texture_output = device->GetDescriptorIndex(&vis.scene->envrenderingColorBuffer, SubresourceType::UAV); if (probe.IsRealTime()) { @@ -7503,12 +7490,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) { GPUBarrier barriers[] = { - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 0), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 1), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 2), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 3), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 4), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, 0, arrayIndex * 6 + 5), + GPUBarrier::Image(&vis.scene->envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -7521,13 +7503,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) { GPUBarrier barriers[] = { - GPUBarrier::Memory(), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 0), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 1), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 2), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 3), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 4), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, 0, arrayIndex * 6 + 5), + GPUBarrier::Image(&vis.scene->envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -7535,49 +7511,37 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) device->EventEnd(cmd); } - MIPGEN_OPTIONS mipopt; - mipopt.arrayIndex = probe.textureIndex; - GenerateMipChain(vis.scene->envmapArray, MIPGENFILTER_LINEAR, cmd, mipopt); + GenerateMipChain(vis.scene->envrenderingColorBuffer, MIPGENFILTER_LINEAR, cmd); // Filter the enviroment map mip chain according to BRDF: // A bit similar to MIP chain generation, but its input is the MIP-mapped texture, // and we generatethe filtered MIPs from bottom to top. device->EventBegin("FilterEnvMap", cmd); { - TextureDesc desc = vis.scene->envmapArray.GetDesc(); - int arrayIndex = probe.textureIndex; + TextureDesc desc = vis.scene->envrenderingColorBuffer.GetDesc(); device->BindComputeShader(&shaders[CSTYPE_FILTERENVMAP], cmd); - desc.width = 1; - desc.height = 1; + desc.width = std::max(1u, desc.width >> (desc.mip_levels - 1)); + desc.height = std::max(1u, desc.height >> (desc.mip_levels - 1)); for (uint32_t i = desc.mip_levels - 1; i > 0; --i) { { GPUBarrier barriers[] = { - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, arrayIndex * 6 + 0), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, arrayIndex * 6 + 1), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, arrayIndex * 6 + 2), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, arrayIndex * 6 + 3), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, arrayIndex * 6 + 4), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, arrayIndex * 6 + 5), + GPUBarrier::Image(&vis.scene->envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i), }; device->Barrier(barriers, arraysize(barriers), cmd); } - device->BindUAV(&vis.scene->envmapArray, 0, cmd, i); - device->BindResource(&vis.scene->envmapArray, 0, cmd, std::max(0, (int)i - 2)); - FilterEnvmapPushConstants push; push.filterResolution.x = desc.width; push.filterResolution.y = desc.height; push.filterResolution_rcp.x = 1.0f / push.filterResolution.x; push.filterResolution_rcp.y = 1.0f / push.filterResolution.y; - push.filterArrayIndex = arrayIndex; push.filterRoughness = (float)i / (float)desc.mip_levels; push.filterRayCount = 128; - push.texture_input = device->GetDescriptorIndex(&vis.scene->envmapArray, SubresourceType::SRV, std::max(0, (int)i - 2)); - push.texture_output = device->GetDescriptorIndex(&vis.scene->envmapArray, SubresourceType::UAV, i); + push.texture_input = device->GetDescriptorIndex(&vis.scene->envrenderingColorBuffer, SubresourceType::SRV, std::max(0, (int)i - 2)); + push.texture_output = device->GetDescriptorIndex(&vis.scene->envrenderingColorBuffer, SubresourceType::UAV, i); device->PushConstants(&push, sizeof(push), cmd); device->Dispatch( @@ -7588,13 +7552,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) { GPUBarrier barriers[] = { - GPUBarrier::Memory(), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, arrayIndex * 6 + 0), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, arrayIndex * 6 + 1), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, arrayIndex * 6 + 2), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, arrayIndex * 6 + 3), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, arrayIndex * 6 + 4), - GPUBarrier::Image(&vis.scene->envmapArray, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, arrayIndex * 6 + 5), + GPUBarrier::Image(&vis.scene->envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -7604,6 +7562,9 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) } } device->EventEnd(cmd); + + // Finally, the complete envmap is block compressed into the envmapArray: + BlockCompress(vis.scene->envrenderingColorBuffer, vis.scene->envmapArray, cmd, probe.textureIndex * 6); }; if (vis.scene->probes.GetCount() == 0) @@ -8594,7 +8555,7 @@ void GenerateMipChain(const Texture& texture, MIPGENFILTER filter, CommandList c } } -void BlockCompress(const wi::graphics::Texture& texture_src, const wi::graphics::Texture& texture_bc, wi::graphics::CommandList cmd) +void BlockCompress(const wi::graphics::Texture& texture_src, const wi::graphics::Texture& texture_bc, wi::graphics::CommandList cmd, uint32_t dst_slice_offset) { const uint32_t block_size = GetFormatBlockSize(texture_bc.desc.format); TextureDesc desc; @@ -8606,58 +8567,77 @@ void BlockCompress(const wi::graphics::Texture& texture_src, const wi::graphics: static Texture bc_raw_uint2; static Texture bc_raw_uint4; + static Texture bc_raw_uint4_cubemap; Texture* bc_raw = nullptr; switch (texture_bc.desc.format) { case Format::BC1_UNORM: case Format::BC1_UNORM_SRGB: - bc_raw = &bc_raw_uint2; desc.format = Format::R32G32_UINT; + bc_raw = &bc_raw_uint2; device->BindComputeShader(&shaders[CSTYPE_BLOCKCOMPRESS_BC1], cmd); + device->EventBegin("BlockCompress - BC1", cmd); break; case Format::BC3_UNORM: case Format::BC3_UNORM_SRGB: - bc_raw = &bc_raw_uint4; desc.format = Format::R32G32B32A32_UINT; + bc_raw = &bc_raw_uint4; device->BindComputeShader(&shaders[CSTYPE_BLOCKCOMPRESS_BC3], cmd); + device->EventBegin("BlockCompress - BC3", cmd); break; case Format::BC4_UNORM: - bc_raw = &bc_raw_uint2; desc.format = Format::R32G32_UINT; + bc_raw = &bc_raw_uint2; device->BindComputeShader(&shaders[CSTYPE_BLOCKCOMPRESS_BC4], cmd); + device->EventBegin("BlockCompress - BC4", cmd); break; case Format::BC5_UNORM: - bc_raw = &bc_raw_uint4; desc.format = Format::R32G32B32A32_UINT; + bc_raw = &bc_raw_uint4; device->BindComputeShader(&shaders[CSTYPE_BLOCKCOMPRESS_BC5], cmd); + device->EventBegin("BlockCompress - BC5", cmd); + break; + case Format::BC6H_UF16: + desc.format = Format::R32G32B32A32_UINT; + if (has_flag(texture_src.desc.misc_flags, ResourceMiscFlag::TEXTURECUBE)) + { + bc_raw = &bc_raw_uint4_cubemap; + device->BindComputeShader(&shaders[CSTYPE_BLOCKCOMPRESS_BC6H_CUBEMAP], cmd); + device->EventBegin("BlockCompress - BC6H - Cubemap", cmd); + desc.array_size = texture_src.desc.array_size; // src array size not dst!! + } + else + { + bc_raw = &bc_raw_uint4; + device->BindComputeShader(&shaders[CSTYPE_BLOCKCOMPRESS_BC6H], cmd); + device->EventBegin("BlockCompress - BC6H", cmd); + } break; default: assert(0); // not supported return; } - if (!bc_raw->IsValid() || bc_raw->desc.width < desc.width || bc_raw->desc.height < desc.height) + if (!bc_raw->IsValid() || bc_raw->desc.width < desc.width || bc_raw->desc.height < desc.height || bc_raw->desc.array_size < desc.array_size) { device->CreateTexture(&desc, nullptr, bc_raw); device->SetName(bc_raw, "bc_raw"); for (uint32_t i = 0; i < bc_raw->desc.mip_levels; ++i) { - int subresource_index = device->CreateSubresource(bc_raw, SubresourceType::UAV, 0, 1, i, 1); + int subresource_index = device->CreateSubresource(bc_raw, SubresourceType::UAV, 0, desc.array_size, i, 1); assert(subresource_index == i); } } - device->EventBegin("BlockCompress", cmd); - for (uint32_t mip = 0; mip < desc.mip_levels; ++mip) { const uint32_t width = std::max(1u, desc.width >> mip); const uint32_t height = std::max(1u, desc.height >> mip); device->BindResource(&texture_src, 0, cmd, mip); device->BindUAV(bc_raw, 0, cmd, mip); - device->Dispatch((width + 7u) / 8u, (height + 7u) / 8u, 1, cmd); + device->Dispatch((width + 7u) / 8u, (height + 7u) / 8u, desc.array_size, cmd); } GPUBarrier barriers[] = { @@ -8666,24 +8646,27 @@ void BlockCompress(const wi::graphics::Texture& texture_src, const wi::graphics: }; device->Barrier(barriers, arraysize(barriers), cmd); - for (uint32_t mip = 0; mip < texture_bc.desc.mip_levels; ++mip) + for (uint32_t slice = 0; slice < desc.array_size; ++slice) { - const uint32_t width = std::max(1u, desc.width >> mip); - const uint32_t height = std::max(1u, desc.height >> mip); - Box box; - box.left = 0; - box.right = width; - box.top = 0; - box.bottom = height; - box.front = 0; - box.back = 1; + for (uint32_t mip = 0; mip < texture_bc.desc.mip_levels; ++mip) + { + const uint32_t width = std::max(1u, desc.width >> mip); + const uint32_t height = std::max(1u, desc.height >> mip); + Box box; + box.left = 0; + box.right = width; + box.top = 0; + box.bottom = height; + box.front = 0; + box.back = 1; - device->CopyTexture( - &texture_bc, 0, 0, 0, mip, 0, - bc_raw, std::min(mip, bc_raw->desc.mip_levels - 1), 0, - cmd, - &box - ); + device->CopyTexture( + &texture_bc, 0, 0, 0, mip, dst_slice_offset + slice, + bc_raw, std::min(mip, bc_raw->desc.mip_levels - 1), slice, + cmd, + &box + ); + } } for (int i = 0; i < arraysize(barriers); ++i) diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index 417f3dcbc..e7b39e793 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -910,8 +910,9 @@ namespace wi::renderer // Compress a texture into Block Compressed format // texture_src : source uncompressed texture - // texture_bc : destination comporessed texture, must be a supported BC format (BC1/BC3/BC4/BC5) - void BlockCompress(const wi::graphics::Texture& texture_src, const wi::graphics::Texture& texture_bc, wi::graphics::CommandList cmd); + // texture_bc : destination comporessed texture, must be a supported BC format (BC1/BC3/BC4/BC5/BC6H_UFLOAT) + // Currently this will handle simple Texture2D with mip levels, and additionally BC6H cubemap + void BlockCompress(const wi::graphics::Texture& texture_src, const wi::graphics::Texture& texture_bc, wi::graphics::CommandList cmd, uint32_t dst_slice_offset = 0); enum BORDEREXPANDSTYLE { diff --git a/WickedEngine/wiResourceManager.cpp b/WickedEngine/wiResourceManager.cpp index d29a5af5d..a76eb830d 100644 --- a/WickedEngine/wiResourceManager.cpp +++ b/WickedEngine/wiResourceManager.cpp @@ -316,12 +316,13 @@ namespace wi if (has_flag(flags, Flags::IMPORT_BLOCK_COMPRESSED)) { - if (has_flag(flags, Flags::IMPORT_NORMALMAP)) - { - fmt = basist::transcoder_texture_format::cTFBC5_RG; - desc.format = Format::BC5_UNORM; - } - else + // BC5 is disabled because it's missing green channel! + //if (has_flag(flags, Flags::IMPORT_NORMALMAP)) + //{ + // fmt = basist::transcoder_texture_format::cTFBC5_RG; + // desc.format = Format::BC5_UNORM; + //} + //else { if (transcoder.get_has_alpha()) { @@ -443,23 +444,29 @@ namespace wi desc.mip_levels = info.m_total_levels; desc.misc_flags = ResourceMiscFlag::TYPED_FORMAT_CASTING; - basist::transcoder_texture_format fmt; - if (has_flag(flags, Flags::IMPORT_NORMALMAP)) + basist::transcoder_texture_format fmt = basist::transcoder_texture_format::cTFRGBA32; + desc.format = Format::R8G8B8A8_UNORM; + + if (has_flag(flags, Flags::IMPORT_BLOCK_COMPRESSED)) { - fmt = basist::transcoder_texture_format::cTFBC5_RG; - desc.format = Format::BC5_UNORM; - } - else - { - if (info.m_alpha_flag) + // BC5 is disabled because it's missing green channel! + //if (has_flag(flags, Flags::IMPORT_NORMALMAP)) + //{ + // fmt = basist::transcoder_texture_format::cTFBC5_RG; + // desc.format = Format::BC5_UNORM; + //} + //else { - fmt = basist::transcoder_texture_format::cTFBC3_RGBA; - desc.format = Format::BC3_UNORM; - } - else - { - fmt = basist::transcoder_texture_format::cTFBC1_RGB; - desc.format = Format::BC1_UNORM; + if (info.m_alpha_flag) + { + fmt = basist::transcoder_texture_format::cTFBC3_RGBA; + desc.format = Format::BC3_UNORM; + } + else + { + fmt = basist::transcoder_texture_format::cTFBC1_RGB; + desc.format = Format::BC1_UNORM; + } } } uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(fmt); @@ -842,7 +849,7 @@ namespace wi if (has_flag(flags, Flags::IMPORT_BLOCK_COMPRESSED)) { - // Schedul additional task to compress into BC format and replace resource texture: + // Schedule additional task to compress into BC format and replace resource texture: Texture uncompressed_src = std::move(resource->texture); resource->srgb_subresource = -1; @@ -856,8 +863,9 @@ namespace wi } else { - // scan for transparency and check if fully grayscale: + // scan for transparency and also check if fully grayscale: // By default we should use BC1 that doesn't have transparency, but half the size of BC3 that supports it + // We only care about grayscale if it's not transparent bool has_transparency = false; bool is_grayscale = true; for (int y = 0; (y < height) && !has_transparency; ++y) @@ -881,7 +889,9 @@ namespace wi } else if (is_grayscale) { + // If not transparent and grayscale, than BC4 is better quality than BC1 with same memory footprint desc.format = Format::BC4_UNORM; + // In this case, reswizzle the texture to be grayscale, not red. Red is ok for some maps, but not all, it's better to use all channels, for example grayscale specular map desc.swizzle.r = ComponentSwizzle::R; desc.swizzle.g = ComponentSwizzle::R; desc.swizzle.b = ComponentSwizzle::R; diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index 275864c65..7b8c1324c 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -3414,6 +3414,9 @@ namespace wi::scene } void Scene::RunImpostorUpdateSystem(wi::jobsystem::context& ctx) { + if (dt == 0) + return; + if (impostors.GetCount() > 0 && !impostorArray.IsValid()) { GraphicsDevice* device = wi::graphics::GetDevice(); @@ -3444,6 +3447,19 @@ namespace wi::scene subresource_index = device->CreateSubresource(&impostorArray, SubresourceType::RTV, i, 1, 0, 1); assert(subresource_index == i); } + + std::string info; + info += "Created impostor array with " + std::to_string(maxImpostorCount) + " max impostors"; + info += "\n\tResolution (width * height * angles * properties) = " + std::to_string(impostorTextureDim) + " * " + std::to_string(impostorTextureDim) + " * " + std::to_string(impostorCaptureAngles) + " * 3"; + info += "\n\tRender Format = "; + info += GetFormatString(impostorArray.desc.format); + info += "\n\tDepth Format = "; + info += GetFormatString(impostorDepthStencil.desc.format); + size_t total_size = 0; + total_size += ComputeTextureMemorySizeInBytes(impostorArray.desc); + total_size += ComputeTextureMemorySizeInBytes(impostorDepthStencil.desc); + info += "\n\tMemory = " + std::to_string(total_size / 1024.0f / 1024.0f) + " MB\n"; + wi::backlog::post(info); } // reconstruct impostor array status: @@ -3885,10 +3901,17 @@ namespace wi::scene { aabb_probes.resize(probes.GetCount()); + if (dt == 0) + return; + if (!envmapArray.IsValid()) // even when zero probes, this will be created, since sometimes only the sky will be rendered into it { GraphicsDevice* device = wi::graphics::GetDevice(); + constexpr Format format = Format::BC6H_UF16; + constexpr uint32_t blocks = envmapRes / GetFormatBlockSize(format); + constexpr uint32_t mip_count = GetMipCount(blocks, blocks); + TextureDesc desc; desc.array_size = 6; desc.height = envmapRes; @@ -3912,11 +3935,11 @@ namespace wi::scene desc.sample_count = 1; desc.array_size = envmapCount * 6; - desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS | BindFlag::RENDER_TARGET; - desc.format = wi::renderer::format_rendertarget_envprobe; + desc.bind_flags = BindFlag::SHADER_RESOURCE; + desc.format = format; desc.height = envmapRes; desc.width = envmapRes; - desc.mip_levels = 0; // all mips + desc.mip_levels = mip_count; desc.misc_flags = ResourceMiscFlag::TEXTURECUBE; desc.usage = Usage::DEFAULT; desc.layout = ResourceState::SHADER_RESOURCE; @@ -3924,6 +3947,13 @@ namespace wi::scene device->SetName(&envmapArray, "envmapArray"); desc.array_size = 6; + desc.mip_levels = mip_count; + desc.bind_flags = BindFlag::RENDER_TARGET | BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.format = wi::renderer::format_rendertarget_envprobe; + desc.layout = ResourceState::SHADER_RESOURCE; + device->CreateTexture(&desc, nullptr, &envrenderingColorBuffer); + device->SetName(&envrenderingColorBuffer, "envrenderingColorBuffer"); + desc.mip_levels = 1; desc.format = wi::renderer::format_depthbuffer_envprobe; desc.bind_flags = BindFlag::DEPTH_STENCIL | BindFlag::SHADER_RESOURCE; @@ -3931,13 +3961,13 @@ namespace wi::scene device->CreateTexture(&desc, nullptr, &envrenderingDepthBuffer); device->SetName(&envrenderingDepthBuffer, "envrenderingDepthBuffer"); - // Cube arrays per mip level: - for (uint32_t i = 0; i < envmapArray.desc.mip_levels; ++i) + // Cubes per mip level: + for (uint32_t i = 0; i < envrenderingColorBuffer.desc.mip_levels; ++i) { int subresource_index; - subresource_index = device->CreateSubresource(&envmapArray, SubresourceType::SRV, 0, envmapArray.desc.array_size, i, 1); + subresource_index = device->CreateSubresource(&envrenderingColorBuffer, SubresourceType::SRV, 0, envrenderingColorBuffer.desc.array_size, i, 1); assert(subresource_index == i); - subresource_index = device->CreateSubresource(&envmapArray, SubresourceType::UAV, 0, envmapArray.desc.array_size, i, 1); + subresource_index = device->CreateSubresource(&envrenderingColorBuffer, SubresourceType::UAV, 0, envrenderingColorBuffer.desc.array_size, i, 1); assert(subresource_index == i); } @@ -3946,23 +3976,27 @@ namespace wi::scene { int subresource_index; subresource_index = device->CreateSubresource(&envmapArray, SubresourceType::SRV, i * 6, 6, 0, -1); - assert(subresource_index == envmapArray.desc.mip_levels + i); - } - - // individual cubes only mip0: - for (uint32_t i = 0; i < envmapCount; ++i) - { - int subresource_index; - subresource_index = device->CreateSubresource(&envmapArray, SubresourceType::SRV, i * 6, 6, 0, 1); - assert(subresource_index == envmapArray.desc.mip_levels + envmapCount + i); - } - - for (uint32_t i = 0; i < envmapCount; ++i) - { - int subresource_index; - subresource_index = device->CreateSubresource(&envmapArray, SubresourceType::RTV, i * 6, 6, 0, 1); assert(subresource_index == i); } + + std::string info; + info += "Created envprobe array with " + std::to_string(envmapCount) + " probes"; + info += "\n\tResolution = " + std::to_string(envmapRes) + " * " + std::to_string(envmapRes) + " * 6"; + info += "\n\tMip Levels = " + std::to_string(envmapArray.desc.mip_levels); + info += "\n\tRender Format = "; + info += GetFormatString(envrenderingColorBuffer.desc.format); + info += "\n\tDepth Format = "; + info += GetFormatString(envrenderingDepthBuffer.desc.format); + info += "\n\tCompressed Format = "; + info += GetFormatString(envmapArray.desc.format); + size_t total_size = 0; + total_size += ComputeTextureMemorySizeInBytes(envrenderingDepthBuffer.desc); + total_size += ComputeTextureMemorySizeInBytes(envrenderingColorBuffer.desc); + total_size += ComputeTextureMemorySizeInBytes(envrenderingDepthBuffer_MSAA.desc); + total_size += ComputeTextureMemorySizeInBytes(envrenderingColorBuffer_MSAA.desc); + total_size += ComputeTextureMemorySizeInBytes(envmapArray.desc); + info += "\n\tMemory = " + std::to_string(total_size / 1024.0f / 1024.0f) + " MB\n"; + wi::backlog::post(info); } // reconstruct envmap array status: diff --git a/WickedEngine/wiScene.h b/WickedEngine/wiScene.h index 4f3dda188..a9db41ffd 100644 --- a/WickedEngine/wiScene.h +++ b/WickedEngine/wiScene.h @@ -210,9 +210,10 @@ namespace wi::scene // Environment probe cubemap array state: static constexpr uint32_t envmapCount = 16; - static constexpr uint32_t envmapRes = 128; + static constexpr uint32_t envmapRes = 256; static constexpr uint32_t envmapMSAASampleCount = 8; wi::graphics::Texture envrenderingDepthBuffer; + wi::graphics::Texture envrenderingColorBuffer; wi::graphics::Texture envrenderingDepthBuffer_MSAA; wi::graphics::Texture envrenderingColorBuffer_MSAA; wi::graphics::Texture envmapArray; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 69f9a0d49..823d5a774 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 215; + const int revision = 216; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision); diff --git a/third_party_software.txt b/third_party_software.txt index 1b5dcd8ff..e67ac0083 100644 --- a/third_party_software.txt +++ b/third_party_software.txt @@ -577,6 +577,32 @@ This software is distributed without any warranty. See . +############################################################################################################################### + +GPURealTimeBC6H: https://github.com/knarkowicz/GPURealTimeBC6H + +MIT License + +Copyright (c) 2015 Krzysztof Narkowicz + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + ############################################################################################################################### ###############################################################################################################################