From 99396e212df102ffd9d4c7fd66ee380b8aaff278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Mon, 29 Jul 2024 18:34:40 +0200 Subject: [PATCH] shader optimizations --- Editor/HumanoidWindow.cpp | 2 +- WickedEngine/shaders/globals.hlsli | 18 +++--- WickedEngine/shaders/lightingHF.hlsli | 64 +++++++----------- WickedEngine/shaders/shadingHF.hlsli | 6 +- WickedEngine/shaders/shadowHF.hlsli | 93 +++++++++++++++++++++++---- WickedEngine/shaders/surfaceHF.hlsli | 2 +- WickedEngine/wiVersion.cpp | 2 +- 7 files changed, 120 insertions(+), 67 deletions(-) diff --git a/Editor/HumanoidWindow.cpp b/Editor/HumanoidWindow.cpp index 4a56a0577..4c67c7be0 100644 --- a/Editor/HumanoidWindow.cpp +++ b/Editor/HumanoidWindow.cpp @@ -9,7 +9,7 @@ void HumanoidWindow::Create(EditorComponent* _editor) editor = _editor; wi::gui::Window::Create(ICON_HUMANOID " Humanoid", wi::gui::Window::WindowControls::COLLAPSE | wi::gui::Window::WindowControls::CLOSE); - SetSize(XMFLOAT2(670, 580)); + SetSize(XMFLOAT2(670, 620)); closeButton.SetTooltip("Delete HumanoidComponent"); OnClose([=](wi::gui::EventArgs args) { diff --git a/WickedEngine/shaders/globals.hlsli b/WickedEngine/shaders/globals.hlsli index 931256e3d..f08645e13 100644 --- a/WickedEngine/shaders/globals.hlsli +++ b/WickedEngine/shaders/globals.hlsli @@ -537,7 +537,7 @@ struct PrimitiveID #define MEDIUMP_FLT_MAX 65504.0 #define sqr(a) ((a)*(a)) -#define pow5(x) pow(x, 5) +#define pow5(a) ((a)*(a)*(a)*(a)*(a)) #define arraysize(a) (sizeof(a) / sizeof(a[0])) #define saturateMediump(x) min(x, MEDIUMP_FLT_MAX) #define highp @@ -709,7 +709,7 @@ inline float GetDeltaTime() { return GetFrame().delta_time; } inline float GetTime() { return GetFrame().time; } inline float GetTimePrev() { return GetFrame().time_previous; } inline float GetFrameCount() { return GetFrame().frame_count; } -inline uint2 GetTemporalAASampleRotation() { return uint2((GetFrame().temporalaa_samplerotation >> 0u) & 0x000000FF, (GetFrame().temporalaa_samplerotation >> 8) & 0x000000FF); } +inline min16uint2 GetTemporalAASampleRotation() { return uint2(GetFrame().temporalaa_samplerotation & 0xFF, (GetFrame().temporalaa_samplerotation >> 8u) & 0xFF); } inline bool IsStaticSky() { return GetScene().globalenvmap >= 0; } // Mie scaterring approximated with Henyey-Greenstein phase function. @@ -1367,27 +1367,27 @@ static const half BayerMatrix8[8][8] = }; -inline half ditherMask2(in float2 pixel) +inline half ditherMask2(in min16uint2 pixel) { return BayerMatrix2[pixel.x % 2][pixel.y % 2]; } -inline half ditherMask3(in float2 pixel) +inline half ditherMask3(in min16uint2 pixel) { return BayerMatrix3[pixel.x % 3][pixel.y % 3]; } -inline half ditherMask4(in float2 pixel) +inline half ditherMask4(in min16uint2 pixel) { return BayerMatrix4[pixel.x % 4][pixel.y % 4]; } -inline half ditherMask8(in float2 pixel) +inline half ditherMask8(in min16uint2 pixel) { return BayerMatrix8[pixel.x % 8][pixel.y % 8]; } -inline half dither(in float2 pixel) +inline half dither(in min16uint2 pixel) { return ditherMask8(pixel); } @@ -1403,11 +1403,11 @@ static const half2 BayerMatrix8_sincos[8][8] = { {half2(0.873968, 0.485983),half2(-0.548012, 0.836470),half2(0.626185, 0.779674),half2(-0.822984, 0.568065),half2(0.822984, 0.568065),half2(-0.626185, 0.779675),half2(0.548013, 0.836470),half2(-0.873968, 0.485984),}, {half2(-0.849468, -0.527640),half2(0.506960, -0.861970),half2(-0.587786, -0.809017),half2(0.794578, -0.607163),half2(-0.794578, -0.607162),half2(0.587785, -0.809017),half2(-0.506960, -0.861970),half2(0.849468, -0.527640),}, }; -inline half2 dither_sincos(in float2 pixel) +inline half2 dither_sincos(in min16uint2 pixel) { return BayerMatrix8_sincos[pixel.x % 8][pixel.y % 8]; } -inline half2x2 dither_rot2x2(in float2 pixel) +inline half2x2 dither_rot2x2(in min16uint2 pixel) { half2 sincos = dither_sincos(pixel); return half2x2( diff --git a/WickedEngine/shaders/lightingHF.hlsli b/WickedEngine/shaders/lightingHF.hlsli index 38ce254ac..6e8118cc3 100644 --- a/WickedEngine/shaders/lightingHF.hlsli +++ b/WickedEngine/shaders/lightingHF.hlsli @@ -14,13 +14,17 @@ #define LIGHTING_SCATTER #endif // WATER -#if __SHADER_TARGET_STAGE == __SHADER_STAGE_PIXEL +template +inline void QuadBlur(inout T value) +{ +#if __SHADER_TARGET_STAGE == __SHADER_STAGE_PIXEL && defined(SHADOW_SAMPLING_DISK) // Average shadow within quad, this smooths out the dithering a bit: // Note that I don't implement this in shadowHF.hlsli because we need to // make sure that when averaging, all lanes in the quad are coherent // It wouldn't be good if some waves are not sampling shadows or sampling different slices -#define SHADOW_QUAD_BLUR + value = (value + QuadReadAcrossX(value) + QuadReadAcrossY(value) + QuadReadAcrossDiagonal(value)) * 0.25; #endif // __SHADER_STAGE_PIXEL +} struct LightingPart { @@ -64,14 +68,14 @@ inline void light_directional(in ShaderEntity light, in Surface surface, inout L if (!any(surface_to_light.NdotL_sss)) return; // early exit: facing away from light - half3 shadow = shadow_mask; + half3 light_color = light.GetColor().rgb * shadow_mask; [branch] if (light.IsCastingShadow() && surface.IsReceiveShadow()) { if (GetFrame().options & OPTION_BIT_VOLUMETRICCLOUDS_CAST_SHADOW) { - shadow *= shadow_2D_volumetricclouds(surface.P); + light_color *= shadow_2D_volumetricclouds(surface.P); } #if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT) @@ -100,21 +104,14 @@ inline void light_directional(in ShaderEntity light, in Surface surface, inout L if (cascade_fade > 0 && dither(surface.pixel + GetTemporalAASampleRotation()) < cascade_fade) continue; - shadow *= shadow_2D(light, shadow_pos, shadow_uv.xy, cascade, surface.pixel); + light_color *= shadow_2D(light, shadow_pos, shadow_uv.xy, cascade, surface.pixel); break; } } } - -#ifdef SHADOW_QUAD_BLUR - shadow = (shadow + QuadReadAcrossX(shadow) + QuadReadAcrossY(shadow) + QuadReadAcrossDiagonal(shadow)) / 4.0; -#endif // SHADOW_QUAD_BLUR + + QuadBlur(light_color); } - - if(!any(shadow)) - return; // early exit: completely shadowed - - half3 light_color = light.GetColor().rgb * shadow; [branch] if (GetFrame().options & OPTION_BIT_REALISTIC_SKY) @@ -167,7 +164,7 @@ inline half attenuation_pointlight(in half dist2, in half range, in half range2) inline void light_point(in ShaderEntity light, in Surface surface, inout Lighting lighting, in half shadow_mask = 1) { float3 Lunnormalized = light.position - surface.P; - float3 LunnormalizedShadow = Lunnormalized; + const float3 LunnormalizedShadow = Lunnormalized; #ifndef DISABLE_AREA_LIGHTS if (light.GetLength() > 0) @@ -197,8 +194,8 @@ inline void light_point(in ShaderEntity light, in Surface surface, inout Lightin if (!any(surface_to_light.NdotL_sss)) return; // early exit: facing away from light - - half3 shadow = shadow_mask; + + half3 light_color = light.GetColor().rgb * shadow_mask; [branch] if (light.IsCastingShadow() && surface.IsReceiveShadow()) @@ -208,18 +205,12 @@ inline void light_point(in ShaderEntity light, in Surface surface, inout Lightin if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0 || GetCamera().texture_rtshadow_index < 0 || (GetCamera().options & SHADERCAMERA_OPTION_USE_SHADOW_MASK) == 0) #endif // SHADOW_MASK_ENABLED { - shadow *= shadow_cube(light, LunnormalizedShadow, surface.pixel); + light_color *= shadow_cube(light, LunnormalizedShadow, surface.pixel); } - -#ifdef SHADOW_QUAD_BLUR - shadow = (shadow + QuadReadAcrossX(shadow) + QuadReadAcrossY(shadow) + QuadReadAcrossDiagonal(shadow)) / 4.0; -#endif // SHADOW_QUAD_BLUR + + QuadBlur(light_color); } - - if(!any(shadow)) - return; // early exit: completely shadowed - - half3 light_color = light.GetColor().rgb * shadow; + light_color *= attenuation_pointlight(dist2, range, range2); lighting.direct.diffuse = mad(light_color, BRDF_GetDiffuse(surface, surface_to_light), lighting.direct.diffuse); @@ -297,8 +288,8 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting if (spot_factor < spot_cutoff) return; // early exit: outside spotlight cone - - half3 shadow = shadow_mask; + + half3 light_color = light.GetColor().rgb * shadow_mask; [branch] if (light.IsCastingShadow() && surface.IsReceiveShadow()) @@ -314,19 +305,13 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting [branch] if (is_saturated(shadow_uv)) { - shadow *= shadow_2D(light, shadow_pos.xyz, shadow_uv.xy, 0, surface.pixel); + light_color *= shadow_2D(light, shadow_pos.xyz, shadow_uv.xy, 0, surface.pixel); } } - -#ifdef SHADOW_QUAD_BLUR - shadow = (shadow + QuadReadAcrossX(shadow) + QuadReadAcrossY(shadow) + QuadReadAcrossDiagonal(shadow)) / 4.0; -#endif // SHADOW_QUAD_BLUR + + QuadBlur(light_color); } - - if(!any(shadow)) - return; // early exit: completely shadowed - - half3 light_color = light.GetColor().rgb * shadow; + light_color *= attenuation_spotlight(dist2, range, range2, spot_factor, light.GetAngleScale(), light.GetAngleOffset()); lighting.direct.diffuse = mad(light_color, BRDF_GetDiffuse(surface, surface_to_light), lighting.direct.diffuse); @@ -353,7 +338,6 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting #endif // LIGHTING_SCATTER } - // ENVIRONMENT MAPS diff --git a/WickedEngine/shaders/shadingHF.hlsli b/WickedEngine/shaders/shadingHF.hlsli index a5cf0287d..059b9bbb5 100644 --- a/WickedEngine/shaders/shadingHF.hlsli +++ b/WickedEngine/shaders/shadingHF.hlsli @@ -10,7 +10,7 @@ inline void LightMapping(in int lightmap, in float2 ATLAS, inout Lighting lighting, inout Surface surface) { [branch] - if (lightmap >= 0 && any(ATLAS)) + if (lightmap >= 0) { Texture2D texture_lightmap = bindless_textures[NonUniformResourceIndex(lightmap)]; #ifdef LIGHTMAP_QUALITY_BICUBIC @@ -127,7 +127,7 @@ inline void ForwardLighting(inout Surface surface, inout Lighting lighting) ShaderEntity light = load_entity(GetFrame().lightarray_offset + entity_index); // under here will be VGPR! - if ((light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) || (light.layerMask & surface.layerMask) == 0) + if ((light.layerMask & surface.layerMask) == 0) continue; switch (light.GetType()) { @@ -406,7 +406,7 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f #endif // SHADOW_MASK_ENABLED && !TRANSPARENT // under here will be VGPR! - if ((light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) || (light.layerMask & surface.layerMask) == 0) + if ((light.layerMask & surface.layerMask) == 0) continue; switch (light.GetType()) { diff --git a/WickedEngine/shaders/shadowHF.hlsli b/WickedEngine/shaders/shadowHF.hlsli index 05068307c..6bffa14a7 100644 --- a/WickedEngine/shaders/shadowHF.hlsli +++ b/WickedEngine/shaders/shadowHF.hlsli @@ -2,6 +2,10 @@ #define WI_SHADOW_HF #include "globals.hlsli" +#define SHADOW_SAMPLING_DISK + +#ifdef SHADOW_SAMPLING_DISK + // "Vogel disk" sampling pattern based on: https://github.com/corporateshark/poisson-disk-generator/blob/master/PoissonGenerator.h // Baked values are remapped from [0, 1] range into [-1, 1] range by doing: value * 2 - 1 static const half2 vogel_points[] = { @@ -22,31 +26,30 @@ inline half3 sample_shadow(float2 uv, float cmp, float4 uv_clamping, half radius half3 shadow = 0; #ifndef DISABLE_SOFT_SHADOWMAP - const float2 spread = GetFrame().shadow_atlas_resolution_rcp.xy * (2 + radius * 8); // remap radius to try to match ray traced shadow result + const float2 spread = GetFrame().shadow_atlas_resolution_rcp.xy * (mad(radius, 8, 2)); // remap radius to try to match ray traced shadow result const half2x2 rot = dither_rot2x2(pixel + GetTemporalAASampleRotation()); // per pixel rotation for every sample for (min16uint i = 0; i < soft_shadow_sample_count; ++i) { - float2 sample_uv = uv + mul(vogel_points[i], rot) * spread; + float2 sample_uv = mad(mul(vogel_points[i], rot), spread, uv); #else float2 sample_uv = uv; #endif // DISABLE_SOFT_SHADOWMAP sample_uv = clamp(sample_uv, uv_clamping.xy, uv_clamping.zw); half3 pcf = texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, sample_uv, cmp).rrr; - if(pcf.x > 0) - { + #ifndef DISABLE_TRANSPARENT_SHADOWMAP - half4 transparent_shadow = texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, sample_uv, 0); + half4 transparent_shadow = texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, sample_uv, 0); #ifdef TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK - if (transparent_shadow.a > cmp) + if (transparent_shadow.a > cmp) #endif // TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK - { - pcf *= transparent_shadow.rgb; - } -#endif // DISABLE_TRANSPARENT_SHADOWMAP - shadow += pcf; + { + pcf *= transparent_shadow.rgb; } +#endif // DISABLE_TRANSPARENT_SHADOWMAP + shadow += pcf; + #ifndef DISABLE_SOFT_SHADOWMAP } shadow *= soft_shadow_sample_count_rcp; @@ -58,7 +61,6 @@ inline half3 sample_shadow(float2 uv, float cmp, float4 uv_clamping, half radius // This is used to clamp the uvs to last texel center to avoid sampling on the border and overfiltering into a different shadow inline float4 shadow_border_clamp(in ShaderEntity light, in float slice) { - const float2 shadow_resolution = light.shadowAtlasMulAdd.xy * GetFrame().shadow_atlas_resolution; const float border_size = 0.75 * GetFrame().shadow_atlas_resolution_rcp; const float2 topleft = mad(float2(slice, 0), light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw) + border_size; const float2 bottomright = mad(float2(slice + 1, 1), light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw) - border_size; @@ -82,6 +84,73 @@ inline half3 shadow_cube(in ShaderEntity light, in float3 Lunnormalized, uint2 p return sample_shadow(shadow_uv, remapped_distance, shadow_border_clamp(light, uv_slice.z), light.GetRadius(), pixel); } +#else + +inline half3 sample_shadow(float2 uv, float cmp, uint2 pixel) +{ + Texture2D texture_shadowatlas = bindless_textures[GetFrame().texture_shadowatlas_index]; + half3 shadow = (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp).r; + +#ifndef DISABLE_SOFT_SHADOWMAP + // sample along a rectangle pattern around center: + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, -1)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 0)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 1)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, -1)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, 1)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, -1)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 0)).r; + shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 1)).r; + shadow = shadow.xxx / 9.0; +#endif // DISABLE_SOFT_SHADOWMAP + +#ifndef DISABLE_TRANSPARENT_SHADOWMAP + Texture2D texture_shadowatlas_transparent = bindless_textures[GetFrame().texture_shadowatlas_transparent_index]; + half4 transparent_shadow = (half4)texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, uv, 0); +#ifdef TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK + if (transparent_shadow.a > cmp) +#endif // TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK + { + shadow *= transparent_shadow.rgb; + } +#endif //DISABLE_TRANSPARENT_SHADOWMAP + + return shadow; +} + +// This is used to clamp the uvs to last texel center to avoid sampling on the border and overfiltering into a different shadow +inline void shadow_border_shrink(in ShaderEntity light, inout float2 shadow_uv) +{ + const float2 shadow_resolution = light.shadowAtlasMulAdd.xy * GetFrame().shadow_atlas_resolution; +#ifdef DISABLE_SOFT_SHADOWMAP + const float border_size = 0.5; +#else + const float border_size = 1.5; +#endif // DISABLE_SOFT_SHADOWMAP + shadow_uv = clamp(shadow_uv * shadow_resolution, border_size, shadow_resolution - border_size) / shadow_resolution; +} + +inline half3 shadow_2D(in ShaderEntity light, in float3 shadow_pos, in float2 shadow_uv, in uint cascade, in uint2 pixel = 0) +{ + shadow_border_shrink(light, shadow_uv); + shadow_uv.x += cascade; + shadow_uv = mad(shadow_uv, light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw); + return sample_shadow(shadow_uv, shadow_pos.z, pixel); +} + +inline half3 shadow_cube(in ShaderEntity light, in float3 Lunnormalized, in uint2 pixel = 0) +{ + const float remapped_distance = light.GetCubemapDepthRemapNear() + light.GetCubemapDepthRemapFar() / (max(max(abs(Lunnormalized.x), abs(Lunnormalized.y)), abs(Lunnormalized.z)) * 0.989); // little bias to avoid artifact + const float3 uv_slice = cubemap_to_uv(-Lunnormalized); + float2 shadow_uv = uv_slice.xy; + shadow_border_shrink(light, shadow_uv); + shadow_uv.x += uv_slice.z; + shadow_uv = mad(shadow_uv, light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw); + return sample_shadow(shadow_uv, remapped_distance, pixel); +} + +#endif // SHADOW_SAMPLING_DISK + inline half shadow_2D_volumetricclouds(float3 P) { // Project into shadow map space (no need to divide by .w because ortho projection!): diff --git a/WickedEngine/shaders/surfaceHF.hlsli b/WickedEngine/shaders/surfaceHF.hlsli index c7e823874..c941db266 100644 --- a/WickedEngine/shaders/surfaceHF.hlsli +++ b/WickedEngine/shaders/surfaceHF.hlsli @@ -79,7 +79,7 @@ struct Surface half3 emissiveColor; // light emission [0 -> 1] half4 refraction; // refraction color (rgb), refraction amount (a) half transmission; // transmission factor - float2 pixel; // pixel coordinate (used for randomization effects) + min16uint2 pixel; // pixel coordinate (used for randomization effects) float2 screenUV; // pixel coordinate in UV space [0 -> 1] (used for randomization effects) half4 T; // tangent half3 B; // bitangent diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index efa50fc0d..ac7f13cef 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 524; + const int revision = 525; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);