shader optimizations

This commit is contained in:
Turánszki János
2024-07-29 18:34:40 +02:00
parent 3b82ba0549
commit 99396e212d
7 changed files with 120 additions and 67 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ void HumanoidWindow::Create(EditorComponent* _editor)
editor = _editor;
wi::gui::Window::Create(ICON_HUMANOID " Humanoid", wi::gui::Window::WindowControls::COLLAPSE | wi::gui::Window::WindowControls::CLOSE);
SetSize(XMFLOAT2(670, 580));
SetSize(XMFLOAT2(670, 620));
closeButton.SetTooltip("Delete HumanoidComponent");
OnClose([=](wi::gui::EventArgs args) {
+9 -9
View File
@@ -537,7 +537,7 @@ struct PrimitiveID
#define MEDIUMP_FLT_MAX 65504.0
#define sqr(a) ((a)*(a))
#define pow5(x) pow(x, 5)
#define pow5(a) ((a)*(a)*(a)*(a)*(a))
#define arraysize(a) (sizeof(a) / sizeof(a[0]))
#define saturateMediump(x) min(x, MEDIUMP_FLT_MAX)
#define highp
@@ -709,7 +709,7 @@ inline float GetDeltaTime() { return GetFrame().delta_time; }
inline float GetTime() { return GetFrame().time; }
inline float GetTimePrev() { return GetFrame().time_previous; }
inline float GetFrameCount() { return GetFrame().frame_count; }
inline uint2 GetTemporalAASampleRotation() { return uint2((GetFrame().temporalaa_samplerotation >> 0u) & 0x000000FF, (GetFrame().temporalaa_samplerotation >> 8) & 0x000000FF); }
inline min16uint2 GetTemporalAASampleRotation() { return uint2(GetFrame().temporalaa_samplerotation & 0xFF, (GetFrame().temporalaa_samplerotation >> 8u) & 0xFF); }
inline bool IsStaticSky() { return GetScene().globalenvmap >= 0; }
// Mie scaterring approximated with Henyey-Greenstein phase function.
@@ -1367,27 +1367,27 @@ static const half BayerMatrix8[8][8] =
};
inline half ditherMask2(in float2 pixel)
inline half ditherMask2(in min16uint2 pixel)
{
return BayerMatrix2[pixel.x % 2][pixel.y % 2];
}
inline half ditherMask3(in float2 pixel)
inline half ditherMask3(in min16uint2 pixel)
{
return BayerMatrix3[pixel.x % 3][pixel.y % 3];
}
inline half ditherMask4(in float2 pixel)
inline half ditherMask4(in min16uint2 pixel)
{
return BayerMatrix4[pixel.x % 4][pixel.y % 4];
}
inline half ditherMask8(in float2 pixel)
inline half ditherMask8(in min16uint2 pixel)
{
return BayerMatrix8[pixel.x % 8][pixel.y % 8];
}
inline half dither(in float2 pixel)
inline half dither(in min16uint2 pixel)
{
return ditherMask8(pixel);
}
@@ -1403,11 +1403,11 @@ static const half2 BayerMatrix8_sincos[8][8] = {
{half2(0.873968, 0.485983),half2(-0.548012, 0.836470),half2(0.626185, 0.779674),half2(-0.822984, 0.568065),half2(0.822984, 0.568065),half2(-0.626185, 0.779675),half2(0.548013, 0.836470),half2(-0.873968, 0.485984),},
{half2(-0.849468, -0.527640),half2(0.506960, -0.861970),half2(-0.587786, -0.809017),half2(0.794578, -0.607163),half2(-0.794578, -0.607162),half2(0.587785, -0.809017),half2(-0.506960, -0.861970),half2(0.849468, -0.527640),},
};
inline half2 dither_sincos(in float2 pixel)
inline half2 dither_sincos(in min16uint2 pixel)
{
return BayerMatrix8_sincos[pixel.x % 8][pixel.y % 8];
}
inline half2x2 dither_rot2x2(in float2 pixel)
inline half2x2 dither_rot2x2(in min16uint2 pixel)
{
half2 sincos = dither_sincos(pixel);
return half2x2(
+24 -40
View File
@@ -14,13 +14,17 @@
#define LIGHTING_SCATTER
#endif // WATER
#if __SHADER_TARGET_STAGE == __SHADER_STAGE_PIXEL
template<typename T>
inline void QuadBlur(inout T value)
{
#if __SHADER_TARGET_STAGE == __SHADER_STAGE_PIXEL && defined(SHADOW_SAMPLING_DISK)
// Average shadow within quad, this smooths out the dithering a bit:
// Note that I don't implement this in shadowHF.hlsli because we need to
// make sure that when averaging, all lanes in the quad are coherent
// It wouldn't be good if some waves are not sampling shadows or sampling different slices
#define SHADOW_QUAD_BLUR
value = (value + QuadReadAcrossX(value) + QuadReadAcrossY(value) + QuadReadAcrossDiagonal(value)) * 0.25;
#endif // __SHADER_STAGE_PIXEL
}
struct LightingPart
{
@@ -64,14 +68,14 @@ inline void light_directional(in ShaderEntity light, in Surface surface, inout L
if (!any(surface_to_light.NdotL_sss))
return; // early exit: facing away from light
half3 shadow = shadow_mask;
half3 light_color = light.GetColor().rgb * shadow_mask;
[branch]
if (light.IsCastingShadow() && surface.IsReceiveShadow())
{
if (GetFrame().options & OPTION_BIT_VOLUMETRICCLOUDS_CAST_SHADOW)
{
shadow *= shadow_2D_volumetricclouds(surface.P);
light_color *= shadow_2D_volumetricclouds(surface.P);
}
#if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT)
@@ -100,21 +104,14 @@ inline void light_directional(in ShaderEntity light, in Surface surface, inout L
if (cascade_fade > 0 && dither(surface.pixel + GetTemporalAASampleRotation()) < cascade_fade)
continue;
shadow *= shadow_2D(light, shadow_pos, shadow_uv.xy, cascade, surface.pixel);
light_color *= shadow_2D(light, shadow_pos, shadow_uv.xy, cascade, surface.pixel);
break;
}
}
}
#ifdef SHADOW_QUAD_BLUR
shadow = (shadow + QuadReadAcrossX(shadow) + QuadReadAcrossY(shadow) + QuadReadAcrossDiagonal(shadow)) / 4.0;
#endif // SHADOW_QUAD_BLUR
QuadBlur(light_color);
}
if(!any(shadow))
return; // early exit: completely shadowed
half3 light_color = light.GetColor().rgb * shadow;
[branch]
if (GetFrame().options & OPTION_BIT_REALISTIC_SKY)
@@ -167,7 +164,7 @@ inline half attenuation_pointlight(in half dist2, in half range, in half range2)
inline void light_point(in ShaderEntity light, in Surface surface, inout Lighting lighting, in half shadow_mask = 1)
{
float3 Lunnormalized = light.position - surface.P;
float3 LunnormalizedShadow = Lunnormalized;
const float3 LunnormalizedShadow = Lunnormalized;
#ifndef DISABLE_AREA_LIGHTS
if (light.GetLength() > 0)
@@ -197,8 +194,8 @@ inline void light_point(in ShaderEntity light, in Surface surface, inout Lightin
if (!any(surface_to_light.NdotL_sss))
return; // early exit: facing away from light
half3 shadow = shadow_mask;
half3 light_color = light.GetColor().rgb * shadow_mask;
[branch]
if (light.IsCastingShadow() && surface.IsReceiveShadow())
@@ -208,18 +205,12 @@ inline void light_point(in ShaderEntity light, in Surface surface, inout Lightin
if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0 || GetCamera().texture_rtshadow_index < 0 || (GetCamera().options & SHADERCAMERA_OPTION_USE_SHADOW_MASK) == 0)
#endif // SHADOW_MASK_ENABLED
{
shadow *= shadow_cube(light, LunnormalizedShadow, surface.pixel);
light_color *= shadow_cube(light, LunnormalizedShadow, surface.pixel);
}
#ifdef SHADOW_QUAD_BLUR
shadow = (shadow + QuadReadAcrossX(shadow) + QuadReadAcrossY(shadow) + QuadReadAcrossDiagonal(shadow)) / 4.0;
#endif // SHADOW_QUAD_BLUR
QuadBlur(light_color);
}
if(!any(shadow))
return; // early exit: completely shadowed
half3 light_color = light.GetColor().rgb * shadow;
light_color *= attenuation_pointlight(dist2, range, range2);
lighting.direct.diffuse = mad(light_color, BRDF_GetDiffuse(surface, surface_to_light), lighting.direct.diffuse);
@@ -297,8 +288,8 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting
if (spot_factor < spot_cutoff)
return; // early exit: outside spotlight cone
half3 shadow = shadow_mask;
half3 light_color = light.GetColor().rgb * shadow_mask;
[branch]
if (light.IsCastingShadow() && surface.IsReceiveShadow())
@@ -314,19 +305,13 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting
[branch]
if (is_saturated(shadow_uv))
{
shadow *= shadow_2D(light, shadow_pos.xyz, shadow_uv.xy, 0, surface.pixel);
light_color *= shadow_2D(light, shadow_pos.xyz, shadow_uv.xy, 0, surface.pixel);
}
}
#ifdef SHADOW_QUAD_BLUR
shadow = (shadow + QuadReadAcrossX(shadow) + QuadReadAcrossY(shadow) + QuadReadAcrossDiagonal(shadow)) / 4.0;
#endif // SHADOW_QUAD_BLUR
QuadBlur(light_color);
}
if(!any(shadow))
return; // early exit: completely shadowed
half3 light_color = light.GetColor().rgb * shadow;
light_color *= attenuation_spotlight(dist2, range, range2, spot_factor, light.GetAngleScale(), light.GetAngleOffset());
lighting.direct.diffuse = mad(light_color, BRDF_GetDiffuse(surface, surface_to_light), lighting.direct.diffuse);
@@ -353,7 +338,6 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting
#endif // LIGHTING_SCATTER
}
// ENVIRONMENT MAPS
+3 -3
View File
@@ -10,7 +10,7 @@
inline void LightMapping(in int lightmap, in float2 ATLAS, inout Lighting lighting, inout Surface surface)
{
[branch]
if (lightmap >= 0 && any(ATLAS))
if (lightmap >= 0)
{
Texture2D<float4> texture_lightmap = bindless_textures[NonUniformResourceIndex(lightmap)];
#ifdef LIGHTMAP_QUALITY_BICUBIC
@@ -127,7 +127,7 @@ inline void ForwardLighting(inout Surface surface, inout Lighting lighting)
ShaderEntity light = load_entity(GetFrame().lightarray_offset + entity_index);
// under here will be VGPR!
if ((light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) || (light.layerMask & surface.layerMask) == 0)
if ((light.layerMask & surface.layerMask) == 0)
continue;
switch (light.GetType())
{
@@ -406,7 +406,7 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
#endif // SHADOW_MASK_ENABLED && !TRANSPARENT
// under here will be VGPR!
if ((light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) || (light.layerMask & surface.layerMask) == 0)
if ((light.layerMask & surface.layerMask) == 0)
continue;
switch (light.GetType())
{
+81 -12
View File
@@ -2,6 +2,10 @@
#define WI_SHADOW_HF
#include "globals.hlsli"
#define SHADOW_SAMPLING_DISK
#ifdef SHADOW_SAMPLING_DISK
// "Vogel disk" sampling pattern based on: https://github.com/corporateshark/poisson-disk-generator/blob/master/PoissonGenerator.h
// Baked values are remapped from [0, 1] range into [-1, 1] range by doing: value * 2 - 1
static const half2 vogel_points[] = {
@@ -22,31 +26,30 @@ inline half3 sample_shadow(float2 uv, float cmp, float4 uv_clamping, half radius
half3 shadow = 0;
#ifndef DISABLE_SOFT_SHADOWMAP
const float2 spread = GetFrame().shadow_atlas_resolution_rcp.xy * (2 + radius * 8); // remap radius to try to match ray traced shadow result
const float2 spread = GetFrame().shadow_atlas_resolution_rcp.xy * (mad(radius, 8, 2)); // remap radius to try to match ray traced shadow result
const half2x2 rot = dither_rot2x2(pixel + GetTemporalAASampleRotation()); // per pixel rotation for every sample
for (min16uint i = 0; i < soft_shadow_sample_count; ++i)
{
float2 sample_uv = uv + mul(vogel_points[i], rot) * spread;
float2 sample_uv = mad(mul(vogel_points[i], rot), spread, uv);
#else
float2 sample_uv = uv;
#endif // DISABLE_SOFT_SHADOWMAP
sample_uv = clamp(sample_uv, uv_clamping.xy, uv_clamping.zw);
half3 pcf = texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, sample_uv, cmp).rrr;
if(pcf.x > 0)
{
#ifndef DISABLE_TRANSPARENT_SHADOWMAP
half4 transparent_shadow = texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, sample_uv, 0);
half4 transparent_shadow = texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, sample_uv, 0);
#ifdef TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK
if (transparent_shadow.a > cmp)
if (transparent_shadow.a > cmp)
#endif // TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK
{
pcf *= transparent_shadow.rgb;
}
#endif // DISABLE_TRANSPARENT_SHADOWMAP
shadow += pcf;
{
pcf *= transparent_shadow.rgb;
}
#endif // DISABLE_TRANSPARENT_SHADOWMAP
shadow += pcf;
#ifndef DISABLE_SOFT_SHADOWMAP
}
shadow *= soft_shadow_sample_count_rcp;
@@ -58,7 +61,6 @@ inline half3 sample_shadow(float2 uv, float cmp, float4 uv_clamping, half radius
// This is used to clamp the uvs to last texel center to avoid sampling on the border and overfiltering into a different shadow
inline float4 shadow_border_clamp(in ShaderEntity light, in float slice)
{
const float2 shadow_resolution = light.shadowAtlasMulAdd.xy * GetFrame().shadow_atlas_resolution;
const float border_size = 0.75 * GetFrame().shadow_atlas_resolution_rcp;
const float2 topleft = mad(float2(slice, 0), light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw) + border_size;
const float2 bottomright = mad(float2(slice + 1, 1), light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw) - border_size;
@@ -82,6 +84,73 @@ inline half3 shadow_cube(in ShaderEntity light, in float3 Lunnormalized, uint2 p
return sample_shadow(shadow_uv, remapped_distance, shadow_border_clamp(light, uv_slice.z), light.GetRadius(), pixel);
}
#else
inline half3 sample_shadow(float2 uv, float cmp, uint2 pixel)
{
Texture2D texture_shadowatlas = bindless_textures[GetFrame().texture_shadowatlas_index];
half3 shadow = (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp).r;
#ifndef DISABLE_SOFT_SHADOWMAP
// sample along a rectangle pattern around center:
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, -1)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 0)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 1)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, -1)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, 1)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, -1)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 0)).r;
shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 1)).r;
shadow = shadow.xxx / 9.0;
#endif // DISABLE_SOFT_SHADOWMAP
#ifndef DISABLE_TRANSPARENT_SHADOWMAP
Texture2D texture_shadowatlas_transparent = bindless_textures[GetFrame().texture_shadowatlas_transparent_index];
half4 transparent_shadow = (half4)texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, uv, 0);
#ifdef TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK
if (transparent_shadow.a > cmp)
#endif // TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK
{
shadow *= transparent_shadow.rgb;
}
#endif //DISABLE_TRANSPARENT_SHADOWMAP
return shadow;
}
// This is used to clamp the uvs to last texel center to avoid sampling on the border and overfiltering into a different shadow
inline void shadow_border_shrink(in ShaderEntity light, inout float2 shadow_uv)
{
const float2 shadow_resolution = light.shadowAtlasMulAdd.xy * GetFrame().shadow_atlas_resolution;
#ifdef DISABLE_SOFT_SHADOWMAP
const float border_size = 0.5;
#else
const float border_size = 1.5;
#endif // DISABLE_SOFT_SHADOWMAP
shadow_uv = clamp(shadow_uv * shadow_resolution, border_size, shadow_resolution - border_size) / shadow_resolution;
}
inline half3 shadow_2D(in ShaderEntity light, in float3 shadow_pos, in float2 shadow_uv, in uint cascade, in uint2 pixel = 0)
{
shadow_border_shrink(light, shadow_uv);
shadow_uv.x += cascade;
shadow_uv = mad(shadow_uv, light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw);
return sample_shadow(shadow_uv, shadow_pos.z, pixel);
}
inline half3 shadow_cube(in ShaderEntity light, in float3 Lunnormalized, in uint2 pixel = 0)
{
const float remapped_distance = light.GetCubemapDepthRemapNear() + light.GetCubemapDepthRemapFar() / (max(max(abs(Lunnormalized.x), abs(Lunnormalized.y)), abs(Lunnormalized.z)) * 0.989); // little bias to avoid artifact
const float3 uv_slice = cubemap_to_uv(-Lunnormalized);
float2 shadow_uv = uv_slice.xy;
shadow_border_shrink(light, shadow_uv);
shadow_uv.x += uv_slice.z;
shadow_uv = mad(shadow_uv, light.shadowAtlasMulAdd.xy, light.shadowAtlasMulAdd.zw);
return sample_shadow(shadow_uv, remapped_distance, pixel);
}
#endif // SHADOW_SAMPLING_DISK
inline half shadow_2D_volumetricclouds(float3 P)
{
// Project into shadow map space (no need to divide by .w because ortho projection!):
+1 -1
View File
@@ -79,7 +79,7 @@ struct Surface
half3 emissiveColor; // light emission [0 -> 1]
half4 refraction; // refraction color (rgb), refraction amount (a)
half transmission; // transmission factor
float2 pixel; // pixel coordinate (used for randomization effects)
min16uint2 pixel; // pixel coordinate (used for randomization effects)
float2 screenUV; // pixel coordinate in UV space [0 -> 1] (used for randomization effects)
half4 T; // tangent
half3 B; // bitangent
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 524;
const int revision = 525;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);