diff --git a/Editor/MeshWindow.cpp b/Editor/MeshWindow.cpp index 02d38c262..407425ec9 100644 --- a/Editor/MeshWindow.cpp +++ b/Editor/MeshWindow.cpp @@ -673,16 +673,14 @@ void MeshWindow::SetEntity(Entity entity, int subset) ss += "GPU memory: " + std::to_string((mesh->generalBuffer.GetDesc().size + mesh->streamoutBuffer.GetDesc().size) / 1024.0f / 1024.0f) + " MB\n"; ss += "\nVertex buffers: "; if (mesh->vb_pos_nor_wind.IsValid()) ss += "position; "; - if (mesh->vb_uv0.IsValid()) ss += "uvset_0; "; - if (mesh->vb_uv1.IsValid()) ss += "uvset_1; "; + if (mesh->vb_uvs.IsValid()) ss += "uvsets; "; if (mesh->vb_atl.IsValid()) ss += "atlas; "; if (mesh->vb_col.IsValid()) ss += "color; "; - if (mesh->vb_pre.IsValid()) ss += "previous_position; "; + if (mesh->so_pre.IsValid()) ss += "previous_position; "; if (mesh->vb_bon.IsValid()) ss += "bone; "; if (mesh->vb_tan.IsValid()) ss += "tangent; "; if (mesh->so_pos_nor_wind.IsValid()) ss += "streamout_position; "; if (mesh->so_tan.IsValid()) ss += "streamout_tangents; "; - if (mesh->subset_view.IsValid()) ss += "subset; "; if (mesh->IsTerrain()) ss += "\n\nTerrain will use 4 blend materials and blend by vertex colors, the default one is always the subset material and uses RED vertex color channel mask, the other 3 are selectable below."; meshInfoLabel.SetText(ss); diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 87630abc6..d265b8d38 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -110,7 +110,6 @@ int main(int argc, char* argv[]) "fsr_sharpenCS.hlsl" , "ssaoCS.hlsl" , "rtreflectionCS.hlsl" , - "ssr_surfaceCS.hlsl" , "ssr_tileMaxRoughness_horizontalCS.hlsl" , "ssr_tileMaxRoughness_verticalCS.hlsl" , "ssr_kickjobsCS.hlsl" , @@ -233,6 +232,8 @@ int main(int argc, char* argv[]) "rtao_denoise_filterCS.hlsl", "visibility_resolveCS.hlsl", "visibility_resolveCS_MSAA.hlsl", + "visibility_resolveCS_fast.hlsl", + "visibility_resolveCS_fast_MSAA.hlsl", "surfel_coverageCS.hlsl", "surfel_indirectprepareCS.hlsl", "surfel_updateCS.hlsl", diff --git a/WickedEngine/shaders/ShaderInterop_Renderer.h b/WickedEngine/shaders/ShaderInterop_Renderer.h index cf183b6f4..0a369fbe3 100644 --- a/WickedEngine/shaders/ShaderInterop_Renderer.h +++ b/WickedEngine/shaders/ShaderInterop_Renderer.h @@ -6,7 +6,7 @@ struct ShaderScene { int instancebuffer; - int meshbuffer; + int geometrybuffer; int materialbuffer; int envmaparray; @@ -145,19 +145,21 @@ static const uint SHADERMESH_FLAG_DOUBLE_SIDED = 1 << 0; static const uint SHADERMESH_FLAG_HAIRPARTICLE = 1 << 1; static const uint SHADERMESH_FLAG_EMITTEDPARTICLE = 1 << 2; -struct ShaderMesh +// This is equivalent to a Mesh + MeshSubset +// But because these are always loaded toghether by shaders, they are unrolled into one to reduce individual buffer loads +struct ShaderGeometry { - int ib; int vb_pos_nor_wind; + int vb_uvs; + int ib; + uint indexOffset; + int vb_tan; int vb_col; - - int vb_uv0; - int vb_uv1; int vb_atl; int vb_pre; - int subsetbuffer; + uint materialIndex; uint blendmaterial1; uint blendmaterial2; uint blendmaterial3; @@ -170,36 +172,24 @@ struct ShaderMesh void init() { ib = -1; + indexOffset = 0; vb_pos_nor_wind = -1; + vb_uvs = -1; + vb_tan = -1; vb_col = -1; - - vb_uv0 = -1; - vb_uv1 = -1; vb_atl = -1; vb_pre = -1; - subsetbuffer = -1; + materialIndex = 0; blendmaterial1 = 0; blendmaterial2 = 0; blendmaterial3 = 0; aabb_min = float3(0, 0, 0); - aabb_max = float3(0, 0, 0); - flags = 0; - } -}; - -struct ShaderMeshSubset -{ - uint indexOffset; - uint materialIndex; - - void init() - { - indexOffset = 0; - materialIndex = 0; + aabb_max = float3(0, 0, 0); + tessellation_factor = 0; } }; @@ -240,11 +230,11 @@ struct ShaderMeshInstance uint uid; uint flags; uint layerMask; - uint meshIndex; + uint geometryOffset; uint color; uint emissive; int lightmap; - int padding0; + int padding; ShaderTransform transform; ShaderTransform transformInverseTranspose; // This correctly handles non uniform scaling for normals ShaderTransform transformPrev; @@ -254,10 +244,10 @@ struct ShaderMeshInstance uid = 0; flags = 0; layerMask = 0; - meshIndex = ~0; color = ~0u; emissive = ~0u; lightmap = -1; + geometryOffset = 0; transform.init(); transformInverseTranspose.init(); transformPrev.init(); @@ -266,17 +256,17 @@ struct ShaderMeshInstance }; struct ShaderMeshInstancePointer { - uint instanceID; + uint instanceIndex; uint userdata; void init() { - instanceID = ~0; + instanceIndex = ~0; userdata = 0; } - void Create(uint _instanceID, uint frustum_index, float dither) + void Create(uint _instanceIndex, uint frustum_index, float dither) { - instanceID = _instanceID; + instanceIndex = _instanceIndex; userdata = 0; userdata |= frustum_index & 0xF; userdata |= (uint(dither * 255.0f) & 0xFF) << 4u; @@ -335,14 +325,15 @@ struct PrimitiveID uint2 pack() { - // 32 bit primitiveID - // 24 bit instanceID - // 8 bit subsetID - return uint2(primitiveIndex, (instanceIndex & 0xFFFFFF) | ((subsetIndex & 0xFF) << 24u)); + // 1 bit valid flag + // 31 bit primitiveIndex + // 24 bit instanceIndex + // 8 bit subsetIndex + return uint2((1u << 31u) | primitiveIndex, (instanceIndex & 0xFFFFFF) | ((subsetIndex & 0xFF) << 24u)); } void unpack(uint2 value) { - primitiveIndex = value.x; + primitiveIndex = value.x & (~0u >> 1u); instanceIndex = value.y & 0xFFFFFF; subsetIndex = (value.y >> 24u) & 0xFF; } @@ -660,25 +651,25 @@ struct CameraCB uint3 entity_culling_tilecount; uint sample_count; + int texture_primitiveID_index; int texture_depth_index; int texture_lineardepth_index; - int texture_gbuffer0_index; - int texture_gbuffer1_index; + int texture_velocity_index; + int texture_normal_index; + int texture_roughness_index; int buffer_entitytiles_opaque_index; int buffer_entitytiles_transparent_index; + int texture_reflection_index; int texture_refraction_index; - int texture_waterriples_index; int texture_ao_index; + int texture_ssr_index; int texture_rtshadow_index; - int texture_surfelgi_index; int texture_depth_index_prev; - int padding1; - int padding2; }; @@ -799,5 +790,19 @@ struct SkinningPushConstants int so_tan; }; +enum VisibilityResolveOptions +{ + VISIBILITY_RESOLVE_DEPTH = 1 << 0, + VISIBILITY_RESOLVE_LINEARDEPTH = 1 << 1, + VISIBILITY_RESOLVE_VELOCITY = 1 << 2, + VISIBILITY_RESOLVE_NORMAL = 1 << 3, + VISIBILITY_RESOLVE_ROUGHNESS = 1 << 4, + VISIBILITY_RESOLVE_PRIMITIVEID = 1 << 5, +}; +struct VisibilityResolvePushConstants +{ + uint options; +}; + #endif // WI_SHADERINTEROP_RENDERER_H diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index b19d32856..b84a5ddfb 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -1018,10 +1018,6 @@ Compute 4.0 - - Compute - 4.0 - Compute 4.0 @@ -1111,6 +1107,14 @@ Compute 4.0 + + Compute + 4.0 + + + Compute + 4.0 + Compute 4.0 diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 30faa01cc..e45aa5e77 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -1037,15 +1037,18 @@ CS - - CS - CS CS + + CS + + + CS + diff --git a/WickedEngine/shaders/brdf.hlsli b/WickedEngine/shaders/brdf.hlsli index 52167fdc2..7167f6193 100644 --- a/WickedEngine/shaders/brdf.hlsli +++ b/WickedEngine/shaders/brdf.hlsli @@ -156,6 +156,9 @@ struct Surface uint layerMask; // the engine-side layer mask float3 facenormal; // surface normal without normal map uint flags; + uint uid_validate; + RayCone raycone; + float hit_depth; // These will be computed when calling Update(): float roughnessBRDF; // roughness input for BRDF functions @@ -173,6 +176,9 @@ struct Surface inline void init() { + P = 0; + V = 0; + N = 0; albedo = 1; f0 = 0; roughness = 1; @@ -198,6 +204,10 @@ struct Surface clearcoat.factor = 0; clearcoat.roughness = 0; clearcoat.N = 0; + + uid_validate = 0; + raycone = (RayCone)0; + hit_depth = 0; } inline void create( @@ -290,77 +300,109 @@ struct Surface ShaderMeshInstance inst; - ShaderMesh mesh; - ShaderMeshSubset subset; + ShaderGeometry geometry; ShaderMaterial material; float2 bary; + uint i0; + uint i1; + uint i2; + uint4 data0; + uint4 data1; + uint4 data2; float3 pre; - bool load(in PrimitiveID prim, in float2 barycentrics, in uint uid = 0) + bool preload_internal(PrimitiveID prim) { inst = load_instance(prim.instanceIndex); - if (uid != 0 && inst.uid != uid) + if (uid_validate != 0 && inst.uid != uid_validate) return false; - mesh = load_mesh(inst.meshIndex); - if (mesh.vb_pos_nor_wind < 0) + geometry = load_geometry(inst.geometryOffset + prim.subsetIndex); + if (geometry.vb_pos_nor_wind < 0) return false; - const bool is_hairparticle = mesh.flags & SHADERMESH_FLAG_HAIRPARTICLE; - const bool is_emittedparticle = mesh.flags & SHADERMESH_FLAG_EMITTEDPARTICLE; + const uint startIndex = prim.primitiveIndex * 3 + geometry.indexOffset; + Buffer indexBuffer = bindless_ib[NonUniformResourceIndex(geometry.ib)]; + i0 = indexBuffer[startIndex + 0]; + i1 = indexBuffer[startIndex + 1]; + i2 = indexBuffer[startIndex + 2]; + + ByteAddressBuffer buf = bindless_buffers[NonUniformResourceIndex(geometry.vb_pos_nor_wind)]; + data0 = buf.Load4(i0 * sizeof(uint4)); + data1 = buf.Load4(i1 * sizeof(uint4)); + data2 = buf.Load4(i2 * sizeof(uint4)); + + return true; + } + void load_internal() + { + material = load_material(geometry.materialIndex); + + const bool is_hairparticle = geometry.flags & SHADERMESH_FLAG_HAIRPARTICLE; + const bool is_emittedparticle = geometry.flags & SHADERMESH_FLAG_EMITTEDPARTICLE; const bool simple_lighting = is_hairparticle || is_emittedparticle; - subset = load_subset(mesh, prim.subsetIndex); - material = load_material(subset.materialIndex); - bary = barycentrics; + float u = bary.x; + float v = bary.y; + float w = 1 - u - v; - uint startIndex = prim.primitiveIndex * 3 + subset.indexOffset; - uint i0 = bindless_ib[NonUniformResourceIndex(mesh.ib)][startIndex + 0]; - uint i1 = bindless_ib[NonUniformResourceIndex(mesh.ib)][startIndex + 1]; - uint i2 = bindless_ib[NonUniformResourceIndex(mesh.ib)][startIndex + 2]; - - uint4 data0 = bindless_buffers[NonUniformResourceIndex(mesh.vb_pos_nor_wind)].Load4(i0 * 16); - uint4 data1 = bindless_buffers[NonUniformResourceIndex(mesh.vb_pos_nor_wind)].Load4(i1 * 16); - uint4 data2 = bindless_buffers[NonUniformResourceIndex(mesh.vb_pos_nor_wind)].Load4(i2 * 16); - float3 p0 = asfloat(data0.xyz); - float3 p1 = asfloat(data1.xyz); - float3 p2 = asfloat(data2.xyz); float3 n0 = unpack_unitvector(data0.w); float3 n1 = unpack_unitvector(data1.w); float3 n2 = unpack_unitvector(data2.w); - - float u = barycentrics.x; - float v = barycentrics.y; - float w = 1 - u - v; - - P = mad(p0, w, mad(p1, u, p2 * v)); // p0 * w + p1 * u + p2 * v - P = mul(inst.transform.GetMatrix(), float4(P, 1)).xyz; - V = normalize(GetCamera().position - P); - - float4 uv0 = 0, uv1 = 0, uv2 = 0; - [branch] - if (mesh.vb_uv0 >= 0) + N = mad(n0, w, mad(n1, u, n2 * v)); // n0 * w + n1 * u + n2 * v + N = mul((float3x3)inst.transformInverseTranspose.GetMatrix(), N); + N = normalize(N); + if ((flags & SURFACE_FLAG_BACKFACE) && !is_hairparticle && !is_emittedparticle) { - uv0.xy = unpack_half2(bindless_buffers[NonUniformResourceIndex(mesh.vb_uv0)].Load(i0 * 4)); - uv1.xy = unpack_half2(bindless_buffers[NonUniformResourceIndex(mesh.vb_uv0)].Load(i1 * 4)); - uv2.xy = unpack_half2(bindless_buffers[NonUniformResourceIndex(mesh.vb_uv0)].Load(i2 * 4)); + N = -N; } + facenormal = N; + +#ifdef SURFACE_LOAD_MIPCONE + float3 p0 = asfloat(data0.xyz); + float3 p1 = asfloat(data1.xyz); + float3 p2 = asfloat(data2.xyz); + float3 P0 = mul(inst.transform.GetMatrix(), float4(p0, 1)).xyz; + float3 P1 = mul(inst.transform.GetMatrix(), float4(p1, 1)).xyz; + float3 P2 = mul(inst.transform.GetMatrix(), float4(p2, 1)).xyz; + const float triangle_constant = rcp(twice_triangle_area(P0, P1, P2)); + float lod_constant0 = 0; + float lod_constant1 = 0; + const float3 ray_direction = V; + const float cone_width = raycone.width_at_t(hit_depth); + //const float3 surf_normal = facenormal; + const float3 surf_normal = normalize(cross(P2 - P1, P1 - P0)); // compute the facenormal, because particles could have fake facenormal which doesn't work well with mipcones! +#endif // SURFACE_LOAD_MIPCONE + + float4 uvsets = 0; [branch] - if (mesh.vb_uv1 >= 0) + if (geometry.vb_uvs >= 0) { - uv0.zw = unpack_half2(bindless_buffers[NonUniformResourceIndex(mesh.vb_uv1)].Load(i0 * 4)); - uv1.zw = unpack_half2(bindless_buffers[NonUniformResourceIndex(mesh.vb_uv1)].Load(i1 * 4)); - uv2.zw = unpack_half2(bindless_buffers[NonUniformResourceIndex(mesh.vb_uv1)].Load(i2 * 4)); + ByteAddressBuffer buf = bindless_buffers[NonUniformResourceIndex(geometry.vb_uvs)]; + const float4 uv0 = unpack_half4(buf.Load2(i0 * sizeof(uint2))); + const float4 uv1 = unpack_half4(buf.Load2(i1 * sizeof(uint2))); + const float4 uv2 = unpack_half4(buf.Load2(i2 * sizeof(uint2))); + uvsets = mad(uv0, w, mad(uv1, u, uv2 * v)); // uv0 * w + uv1 * u + uv2 * v + uvsets.xy = mad(uvsets.xy, material.texMulAdd.xy, material.texMulAdd.zw); + +#ifdef SURFACE_LOAD_MIPCONE + lod_constant0 = 0.5 * log2(twice_uv_area(uv0.xy, uv1.xy, uv2.xy) * triangle_constant); + lod_constant1 = 0.5 * log2(twice_uv_area(uv0.zw, uv1.zw, uv2.zw) * triangle_constant); +#endif // SURFACE_LOAD_MIPCONE } - float4 uvsets = mad(uv0, w, mad(uv1, u, uv2 * v)); // uv0 * w + uv1 * u + uv2 * v - uvsets.xy = mad(uvsets.xy, material.texMulAdd.xy, material.texMulAdd.zw); float4 baseColor = is_emittedparticle ? 1 : material.baseColor; + baseColor *= unpack_rgba(inst.color); [branch] if (material.texture_basecolormap_index >= 0) { const float2 UV_baseColorMap = material.uvset_baseColorMap == 0 ? uvsets.xy : uvsets.zw; - float4 baseColorMap = bindless_textures[NonUniformResourceIndex(material.texture_basecolormap_index)].SampleLevel(sampler_linear_wrap, UV_baseColorMap, 0); + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_basecolormap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_baseColorMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + float4 baseColorMap = tex.SampleLevel(sampler_linear_wrap, UV_baseColorMap, lod); if ((GetFrame().options & OPTION_BIT_DISABLE_ALBEDO_MAPS) == 0) { baseColorMap.rgb *= DEGAMMA(baseColorMap.rgb); @@ -373,13 +415,12 @@ struct Surface } [branch] - if (mesh.vb_col >= 0 && material.IsUsingVertexColors()) + if (geometry.vb_col >= 0 && material.IsUsingVertexColors()) { - float4 c0, c1, c2; - const uint stride_COL = 4; - c0 = unpack_rgba(bindless_buffers[NonUniformResourceIndex(mesh.vb_col)].Load(i0 * stride_COL)); - c1 = unpack_rgba(bindless_buffers[NonUniformResourceIndex(mesh.vb_col)].Load(i1 * stride_COL)); - c2 = unpack_rgba(bindless_buffers[NonUniformResourceIndex(mesh.vb_col)].Load(i2 * stride_COL)); + ByteAddressBuffer buf = bindless_buffers[NonUniformResourceIndex(geometry.vb_col)]; + const float4 c0 = unpack_rgba(buf.Load(i0 * sizeof(uint))); + const float4 c1 = unpack_rgba(buf.Load(i1 * sizeof(uint))); + const float4 c2 = unpack_rgba(buf.Load(i2 * sizeof(uint))); float4 vertexColor = mad(c0, w, mad(c1, u, c2 * v)); // c0 * w + c1 * u + c2 * v baseColor *= vertexColor; } @@ -389,7 +430,12 @@ struct Surface if (material.texture_surfacemap_index >= 0 && !simple_lighting) { const float2 UV_surfaceMap = material.uvset_surfaceMap == 0 ? uvsets.xy : uvsets.zw; - surfaceMap = bindless_textures[NonUniformResourceIndex(material.texture_surfacemap_index)].SampleLevel(sampler_linear_wrap, UV_surfaceMap, 0); + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_surfacemap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_surfaceMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + surfaceMap = tex.SampleLevel(sampler_linear_wrap, UV_surfaceMap, lod); } if (simple_lighting) { @@ -401,13 +447,18 @@ struct Surface if (material.texture_specularmap_index >= 0 && !simple_lighting) { const float2 UV_specularMap = material.uvset_specularMap == 0 ? uvsets.xy : uvsets.zw; - specularMap = bindless_textures[NonUniformResourceIndex(material.texture_specularmap_index)].SampleLevel(sampler_linear_wrap, UV_specularMap, 0); + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_specularmap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_specularMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + specularMap = tex.SampleLevel(sampler_linear_wrap, UV_specularMap, lod); specularMap.rgb = DEGAMMA(specularMap.rgb); } create(material, baseColor, surfaceMap, specularMap); - emissiveColor = material.GetEmissive(); + emissiveColor = material.GetEmissive() * Unpack_R11G11B10_FLOAT(inst.emissive); if (is_emittedparticle) { emissiveColor *= baseColor.rgb * baseColor.a; @@ -418,7 +469,12 @@ struct Surface if (material.texture_emissivemap_index >= 0) { const float2 UV_emissiveMap = material.uvset_emissiveMap == 0 ? uvsets.xy : uvsets.zw; - float4 emissiveMap = bindless_textures[NonUniformResourceIndex(material.texture_emissivemap_index)].SampleLevel(sampler_linear_wrap, UV_emissiveMap, 0); + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_emissivemap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_emissiveMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + float4 emissiveMap = tex.SampleLevel(sampler_linear_wrap, UV_emissiveMap, lod); emissiveMap.rgb = DEGAMMA(emissiveMap.rgb); emissiveColor *= emissiveMap.rgb * emissiveMap.a; } @@ -433,86 +489,118 @@ struct Surface if (material.texture_transmissionmap_index >= 0) { const float2 UV_transmissionMap = material.uvset_transmissionMap == 0 ? uvsets.xy : uvsets.zw; - float transmissionMap = bindless_textures[NonUniformResourceIndex(material.texture_transmissionmap_index)].SampleLevel(sampler_linear_wrap, UV_transmissionMap, 0).r; - transmission *= transmissionMap; + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_transmissionmap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_transmissionMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + transmission *= tex.SampleLevel(sampler_linear_wrap, UV_transmissionMap, lod).r; } [branch] if (material.IsOcclusionEnabled_Secondary() && material.texture_occlusionmap_index >= 0) { const float2 UV_occlusionMap = material.uvset_occlusionMap == 0 ? uvsets.xy : uvsets.zw; - occlusion *= bindless_textures[NonUniformResourceIndex(material.texture_occlusionmap_index)].SampleLevel(sampler_linear_wrap, UV_occlusionMap, 0).r; + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_occlusionmap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_occlusionMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + occlusion *= tex.SampleLevel(sampler_linear_wrap, UV_occlusionMap, lod).r; } - N = mad(n0, w, mad(n1, u, n2 * v)); // n0 * w + n1 * u + n2 * v - N = mul((float3x3)inst.transformInverseTranspose.GetMatrix(), N); - N = normalize(N); - if ((flags & SURFACE_FLAG_BACKFACE) && !is_hairparticle && !is_emittedparticle) - { - N = -N; - } - facenormal = N; - [branch] - if (mesh.vb_tan >= 0 && material.texture_normalmap_index >= 0 && material.normalMapStrength > 0) + if (geometry.vb_tan >= 0 && material.texture_normalmap_index >= 0 && material.normalMapStrength > 0) { - float4 t0, t1, t2; - const uint stride_TAN = 4; - t0 = unpack_utangent(bindless_buffers[NonUniformResourceIndex(mesh.vb_tan)].Load(i0 * stride_TAN)); - t1 = unpack_utangent(bindless_buffers[NonUniformResourceIndex(mesh.vb_tan)].Load(i1 * stride_TAN)); - t2 = unpack_utangent(bindless_buffers[NonUniformResourceIndex(mesh.vb_tan)].Load(i2 * stride_TAN)); + ByteAddressBuffer buf = bindless_buffers[NonUniformResourceIndex(geometry.vb_tan)]; + const float4 t0 = unpack_utangent(buf.Load(i0 * sizeof(uint))); + const float4 t1 = unpack_utangent(buf.Load(i1 * sizeof(uint))); + const float4 t2 = unpack_utangent(buf.Load(i2 * sizeof(uint))); float4 T = mad(t0, w, mad(t1, u, t2 * v)); // t0 * w + t1 * u + t2 * v T = T * 2 - 1; T.xyz = mul((float3x3)inst.transformInverseTranspose.GetMatrix(), T.xyz); T.xyz = normalize(T.xyz); - float3 B = normalize(cross(T.xyz, N) * T.w); - float3x3 TBN = float3x3(T.xyz, B, N); + const float3 B = normalize(cross(T.xyz, N) * T.w); + const float3x3 TBN = float3x3(T.xyz, B, N); const float2 UV_normalMap = material.uvset_normalMap == 0 ? uvsets.xy : uvsets.zw; - float3 normalMap = float3(bindless_textures[NonUniformResourceIndex(material.texture_normalmap_index)].SampleLevel(sampler_linear_wrap, UV_normalMap, 0).rg, 1); - normalMap = normalMap * 2 - 1; + Texture2D tex = bindless_textures[NonUniformResourceIndex(material.texture_normalmap_index)]; + float lod = 0; +#ifdef SURFACE_LOAD_MIPCONE + lod = compute_texture_lod(tex, material.uvset_normalMap == 0 ? lod_constant0 : lod_constant1, ray_direction, surf_normal, cone_width); +#endif // SURFACE_LOAD_MIPCONE + const float3 normalMap = float3(tex.SampleLevel(sampler_linear_wrap, UV_normalMap, lod).rg, 1) * 2 - 1; N = normalize(lerp(N, mul(normalMap, TBN), material.normalMapStrength)); } + float3 pre0; + float3 pre1; + float3 pre2; [branch] - if (mesh.vb_pre >= 0) + if (geometry.vb_pre >= 0) { - p0 = asfloat(bindless_buffers[NonUniformResourceIndex(mesh.vb_pre)].Load3(i0 * 16)); - p1 = asfloat(bindless_buffers[NonUniformResourceIndex(mesh.vb_pre)].Load3(i1 * 16)); - p2 = asfloat(bindless_buffers[NonUniformResourceIndex(mesh.vb_pre)].Load3(i2 * 16)); + ByteAddressBuffer buf = bindless_buffers[NonUniformResourceIndex(geometry.vb_pre)]; + pre0 = asfloat(buf.Load3(i0 * sizeof(uint4))); + pre1 = asfloat(buf.Load3(i1 * sizeof(uint4))); + pre2 = asfloat(buf.Load3(i2 * sizeof(uint4))); } - pre = mad(p0, w, mad(p1, u, p2 * v)); // p0 * w + p1 * u + p2 * v + else + { + pre0 = asfloat(data0.xyz); + pre1 = asfloat(data1.xyz); + pre2 = asfloat(data2.xyz); + } + pre = mad(pre0, w, mad(pre1, u, pre2 * v)); // pre0 * w + pre1 * u + pre2 * v pre = mul(inst.transformPrev.GetMatrix(), float4(pre, 1)).xyz; sss = material.subsurfaceScattering; sss_inv = material.subsurfaceScattering_inv; update(); - - return true; } - bool load(in PrimitiveID prim, in float3 P, in uint uid = 0) + bool load(in PrimitiveID prim, in float2 barycentrics) { - inst = load_instance(prim.instanceIndex); - if (uid != 0 && inst.uid != uid) + if (!preload_internal(prim)) return false; - mesh = load_mesh(inst.meshIndex); - if (mesh.vb_pos_nor_wind < 0) + bary = barycentrics; + float u = bary.x; + float v = bary.y; + float w = 1 - u - v; + + float3 p0 = asfloat(data0.xyz); + float3 p1 = asfloat(data1.xyz); + float3 p2 = asfloat(data2.xyz); + P = mad(p0, w, mad(p1, u, p2 * v)); // p0 * w + p1 * u + p2 * v + P = mul(inst.transform.GetMatrix(), float4(P, 1)).xyz; + + load_internal(); + return true; + } + bool load(in PrimitiveID prim, in float3 worldPosition) + { + if (!preload_internal(prim)) return false; - subset = load_subset(mesh, prim.subsetIndex); - material = load_material(subset.materialIndex); + float3 p0 = asfloat(data0.xyz); + float3 p1 = asfloat(data1.xyz); + float3 p2 = asfloat(data2.xyz); + float3 P0 = mul(inst.transform.GetMatrix(), float4(p0, 1)).xyz; + float3 P1 = mul(inst.transform.GetMatrix(), float4(p1, 1)).xyz; + float3 P2 = mul(inst.transform.GetMatrix(), float4(p2, 1)).xyz; + P = worldPosition; - uint startIndex = prim.primitiveIndex * 3 + subset.indexOffset; - uint i0 = bindless_ib[NonUniformResourceIndex(mesh.ib)][startIndex + 0]; - uint i1 = bindless_ib[NonUniformResourceIndex(mesh.ib)][startIndex + 1]; - uint i2 = bindless_ib[NonUniformResourceIndex(mesh.ib)][startIndex + 2]; + bary = compute_barycentrics(P, P0, P1, P2); + + load_internal(); + return true; + } + bool load(in PrimitiveID prim, in float3 rayOrigin, in float3 rayDirection) + { + if (!preload_internal(prim)) + return false; - uint4 data0 = bindless_buffers[NonUniformResourceIndex(mesh.vb_pos_nor_wind)].Load4(i0 * 16); - uint4 data1 = bindless_buffers[NonUniformResourceIndex(mesh.vb_pos_nor_wind)].Load4(i1 * 16); - uint4 data2 = bindless_buffers[NonUniformResourceIndex(mesh.vb_pos_nor_wind)].Load4(i2 * 16); float3 p0 = asfloat(data0.xyz); float3 p1 = asfloat(data1.xyz); float3 p2 = asfloat(data2.xyz); @@ -520,9 +608,12 @@ struct Surface float3 P1 = mul(inst.transform.GetMatrix(), float4(p1, 1)).xyz; float3 P2 = mul(inst.transform.GetMatrix(), float4(p2, 1)).xyz; - float2 barycentrics = compute_barycentrics(P, P0, P1, P2); + bary = compute_barycentrics(rayOrigin, rayDirection, P0, P1, P2, hit_depth); + P = rayOrigin + rayDirection * hit_depth; + V = rayDirection; - return load(prim, barycentrics, uid); + load_internal(); + return true; } }; diff --git a/WickedEngine/shaders/bvh_primitivesCS.hlsl b/WickedEngine/shaders/bvh_primitivesCS.hlsl index f7b6acc14..f29b99095 100644 --- a/WickedEngine/shaders/bvh_primitivesCS.hlsl +++ b/WickedEngine/shaders/bvh_primitivesCS.hlsl @@ -24,18 +24,17 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) prim.subsetIndex = push.subsetIndex; ShaderMeshInstance inst = load_instance(prim.instanceIndex); - ShaderMesh mesh = load_mesh(inst.meshIndex); - ShaderMeshSubset subset = load_subset(mesh, prim.subsetIndex); - ShaderMaterial material = load_material(subset.materialIndex); + ShaderGeometry geometry = load_geometry(inst.geometryOffset + prim.subsetIndex); + ShaderMaterial material = load_material(geometry.materialIndex); - uint startIndex = prim.primitiveIndex * 3 + subset.indexOffset; - uint i0 = bindless_ib[mesh.ib][startIndex + 0]; - uint i1 = bindless_ib[mesh.ib][startIndex + 1]; - uint i2 = bindless_ib[mesh.ib][startIndex + 2]; + uint startIndex = prim.primitiveIndex * 3 + geometry.indexOffset; + uint i0 = bindless_ib[geometry.ib][startIndex + 0]; + uint i1 = bindless_ib[geometry.ib][startIndex + 1]; + uint i2 = bindless_ib[geometry.ib][startIndex + 2]; - uint4 data0 = bindless_buffers[mesh.vb_pos_nor_wind].Load4(i0 * 16); - uint4 data1 = bindless_buffers[mesh.vb_pos_nor_wind].Load4(i1 * 16); - uint4 data2 = bindless_buffers[mesh.vb_pos_nor_wind].Load4(i2 * 16); + uint4 data0 = bindless_buffers[geometry.vb_pos_nor_wind].Load4(i0 * 16); + uint4 data1 = bindless_buffers[geometry.vb_pos_nor_wind].Load4(i1 * 16); + uint4 data2 = bindless_buffers[geometry.vb_pos_nor_wind].Load4(i2 * 16); float3 p0 = asfloat(data0.xyz); float3 p1 = asfloat(data1.xyz); float3 p2 = asfloat(data2.xyz); @@ -47,7 +46,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) bvhprim.packed_prim = prim.pack(); bvhprim.flags = 0; bvhprim.flags |= inst.layerMask & 0xFF; - if (mesh.flags & SHADERMESH_FLAG_DOUBLE_SIDED) + if (geometry.flags & SHADERMESH_FLAG_DOUBLE_SIDED) { bvhprim.flags |= BVH_PRIMITIVE_FLAG_DOUBLE_SIDED; } diff --git a/WickedEngine/shaders/emittedparticleHF.hlsli b/WickedEngine/shaders/emittedparticleHF.hlsli index c9f2b6680..86bbc084e 100644 --- a/WickedEngine/shaders/emittedparticleHF.hlsli +++ b/WickedEngine/shaders/emittedparticleHF.hlsli @@ -7,13 +7,13 @@ ShaderMeshInstance EmitterGetInstance() { return load_instance(xEmitterInstanceIndex); } -ShaderMesh EmitterGetMesh() +ShaderGeometry EmitterGetGeometry() { - return load_mesh(EmitterGetInstance().meshIndex); + return load_geometry(EmitterGetInstance().geometryOffset); } ShaderMaterial EmitterGetMaterial() { - return load_material(load_subset(EmitterGetMesh(), 0).materialIndex); + return load_material(EmitterGetGeometry().materialIndex); } struct VertextoPixel diff --git a/WickedEngine/shaders/emittedparticleMS.hlsl b/WickedEngine/shaders/emittedparticleMS.hlsl index 9241765aa..72de41909 100644 --- a/WickedEngine/shaders/emittedparticleMS.hlsl +++ b/WickedEngine/shaders/emittedparticleMS.hlsl @@ -36,7 +36,7 @@ void main( if (tig >= realGroupCount) return; - ShaderMesh mesh = EmitterGetMesh(); + ShaderGeometry geometry = EmitterGetGeometry(); uint instanceID = tid; uint particleIndex = culledIndirectionBuffer2[culledIndirectionBuffer[instanceID]]; @@ -59,17 +59,16 @@ void main( { uint vertexID = particleIndex * 4 + i; - uint4 data = bindless_buffers[mesh.vb_pos_nor_wind].Load4(vertexID * 16); + uint4 data = bindless_buffers[geometry.vb_pos_nor_wind].Load4(vertexID * 16); float3 position = asfloat(data.xyz); float3 normal = normalize(unpack_unitvector(data.w)); - float2 uv = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(vertexID * 4)); - float2 uv2 = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(vertexID * 4)); - uint color = bindless_buffers[mesh.vb_col].Load(vertexID * 4); + float4 uvsets = unpack_half4(bindless_buffers[geometry.vb_uvs].Load2(vertexID * 8)); + uint color = bindless_buffers[geometry.vb_col].Load(vertexID * 4); VertextoPixel Out; Out.P = position; Out.pos = mul(GetCamera().view_projection, float4(position, 1)); - Out.tex = float4(uv, uv2); + Out.tex = uvsets; Out.size = size; Out.color = color; Out.unrotated_uv = BILLBOARD[i].xy * float2(1, -1) * 0.5f + 0.5f; diff --git a/WickedEngine/shaders/emittedparticleVS.hlsl b/WickedEngine/shaders/emittedparticleVS.hlsl index 23d81eda8..c32e0d870 100644 --- a/WickedEngine/shaders/emittedparticleVS.hlsl +++ b/WickedEngine/shaders/emittedparticleVS.hlsl @@ -15,17 +15,16 @@ StructuredBuffer culledIndirectionBuffer2 : register(t3); VertextoPixel main(uint vid : SV_VertexID, uint instanceID : SV_InstanceID) { - ShaderMesh mesh = EmitterGetMesh(); + ShaderGeometry geometry = EmitterGetGeometry(); uint particleIndex = culledIndirectionBuffer2[culledIndirectionBuffer[instanceID]]; uint vertexID = particleIndex * 4 + vid; - uint4 data = bindless_buffers[mesh.vb_pos_nor_wind].Load4(vertexID * 16); + uint4 data = bindless_buffers[geometry.vb_pos_nor_wind].Load4(vertexID * 16); float3 position = asfloat(data.xyz); float3 normal = normalize(unpack_unitvector(data.w)); - float2 uv = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(vertexID * 4)); - float2 uv2 = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(vertexID * 4)); - uint color = bindless_buffers[mesh.vb_col].Load(vertexID * 4); + float4 uvsets = unpack_half4(bindless_buffers[geometry.vb_uvs].Load2(vertexID * 8)); + uint color = bindless_buffers[geometry.vb_col].Load(vertexID * 4); // load particle data: @@ -44,7 +43,7 @@ VertextoPixel main(uint vid : SV_VertexID, uint instanceID : SV_InstanceID) VertextoPixel Out; Out.P = position; Out.pos = mul(GetCamera().view_projection, float4(position, 1)); - Out.tex = float4(uv, uv2); + Out.tex = uvsets; Out.size = size; Out.color = color; Out.unrotated_uv = BILLBOARD[vertexID % 4].xy * float2(1, -1) * 0.5f + 0.5f; diff --git a/WickedEngine/shaders/emittedparticle_simulateCS.hlsl b/WickedEngine/shaders/emittedparticle_simulateCS.hlsl index e926df02c..356f97872 100644 --- a/WickedEngine/shaders/emittedparticle_simulateCS.hlsl +++ b/WickedEngine/shaders/emittedparticle_simulateCS.hlsl @@ -16,11 +16,10 @@ RWStructuredBuffer deadBuffer : register(u3); RWByteAddressBuffer counterBuffer : register(u4); RWStructuredBuffer distanceBuffer : register(u6); RWByteAddressBuffer vertexBuffer_POS : register(u7); -RWByteAddressBuffer vertexBuffer_TEX : register(u8); -RWByteAddressBuffer vertexBuffer_TEX2 : register(u9); -RWByteAddressBuffer vertexBuffer_COL : register(u10); -RWStructuredBuffer culledIndirectionBuffer : register(u11); -RWStructuredBuffer culledIndirectionBuffer2 : register(u12); +RWByteAddressBuffer vertexBuffer_UVS : register(u8); +RWByteAddressBuffer vertexBuffer_COL : register(u9); +RWStructuredBuffer culledIndirectionBuffer : register(u10); +RWStructuredBuffer culledIndirectionBuffer2 : register(u11); #define SPH_FLOOR_COLLISION #define SPH_BOX_COLLISION @@ -230,8 +229,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint Gid : SV_GroupIndex) data.xyz = asuint(particle.position + quadPos); data.w = pack_unitvector(normalize(-GetCamera().forward)); vertexBuffer_POS.Store4((v0 + vertexID) * 16, data); - vertexBuffer_TEX.Store((v0 + vertexID) * 4, pack_half2(uv)); - vertexBuffer_TEX2.Store((v0 + vertexID) * 4, pack_half2(uv2)); + vertexBuffer_UVS.Store2((v0 + vertexID) * 8, pack_half4(float4(uv, uv2))); vertexBuffer_COL.Store((v0 + vertexID) * 4, particleColorPacked); } diff --git a/WickedEngine/shaders/envMapPS.hlsl b/WickedEngine/shaders/envMapPS.hlsl index db35d28ba..9a07b8ed0 100644 --- a/WickedEngine/shaders/envMapPS.hlsl +++ b/WickedEngine/shaders/envMapPS.hlsl @@ -12,7 +12,7 @@ #define OBJECTSHADER_USE_TANGENT #define OBJECTSHADER_USE_POSITION3D #define OBJECTSHADER_USE_EMISSIVE -#define OBJECTSHADER_USE_INSTANCEID +#define OBJECTSHADER_USE_INSTANCEINDEX #define OBJECTSHADER_USE_RENDERTARGETARRAYINDEX #define ENVMAPRENDERING #define FORWARD diff --git a/WickedEngine/shaders/envMapVS.hlsl b/WickedEngine/shaders/envMapVS.hlsl index 5cb191a47..d2966e4b7 100644 --- a/WickedEngine/shaders/envMapVS.hlsl +++ b/WickedEngine/shaders/envMapVS.hlsl @@ -12,7 +12,7 @@ #define OBJECTSHADER_USE_TANGENT #define OBJECTSHADER_USE_POSITION3D #define OBJECTSHADER_USE_EMISSIVE -#define OBJECTSHADER_USE_INSTANCEID +#define OBJECTSHADER_USE_INSTANCEINDEX #define OBJECTSHADER_USE_RENDERTARGETARRAYINDEX #define ENVMAPRENDERING #include "objectHF.hlsli" diff --git a/WickedEngine/shaders/globals.hlsli b/WickedEngine/shaders/globals.hlsli index ec3cafca8..f96cff863 100644 --- a/WickedEngine/shaders/globals.hlsli +++ b/WickedEngine/shaders/globals.hlsli @@ -109,13 +109,9 @@ inline ShaderMeshInstance load_instance(uint instanceIndex) { return bindless_buffers[GetScene().instancebuffer].Load(instanceIndex * sizeof(ShaderMeshInstance)); } -inline ShaderMesh load_mesh(uint meshIndex) +inline ShaderGeometry load_geometry(uint geometryIndex) { - return bindless_buffers[GetScene().meshbuffer].Load(meshIndex * sizeof(ShaderMesh)); -} -inline ShaderMeshSubset load_subset(ShaderMesh mesh, uint subsetIndex) -{ - return bindless_buffers[NonUniformResourceIndex(mesh.subsetbuffer)].Load(subsetIndex * sizeof(ShaderMeshSubset)); + return bindless_buffers[GetScene().geometrybuffer].Load(geometryIndex * sizeof(ShaderGeometry)); } inline ShaderMaterial load_material(uint materialIndex) { @@ -150,8 +146,10 @@ inline float4x4 load_entitymatrix(uint matrixIndex) #define texture_depth bindless_textures_float[GetCamera().texture_depth_index] #define texture_depth_history bindless_textures_float[GetCamera().texture_depth_index_prev] #define texture_lineardepth bindless_textures_float[GetCamera().texture_lineardepth_index] -#define texture_gbuffer0 bindless_textures_uint2[GetCamera().texture_gbuffer0_index] -#define texture_gbuffer1 bindless_textures_float2[GetCamera().texture_gbuffer1_index] +#define texture_primitiveID bindless_textures_uint2[GetCamera().texture_primitiveID_index] +#define texture_velocity bindless_textures_float2[GetCamera().texture_velocity_index] +#define texture_normal bindless_textures_float2[GetCamera().texture_normal_index] +#define texture_roughness bindless_textures_float[GetCamera().texture_roughness_index] #define PI 3.14159265358979323846 #define SQRT2 1.41421356237309504880 @@ -166,6 +164,13 @@ inline bool is_saturated(float2 a) { return is_saturated(a.x) && is_saturated(a. inline bool is_saturated(float3 a) { return is_saturated(a.x) && is_saturated(a.y) && is_saturated(a.z); } inline bool is_saturated(float4 a) { return is_saturated(a.x) && is_saturated(a.y) && is_saturated(a.z) && is_saturated(a.w); } +inline float2 uv_to_clipspace(in float2 uv) +{ + float2 clipspace = uv * 2 - 1; + clipspace.y *= -1; + return clipspace; +} + #define DEGAMMA_SKY(x) ((GetFrame().options & OPTION_BIT_STATIC_SKY_HDR) ? (x) : RemoveSRGBCurve_Fast(x)) #define DEGAMMA(x) (RemoveSRGBCurve_Fast(x)) #define GAMMA(x) (ApplySRGBCurve_Fast(x)) @@ -851,6 +856,105 @@ float2 compute_barycentrics(float3 p, float3 a, float3 b, float3 c) float v = (d00 * d21 - d01 * d20) * denom_rcp; return float2(u, v); } +// Compute barycentric coordinates on triangle from a ray +float2 compute_barycentrics(float3 rayOrigin, float3 rayDirection, float3 a, float3 b, float3 c) +{ + float3 v0v1 = b - a; + float3 v0v2 = c - a; + float3 pvec = cross(rayDirection, v0v2); + float det = dot(v0v1, pvec); + float det_rcp = rcp(det); + float3 tvec = rayOrigin - a; + float u = dot(tvec, pvec) * det_rcp; + float3 qvec = cross(tvec, v0v1); + float v = dot(rayDirection, qvec) * det_rcp; + return float2(u, v); +} +// Compute barycentric coordinates on triangle from a ray +// also outputs hit distance "t" +float2 compute_barycentrics(float3 rayOrigin, float3 rayDirection, float3 a, float3 b, float3 c, out float t) +{ + float3 v0v1 = b - a; + float3 v0v2 = c - a; + float3 pvec = cross(rayDirection, v0v2); + float det = dot(v0v1, pvec); + float det_rcp = rcp(det); + float3 tvec = rayOrigin - a; + float u = dot(tvec, pvec) * det_rcp; + float3 qvec = cross(tvec, v0v1); + float v = dot(rayDirection, qvec) * det_rcp; + t = dot(v0v2, qvec) * det_rcp; + return float2(u, v); +} + +// Texture LOD computation things from https://github.com/EmbarkStudios/kajiya +float twice_triangle_area(float3 p0, float3 p1, float3 p2) +{ + return length(cross(p1 - p0, p2 - p0)); +} +float twice_uv_area(float2 t0, float2 t1, float2 t2) +{ + return abs((t1.x - t0.x) * (t2.y - t0.y) - (t2.x - t0.x) * (t1.y - t0.y)); +} +// https://media.contentapi.ea.com/content/dam/ea/seed/presentations/2019-ray-tracing-gems-chapter-20-akenine-moller-et-al.pdf +float compute_texture_lod(Texture2D tex, float triangle_constant, float3 ray_direction, float3 surf_normal, float cone_width) +{ + uint w, h; + tex.GetDimensions(w, h); + + float lambda = triangle_constant; + lambda += log2(abs(cone_width)); + lambda += 0.5 * log2(float(w) * float(h)); + lambda -= log2(abs(dot(normalize(ray_direction), surf_normal))); + return lambda; +} +float pixel_cone_spread_angle_from_image_height(float image_height) +{ + //return atan(2.0 * frame_constants.view_constants.clip_to_view._11 / image_height); + return atan(2.0 * GetCamera().inverse_projection._11 / image_height); +} +// https://media.contentapi.ea.com/content/dam/ea/seed/presentations/2019-ray-tracing-gems-chapter-20-akenine-moller-et-al.pdf +struct RayCone +{ + float width; + float spread_angle; + + static RayCone from_spread_angle(float spread_angle) + { + RayCone res; + res.width = 0.0; + res.spread_angle = spread_angle; + return res; + } + + static RayCone from_width_spread_angle(float width, float spread_angle) + { + RayCone res; + res.width = width; + res.spread_angle = spread_angle; + return res; + } + + RayCone propagate(float surface_spread_angle, float hit_t) + { + RayCone res; + res.width = this.spread_angle * hit_t + this.width; + res.spread_angle = this.spread_angle + surface_spread_angle; + return res; + } + + float width_at_t(float hit_t) + { + return this.width + this.spread_angle * hit_t; + } +}; +RayCone pixel_ray_cone_from_image_height(float image_height) +{ + RayCone res; + res.width = 0.0; + res.spread_angle = pixel_cone_spread_angle_from_image_height(image_height); + return res; +} static const float4 halton64[] = { float4(0.5000000000f, 0.3333333333f, 0.2000000000f, 0.1428571429f), diff --git a/WickedEngine/shaders/hairparticleHF.hlsli b/WickedEngine/shaders/hairparticleHF.hlsli index b998434e0..197dc0a96 100644 --- a/WickedEngine/shaders/hairparticleHF.hlsli +++ b/WickedEngine/shaders/hairparticleHF.hlsli @@ -7,13 +7,13 @@ ShaderMeshInstance HairGetInstance() { return load_instance(xHairInstanceIndex); } -ShaderMesh HairGetMesh() +ShaderGeometry HairGetGeometry() { - return load_mesh(HairGetInstance().meshIndex); + return load_geometry(HairGetInstance().geometryOffset); } ShaderMaterial HairGetMaterial() { - return load_material(load_subset(HairGetMesh(), 0).materialIndex); + return load_material(HairGetGeometry().materialIndex); } struct VertexToPixel diff --git a/WickedEngine/shaders/hairparticleVS.hlsl b/WickedEngine/shaders/hairparticleVS.hlsl index 52545d2a1..4c51aa701 100644 --- a/WickedEngine/shaders/hairparticleVS.hlsl +++ b/WickedEngine/shaders/hairparticleVS.hlsl @@ -6,16 +6,16 @@ Buffer primitiveBuffer : register(t0); VertexToPixel main(uint vid : SV_VERTEXID) { - ShaderMesh mesh = HairGetMesh(); + ShaderGeometry geometry = HairGetGeometry(); VertexToPixel Out; Out.primitiveID = vid / 3; uint vertexID = primitiveBuffer[vid]; - uint4 data = bindless_buffers[mesh.vb_pos_nor_wind].Load4(vertexID * 16); + uint4 data = bindless_buffers[geometry.vb_pos_nor_wind].Load4(vertexID * 16); float3 position = asfloat(data.xyz); float3 normal = normalize(unpack_unitvector(data.w)); - float2 uv = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(vertexID * 4)); + float4 uvsets = unpack_half4(bindless_buffers[geometry.vb_uvs].Load2(vertexID * 8)); Out.fade = saturate(distance(position.xyz, GetCamera().position.xyz) / xHairViewDistance); Out.fade = saturate(Out.fade - 0.8f) * 5.0f; // fade will be on edge and inwards 20% @@ -25,7 +25,7 @@ VertexToPixel main(uint vid : SV_VERTEXID) Out.pos = mul(GetCamera().view_projection, Out.pos); Out.nor = normalize(normal); - Out.tex = uv; + Out.tex = uvsets.xy; return Out; } diff --git a/WickedEngine/shaders/hairparticle_simulateCS.hlsl b/WickedEngine/shaders/hairparticle_simulateCS.hlsl index d6e320190..d5647485d 100644 --- a/WickedEngine/shaders/hairparticle_simulateCS.hlsl +++ b/WickedEngine/shaders/hairparticle_simulateCS.hlsl @@ -15,7 +15,7 @@ Buffer meshVertexBuffer_length : register(t2); RWStructuredBuffer simulationBuffer : register(u0); RWByteAddressBuffer vertexBuffer_POS : register(u1); -RWByteAddressBuffer vertexBuffer_TEX : register(u2); +RWByteAddressBuffer vertexBuffer_UVS : register(u2); RWBuffer culledIndexBuffer : register(u3); RWByteAddressBuffer counterBuffer : register(u4); @@ -211,7 +211,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint groupIn data.xyz = asuint(position); data.w = pack_unitvector(normalize(normal + wind)); vertexBuffer_POS.Store4((v0 + vertexID) * 16, data); - vertexBuffer_TEX.Store((v0 + vertexID) * 4, pack_half2(uv)); + vertexBuffer_UVS.Store2((v0 + vertexID) * 8, pack_half4(float4(uv, uv))); // a second uv set could be used here } // Frustum culling: diff --git a/WickedEngine/shaders/impostorVS.hlsl b/WickedEngine/shaders/impostorVS.hlsl index 122fbd441..6f0a2abfa 100644 --- a/WickedEngine/shaders/impostorVS.hlsl +++ b/WickedEngine/shaders/impostorVS.hlsl @@ -23,10 +23,10 @@ VSOut main(uint fakeIndex : SV_VERTEXID) const uint vertexID = fakeIndex % 6; const uint instanceID = fakeIndex / 6; - ShaderMeshInstancePointer poi = impostorBuffer.Load(push.instanceOffset + instanceID * 8); - ShaderMeshInstance instance = load_instance(poi.instanceID); - ShaderMesh mesh = load_mesh(instance.meshIndex); - float3 extents = mesh.aabb_max - mesh.aabb_min; + ShaderMeshInstancePointer poi = impostorBuffer.Load(push.instanceOffset + instanceID * sizeof(ShaderMeshInstancePointer)); + ShaderMeshInstance instance = load_instance(poi.instanceIndex); + ShaderGeometry geometry = load_geometry(instance.geometryOffset); + float3 extents = geometry.aabb_max - geometry.aabb_min; float radius = max(extents.x, max(extents.y, extents.z)) * 0.5; float3 pos = BILLBOARD[vertexID]; @@ -61,7 +61,7 @@ VSOut main(uint fakeIndex : SV_VERTEXID) Out.uv = uv; Out.slice = slice; Out.dither = poi.GetDither(); - Out.instanceID = poi.instanceID; + Out.instanceID = poi.instanceIndex; return Out; } diff --git a/WickedEngine/shaders/lightingHF.hlsli b/WickedEngine/shaders/lightingHF.hlsli index 5b501ff52..40d8a9cf4 100644 --- a/WickedEngine/shaders/lightingHF.hlsli +++ b/WickedEngine/shaders/lightingHF.hlsli @@ -136,7 +136,7 @@ inline void light_directional(in ShaderEntity light, in Surface surface, inout L { #ifdef SHADOW_MASK_ENABLED [branch] - if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0) + if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0 || GetCamera().texture_rtshadow_index < 0) #endif // SHADOW_MASK_ENABLED { // Loop through cascades from closest (smallest) to furthest (largest) @@ -219,7 +219,7 @@ inline void light_point(in ShaderEntity light, in Surface surface, inout Lightin { #ifdef SHADOW_MASK_ENABLED [branch] - if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0) + if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0 || GetCamera().texture_rtshadow_index < 0) #endif // SHADOW_MASK_ENABLED { shadow *= shadow_cube(light, L, Lunnormalized); @@ -272,7 +272,7 @@ inline void light_spot(in ShaderEntity light, in Surface surface, inout Lighting { #ifdef SHADOW_MASK_ENABLED [branch] - if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0) + if ((GetFrame().options & OPTION_BIT_RAYTRACED_SHADOWS) == 0 || GetCamera().texture_rtshadow_index < 0) #endif // SHADOW_MASK_ENABLED { float4 shadow_pos = mul(load_entitymatrix(light.GetMatrixIndex() + 0), float4(surface.P, 1)); diff --git a/WickedEngine/shaders/motionblurCS.hlsl b/WickedEngine/shaders/motionblurCS.hlsl index a5b9c3009..48f426355 100644 --- a/WickedEngine/shaders/motionblurCS.hlsl +++ b/WickedEngine/shaders/motionblurCS.hlsl @@ -66,7 +66,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) const float neighborhood_velocity_magnitude = length(neighborhood_velocity); const float4 center_color = input[pixel]; - const float2 center_velocity = texture_gbuffer1[pixel].xy * strength; + const float2 center_velocity = texture_velocity[pixel].xy * strength; const float center_velocity_magnitude = length(center_velocity); const float center_depth = texture_lineardepth[pixel]; @@ -89,12 +89,12 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) for (float i = -range; i <= range; i += 2.0f) { const float depth1 = texture_lineardepth.SampleLevel(sampler_point_clamp, uv2, 0); - const float2 velocity1 = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv2, 0).xy; + const float2 velocity1 = texture_velocity.SampleLevel(sampler_point_clamp, uv2, 0).xy; const float velocity_magnitude1 = length(velocity1); const float3 color1 = input.SampleLevel(sampler_point_clamp, uv2, 0).rgb; uv2 += sampling_direction; const float depth2 = texture_lineardepth.SampleLevel(sampler_point_clamp, uv2, 0); - const float2 velocity2 = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv2, 0).xy; + const float2 velocity2 = texture_velocity.SampleLevel(sampler_point_clamp, uv2, 0).xy; const float velocity_magnitude2 = length(velocity2); const float3 color2 = input.SampleLevel(sampler_point_clamp, uv2, 0).rgb; uv2 += sampling_direction; diff --git a/WickedEngine/shaders/motionblur_tileMaxVelocity_horizontalCS.hlsl b/WickedEngine/shaders/motionblur_tileMaxVelocity_horizontalCS.hlsl index 766cd6412..7b705bf3f 100644 --- a/WickedEngine/shaders/motionblur_tileMaxVelocity_horizontalCS.hlsl +++ b/WickedEngine/shaders/motionblur_tileMaxVelocity_horizontalCS.hlsl @@ -17,7 +17,7 @@ void main(uint3 DTid : SV_DispatchThreadID) for (uint i = 0; i < MOTIONBLUR_TILESIZE.x; ++i) { const uint2 pixel = uint2(tile_upperleft.x + i, tile_upperleft.y); - const float2 velocity = texture_gbuffer1[pixel].xy; + const float2 velocity = texture_velocity[pixel].xy; const float magnitude = length(velocity); if (magnitude > max_magnitude) { diff --git a/WickedEngine/shaders/objectHF.hlsli b/WickedEngine/shaders/objectHF.hlsli index 0869bc0d0..5a919ae5c 100644 --- a/WickedEngine/shaders/objectHF.hlsli +++ b/WickedEngine/shaders/objectHF.hlsli @@ -32,9 +32,9 @@ inline uint GetSubsetIndex() { return push.GetSubsetIndex(); } -inline ShaderMesh GetMesh() +inline ShaderGeometry GetMesh() { - return load_mesh(push.GetMeshIndex()); + return load_geometry(push.GetMeshIndex() + push.GetSubsetIndex()); } inline ShaderMaterial GetMaterial() { @@ -118,7 +118,7 @@ uint load_entitytile(uint tileIndex) //#define OBJECTSHADER_USE_EMISSIVE - shader will use emissive //#define OBJECTSHADER_USE_RENDERTARGETARRAYINDEX - shader will use dynamic render target slice selection //#define OBJECTSHADER_USE_NOCAMERA - shader will not use camera space transform -//#define OBJECTSHADER_USE_INSTANCEID - shader will use instance ID +//#define OBJECTSHADER_USE_INSTANCEINDEX - shader will use instance ID #ifdef OBJECTSHADER_LAYOUT_SHADOW @@ -133,7 +133,7 @@ uint load_entitytile(uint tileIndex) #ifdef OBJECTSHADER_LAYOUT_PREPASS #define OBJECTSHADER_USE_CLIPPLANE #define OBJECTSHADER_USE_WIND -#define OBJECTSHADER_USE_INSTANCEID +#define OBJECTSHADER_USE_INSTANCEINDEX #endif // OBJECTSHADER_LAYOUT_SHADOW #ifdef OBJECTSHADER_LAYOUT_PREPASS_TEX @@ -141,7 +141,7 @@ uint load_entitytile(uint tileIndex) #define OBJECTSHADER_USE_WIND #define OBJECTSHADER_USE_UVSETS #define OBJECTSHADER_USE_DITHERING -#define OBJECTSHADER_USE_INSTANCEID +#define OBJECTSHADER_USE_INSTANCEINDEX #endif // OBJECTSHADER_LAYOUT_SHADOW_TEX #ifdef OBJECTSHADER_LAYOUT_COMMON @@ -154,7 +154,7 @@ uint load_entitytile(uint tileIndex) #define OBJECTSHADER_USE_TANGENT #define OBJECTSHADER_USE_POSITION3D #define OBJECTSHADER_USE_EMISSIVE -#define OBJECTSHADER_USE_INSTANCEID +#define OBJECTSHADER_USE_INSTANCEINDEX #endif // OBJECTSHADER_LAYOUT_COMMON struct VertexInput @@ -164,11 +164,11 @@ struct VertexInput float4 GetPosition() { - return float4(bindless_buffers[GetMesh().vb_pos_nor_wind].Load(vertexID * 16), 1); + return float4(bindless_buffers[GetMesh().vb_pos_nor_wind].Load(vertexID * sizeof(uint4)), 1); } float3 GetNormal() { - const uint normal_wind = bindless_buffers[GetMesh().vb_pos_nor_wind].Load(vertexID * 16).w; + const uint normal_wind = bindless_buffers[GetMesh().vb_pos_nor_wind].Load(vertexID * sizeof(uint4)).w; float3 normal; normal.x = (float)((normal_wind >> 0u) & 0xFF) / 255.0 * 2 - 1; normal.y = (float)((normal_wind >> 8u) & 0xFF) / 255.0 * 2 - 1; @@ -177,29 +177,22 @@ struct VertexInput } float GetWindWeight() { - const uint normal_wind = bindless_buffers[GetMesh().vb_pos_nor_wind].Load(vertexID * 16).w; + const uint normal_wind = bindless_buffers[GetMesh().vb_pos_nor_wind].Load(vertexID * sizeof(uint4)).w; return ((normal_wind >> 24u) & 0xFF) / 255.0; } - float2 GetUV0() + float4 GetUVSets() { [branch] - if (GetMesh().vb_uv0 < 0) + if (GetMesh().vb_uvs < 0) return 0; - return unpack_half2(bindless_buffers[GetMesh().vb_uv0].Load(vertexID * 4)); - } - float2 GetUV1() - { - [branch] - if (GetMesh().vb_uv1 < 0) - return 0; - return unpack_half2(bindless_buffers[GetMesh().vb_uv1].Load(vertexID * 4)); + return unpack_half4(bindless_buffers[GetMesh().vb_uvs].Load2(vertexID * sizeof(uint2))); } ShaderMeshInstancePointer GetInstancePointer() { if (push.instances >= 0) - return bindless_buffers[push.instances].Load(push.instance_offset + instanceID * 8); + return bindless_buffers[push.instances].Load(push.instance_offset + instanceID * sizeof(ShaderMeshInstancePointer)); ShaderMeshInstancePointer poi; poi.init(); @@ -211,7 +204,7 @@ struct VertexInput [branch] if (GetMesh().vb_atl < 0) return 0; - return unpack_half2(bindless_buffers[GetMesh().vb_atl].Load(vertexID * 4)); + return unpack_half2(bindless_buffers[GetMesh().vb_atl].Load(vertexID * sizeof(uint))); } float4 GetVertexColor() @@ -219,7 +212,7 @@ struct VertexInput [branch] if (GetMesh().vb_col < 0) return 1; - return unpack_rgba(bindless_buffers[GetMesh().vb_col].Load(vertexID * 4)); + return unpack_rgba(bindless_buffers[GetMesh().vb_col].Load(vertexID * sizeof(uint))); } float4 GetTangent() @@ -227,13 +220,13 @@ struct VertexInput [branch] if (GetMesh().vb_tan < 0) return 0; - return unpack_utangent(bindless_buffers[GetMesh().vb_tan].Load(vertexID * 4)) * 2 - 1; + return unpack_utangent(bindless_buffers[GetMesh().vb_tan].Load(vertexID * sizeof(uint))) * 2 - 1; } ShaderMeshInstance GetInstance() { if (push.instances >= 0) - return load_instance(GetInstancePointer().instanceID); + return load_instance(GetInstancePointer().instanceIndex); ShaderMeshInstance inst; inst.init(); @@ -269,7 +262,8 @@ struct VertexSurface tangent = input.GetTangent(); tangent.xyz = normalize(mul((float3x3)input.GetInstance().transformInverseTranspose.GetMatrix(), tangent.xyz)); - uvsets = float4(mad(input.GetUV0(), material.texMulAdd.xy, material.texMulAdd.zw), input.GetUV1()); + uvsets = input.GetUVSets(); + uvsets.xy = mad(uvsets.xy, material.texMulAdd.xy, material.texMulAdd.zw); atlas = input.GetAtlasUV(); @@ -293,9 +287,9 @@ struct PixelInput { precise float4 pos : SV_POSITION; -#ifdef OBJECTSHADER_USE_INSTANCEID - uint instanceID : INSTANCEID; -#endif // OBJECTSHADER_USE_INSTANCEID +#ifdef OBJECTSHADER_USE_INSTANCEINDEX + uint instanceIndex : INSTANCEINDEX; +#endif // OBJECTSHADER_USE_INSTANCEINDEX #ifdef OBJECTSHADER_USE_CLIPPLANE float clip : SV_ClipDistance0; @@ -818,9 +812,10 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting) if (GetFrame().lightarray_count > 0) { uint4 shadow_mask_packed = 0; + const bool shadow_mask_enabled = GetFrame().options & OPTION_BIT_SHADOW_MASK && GetCamera().texture_rtshadow_index >= 0; #ifdef SHADOW_MASK_ENABLED [branch] - if (GetFrame().options & OPTION_BIT_SHADOW_MASK && GetCamera().texture_rtshadow_index >= 0) + if (shadow_mask_enabled) { shadow_mask_packed = bindless_textures_uint4[GetCamera().texture_rtshadow_index][surface.pixel / 2]; } @@ -863,7 +858,7 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting) float shadow_mask = 1; #ifdef SHADOW_MASK_ENABLED [branch] - if (GetFrame().options & OPTION_BIT_SHADOW_MASK && light.IsCastingShadow()) + if (shadow_mask_enabled && light.IsCastingShadow()) { uint shadow_index = entity_index - GetFrame().lightarray_offset; if (shadow_index < 16) @@ -992,9 +987,9 @@ PixelInput main(VertexInput input) { PixelInput Out; -#ifdef OBJECTSHADER_USE_INSTANCEID - Out.instanceID = input.GetInstancePointer().instanceID; -#endif // OBJECTSHADER_USE_INSTANCEID +#ifdef OBJECTSHADER_USE_INSTANCEINDEX + Out.instanceIndex = input.GetInstancePointer().instanceIndex; +#endif // OBJECTSHADER_USE_INSTANCEINDEX VertexSurface surface; surface.create(GetMaterial(), input); @@ -1164,7 +1159,7 @@ float4 main(PixelInput input, in bool is_frontface : SV_IsFrontFace) : SV_Target #ifndef DISABLE_ALPHATEST // Alpha test is only done for transparents // - Prepass will write alpha coverage mask - // - Opaque will + // - Opaque will use [earlydepthstencil] and COMPARISON_EQUAL depth test on top of depth prepass clip(color.a - GetMaterial().alphaTest); #endif // DISABLE_ALPHATEST #endif // TRANSPARENT @@ -1669,7 +1664,7 @@ float4 main(PixelInput input, in bool is_frontface : SV_IsFrontFace) : SV_Target #ifdef OBJECTSHADER_USE_ATLAS - LightMapping(load_instance(input.instanceID).lightmap, input.atl, lighting, surface); + LightMapping(load_instance(input.instanceIndex).lightmap, input.atl, lighting, surface); #endif // OBJECTSHADER_USE_ATLAS @@ -1754,7 +1749,7 @@ float4 main(PixelInput input, in bool is_frontface : SV_IsFrontFace) : SV_Target PrimitiveID prim; prim.primitiveIndex = primitiveID; - prim.instanceIndex = input.instanceID; + prim.instanceIndex = input.instanceIndex; prim.subsetIndex = GetSubsetIndex(); return prim.pack(); #else diff --git a/WickedEngine/shaders/raytraceCS.hlsl b/WickedEngine/shaders/raytraceCS.hlsl index e11c2d791..fd9cfd0e5 100644 --- a/WickedEngine/shaders/raytraceCS.hlsl +++ b/WickedEngine/shaders/raytraceCS.hlsl @@ -1,5 +1,6 @@ #define RAY_BACKFACE_CULLING #define RAYTRACE_STACK_SHARED +#define SURFACE_LOAD_MIPCONE #include "globals.hlsli" #include "raytracingHF.hlsli" #include "lightingHF.hlsli" @@ -41,6 +42,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) ray.Origin = ray.Origin + coc; ray.Direction = focal_point - ray.Origin; // will be normalized before tracing! + RayCone raycone = pixel_ray_cone_from_image_height(xTraceResolution.y); + uint bounces = xTraceUserData.x; const uint bouncelimit = 16; for (uint bounce = 0; ((bounce < min(bounces, bouncelimit)) && any(energy)); ++bounce) @@ -69,6 +72,9 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) Surface surface; surface.init(); + surface.V = ray.Direction; + surface.raycone = raycone; + surface.hit_depth = q.CandidateTriangleRayT(); if (!surface.load(prim, q.CandidateTriangleBarycentrics())) break; @@ -106,6 +112,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) Surface surface; surface.init(); + surface.V = -ray.Direction; + surface.raycone = raycone; #ifdef RTAPI // ray origin updated for next bounce: @@ -121,6 +129,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) surface.flags |= SURFACE_FLAG_BACKFACE; } + surface.hit_depth = q.CommittedRayT(); if (!surface.load(prim, q.CommittedTriangleBarycentrics())) return; @@ -133,6 +142,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) surface.flags |= SURFACE_FLAG_BACKFACE; } + surface.hit_depth = hit.distance; if (!surface.load(hit.primitiveID, hit.bary)) return; @@ -142,6 +152,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) surface.V = -ray.Direction; surface.update(); + raycone = raycone.propagate(surface.roughnessBRDF, surface.hit_depth); + result += max(0, energy * surface.emissiveColor); if (!surface.material.IsUnlit()) @@ -272,6 +284,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) Surface surface; surface.init(); + surface.hit_depth = q.CandidateTriangleRayT(); if (!surface.load(prim, q.CandidateTriangleBarycentrics())) break; diff --git a/WickedEngine/shaders/rtaoCS.hlsl b/WickedEngine/shaders/rtaoCS.hlsl index 8029a2c00..d68803629 100644 --- a/WickedEngine/shaders/rtaoCS.hlsl +++ b/WickedEngine/shaders/rtaoCS.hlsl @@ -17,9 +17,8 @@ groupshared float tile_Z[TILE_SIZE * TILE_SIZE]; [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint groupIndex : SV_GroupIndex) { - const float2 uv = ((float2)DTid.xy + 0.5) * postprocess.resolution_rcp; - const float depth = texture_depth.SampleLevel(sampler_linear_clamp, uv, 0); - if (depth == 0) + uint2 primitiveID = texture_primitiveID[DTid.xy * 2]; + if (!any(primitiveID)) return; uint flatTileIdx = 0; @@ -33,20 +32,22 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : } output_tiles[flatTileIdx] = 0; - const float3 P = reconstruct_position(uv, depth); + const float2 uv = ((float2)DTid.xy + 0.5) * postprocess.resolution_rcp; + float2 clipspace = uv * 2 - 1; + clipspace.y *= -1; + RayDesc ray = CreateCameraRay(clipspace); PrimitiveID prim; - prim.unpack(texture_gbuffer0[DTid.xy * 2]); + prim.unpack(primitiveID); Surface surface; surface.init(); - if (!surface.load(prim, P)) - { + if (!surface.load(prim, ray.Origin, ray.Direction)) return; - } + + float3 P = surface.P; float3 N = surface.facenormal; - RayDesc ray; ray.TMin = 0.01; ray.TMax = rtao_range; ray.Origin = P; @@ -86,7 +87,6 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : if (surface.opacity - alphatest >= 0) { q.CommitNonOpaqueTriangleHit(); - break; } } shadow = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT ? 0 : 1; diff --git a/WickedEngine/shaders/rtao_denoise_tileclassificationCS.hlsl b/WickedEngine/shaders/rtao_denoise_tileclassificationCS.hlsl index 021d9a681..0a49ab22a 100644 --- a/WickedEngine/shaders/rtao_denoise_tileclassificationCS.hlsl +++ b/WickedEngine/shaders/rtao_denoise_tileclassificationCS.hlsl @@ -67,7 +67,7 @@ float FFX_DNSR_Shadows_ReadHistory(float2 history_uv) } float2 FFX_DNSR_Shadows_ReadVelocity(uint2 did) { - return -texture_gbuffer1[did * 2].xy; + return -texture_velocity[did * 2].xy; } void FFX_DNSR_Shadows_WriteReprojectionResults(uint2 did, float2 value) diff --git a/WickedEngine/shaders/rtreflectionCS.hlsl b/WickedEngine/shaders/rtreflectionCS.hlsl index a6a6bd1f3..027b12b3c 100644 --- a/WickedEngine/shaders/rtreflectionCS.hlsl +++ b/WickedEngine/shaders/rtreflectionCS.hlsl @@ -1,6 +1,7 @@ #define RTAPI #define DISABLE_SOFT_SHADOWMAP #define DISABLE_TRANSPARENT_SHADOWMAP +#define SURFACE_LOAD_MIPCONE #include "globals.hlsli" #include "ShaderInterop_Postprocess.h" @@ -11,10 +12,6 @@ PUSHCONSTANT(postprocess, PostProcess); -Texture2D texture_surface_normal : register(t0); -Texture2D texture_surface_roughness : register(t1); -Texture2D texture_surface_environment : register(t2); - RWTexture2D output_rayIndirectSpecular : register(u0); RWTexture2D output_rayDirectionPDF : register(u1); RWTexture2D output_rayLengths : register(u2); @@ -37,19 +34,18 @@ void main(uint2 DTid : SV_DispatchThreadID) float2 jitterUV = (screenJitter + DTid.xy + 0.5f) * postprocess.resolution_rcp; const float depth = texture_depth.SampleLevel(sampler_linear_clamp, jitterUV, 0); - const float roughness = texture_surface_roughness[jitterPixel]; + const float lineardepth = texture_lineardepth.SampleLevel(sampler_linear_clamp, jitterUV, 0); + const float roughness = texture_roughness[jitterPixel]; if (!NeedReflection(roughness, depth)) { - float3 environmentReflection = texture_surface_environment[DTid.xy * downsampleFactor]; - - output_rayIndirectSpecular[DTid.xy] = float4(environmentReflection, 1); - output_rayDirectionPDF[DTid.xy] = 0.0; + output_rayIndirectSpecular[DTid.xy] = 0; + output_rayDirectionPDF[DTid.xy] = 0; output_rayLengths[DTid.xy] = FLT_MAX; return; } - const float3 N = texture_surface_normal[jitterPixel]; + const float3 N = decode_oct(texture_normal[jitterPixel]); const float3 P = reconstruct_position(jitterUV, depth); const float3 V = normalize(GetCamera().position - P); @@ -95,6 +91,9 @@ void main(uint2 DTid : SV_DispatchThreadID) RayPayload payload; payload.data = 0; + const float minraycone = 0.05; + RayCone raycone = RayCone::from_spread_angle(pixel_cone_spread_angle_from_image_height(postprocess.resolution.y)); + raycone = raycone.propagate(sqr(max(minraycone, roughness)), lineardepth * GetCamera().z_far); #ifdef RTAPI RayQuery< @@ -115,6 +114,9 @@ void main(uint2 DTid : SV_DispatchThreadID) Surface surface; surface.init(); + surface.V = -ray.Direction; + surface.raycone = raycone; + surface.hit_depth = q.CandidateTriangleRayT(); if (!surface.load(prim, q.CandidateTriangleBarycentrics())) break; @@ -124,7 +126,6 @@ void main(uint2 DTid : SV_DispatchThreadID) if (surface.opacity - alphatest >= 0) { q.CommitNonOpaqueTriangleHit(); - break; } } if (q.CommittedStatus() != COMMITTED_TRIANGLE_HIT) @@ -152,6 +153,9 @@ void main(uint2 DTid : SV_DispatchThreadID) { surface.flags |= SURFACE_FLAG_BACKFACE; } + surface.V = -ray.Direction; + surface.raycone = raycone; + surface.hit_depth = q.CommittedRayT(); if (!surface.load(prim, q.CommittedTriangleBarycentrics())) return; diff --git a/WickedEngine/shaders/rtreflectionLIB.hlsl b/WickedEngine/shaders/rtreflectionLIB.hlsl index 742982a2b..4a0a1de53 100644 --- a/WickedEngine/shaders/rtreflectionLIB.hlsl +++ b/WickedEngine/shaders/rtreflectionLIB.hlsl @@ -10,10 +10,6 @@ PUSHCONSTANT(postprocess, PostProcess); -Texture2D texture_surface_normal : register(t0); -Texture2D texture_surface_roughness : register(t1); -Texture2D texture_surface_environment : register(t2); - RWTexture2D output_rayIndirectSpecular : register(u0); RWTexture2D output_rayDirectionPDF : register(u1); RWTexture2D output_rayLengths : register(u2); @@ -44,19 +40,17 @@ void RTReflection_Raygen() float2 jitterUV = (screenJitter + DTid.xy + 0.5f) / (float2)DispatchRaysDimensions(); const float depth = texture_depth.SampleLevel(sampler_linear_clamp, jitterUV, 0); - const float roughness = texture_surface_roughness[jitterPixel]; + const float roughness = texture_roughness[jitterPixel]; if (!NeedReflection(roughness, depth)) { - float3 environmentReflection = texture_surface_environment[DTid.xy * downsampleFactor]; - - output_rayIndirectSpecular[DTid.xy] = float4(environmentReflection, 1); - output_rayDirectionPDF[DTid.xy] = 0.0; + output_rayIndirectSpecular[DTid.xy] = 0; + output_rayDirectionPDF[DTid.xy] = 0; output_rayLengths[DTid.xy] = FLT_MAX; return; } - const float3 N = texture_surface_normal[jitterPixel]; + const float3 N = decode_oct(texture_normal[jitterPixel]); const float3 P = reconstruct_position(jitterUV, depth); const float3 V = normalize(GetCamera().position - P); diff --git a/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl b/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl index fba2a0674..cd45ef477 100644 --- a/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl +++ b/WickedEngine/shaders/rtshadow_denoise_temporalCS.hlsl @@ -81,7 +81,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp; - const float2 velocity = texture_gbuffer1.SampleLevel(sampler_point_clamp, uv, 0).xy; + const float2 velocity = texture_velocity.SampleLevel(sampler_point_clamp, uv, 0).xy; const float2 prevUV = uv + velocity; if (!is_saturated(prevUV)) { diff --git a/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl b/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl index 01c4a4f5c..0b40b4093 100644 --- a/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl +++ b/WickedEngine/shaders/rtshadow_denoise_tileclassificationCS.hlsl @@ -72,7 +72,7 @@ float FFX_DNSR_Shadows_ReadHistory(float2 history_uv) } float2 FFX_DNSR_Shadows_ReadVelocity(uint2 did) { - return -texture_gbuffer1[did * 2].xy; + return -texture_velocity[did * 2].xy; } void FFX_DNSR_Shadows_WriteReprojectionResults(uint2 did, float2 value) diff --git a/WickedEngine/shaders/screenspaceshadowCS.hlsl b/WickedEngine/shaders/screenspaceshadowCS.hlsl index 2a3a40947..b1f33db2c 100644 --- a/WickedEngine/shaders/screenspaceshadowCS.hlsl +++ b/WickedEngine/shaders/screenspaceshadowCS.hlsl @@ -22,11 +22,6 @@ RWTexture2D output_normals : register(u1); RWStructuredBuffer output_tiles : register(u2); #endif // RTSHADOW -static const uint TILE_BORDER = 1; -static const uint TILE_SIZE = POSTPROCESS_BLOCKSIZE + TILE_BORDER * 2; -groupshared float2 tile_XY[TILE_SIZE * TILE_SIZE]; -groupshared float tile_Z[TILE_SIZE * TILE_SIZE]; - [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint groupIndex : SV_GroupIndex) { @@ -51,7 +46,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : float3 P = reconstruct_position(uv, depth); PrimitiveID prim; - prim.unpack(texture_gbuffer0[DTid.xy * 2]); + prim.unpack(texture_primitiveID[DTid.xy * 2]); Surface surface; surface.init(); @@ -59,7 +54,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : { return; } - float3 N = surface.N; + float3 N = surface.facenormal; const float2 bluenoise = blue_noise(DTid.xy).xy; @@ -247,7 +242,6 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : if (surface.opacity - alphatest >= 0) { q.CommitNonOpaqueTriangleHit(); - break; } } shadow = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT ? 0 : 1; diff --git a/WickedEngine/shaders/shadingRateClassificationCS.hlsl b/WickedEngine/shaders/shadingRateClassificationCS.hlsl index 08765c919..f1724293d 100644 --- a/WickedEngine/shaders/shadingRateClassificationCS.hlsl +++ b/WickedEngine/shaders/shadingRateClassificationCS.hlsl @@ -23,7 +23,7 @@ void main( uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint groupInde GroupMemoryBarrierWithGroupSync(); uint2 dim; - texture_gbuffer1.GetDimensions(dim.x, dim.y); + texture_velocity.GetDimensions(dim.x, dim.y); const uint2 tile = Gid.xy; @@ -35,7 +35,7 @@ void main( uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint groupInde const uint2 tile_pixel = unflatten2D(i, shadingrate.TileSize); const uint2 pixel = min(tile * shadingrate.TileSize + tile_pixel, dim - 1); - const float2 velocity = abs(texture_gbuffer1[pixel].xy); + const float2 velocity = abs(texture_velocity[pixel].xy); const float magnitude = max(velocity.x, velocity.y); uint rate = 0; diff --git a/WickedEngine/shaders/ssr_bilateralCS.hlsl b/WickedEngine/shaders/ssr_bilateralCS.hlsl index df0236666..3cf6f5bfe 100644 --- a/WickedEngine/shaders/ssr_bilateralCS.hlsl +++ b/WickedEngine/shaders/ssr_bilateralCS.hlsl @@ -6,8 +6,6 @@ PUSHCONSTANT(postprocess, PostProcess); Texture2D texture_temporal : register(t0); Texture2D texture_resolve_variance : register(t1); -Texture2D texture_surface_normal : register(t2); -Texture2D texture_surface_roughness : register(t3); RWTexture2D output : register(u0); @@ -28,7 +26,7 @@ void main(uint3 DTid : SV_DispatchThreadID) #endif const float depth = texture_depth[DTid.xy]; - const float roughness = texture_surface_roughness[DTid.xy]; + const float roughness = texture_roughness[DTid.xy]; if (!NeedReflection(roughness, depth)) { @@ -39,7 +37,7 @@ void main(uint3 DTid : SV_DispatchThreadID) float2 direction = postprocess.params0.xy; const float linearDepth = texture_lineardepth[DTid.xy]; - const float3 N = texture_surface_normal[DTid.xy]; + const float3 N = decode_oct(texture_normal[DTid.xy]); float4 outputColor = texture_temporal[DTid.xy]; @@ -70,8 +68,8 @@ void main(uint3 DTid : SV_DispatchThreadID) const float sampleDepth = texture_depth[sampleCoord]; const float4 sampleColor = texture_temporal[sampleCoord]; - const float3 sampleN = texture_surface_normal[sampleCoord]; - const float sampleRoughness = texture_surface_roughness[sampleCoord]; + const float3 sampleN = decode_oct(texture_normal[sampleCoord]); + const float sampleRoughness = texture_roughness[sampleCoord]; float2 sampleUV = (sampleCoord + 0.5) * postprocess.resolution_rcp; float3 sampleP = reconstruct_position(sampleUV, sampleDepth); diff --git a/WickedEngine/shaders/ssr_raytraceCS.hlsl b/WickedEngine/shaders/ssr_raytraceCS.hlsl index d72cf4abd..be8dc3420 100644 --- a/WickedEngine/shaders/ssr_raytraceCS.hlsl +++ b/WickedEngine/shaders/ssr_raytraceCS.hlsl @@ -6,17 +6,15 @@ PUSHCONSTANT(postprocess, PostProcess); //#define DEBUG_TILING -Texture2D texture_surface_normal : register(t0); -Texture2D texture_surface_roughness : register(t1); -Texture2D texture_depth_hierarchy : register(t2); -Texture2D input : register(t3); +Texture2D texture_depth_hierarchy : register(t0); +Texture2D input : register(t1); #if defined(SSR_EARLYEXIT) -StructuredBuffer tiles : register(t4); +StructuredBuffer tiles : register(t2); #elif defined(SSR_CHEAP) -StructuredBuffer tiles : register(t5); +StructuredBuffer tiles : register(t3); #else -StructuredBuffer tiles : register(t6); +StructuredBuffer tiles : register(t4); #endif // SSR_EARLYEXIT RWTexture2D output_rayIndirectSpecular : register(u0); @@ -250,7 +248,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) // Due to HiZ tracing, the tracing and the pass components must match depth. float depth = texture_depth_hierarchy[screenJitter + pixel].r; - float roughness = texture_surface_roughness[jitterPixel]; + float roughness = texture_roughness[jitterPixel]; if (!NeedReflection(roughness, depth)) { @@ -260,7 +258,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) return; } - float3 N = texture_surface_normal[jitterPixel]; + float3 N = decode_oct(texture_normal[jitterPixel]); float3 P = reconstruct_position(jitterUV, depth); float3 V = normalize(GetCamera().position - P); @@ -355,7 +353,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) #endif // SSR_CHEAP - float2 prevHitUV = texture_gbuffer1.SampleLevel(sampler_point_clamp, hit.xy, 0).xy + hit.xy; + float2 prevHitUV = texture_velocity.SampleLevel(sampler_point_clamp, hit.xy, 0).xy + hit.xy; float hitDepth = texture_depth.SampleLevel(sampler_point_clamp, hit.xy, 0); float confidence = validHit ? ValidateHit(hit, hitDepth, prevHitUV) : 0; diff --git a/WickedEngine/shaders/ssr_resolveCS.hlsl b/WickedEngine/shaders/ssr_resolveCS.hlsl index 816da6a56..f95a30c6c 100644 --- a/WickedEngine/shaders/ssr_resolveCS.hlsl +++ b/WickedEngine/shaders/ssr_resolveCS.hlsl @@ -5,11 +5,9 @@ PUSHCONSTANT(postprocess, PostProcess); -Texture2D texture_surface_normal : register(t0); -Texture2D texture_surface_roughness : register(t1); -Texture2D texture_rayIndirectSpecular : register(t2); -Texture2D texture_rayDirectionPDF : register(t3); -Texture2D texture_rayLength : register(t4); +Texture2D texture_rayIndirectSpecular : register(t0); +Texture2D texture_rayDirectionPDF : register(t1); +Texture2D texture_rayLength : register(t2); RWTexture2D texture_resolve : register(u0); RWTexture2D texture_resolve_variance : register(u1); @@ -100,7 +98,7 @@ void main(uint3 DTid : SV_DispatchThreadID) const uint2 tracingCoord = DTid.xy / 2; const float depth = texture_depth[DTid.xy]; - const float roughness = texture_surface_roughness[DTid.xy]; + const float roughness = texture_roughness[DTid.xy]; if (!NeedReflection(roughness, depth)) { @@ -112,7 +110,7 @@ void main(uint3 DTid : SV_DispatchThreadID) // Everthing in world space: const float3 P = reconstruct_position(uv, depth); - const float3 N = texture_surface_normal[DTid.xy]; + const float3 N = decode_oct(texture_normal[DTid.xy]); const float3 V = normalize(GetCamera().position - P); const float NdotV = saturate(dot(N, V)); diff --git a/WickedEngine/shaders/ssr_surfaceCS.hlsl b/WickedEngine/shaders/ssr_surfaceCS.hlsl deleted file mode 100644 index fa5c3ab17..000000000 --- a/WickedEngine/shaders/ssr_surfaceCS.hlsl +++ /dev/null @@ -1,51 +0,0 @@ -#include "globals.hlsli" -#include "brdf.hlsli" -#include "lightingHF.hlsli" -#include "ShaderInterop_Postprocess.h" - -PUSHCONSTANT(postprocess, PostProcess); - -RWTexture2D output_surface_normal : register(u0); -RWTexture2D output_surface_roughness : register(u1); -RWTexture2D output_surface_environment : register(u2); - -[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] -void main(uint3 DTid : SV_DispatchThreadID) -{ - uint2 dim; - texture_depth.GetDimensions(dim.x, dim.y); - - float2 uv = (DTid.xy + 0.5f) / dim; - - float depth = texture_depth[DTid.xy]; - if (depth == 0.0) - { - output_surface_normal[DTid.xy] = 0.0; - output_surface_roughness[DTid.xy] = 0.0; - output_surface_environment[DTid.xy] = 0.0; - return; - } - - uint2 primitiveID = texture_gbuffer0[DTid.xy]; // Map to resolution - - PrimitiveID prim; - prim.unpack(primitiveID); - - Surface surface; - surface.init(); - if (!surface.load(prim, reconstruct_position(uv, depth))) - { - output_surface_normal[DTid.xy] = 0.0; - output_surface_roughness[DTid.xy] = 0.0; - output_surface_environment[DTid.xy] = 0.0; - return; - } - - float3 N = surface.N; - float roughness = surface.roughness; - float3 environmentReflection = EnvironmentReflection_Global(surface); - - output_surface_normal[DTid.xy] = N; - output_surface_roughness[DTid.xy] = roughness; - output_surface_environment[DTid.xy] = environmentReflection; -} diff --git a/WickedEngine/shaders/ssr_temporalCS.hlsl b/WickedEngine/shaders/ssr_temporalCS.hlsl index 2b362b9ff..d5c6977c6 100644 --- a/WickedEngine/shaders/ssr_temporalCS.hlsl +++ b/WickedEngine/shaders/ssr_temporalCS.hlsl @@ -4,12 +4,11 @@ PUSHCONSTANT(postprocess, PostProcess); -Texture2D texture_surface_roughness : register(t0); -Texture2D texture_color_current : register(t1); -Texture2D texture_color_history : register(t2); -Texture2D texture_variance_current : register(t3); -Texture2D texture_variance_history : register(t4); -Texture2D texture_reprojectionDepth : register(t5); +Texture2D texture_color_current : register(t0); +Texture2D texture_color_history : register(t1); +Texture2D texture_variance_current : register(t2); +Texture2D texture_variance_history : register(t3); +Texture2D texture_reprojectionDepth : register(t4); RWTexture2D output_color : register(u0); RWTexture2D output_variance : register(u1); @@ -145,7 +144,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV } const float depth = texture_depth[DTid.xy]; - const float roughness = texture_surface_roughness[DTid.xy]; + const float roughness = texture_roughness[DTid.xy]; if (!NeedReflection(roughness, depth)) { @@ -180,7 +179,7 @@ void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV // Secondary reprojection based on ray lengths: // https://www.ea.com/seed/news/seed-dd18-presentation-slides-raytracing (Slide 45) - float2 velocity = texture_gbuffer1[DTid.xy]; + float2 velocity = texture_velocity[DTid.xy]; float reprojectionDepth = texture_reprojectionDepth[DTid.xy]; float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp; diff --git a/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl b/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl index d62c41c12..8cc52ff45 100644 --- a/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl +++ b/WickedEngine/shaders/ssr_tileMaxRoughness_horizontalCS.hlsl @@ -4,8 +4,6 @@ PUSHCONSTANT(postprocess, PostProcess); -Texture2D texture_surface_roughness : register(t0); - RWTexture2D tile_minmax_roughness_horizontal : register(u0); [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] @@ -32,7 +30,7 @@ void main(uint3 DTid : SV_DispatchThreadID) } else { - float roughness = texture_surface_roughness[pixel]; + float roughness = texture_roughness[pixel]; maxRoughness = max(maxRoughness, roughness); minRoughness = min(minRoughness, roughness); } diff --git a/WickedEngine/shaders/surfel_coverageCS.hlsl b/WickedEngine/shaders/surfel_coverageCS.hlsl index 93f89cff0..3ea58715b 100644 --- a/WickedEngine/shaders/surfel_coverageCS.hlsl +++ b/WickedEngine/shaders/surfel_coverageCS.hlsl @@ -89,7 +89,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, uin const float2 uv = ((float2)pixel + 0.5) * GetCamera().internal_resolution_rcp; const float3 P = reconstruct_position(uv, depth); - uint2 primitiveID = texture_gbuffer0[pixel]; + uint2 primitiveID = texture_primitiveID[pixel]; PrimitiveID prim; prim.unpack(primitiveID); diff --git a/WickedEngine/shaders/surfel_updateCS.hlsl b/WickedEngine/shaders/surfel_updateCS.hlsl index 1c5297d22..6efe9fca0 100644 --- a/WickedEngine/shaders/surfel_updateCS.hlsl +++ b/WickedEngine/shaders/surfel_updateCS.hlsl @@ -30,7 +30,8 @@ void main(uint3 DTid : SV_DispatchThreadID) Surface surface; surface.init(); - if (surface.load(prim, unpack_half2(surfel_data.bary), surfel_data.uid)) + surface.uid_validate = surfel_data.uid; + if (surface.load(prim, unpack_half2(surfel_data.bary))) { surfel.normal = pack_unitvector(surface.facenormal); surfel.position = surface.P; diff --git a/WickedEngine/shaders/temporalaaCS.hlsl b/WickedEngine/shaders/temporalaaCS.hlsl index 97da1ad21..213c402ed 100644 --- a/WickedEngine/shaders/temporalaaCS.hlsl +++ b/WickedEngine/shaders/temporalaaCS.hlsl @@ -65,7 +65,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 } } } - const float2 velocity = texture_gbuffer1[DTid.xy + bestOffset].xy; + const float2 velocity = texture_velocity[DTid.xy + bestOffset].xy; #else @@ -93,7 +93,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 } } } - const float2 velocity = texture_gbuffer1[bestPixel].xy; + const float2 velocity = texture_velocity[bestPixel].xy; #endif // USE_LDS diff --git a/WickedEngine/shaders/visibility_resolveCS.hlsl b/WickedEngine/shaders/visibility_resolveCS.hlsl index dcc5bb17c..79d8de49f 100644 --- a/WickedEngine/shaders/visibility_resolveCS.hlsl +++ b/WickedEngine/shaders/visibility_resolveCS.hlsl @@ -1,28 +1,34 @@ +#define SURFACE_LOAD_MIPCONE #include "globals.hlsli" #include "ShaderInterop_Renderer.h" #include "brdf.hlsli" +#include "raytracingHF.hlsli" -RWTexture2D output_velocity : register(u0); - -RWTexture2D output_depth_mip0 : register(u1); -RWTexture2D output_depth_mip1 : register(u2); -RWTexture2D output_depth_mip2 : register(u3); -RWTexture2D output_depth_mip3 : register(u4); -RWTexture2D output_depth_mip4 : register(u5); - -RWTexture2D output_lineardepth_mip0 : register(u6); -RWTexture2D output_lineardepth_mip1 : register(u7); -RWTexture2D output_lineardepth_mip2 : register(u8); -RWTexture2D output_lineardepth_mip3 : register(u9); -RWTexture2D output_lineardepth_mip4 : register(u10); +PUSHCONSTANT(push, VisibilityResolvePushConstants); #ifdef VISIBILITY_MSAA -Texture2DMS texture_primitiveID : register(t0); -Texture2DMS texture_depthbuffer : register(t1); -RWTexture2D output_primitiveID : register(u11); +Texture2DMS input_primitiveID : register(t0); #else -Texture2D texture_primitiveID : register(t0); -Texture2D texture_depthbuffer : register(t1); +Texture2D input_primitiveID : register(t0); +#endif // VISIBILITY_MSAA + +RWTexture2D output_depth_mip0 : register(u0); +RWTexture2D output_depth_mip1 : register(u1); +RWTexture2D output_depth_mip2 : register(u2); +RWTexture2D output_depth_mip3 : register(u3); +RWTexture2D output_depth_mip4 : register(u4); + +RWTexture2D output_lineardepth_mip0 : register(u5); +RWTexture2D output_lineardepth_mip1 : register(u6); +RWTexture2D output_lineardepth_mip2 : register(u7); +RWTexture2D output_lineardepth_mip3 : register(u8); +RWTexture2D output_lineardepth_mip4 : register(u9); + +RWTexture2D output_velocity : register(u10); +RWTexture2D output_normal : register(u11); +RWTexture2D output_roughness : register(u12); +#ifdef VISIBILITY_MSAA +RWTexture2D output_primitiveID : register(u13); #endif // VISIBILITY_MSAA [numthreads(16, 16, 1)] @@ -31,68 +37,121 @@ void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex, uin uint2 pixel = DTid.xy; const float2 uv = ((float2)pixel + 0.5) * GetCamera().internal_resolution_rcp; - const float depth = texture_depthbuffer[pixel]; - const float3 P = reconstruct_position(uv, depth); + const float2 clipspace = uv_to_clipspace(uv); + RayDesc ray = CreateCameraRay(clipspace); - float3 pre = P; - - if (depth > 0) - { - uint2 primitiveID = texture_primitiveID[pixel]; + uint2 primitiveID = input_primitiveID[pixel]; #ifdef VISIBILITY_MSAA + [branch] + if (push.options & VISIBILITY_RESOLVE_PRIMITIVEID) + { output_primitiveID[pixel] = primitiveID; + } #endif // VISIBILITY_MSAA + float3 pre; + float depth; + [branch] + if (any(primitiveID)) + { PrimitiveID prim; prim.unpack(primitiveID); Surface surface; surface.init(); - if (surface.load(prim, P)) + surface.raycone = pixel_ray_cone_from_image_height(GetCamera().internal_resolution.y); + [branch] + if (surface.load(prim, ray.Origin, ray.Direction)) { pre = surface.pre; - } + float4 tmp = mul(GetCamera().view_projection, float4(surface.P, 1)); + tmp.xyz /= tmp.w; + depth = tmp.z; +#ifndef VISIBILITY_FAST + [branch] + if (push.options & VISIBILITY_RESOLVE_NORMAL) + { + output_normal[pixel] = encode_oct(surface.N); + } + [branch] + if (push.options & VISIBILITY_RESOLVE_ROUGHNESS) + { + output_roughness[pixel] = surface.roughness; + } +#endif // VISIBILITY_FAST + } + } + else + { + pre = ray.Origin + ray.Direction * GetCamera().z_far; + depth = 0; } - float4 pos2DPrev = mul(GetCamera().previous_view_projection, float4(pre, 1)); - pos2DPrev.xy /= pos2DPrev.w; - float2 pos2D = uv * 2 - 1; - pos2D.y *= -1; - float2 velocity = ((pos2DPrev.xy - GetCamera().temporalaa_jitter_prev) - (pos2D.xy - GetCamera().temporalaa_jitter)) * float2(0.5, -0.5); - - output_velocity[pixel] = velocity; - - +#ifndef VISIBILITY_FAST + [branch] + if (push.options & VISIBILITY_RESOLVE_VELOCITY) + { + float2 pos2D = clipspace; + float4 pos2DPrev = mul(GetCamera().previous_view_projection, float4(pre, 1)); + pos2DPrev.xy /= pos2DPrev.w; + float2 velocity = ((pos2DPrev.xy - GetCamera().temporalaa_jitter_prev) - (pos2D.xy - GetCamera().temporalaa_jitter)) * float2(0.5, -0.5); + output_velocity[pixel] = velocity; + } +#endif // VISIBILITY_FAST // Downsample depths: - output_depth_mip0[pixel] = depth; - float lineardepth = compute_lineardepth(depth) * GetCamera().z_far_rcp; - output_lineardepth_mip0[pixel] = lineardepth; - - if (GTid.x % 2 == 0 && GTid.y % 2 == 0) + [branch] + if (push.options & VISIBILITY_RESOLVE_DEPTH) { - output_depth_mip1[pixel / 2] = depth; - output_lineardepth_mip1[pixel / 2] = lineardepth; + output_depth_mip0[pixel] = depth; + + if (GTid.x % 2 == 0 && GTid.y % 2 == 0) + { + output_depth_mip1[pixel / 2] = depth; + } + + if (GTid.x % 4 == 0 && GTid.y % 4 == 0) + { + output_depth_mip2[pixel / 4] = depth; + } + + if (GTid.x % 8 == 0 && GTid.y % 8 == 0) + { + output_depth_mip3[pixel / 8] = depth; + } + + if (GTid.x % 16 == 0 && GTid.y % 16 == 0) + { + output_depth_mip4[pixel / 16] = depth; + } } - - if (GTid.x % 4 == 0 && GTid.y % 4 == 0) + [branch] + if (push.options & VISIBILITY_RESOLVE_LINEARDEPTH) { - output_depth_mip2[pixel / 4] = depth; - output_lineardepth_mip2[pixel / 4] = lineardepth; - } + float lineardepth = compute_lineardepth(depth) * GetCamera().z_far_rcp; + output_lineardepth_mip0[pixel] = lineardepth; - if (GTid.x % 8 == 0 && GTid.y % 8 == 0) - { - output_depth_mip3[pixel / 8] = depth; - output_lineardepth_mip3[pixel / 8] = lineardepth; - } + if (GTid.x % 2 == 0 && GTid.y % 2 == 0) + { + output_lineardepth_mip1[pixel / 2] = lineardepth; + } - if (GTid.x % 16 == 0 && GTid.y % 16 == 0) - { - output_depth_mip4[pixel / 16] = depth; - output_lineardepth_mip4[pixel / 16] = lineardepth; + if (GTid.x % 4 == 0 && GTid.y % 4 == 0) + { + output_lineardepth_mip2[pixel / 4] = lineardepth; + } + + if (GTid.x % 8 == 0 && GTid.y % 8 == 0) + { + output_lineardepth_mip3[pixel / 8] = lineardepth; + } + + if (GTid.x % 16 == 0 && GTid.y % 16 == 0) + { + output_lineardepth_mip4[pixel / 16] = lineardepth; + } } } diff --git a/WickedEngine/shaders/visibility_resolveCS_fast.hlsl b/WickedEngine/shaders/visibility_resolveCS_fast.hlsl new file mode 100644 index 000000000..8805e23e5 --- /dev/null +++ b/WickedEngine/shaders/visibility_resolveCS_fast.hlsl @@ -0,0 +1,2 @@ +#define VISIBILITY_FAST +#include "visibility_resolveCS.hlsl" diff --git a/WickedEngine/shaders/visibility_resolveCS_fast_MSAA.hlsl b/WickedEngine/shaders/visibility_resolveCS_fast_MSAA.hlsl new file mode 100644 index 000000000..eda65ede5 --- /dev/null +++ b/WickedEngine/shaders/visibility_resolveCS_fast_MSAA.hlsl @@ -0,0 +1,3 @@ +#define VISIBILITY_FAST +#define VISIBILITY_MSAA +#include "visibility_resolveCS.hlsl" diff --git a/WickedEngine/wiECS.h b/WickedEngine/wiECS.h index 5979b657c..eec045da1 100644 --- a/WickedEngine/wiECS.h +++ b/WickedEngine/wiECS.h @@ -303,7 +303,7 @@ namespace wi::ecs return nullptr; } - // Retrieve component index by entity handle (if not exists, returns ~0 value) + // Retrieve component index by entity handle (if not exists, returns ~0ull value) inline size_t GetIndex(Entity entity) const { const auto it = lookup.find(entity); @@ -311,7 +311,7 @@ namespace wi::ecs { return it->second; } - return ~0; + return ~0ull; } // Retrieve the number of existing entries diff --git a/WickedEngine/wiEmittedParticle.cpp b/WickedEngine/wiEmittedParticle.cpp index a30882505..9f492a882 100644 --- a/WickedEngine/wiEmittedParticle.cpp +++ b/WickedEngine/wiEmittedParticle.cpp @@ -107,16 +107,10 @@ namespace wi device->SetName(&vertexBuffer_POS, "EmittedParticleSystem::vertexBuffer_POS"); bd.misc_flags = ResourceMiscFlag::BUFFER_RAW; - bd.stride = sizeof(MeshComponent::Vertex_TEX); + bd.stride = sizeof(MeshComponent::Vertex_UVS); bd.size = bd.stride * 4 * MAX_PARTICLES; - device->CreateBuffer(&bd, nullptr, &vertexBuffer_TEX); - device->SetName(&vertexBuffer_TEX, "EmittedParticleSystem::vertexBuffer_TEX"); - - bd.misc_flags = ResourceMiscFlag::BUFFER_RAW; - bd.stride = sizeof(MeshComponent::Vertex_TEX); - bd.size = bd.stride * 4 * MAX_PARTICLES; - device->CreateBuffer(&bd, nullptr, &vertexBuffer_TEX2); - device->SetName(&vertexBuffer_TEX2, "EmittedParticleSystem::vertexBuffer_TEX2"); + device->CreateBuffer(&bd, nullptr, &vertexBuffer_UVS); + device->SetName(&vertexBuffer_UVS, "EmittedParticleSystem::vertexBuffer_UVS"); bd.misc_flags = ResourceMiscFlag::BUFFER_RAW; bd.stride = sizeof(MeshComponent::Vertex_COL); @@ -259,16 +253,6 @@ namespace wi } } - if (!subsetBuffer.IsValid()) - { - GPUBufferDesc desc; - desc.stride = sizeof(ShaderMeshSubset); - desc.size = desc.stride; - desc.misc_flags = ResourceMiscFlag::BUFFER_RAW; - desc.bind_flags = BindFlag::SHADER_RESOURCE; - device->CreateBuffer(&desc, nullptr, &subsetBuffer); - } - if (device->CheckCapability(GraphicsDeviceCapability::RAYTRACING) && primitiveBuffer.IsValid()) { RaytracingAccelerationStructureDesc desc; @@ -314,13 +298,11 @@ namespace wi retVal += indirectBuffers.GetDesc().size; retVal += constantBuffer.GetDesc().size; retVal += vertexBuffer_POS.GetDesc().size; - retVal += vertexBuffer_TEX.GetDesc().size; - retVal += vertexBuffer_TEX2.GetDesc().size; + retVal += vertexBuffer_UVS.GetDesc().size; retVal += vertexBuffer_COL.GetDesc().size; retVal += primitiveBuffer.GetDesc().size; retVal += culledIndirectionBuffer.GetDesc().size; retVal += culledIndirectionBuffer2.GetDesc().size; - retVal += subsetBuffer.GetDesc().size; return retVal; } @@ -365,7 +347,7 @@ namespace wi counterBuffer = {}; // will be recreated } - void EmittedParticleSystem::UpdateGPU(uint32_t instanceIndex, uint32_t materialIndex, const TransformComponent& transform, const MeshComponent* mesh, CommandList cmd) const + void EmittedParticleSystem::UpdateGPU(uint32_t instanceIndex, const TransformComponent& transform, const MeshComponent* mesh, CommandList cmd) const { if (!particleBuffer.IsValid()) { @@ -407,12 +389,6 @@ namespace wi cb.xEmitterLayerMask = layerMask; cb.xEmitterInstanceIndex = instanceIndex; - ShaderMeshSubset subset; - subset.init(); - subset.indexOffset = 0; - subset.materialIndex = materialIndex; - device->UpdateBuffer(&subsetBuffer, &subset, cmd); - cb.xEmitterOptions = 0; if (IsSPHEnabled()) { @@ -451,7 +427,6 @@ namespace wi { GPUBarrier barriers[] = { GPUBarrier::Buffer(&constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER), - GPUBarrier::Buffer(&subsetBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -467,8 +442,7 @@ namespace wi &indirectBuffers, &distanceBuffer, &vertexBuffer_POS, - &vertexBuffer_TEX, - &vertexBuffer_TEX2, + &vertexBuffer_UVS, &vertexBuffer_COL, &culledIndirectionBuffer, &culledIndirectionBuffer2, @@ -700,8 +674,7 @@ namespace wi GPUBarrier::Buffer(&particleBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Buffer(&aliveList[1], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Buffer(&vertexBuffer_POS, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), - GPUBarrier::Buffer(&vertexBuffer_TEX, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), - GPUBarrier::Buffer(&vertexBuffer_TEX2, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), + GPUBarrier::Buffer(&vertexBuffer_UVS, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Buffer(&vertexBuffer_COL, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Buffer(&culledIndirectionBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE), GPUBarrier::Buffer(&culledIndirectionBuffer2, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE), diff --git a/WickedEngine/wiEmittedParticle.h b/WickedEngine/wiEmittedParticle.h index 23a8b5ea5..87e8eef2f 100644 --- a/WickedEngine/wiEmittedParticle.h +++ b/WickedEngine/wiEmittedParticle.h @@ -44,13 +44,11 @@ namespace wi wi::graphics::GPUBuffer indirectBuffers; // kickoffUpdate, simulation, draw wi::graphics::GPUBuffer constantBuffer; wi::graphics::GPUBuffer vertexBuffer_POS; - wi::graphics::GPUBuffer vertexBuffer_TEX; - wi::graphics::GPUBuffer vertexBuffer_TEX2; + wi::graphics::GPUBuffer vertexBuffer_UVS; wi::graphics::GPUBuffer vertexBuffer_COL; wi::graphics::GPUBuffer primitiveBuffer; // raytracing wi::graphics::GPUBuffer culledIndirectionBuffer; // rasterization wi::graphics::GPUBuffer culledIndirectionBuffer2; // rasterization - wi::graphics::GPUBuffer subsetBuffer; wi::graphics::RaytracingAccelerationStructure BLAS; @@ -68,7 +66,7 @@ namespace wi void Restart(); // Must have a transform and material component, but mesh is optional - void UpdateGPU(uint32_t instanceIndex, uint32_t materialIndex, const wi::scene::TransformComponent& transform, const wi::scene::MeshComponent* mesh, wi::graphics::CommandList cmd) const; + void UpdateGPU(uint32_t instanceIndex, const wi::scene::TransformComponent& transform, const wi::scene::MeshComponent* mesh, wi::graphics::CommandList cmd) const; void Draw(const wi::scene::MaterialComponent& material, wi::graphics::CommandList cmd) const; ParticleCounters GetStatistics() { return statistics; } diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 54581505a..25f477478 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -284,7 +284,6 @@ namespace wi::enums CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN, CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN_BLENDOUT, CSTYPE_POSTPROCESS_RTREFLECTION, - CSTYPE_POSTPROCESS_SSR_SURFACE, CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL, CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_VERTICAL, CSTYPE_POSTPROCESS_SSR_KICKJOBS, @@ -354,6 +353,8 @@ namespace wi::enums CSTYPE_SURFEL_INTEGRATE, CSTYPE_VISIBILITY_RESOLVE, CSTYPE_VISIBILITY_RESOLVE_MSAA, + CSTYPE_VISIBILITY_RESOLVE_FAST, + CSTYPE_VISIBILITY_RESOLVE_FAST_MSAA, CSTYPE_DDGI_RAYTRACE, CSTYPE_DDGI_UPDATE, CSTYPE_DDGI_UPDATE_DEPTH, diff --git a/WickedEngine/wiGraphicsDevice.h b/WickedEngine/wiGraphicsDevice.h index f0aeac492..287f4f4b8 100644 --- a/WickedEngine/wiGraphicsDevice.h +++ b/WickedEngine/wiGraphicsDevice.h @@ -169,6 +169,8 @@ namespace wi::graphics virtual void DrawIndexedInstanced(uint32_t indexCount, uint32_t instanceCount, uint32_t startIndexLocation, int32_t baseVertexLocation, uint32_t startInstanceLocation, CommandList cmd) = 0; virtual void DrawInstancedIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) = 0; virtual void DrawIndexedInstancedIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) = 0; + virtual void DrawInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) = 0; + virtual void DrawIndexedInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) = 0; virtual void Dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) = 0; virtual void DispatchIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) = 0; virtual void DispatchMesh(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) {} diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index 637544c3c..3784341fc 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -5465,6 +5465,22 @@ using namespace dx12_internal; CommandList_DX12& commandlist = GetCommandList(cmd); commandlist.GetGraphicsCommandList()->ExecuteIndirect(drawIndexedInstancedIndirectCommandSignature.Get(), 1, internal_state->resource.Get(), args_offset, nullptr, 0); } + void GraphicsDevice_DX12::DrawInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) + { + predraw(cmd); + auto args_internal = to_internal(args); + auto count_internal = to_internal(count); + CommandList_DX12& commandlist = GetCommandList(cmd); + commandlist.GetGraphicsCommandList()->ExecuteIndirect(drawInstancedIndirectCommandSignature.Get(), max_count, args_internal->resource.Get(), args_offset, count_internal->resource.Get(), count_offset); + } + void GraphicsDevice_DX12::DrawIndexedInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) + { + predraw(cmd); + auto args_internal = to_internal(args); + auto count_internal = to_internal(count); + CommandList_DX12& commandlist = GetCommandList(cmd); + commandlist.GetGraphicsCommandList()->ExecuteIndirect(drawIndexedInstancedIndirectCommandSignature.Get(), max_count, args_internal->resource.Get(), args_offset, count_internal->resource.Get(), count_offset); + } void GraphicsDevice_DX12::Dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) { predispatch(cmd); diff --git a/WickedEngine/wiGraphicsDevice_DX12.h b/WickedEngine/wiGraphicsDevice_DX12.h index 1fb89b547..338f46178 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.h +++ b/WickedEngine/wiGraphicsDevice_DX12.h @@ -269,6 +269,8 @@ namespace wi::graphics void DrawIndexedInstanced(uint32_t indexCount, uint32_t instanceCount, uint32_t startIndexLocation, int32_t baseVertexLocation, uint32_t startInstanceLocation, CommandList cmd) override; void DrawInstancedIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) override; void DrawIndexedInstancedIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) override; + void DrawInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) override; + void DrawIndexedInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) override; void Dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) override; void DispatchIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) override; void DispatchMesh(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) override; diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index 7d09ea19e..e92401df8 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -6930,6 +6930,22 @@ using namespace vulkan_internal; CommandList_Vulkan& commandlist = GetCommandList(cmd); vkCmdDrawIndexedIndirect(commandlist.GetCommandBuffer(), internal_state->resource, args_offset, 1, sizeof(IndirectDrawArgsIndexedInstanced)); } + void GraphicsDevice_Vulkan::DrawInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) + { + predraw(cmd); + auto args_internal = to_internal(args); + auto count_internal = to_internal(count); + CommandList_Vulkan& commandlist = GetCommandList(cmd); + vkCmdDrawIndirectCount(commandlist.GetCommandBuffer(), args_internal->resource, args_offset, count_internal->resource, count_offset, max_count, sizeof(IndirectDrawArgsInstanced)); + } + void GraphicsDevice_Vulkan::DrawIndexedInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) + { + predraw(cmd); + auto args_internal = to_internal(args); + auto count_internal = to_internal(count); + CommandList_Vulkan& commandlist = GetCommandList(cmd); + vkCmdDrawIndexedIndirectCount(commandlist.GetCommandBuffer(), args_internal->resource, args_offset, count_internal->resource, count_offset, max_count, sizeof(IndirectDrawArgsIndexedInstanced)); + } void GraphicsDevice_Vulkan::Dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) { predispatch(cmd); diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.h b/WickedEngine/wiGraphicsDevice_Vulkan.h index 84d80470e..4b6082fa6 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.h +++ b/WickedEngine/wiGraphicsDevice_Vulkan.h @@ -368,6 +368,8 @@ namespace wi::graphics void DrawIndexedInstanced(uint32_t indexCount, uint32_t instanceCount, uint32_t startIndexLocation, int32_t baseVertexLocation, uint32_t startInstanceLocation, CommandList cmd) override; void DrawInstancedIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) override; void DrawIndexedInstancedIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) override; + void DrawInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) override; + void DrawIndexedInstancedIndirectCount(const GPUBuffer* args, uint64_t args_offset, const GPUBuffer* count, uint64_t count_offset, uint32_t max_count, CommandList cmd) override; void Dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) override; void DispatchIndirect(const GPUBuffer* args, uint64_t args_offset, CommandList cmd) override; void DispatchMesh(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ, CommandList cmd) override; diff --git a/WickedEngine/wiHairParticle.cpp b/WickedEngine/wiHairParticle.cpp index 964e25248..ca42d6c9e 100644 --- a/WickedEngine/wiHairParticle.cpp +++ b/WickedEngine/wiHairParticle.cpp @@ -82,10 +82,10 @@ namespace wi device->SetName(&vertexBuffer_POS[1], "HairParticleSystem::vertexBuffer_POS[1]"); bd.misc_flags = ResourceMiscFlag::BUFFER_RAW; - bd.stride = sizeof(MeshComponent::Vertex_TEX); + bd.stride = sizeof(MeshComponent::Vertex_UVS); bd.size = bd.stride * 4 * particleCount; - device->CreateBuffer(&bd, nullptr, &vertexBuffer_TEX); - device->SetName(&vertexBuffer_TEX, "HairParticleSystem::vertexBuffer_TEX"); + device->CreateBuffer(&bd, nullptr, &vertexBuffer_UVS); + device->SetName(&vertexBuffer_UVS, "HairParticleSystem::vertexBuffer_UVS"); bd.bind_flags = BindFlag::SHADER_RESOURCE; bd.misc_flags = ResourceMiscFlag::NONE; @@ -211,16 +211,6 @@ namespace wi device->CreateBuffer(&desc, nullptr, &indirectBuffer); } - if (!subsetBuffer.IsValid()) - { - GPUBufferDesc desc; - desc.stride = sizeof(ShaderMeshSubset); - desc.size = desc.stride; - desc.misc_flags = ResourceMiscFlag::BUFFER_RAW; - desc.bind_flags = BindFlag::SHADER_RESOURCE; - device->CreateBuffer(&desc, nullptr, &subsetBuffer); - } - std::swap(vertexBuffer_POS[0], vertexBuffer_POS[1]); if (BLAS.IsValid() && !BLAS.desc.bottom_level.geometries.empty()) @@ -229,7 +219,7 @@ namespace wi } } - void HairParticleSystem::UpdateGPU(uint32_t instanceIndex, uint32_t materialIndex, const MeshComponent& mesh, const MaterialComponent& material, CommandList cmd) const + void HairParticleSystem::UpdateGPU(uint32_t instanceIndex, const MeshComponent& mesh, const MaterialComponent& material, CommandList cmd) const { if (strandCount == 0 || !simulationBuffer.IsValid()) { @@ -268,16 +258,9 @@ namespace wi hcb.xHairInstanceIndex = instanceIndex; device->UpdateBuffer(&constantBuffer, &hcb, cmd); - ShaderMeshSubset subset; - subset.init(); - subset.indexOffset = 0; - subset.materialIndex = materialIndex; - device->UpdateBuffer(&subsetBuffer, &subset, cmd); - { GPUBarrier barriers[] = { GPUBarrier::Buffer(&constantBuffer, ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER), - GPUBarrier::Buffer(&subsetBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE), }; device->Barrier(barriers, arraysize(barriers), cmd); } @@ -290,7 +273,7 @@ namespace wi const GPUResource* uavs[] = { &simulationBuffer, &vertexBuffer_POS[0], - &vertexBuffer_TEX, + &vertexBuffer_UVS, &culledIndexBuffer, &indirectBuffer }; @@ -338,7 +321,7 @@ namespace wi GPUBarrier::Memory(&indirectBuffer), GPUBarrier::Buffer(&indirectBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT), GPUBarrier::Buffer(&vertexBuffer_POS[0], ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), - GPUBarrier::Buffer(&vertexBuffer_TEX, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), + GPUBarrier::Buffer(&vertexBuffer_UVS, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Buffer(&culledIndexBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::INDEX_BUFFER), }; device->Barrier(barriers, arraysize(barriers), cmd); diff --git a/WickedEngine/wiHairParticle.h b/WickedEngine/wiHairParticle.h index 46de1b219..82dca0cf1 100644 --- a/WickedEngine/wiHairParticle.h +++ b/WickedEngine/wiHairParticle.h @@ -21,11 +21,10 @@ namespace wi wi::graphics::GPUBuffer constantBuffer; wi::graphics::GPUBuffer simulationBuffer; wi::graphics::GPUBuffer vertexBuffer_POS[2]; - wi::graphics::GPUBuffer vertexBuffer_TEX; + wi::graphics::GPUBuffer vertexBuffer_UVS; wi::graphics::GPUBuffer primitiveBuffer; wi::graphics::GPUBuffer culledIndexBuffer; wi::graphics::GPUBuffer indirectBuffer; - wi::graphics::GPUBuffer subsetBuffer; wi::graphics::GPUBuffer indexBuffer; wi::graphics::GPUBuffer vertexBuffer_length; @@ -39,7 +38,6 @@ namespace wi ); void UpdateGPU( uint32_t instanceIndex, - uint32_t materialIndex, const wi::scene::MeshComponent& mesh, const wi::scene::MaterialComponent& material, wi::graphics::CommandList cmd diff --git a/WickedEngine/wiProfiler.cpp b/WickedEngine/wiProfiler.cpp index e56c4d539..213b7b87f 100644 --- a/WickedEngine/wiProfiler.cpp +++ b/WickedEngine/wiProfiler.cpp @@ -349,7 +349,6 @@ namespace wi::profiler { if (value != ENABLED) { - initialized = false; ranges.clear(); ENABLED = value; } diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index 64a0f5106..d8e0d63ae 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -47,33 +47,25 @@ void RenderPath3D::ResizeBuffers() } { TextureDesc desc; + desc.format = Format::R32G32_UINT; + desc.bind_flags = BindFlag::RENDER_TARGET | BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; desc.width = internalResolution.x; desc.height = internalResolution.y; - desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; desc.sample_count = 1; - - desc.bind_flags = BindFlag::RENDER_TARGET | BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - desc.format = Format::R32G32_UINT; - device->CreateTexture(&desc, nullptr, &rtGbuffer[wi::renderer::GBUFFER_PRIMITIVEID]); - device->SetName(&rtGbuffer[wi::renderer::GBUFFER_PRIMITIVEID], "rtGbuffer[GBUFFER_PRIMITIVEID]"); - - desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - desc.format = Format::R16G16_FLOAT; - device->CreateTexture(&desc, nullptr, &rtGbuffer[wi::renderer::GBUFFER_VELOCITY]); - device->SetName(&rtGbuffer[wi::renderer::GBUFFER_VELOCITY], "rtGbuffer[GBUFFER_VELOCITY]"); + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + device->CreateTexture(&desc, nullptr, &rtPrimitiveID); + device->SetName(&rtPrimitiveID, "rtPrimitiveID"); if (getMSAASampleCount() > 1) { - desc = rtGbuffer[wi::renderer::GBUFFER_PRIMITIVEID].desc; desc.sample_count = getMSAASampleCount(); desc.bind_flags = BindFlag::RENDER_TARGET | BindFlag::SHADER_RESOURCE; - device->CreateTexture(&desc, nullptr, &rtPrimitiveID_render); device->SetName(&rtPrimitiveID_render, "rtPrimitiveID_render"); } else { - rtPrimitiveID_render = rtGbuffer[wi::renderer::GBUFFER_PRIMITIVEID]; + rtPrimitiveID_render = rtPrimitiveID; } } { @@ -225,6 +217,9 @@ void RenderPath3D::ResizeBuffers() device->CreateTexture(&desc, nullptr, &rtShadingRate); device->SetName(&rtShadingRate, "rtShadingRate"); } + rtVelocity = {}; + rtNormal = {}; + rtRoughness = {}; rtAO = {}; rtShadow = {}; rtSSR = {}; @@ -238,7 +233,7 @@ void RenderPath3D::ResizeBuffers() desc.sample_count = getMSAASampleCount(); desc.layout = ResourceState::DEPTHSTENCIL_READONLY; desc.format = Format::R32G8X24_TYPELESS; - desc.bind_flags = BindFlag::DEPTH_STENCIL | BindFlag::SHADER_RESOURCE; + desc.bind_flags = BindFlag::DEPTH_STENCIL; device->CreateTexture(&desc, nullptr, &depthBuffer_Main); device->SetName(&depthBuffer_Main, "depthBuffer_Main"); @@ -306,13 +301,13 @@ void RenderPath3D::ResizeBuffers() RenderPassAttachment::StoreOp::STORE, ResourceState::DEPTHSTENCIL_READONLY, ResourceState::DEPTHSTENCIL, - ResourceState::SHADER_RESOURCE + ResourceState::DEPTHSTENCIL_READONLY ) ); desc.attachments.push_back( RenderPassAttachment::RenderTarget( &rtPrimitiveID_render, - RenderPassAttachment::LoadOp::DONTCARE, + RenderPassAttachment::LoadOp::CLEAR, RenderPassAttachment::StoreOp::STORE, ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::RENDERTARGET, @@ -328,7 +323,7 @@ void RenderPath3D::ResizeBuffers() &depthBuffer_Main, RenderPassAttachment::LoadOp::LOAD, RenderPassAttachment::StoreOp::STORE, - ResourceState::SHADER_RESOURCE, + ResourceState::DEPTHSTENCIL_READONLY, ResourceState::DEPTHSTENCIL_READONLY, ResourceState::DEPTHSTENCIL_READONLY ) @@ -513,6 +508,8 @@ void RenderPath3D::PreUpdate() void RenderPath3D::Update(float dt) { + GraphicsDevice* device = wi::graphics::GetDevice(); + if (rtMain_render.desc.sample_count != msaaSampleCount) { ResizeBuffers(); @@ -591,35 +588,99 @@ void RenderPath3D::Update(float dt) { rtAO = {}; } - if (!wi::renderer::GetScreenSpaceShadowsEnabled() && !wi::renderer::GetRaytracedShadowsEnabled()) + + // Check whether normal and roughness buffers are required: + if (getSSREnabled() || getRaytracedReflectionEnabled()) + { + if (!rtNormal.IsValid()) + { + TextureDesc desc; + desc.format = Format::R16G16_FLOAT; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.width = internalResolution.x; + desc.height = internalResolution.y; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + device->CreateTexture(&desc, nullptr, &rtNormal); + device->SetName(&rtNormal, "rtNormal"); + } + if (!rtRoughness.IsValid()) + { + TextureDesc desc; + desc.format = Format::R8_UNORM; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.width = internalResolution.x; + desc.height = internalResolution.y; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + device->CreateTexture(&desc, nullptr, &rtRoughness); + device->SetName(&rtRoughness, "rtRoughness"); + } + } + else + { + rtNormal = {}; + rtRoughness = {}; + } + + // Check whether velocity buffer is required: + if ( + getMotionBlurEnabled() || + wi::renderer::GetTemporalAAEnabled() || + getSSREnabled() || + getRaytracedReflectionEnabled() || + wi::renderer::GetRaytracedShadowsEnabled() || + getAO() == AO::AO_RTAO + ) + { + if (!rtVelocity.IsValid()) + { + TextureDesc desc; + desc.format = Format::R16G16_FLOAT; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.width = internalResolution.x; + desc.height = internalResolution.y; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + device->CreateTexture(&desc, nullptr, &rtVelocity); + device->SetName(&rtVelocity, "rtVelocity"); + } + } + else + { + rtVelocity = {}; + } + + // Check whether shadow mask is required: + if(wi::renderer::GetScreenSpaceShadowsEnabled() || wi::renderer::GetRaytracedShadowsEnabled()) + { + if (!rtShadow.IsValid()) + { + TextureDesc desc; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.format = Format::R32G32B32A32_UINT; + desc.width = internalResolution.x / 2; + desc.height = internalResolution.y / 2; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + device->CreateTexture(&desc, nullptr, &rtShadow); + device->SetName(&rtShadow, "rtShadow"); + } + } + else { rtShadow = {}; } - GraphicsDevice* device = wi::graphics::GetDevice(); - - if((wi::renderer::GetScreenSpaceShadowsEnabled() || wi::renderer::GetRaytracedShadowsEnabled()) && !rtShadow.IsValid()) - { - TextureDesc desc; - desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - desc.format = Format::R32G32B32A32_UINT; - desc.width = internalResolution.x / 2; - desc.height = internalResolution.y / 2; - desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; - device->CreateTexture(&desc, nullptr, &rtShadow); - device->SetName(&rtShadow, "rtShadow"); - } - + // Keep a copy of last frame's depth buffer for temporal disocclusion checks, so swap with current one every frame: std::swap(depthBuffer_Copy, depthBuffer_Copy1); - camera->canvas = *this; + camera->canvas.init(*this); camera->width = (float)internalResolution.x; camera->height = (float)internalResolution.y; camera->sample_count = depthBuffer_Main.desc.sample_count; + camera->texture_primitiveID_index = device->GetDescriptorIndex(&rtPrimitiveID, SubresourceType::SRV); camera->texture_depth_index = device->GetDescriptorIndex(&depthBuffer_Copy, SubresourceType::SRV); camera->texture_lineardepth_index = device->GetDescriptorIndex(&rtLinearDepth, SubresourceType::SRV); - camera->texture_gbuffer0_index = device->GetDescriptorIndex(&rtGbuffer[wi::renderer::GBUFFER_PRIMITIVEID], SubresourceType::SRV); - camera->texture_gbuffer1_index = device->GetDescriptorIndex(&rtGbuffer[wi::renderer::GBUFFER_VELOCITY], SubresourceType::SRV); + camera->texture_velocity_index = device->GetDescriptorIndex(&rtVelocity, SubresourceType::SRV); + camera->texture_normal_index = device->GetDescriptorIndex(&rtNormal, SubresourceType::SRV); + camera->texture_roughness_index = device->GetDescriptorIndex(&rtRoughness, SubresourceType::SRV); camera->buffer_entitytiles_opaque_index = device->GetDescriptorIndex(&tiledLightResources.entityTiles_Opaque, SubresourceType::SRV); camera->buffer_entitytiles_transparent_index = device->GetDescriptorIndex(&tiledLightResources.entityTiles_Transparent, SubresourceType::SRV); camera->texture_reflection_index = device->GetDescriptorIndex(&rtReflection, SubresourceType::SRV); @@ -630,14 +691,16 @@ void RenderPath3D::Update(float dt) camera->texture_rtshadow_index = device->GetDescriptorIndex(&rtShadow, SubresourceType::SRV); camera->texture_surfelgi_index = device->GetDescriptorIndex(&surfelGIResources.result, SubresourceType::SRV); - camera_reflection.canvas = *this; + camera_reflection.canvas.init(*this); camera_reflection.width = (float)depthBuffer_Reflection.desc.width; camera_reflection.height = (float)depthBuffer_Reflection.desc.height; camera_reflection.sample_count = depthBuffer_Reflection.desc.sample_count; + camera_reflection.texture_primitiveID_index = -1; camera_reflection.texture_depth_index = device->GetDescriptorIndex(&depthBuffer_Reflection, SubresourceType::SRV); camera_reflection.texture_lineardepth_index = -1; - camera_reflection.texture_gbuffer0_index = -1; - camera_reflection.texture_gbuffer1_index = -1; + camera_reflection.texture_velocity_index = -1; + camera_reflection.texture_normal_index = -1; + camera_reflection.texture_roughness_index = -1; camera_reflection.buffer_entitytiles_opaque_index = device->GetDescriptorIndex(&tiledLightResources_planarReflection.entityTiles_Opaque, SubresourceType::SRV); camera_reflection.buffer_entitytiles_transparent_index = device->GetDescriptorIndex(&tiledLightResources_planarReflection.entityTiles_Transparent, SubresourceType::SRV); camera_reflection.texture_reflection_index = -1; @@ -777,12 +840,28 @@ void RenderPath3D::Render() const cmd ); + wi::renderer::VisibilityResolveOutputs vis_out = {}; + vis_out.depthbuffer = &depthBuffer_Copy; + vis_out.lineardepth = &rtLinearDepth; + if (rtVelocity.IsValid()) + { + vis_out.velocity = &rtVelocity; + } + if (rtNormal.IsValid()) + { + vis_out.normal = &rtNormal; + } + if (rtRoughness.IsValid()) + { + vis_out.roughness = &rtRoughness; + } + if (getMSAASampleCount() > 1) + { + vis_out.primitiveID_resolved = &rtPrimitiveID; + } wi::renderer::VisibilityResolve( - depthBuffer_Main, rtPrimitiveID_render, - rtGbuffer, - depthBuffer_Copy, - rtLinearDepth, + vis_out, cmd ); @@ -1624,7 +1703,7 @@ void RenderPath3D::setRaytracedReflectionsEnabled(bool value) TextureDesc desc; desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - desc.format = Format::R11G11B10_FLOAT; + desc.format = Format::R16G16B16A16_FLOAT; desc.width = internalResolution.x; desc.height = internalResolution.y; device->CreateTexture(&desc, nullptr, &rtSSR); diff --git a/WickedEngine/wiRenderPath3D.h b/WickedEngine/wiRenderPath3D.h index cfdda9677..b727a6ee6 100644 --- a/WickedEngine/wiRenderPath3D.h +++ b/WickedEngine/wiRenderPath3D.h @@ -69,8 +69,11 @@ namespace wi public: wi::graphics::Texture rtMain; wi::graphics::Texture rtMain_render; // can be MSAA - wi::graphics::Texture rtGbuffer[wi::renderer::GBUFFER_COUNT]; + wi::graphics::Texture rtPrimitiveID; wi::graphics::Texture rtPrimitiveID_render; // can be MSAA + wi::graphics::Texture rtVelocity; // per pixel velocity (optional) + wi::graphics::Texture rtNormal; // per pixel normal (optional) + wi::graphics::Texture rtRoughness; // per pixel roughness (optional) wi::graphics::Texture rtReflection; // contains the scene rendered for planar reflections wi::graphics::Texture rtSSR; // standard screen-space reflection results wi::graphics::Texture rtSceneCopy; // contains the rendered scene that can be fed into transparent pass for distortion effect diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 27d5c776f..afbdb856e 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -934,7 +934,6 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_BLENDOUT], "msao_blurupsampleCS_blendout.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN], "msao_blurupsampleCS_premin.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_MSAO_BLURUPSAMPLE_PREMIN_BLENDOUT], "msao_blurupsampleCS_premin_blendout.cso"); }); - wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_SURFACE], "ssr_surfaceCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL], "ssr_tileMaxRoughness_horizontalCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_VERTICAL], "ssr_tileMaxRoughness_verticalCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_POSTPROCESS_SSR_KICKJOBS], "ssr_kickjobsCS.cso"); }); @@ -1021,6 +1020,8 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_VISIBILITY_RESOLVE], "visibility_resolveCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_VISIBILITY_RESOLVE_MSAA], "visibility_resolveCS_MSAA.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_VISIBILITY_RESOLVE_FAST], "visibility_resolveCS_fast.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_VISIBILITY_RESOLVE_FAST_MSAA], "visibility_resolveCS_fast_MSAA.cso"); }); if (device->CheckCapability(GraphicsDeviceCapability::RAYTRACING)) { @@ -2504,7 +2505,7 @@ void RenderMeshes( ObjectPushConstants push; push.init( - instancedBatch.meshIndex, + mesh.geometryOffset, (uint)subsetIndex, subset.materialIndex, instanceBufferDescriptorIndex, @@ -3233,17 +3234,17 @@ void UpdateRenderData( barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->instanceBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } - if (vis.scene->meshBuffer.IsValid() && vis.scene->meshArraySize > 0) + if (vis.scene->geometryBuffer.IsValid() && vis.scene->geometryArraySize > 0) { device->CopyBuffer( - &vis.scene->meshBuffer, + &vis.scene->geometryBuffer, 0, - &vis.scene->meshUploadBuffer[device->GetBufferIndex()], + &vis.scene->geometryUploadBuffer[device->GetBufferIndex()], 0, - vis.scene->meshArraySize * sizeof(ShaderMesh), + vis.scene->geometryArraySize * sizeof(ShaderGeometry), cmd ); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->meshBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); + barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->geometryBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } if (vis.scene->materialBuffer.IsValid() && vis.scene->materialArraySize > 0) @@ -3608,34 +3609,6 @@ void UpdateRenderData( Entity entity = vis.scene->meshes.GetEntity(i); const MeshComponent& mesh = vis.scene->meshes[i]; - if (mesh.dirty_subsets) - { - mesh.dirty_subsets = false; - - size_t tmp_alloc = sizeof(ShaderMeshSubset) * mesh.subsets.size(); - auto allocation = device->AllocateGPU(tmp_alloc, cmd); - ShaderMeshSubset* subsetarray = (ShaderMeshSubset*)allocation.data; - int j = 0; - for (auto& x : mesh.subsets) - { - ShaderMeshSubset shadersubset; - shadersubset.indexOffset = x.indexOffset; - shadersubset.materialIndex = x.materialIndex; - std::memcpy(subsetarray + j, &shadersubset, sizeof(ShaderMeshSubset)); // memcpy whole structure into mapped pointer to avoid read from uncached memory - j++; - } - - device->CopyBuffer( - &mesh.generalBuffer, - mesh.subset_view.offset, - &allocation.buffer, - allocation.offset, - tmp_alloc, - cmd - ); - barrier_stack.push_back(GPUBarrier::Buffer(&mesh.generalBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); - } - if (mesh.dirty_morph) { mesh.dirty_morph = false; @@ -3698,7 +3671,7 @@ void UpdateRenderData( size_t materialIndex = vis.scene->materials.GetIndex(entity); const MaterialComponent& material = vis.scene->materials[materialIndex]; - hair.UpdateGPU((uint32_t)vis.scene->objects.GetCount() + hairIndex, (uint32_t)materialIndex, *mesh, material, cmd); + hair.UpdateGPU((uint32_t)vis.scene->objects.GetCount() + hairIndex, *mesh, material, cmd); } } wi::profiler::EndRange(range); @@ -3870,9 +3843,8 @@ void UpdateRenderDataAsync( const MaterialComponent& material = *vis.scene->materials.GetComponent(entity); const MeshComponent* mesh = vis.scene->meshes.GetComponent(emitter.meshID); const uint32_t instanceIndex = uint32_t(vis.scene->objects.GetCount() + vis.scene->hairs.GetCount()) + emitterIndex; - const uint32_t materialIndex = (uint32_t)vis.scene->materials.GetIndex(entity); - emitter.UpdateGPU(instanceIndex, materialIndex, transform, mesh, cmd); + emitter.UpdateGPU(instanceIndex, transform, mesh, cmd); } wi::profiler::EndRange(range); } @@ -4941,7 +4913,14 @@ void DrawScene( } if (!renderQueue.empty()) { - renderQueue.sort(transparent ? RenderQueue::SORT_BACK_TO_FRONT : RenderQueue::SORT_FRONT_TO_BACK); + if (transparent) + { + renderQueue.sort(RenderQueue::SORT_BACK_TO_FRONT); + } + else if (renderPass == RENDERPASS_PREPASS) + { + renderQueue.sort(RenderQueue::SORT_FRONT_TO_BACK); + } RenderMeshes(vis, renderQueue, renderPass, renderTypeFlags, cmd, tessellation); } @@ -5866,9 +5845,9 @@ void DrawDebugWorld( const MaterialComponent& material = *scene.materials.GetComponent(subset.materialID); GraphicsDevice::GPUAllocation mem = device->AllocateGPU(sizeof(ShaderMeshInstancePointer), cmd); - volatile ShaderMeshInstancePointer* buff = (volatile ShaderMeshInstancePointer*)mem.data; - buff->instanceID = (uint)scene.objects.GetIndex(x.objectEntity); - buff->userdata = 0; + ShaderMeshInstancePointer poi; + poi.init(); + poi.instanceIndex = (uint)scene.objects.GetIndex(x.objectEntity); device->BindIndexBuffer(&mesh.generalBuffer, mesh.GetIndexFormat(), mesh.ib.offset, cmd); @@ -5881,7 +5860,7 @@ void DrawDebugWorld( ObjectPushConstants push; push.init( - (uint)object.mesh_index, + mesh.geometryOffset, x.subset, subset.materialIndex, device->GetDescriptorIndex(&mem.buffer, SubresourceType::SRV), @@ -6483,7 +6462,7 @@ void RefreshImpostors(const Scene& scene, CommandList cmd) ObjectPushConstants push; push.init( - (uint)scene.meshes.GetIndex(entity), + mesh.geometryOffset, (uint)subsetIndex, subset.materialIndex, -1, 0 @@ -7390,10 +7369,12 @@ void BindCameraCB( cb.entity_culling_tilecount = GetEntityCullingTileCount(cb.internal_resolution); cb.sample_count = camera.sample_count; + cb.texture_primitiveID_index = camera.texture_primitiveID_index; cb.texture_depth_index = camera.texture_depth_index; cb.texture_lineardepth_index = camera.texture_lineardepth_index; - cb.texture_gbuffer0_index = camera.texture_gbuffer0_index; - cb.texture_gbuffer1_index = camera.texture_gbuffer1_index; + cb.texture_velocity_index = camera.texture_velocity_index; + cb.texture_normal_index = camera.texture_normal_index; + cb.texture_roughness_index = camera.texture_roughness_index; cb.buffer_entitytiles_opaque_index = camera.buffer_entitytiles_opaque_index; cb.buffer_entitytiles_transparent_index = camera.buffer_entitytiles_transparent_index; cb.texture_reflection_index = camera.texture_reflection_index; @@ -7648,11 +7629,8 @@ void ComputeShadingRateClassification( } void VisibilityResolve( - const Texture& depthbuffer, - const Texture& texture_primitiveID, // can be MSAA - const Texture gbuffer[GBUFFER_COUNT], - const Texture& depthbuffer_resolved, - const Texture& lineardepth, + const Texture& input_primitiveID, // can be MSAA + const VisibilityResolveOutputs& outputs, CommandList cmd ) { @@ -7661,55 +7639,135 @@ void VisibilityResolve( BindCommonResources(cmd); - const bool msaa = texture_primitiveID.desc.sample_count > 1; + const bool msaa = input_primitiveID.GetDesc().sample_count > 1; + bool fast = true; - device->BindComputeShader(&shaders[msaa ? CSTYPE_VISIBILITY_RESOLVE_MSAA : CSTYPE_VISIBILITY_RESOLVE], cmd); + device->BindResource(&input_primitiveID, 0, cmd); + GPUResource unbind; - - device->BindResource(&texture_primitiveID, 0, cmd); - device->BindResource(&depthbuffer, 1, cmd); - - device->BindUAV(&gbuffer[GBUFFER_VELOCITY], 0, cmd); - device->BindUAV(&depthbuffer_resolved, 1, cmd, 0); - device->BindUAV(&depthbuffer_resolved, 2, cmd, 1); - device->BindUAV(&depthbuffer_resolved, 3, cmd, 2); - device->BindUAV(&depthbuffer_resolved, 4, cmd, 3); - device->BindUAV(&depthbuffer_resolved, 5, cmd, 4); - device->BindUAV(&lineardepth, 6, cmd, 0); - device->BindUAV(&lineardepth, 7, cmd, 1); - device->BindUAV(&lineardepth, 8, cmd, 2); - device->BindUAV(&lineardepth, 9, cmd, 3); - device->BindUAV(&lineardepth, 10, cmd, 4); - - barrier_stack.push_back(GPUBarrier::Image(&gbuffer[GBUFFER_VELOCITY], gbuffer[GBUFFER_VELOCITY].desc.layout, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&depthbuffer_resolved, depthbuffer_resolved.desc.layout, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&lineardepth, lineardepth.desc.layout, ResourceState::UNORDERED_ACCESS)); - - if (msaa) + VisibilityResolvePushConstants push = {}; + if (outputs.depthbuffer) { - device->BindUAV(&gbuffer[GBUFFER_PRIMITIVEID], 11, cmd); - barrier_stack.push_back(GPUBarrier::Image(&gbuffer[GBUFFER_PRIMITIVEID], gbuffer[GBUFFER_PRIMITIVEID].desc.layout, ResourceState::UNORDERED_ACCESS)); + push.options |= VISIBILITY_RESOLVE_DEPTH; + device->BindUAV(outputs.depthbuffer, 0, cmd, 0); + device->BindUAV(outputs.depthbuffer, 1, cmd, 1); + device->BindUAV(outputs.depthbuffer, 2, cmd, 2); + device->BindUAV(outputs.depthbuffer, 3, cmd, 3); + device->BindUAV(outputs.depthbuffer, 4, cmd, 4); + barrier_stack.push_back(GPUBarrier::Image(outputs.depthbuffer, outputs.depthbuffer->desc.layout, ResourceState::UNORDERED_ACCESS)); + } + else + { + device->BindUAV(&unbind, 0, cmd); + device->BindUAV(&unbind, 1, cmd); + device->BindUAV(&unbind, 2, cmd); + device->BindUAV(&unbind, 3, cmd); + device->BindUAV(&unbind, 4, cmd); + } + if (outputs.lineardepth) + { + push.options |= VISIBILITY_RESOLVE_LINEARDEPTH; + device->BindUAV(outputs.lineardepth, 5, cmd, 0); + device->BindUAV(outputs.lineardepth, 6, cmd, 1); + device->BindUAV(outputs.lineardepth, 7, cmd, 2); + device->BindUAV(outputs.lineardepth, 8, cmd, 3); + device->BindUAV(outputs.lineardepth, 9, cmd, 4); + barrier_stack.push_back(GPUBarrier::Image(outputs.lineardepth, outputs.lineardepth->desc.layout, ResourceState::UNORDERED_ACCESS)); + } + else + { + device->BindUAV(&unbind, 5, cmd); + device->BindUAV(&unbind, 6, cmd); + device->BindUAV(&unbind, 7, cmd); + device->BindUAV(&unbind, 8, cmd); + device->BindUAV(&unbind, 9, cmd); + } + if (outputs.velocity) + { + fast = false; + push.options |= VISIBILITY_RESOLVE_VELOCITY; + device->BindUAV(outputs.velocity, 10, cmd); + barrier_stack.push_back(GPUBarrier::Image(outputs.velocity, outputs.velocity->desc.layout, ResourceState::UNORDERED_ACCESS)); + } + else + { + device->BindUAV(&unbind, 10, cmd); + } + if (outputs.normal) + { + fast = false; + push.options |= VISIBILITY_RESOLVE_NORMAL; + device->BindUAV(outputs.normal, 11, cmd); + barrier_stack.push_back(GPUBarrier::Image(outputs.normal, outputs.normal->desc.layout, ResourceState::UNORDERED_ACCESS)); + } + else + { + device->BindUAV(&unbind, 11, cmd); + } + if (outputs.roughness) + { + fast = false; + push.options |= VISIBILITY_RESOLVE_ROUGHNESS; + device->BindUAV(outputs.roughness, 12, cmd); + barrier_stack.push_back(GPUBarrier::Image(outputs.roughness, outputs.roughness->desc.layout, ResourceState::UNORDERED_ACCESS)); + } + else + { + device->BindUAV(&unbind, 12, cmd); + } + if (outputs.primitiveID_resolved) + { + push.options |= VISIBILITY_RESOLVE_PRIMITIVEID; + device->BindUAV(outputs.primitiveID_resolved, 13, cmd); + barrier_stack.push_back(GPUBarrier::Image(outputs.primitiveID_resolved, outputs.primitiveID_resolved->desc.layout, ResourceState::UNORDERED_ACCESS)); + } + else + { + device->BindUAV(&unbind, 13, cmd); } - barrier_stack_flush(cmd); + if (fast) + { + device->BindComputeShader(&shaders[msaa ? CSTYPE_VISIBILITY_RESOLVE_FAST_MSAA : CSTYPE_VISIBILITY_RESOLVE_FAST], cmd); + } + else + { + device->BindComputeShader(&shaders[msaa ? CSTYPE_VISIBILITY_RESOLVE_MSAA : CSTYPE_VISIBILITY_RESOLVE], cmd); + } + device->PushConstants(&push, sizeof(push), cmd); + device->Dispatch( - (depthbuffer.desc.width + 15) / 16, - (depthbuffer.desc.height + 15) / 16, + (input_primitiveID.desc.width + 15) / 16, + (input_primitiveID.desc.height + 15) / 16, 1, cmd ); - barrier_stack.push_back(GPUBarrier::Memory()); - barrier_stack.push_back(GPUBarrier::Image(&gbuffer[GBUFFER_VELOCITY], ResourceState::UNORDERED_ACCESS, gbuffer[GBUFFER_VELOCITY].desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&depthbuffer_resolved, ResourceState::UNORDERED_ACCESS, depthbuffer_resolved.desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&lineardepth, ResourceState::UNORDERED_ACCESS, lineardepth.desc.layout)); - - if (msaa) + if (outputs.depthbuffer) { - barrier_stack.push_back(GPUBarrier::Image(&gbuffer[GBUFFER_PRIMITIVEID], ResourceState::UNORDERED_ACCESS, gbuffer[GBUFFER_PRIMITIVEID].desc.layout)); + barrier_stack.push_back(GPUBarrier::Image(outputs.depthbuffer, ResourceState::UNORDERED_ACCESS, outputs.depthbuffer->desc.layout)); + } + if (outputs.lineardepth) + { + barrier_stack.push_back(GPUBarrier::Image(outputs.lineardepth, ResourceState::UNORDERED_ACCESS, outputs.lineardepth->desc.layout)); + } + if (outputs.velocity) + { + barrier_stack.push_back(GPUBarrier::Image(outputs.velocity, ResourceState::UNORDERED_ACCESS, outputs.velocity->desc.layout)); + } + if (outputs.normal) + { + barrier_stack.push_back(GPUBarrier::Image(outputs.normal, ResourceState::UNORDERED_ACCESS, outputs.normal->desc.layout)); + } + if (outputs.roughness) + { + barrier_stack.push_back(GPUBarrier::Image(outputs.roughness, ResourceState::UNORDERED_ACCESS, outputs.roughness->desc.layout)); + } + if (outputs.primitiveID_resolved) + { + barrier_stack.push_back(GPUBarrier::Image(outputs.primitiveID_resolved, ResourceState::UNORDERED_ACCESS, outputs.primitiveID_resolved->desc.layout)); } - barrier_stack_flush(cmd); wi::profiler::EndRange(range); @@ -9394,18 +9452,6 @@ void CreateRTReflectionResources(RTReflectionResources& res, XMUINT2 resolution) { res.frame = 0; - TextureDesc surface_desc; - surface_desc.type = TextureDesc::Type::TEXTURE_2D; - surface_desc.width = resolution.x; - surface_desc.height = resolution.y; - surface_desc.format = Format::R8G8B8A8_SNORM; - surface_desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_normal); - surface_desc.format = Format::R8_UNORM; - device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_roughness); - surface_desc.format = Format::R11G11B10_FLOAT; - device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_environment); - TextureDesc desc; desc.type = TextureDesc::Type::TEXTURE_2D; desc.width = resolution.x / 2; @@ -9453,47 +9499,6 @@ void Postprocess_RTReflection( BindCommonResources(cmd); - // Compute common Raytraced surface properties: - { - device->EventBegin("RTReflection Surface", cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_SURFACE], cmd); - - const GPUResource* uavs[] = { - &res.texture_surface_normal, - &res.texture_surface_roughness, - &res.texture_surface_environment, - }; - device->BindUAVs(uavs, 0, arraysize(uavs), cmd); - - { - GPUBarrier barriers[] = { - GPUBarrier::Image(&res.texture_surface_normal, res.texture_surface_normal.desc.layout, ResourceState::UNORDERED_ACCESS), - GPUBarrier::Image(&res.texture_surface_roughness, res.texture_surface_roughness.desc.layout, ResourceState::UNORDERED_ACCESS), - GPUBarrier::Image(&res.texture_surface_environment, res.texture_surface_environment.desc.layout, ResourceState::UNORDERED_ACCESS), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - - device->Dispatch( - (res.texture_surface_normal.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (res.texture_surface_normal.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - 1, - cmd - ); - - { - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - GPUBarrier::Image(&res.texture_surface_normal, ResourceState::UNORDERED_ACCESS, res.texture_surface_normal.desc.layout), - GPUBarrier::Image(&res.texture_surface_roughness, ResourceState::UNORDERED_ACCESS, res.texture_surface_roughness.desc.layout), - GPUBarrier::Image(&res.texture_surface_environment, ResourceState::UNORDERED_ACCESS, res.texture_surface_environment.desc.layout), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - - device->EventEnd(cmd); - } - const TextureDesc& desc = output.desc; // Render half-res: @@ -9517,13 +9522,6 @@ void Postprocess_RTReflection( device->PushConstants(&postprocess, sizeof(postprocess), cmd); - const GPUResource* resarray[] = { - &res.texture_surface_normal, - &res.texture_surface_roughness, - &res.texture_surface_environment, - }; - device->BindResources(resarray, 0, arraysize(resarray), cmd); - const GPUResource* uavs[] = { &res.texture_rayIndirectSpecular, &res.texture_rayDirectionPDF, @@ -9607,8 +9605,6 @@ void Postprocess_RTReflection( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], cmd); const GPUResource* resarray[] = { - &res.texture_surface_normal, - &res.texture_surface_roughness, &res.texture_rayIndirectSpecular, &res.texture_rayDirectionPDF, &res.texture_rayLengths, @@ -9660,7 +9656,6 @@ void Postprocess_RTReflection( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd); const GPUResource* resarray[] = { - &res.texture_surface_roughness, &res.texture_resolve, &res.texture_temporal[temporal_history], &res.texture_resolve_variance, @@ -9716,8 +9711,6 @@ void Postprocess_RTReflection( const GPUResource* resarray[] = { &res.texture_temporal[temporal_output], &res.texture_temporal_variance[temporal_output], - &res.texture_surface_normal, - &res.texture_surface_roughness, }; device->BindResources(resarray, 0, arraysize(resarray), cmd); @@ -9758,8 +9751,6 @@ void Postprocess_RTReflection( const GPUResource* resarray[] = { &res.texture_bilateral_temp, &res.texture_temporal_variance[temporal_output], - &res.texture_surface_normal, - &res.texture_surface_roughness, }; device->BindResources(resarray, 0, arraysize(resarray), cmd); @@ -9803,22 +9794,13 @@ void CreateSSRResources(SSRResources& res, XMUINT2 resolution) { res.frame = 0; - TextureDesc surface_desc; - surface_desc.type = TextureDesc::Type::TEXTURE_2D; - surface_desc.width = resolution.x; - surface_desc.height = resolution.y; - surface_desc.format = Format::R8G8B8A8_SNORM; - surface_desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_normal); - surface_desc.format = Format::R8_UNORM; - device->CreateTexture(&surface_desc, nullptr, &res.texture_surface_roughness); - TextureDesc tile_desc; tile_desc.type = TextureDesc::Type::TEXTURE_2D; tile_desc.width = (resolution.x + SSR_TILESIZE - 1) / SSR_TILESIZE; tile_desc.height = (resolution.y + SSR_TILESIZE - 1) / SSR_TILESIZE; tile_desc.format = Format::R16G16_FLOAT; tile_desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + tile_desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; device->CreateTexture(&tile_desc, nullptr, &res.texture_tile_minmax_roughness); tile_desc.height = resolution.y; @@ -9888,58 +9870,15 @@ void Postprocess_SSR( { device->EventBegin("Postprocess_SSR", cmd); - auto range = wi::profiler::BeginRangeGPU("Screen Space Reflections", cmd); + auto range = wi::profiler::BeginRangeGPU("SSR", cmd); BindCommonResources(cmd); - // Compute common SSR surface properties: - { - device->EventBegin("SSR Surface", cmd); - device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_SURFACE], cmd); - - const GPUResource* uavs[] = { - &res.texture_surface_normal, - &res.texture_surface_roughness, - }; - device->BindUAVs(uavs, 0, arraysize(uavs), cmd); - - { - GPUBarrier barriers[] = { - GPUBarrier::Image(&res.texture_surface_normal, res.texture_surface_normal.desc.layout, ResourceState::UNORDERED_ACCESS), - GPUBarrier::Image(&res.texture_surface_roughness, res.texture_surface_roughness.desc.layout, ResourceState::UNORDERED_ACCESS), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - - device->Dispatch( - (res.texture_surface_normal.GetDesc().width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - (res.texture_surface_normal.GetDesc().height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, - 1, - cmd - ); - - { - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - GPUBarrier::Image(&res.texture_surface_normal, ResourceState::UNORDERED_ACCESS, res.texture_surface_normal.desc.layout), - GPUBarrier::Image(&res.texture_surface_roughness, ResourceState::UNORDERED_ACCESS, res.texture_surface_roughness.desc.layout), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - - device->EventEnd(cmd); - } - // Compute tile classification (horizontal): { device->EventBegin("SSR Tile Classification - Horizontal", cmd); device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TILEMAXROUGHNESS_HORIZONTAL], cmd); - const GPUResource* resarray[] = { - &res.texture_surface_roughness, - }; - device->BindResources(resarray, 0, arraysize(resarray), cmd); - const GPUResource* uavs[] = { &res.texture_tile_minmax_roughness_horizontal, }; @@ -10141,8 +10080,6 @@ void Postprocess_SSR( device->EventBegin("SSR Raytrace pass", cmd); const GPUResource* resarray[] = { - &res.texture_surface_normal, - &res.texture_surface_roughness, &res.texture_depth_hierarchy, &input, &res.buffer_tiles_tracing_earlyexit, @@ -10205,8 +10142,6 @@ void Postprocess_SSR( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_RESOLVE], cmd); const GPUResource* resarray[] = { - &res.texture_surface_normal, - &res.texture_surface_roughness, &res.texture_rayIndirectSpecular, &res.texture_rayDirectionPDF, &res.texture_rayLengths, @@ -10258,7 +10193,6 @@ void Postprocess_SSR( device->BindComputeShader(&shaders[CSTYPE_POSTPROCESS_SSR_TEMPORAL], cmd); const GPUResource* resarray[] = { - &res.texture_surface_roughness, &res.texture_resolve, &res.texture_temporal[temporal_history], &res.texture_resolve_variance, @@ -10314,8 +10248,6 @@ void Postprocess_SSR( const GPUResource* resarray[] = { &res.texture_temporal[temporal_output], &res.texture_temporal_variance[temporal_output], - &res.texture_surface_normal, - &res.texture_surface_roughness, }; device->BindResources(resarray, 0, arraysize(resarray), cmd); @@ -10356,8 +10288,6 @@ void Postprocess_SSR( const GPUResource* resarray[] = { &res.texture_bilateral_temp, &res.texture_temporal_variance[temporal_output], - &res.texture_surface_normal, - &res.texture_surface_roughness, }; device->BindResources(resarray, 0, arraysize(resarray), cmd); diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index 6e0c96bc4..74a3c807b 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -16,13 +16,6 @@ namespace wi::renderer { - enum GBUFFER - { - GBUFFER_PRIMITIVEID, - GBUFFER_VELOCITY, - GBUFFER_COUNT - }; - inline uint32_t CombineStencilrefs(wi::enums::STENCILREF engineStencilRef, uint8_t userStencilRef) { return (userStencilRef << 4) | static_cast(engineStencilRef); @@ -315,12 +308,19 @@ namespace wi::renderer wi::graphics::CommandList cmd ); + struct VisibilityResolveOutputs + { + // You can request any of these extra outputs to be written by VisibilityResolve: + const wi::graphics::Texture* depthbuffer = nullptr; // depth buffer that matches with post projection + const wi::graphics::Texture* lineardepth = nullptr; // depth buffer in linear space in [0,1] range + const wi::graphics::Texture* velocity = nullptr; // recommended format: R16G16_FLOAT + const wi::graphics::Texture* normal = nullptr; // recommended format: R16G16_FLOAT + const wi::graphics::Texture* roughness = nullptr; // recommended format: R8_UNORM + const wi::graphics::Texture* primitiveID_resolved = nullptr; // resolved from MSAA texture_visibility input + }; void VisibilityResolve( - const wi::graphics::Texture& depthbuffer, - const wi::graphics::Texture& texture_primitiveID, // can be MSAA - const wi::graphics::Texture gbuffer[GBUFFER_COUNT], - const wi::graphics::Texture& depthbuffer_resolved, - const wi::graphics::Texture& lineardepth, + const wi::graphics::Texture& input_primitiveID, // can be MSAA + const VisibilityResolveOutputs& outputs, wi::graphics::CommandList cmd ); @@ -444,9 +444,6 @@ namespace wi::renderer struct RTReflectionResources { mutable int frame = 0; - wi::graphics::Texture texture_surface_normal; - wi::graphics::Texture texture_surface_roughness; - wi::graphics::Texture texture_surface_environment; wi::graphics::Texture texture_rayIndirectSpecular; wi::graphics::Texture texture_rayDirectionPDF; wi::graphics::Texture texture_rayLengths; @@ -469,8 +466,6 @@ namespace wi::renderer struct SSRResources { mutable int frame = 0; - wi::graphics::Texture texture_surface_normal; - wi::graphics::Texture texture_surface_roughness; wi::graphics::Texture texture_tile_minmax_roughness_horizontal; wi::graphics::Texture texture_tile_minmax_roughness; wi::graphics::Texture texture_depth_hierarchy; diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index 760a3cb05..aa9420d95 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -391,18 +391,15 @@ namespace wi::scene ib = {}; vb_pos_nor_wind = {}; vb_tan = {}; - vb_uv0 = {}; - vb_uv1 = {}; + vb_uvs = {}; vb_atl = {}; vb_col = {}; vb_bon = {}; - vb_pre = {}; so_pos_nor_wind = {}; so_tan = {}; - vb_pre = {}; - subset_view = {}; + so_pre = {}; - if (vertex_tangents.empty() && !vertex_uvset_0.empty()) + if (vertex_tangents.empty() && !vertex_uvset_0.empty() && !vertex_normals.empty()) { // Generate tangents if not found: vertex_tangents.resize(vertex_positions.size()); @@ -485,6 +482,8 @@ namespace wi::scene subsetCounter++; } + const size_t uv_count = std::max(vertex_uvset_0.size(), vertex_uvset_1.size()); + GPUBufferDesc bd; bd.usage = Usage::DEFAULT; bd.bind_flags = BindFlag::VERTEX_BUFFER | BindFlag::INDEX_BUFFER | BindFlag::SHADER_RESOURCE; @@ -498,12 +497,10 @@ namespace wi::scene AlignTo(indices.size() * GetIndexStride(), alignment) + AlignTo(vertex_positions.size() * sizeof(Vertex_POS), alignment) + AlignTo(vertex_tangents.size() * sizeof(Vertex_TAN), alignment) + - AlignTo(vertex_uvset_0.size() * sizeof(Vertex_TEX), alignment) + - AlignTo(vertex_uvset_1.size() * sizeof(Vertex_TEX), alignment) + + AlignTo(uv_count * sizeof(Vertex_UVS), alignment) + AlignTo(vertex_atlas.size() * sizeof(Vertex_TEX), alignment) + AlignTo(vertex_colors.size() * sizeof(Vertex_COL), alignment) + - AlignTo(vertex_boneindices.size() * sizeof(Vertex_BON), alignment) + - AlignTo(subsets.size() * sizeof(ShaderMeshSubset), alignment) + AlignTo(vertex_boneindices.size() * sizeof(Vertex_BON), alignment) ; // single allocation storage for GPU buffer data: @@ -577,29 +574,20 @@ namespace wi::scene } } - // vertexBuffer - UV SET 0 - if (!vertex_uvset_0.empty()) + // vertexBuffer - UV SETS + if (!vertex_uvset_0.empty() || !vertex_uvset_1.empty()) { - vb_uv0.offset = buffer_offset; - vb_uv0.size = vertex_uvset_0.size() * sizeof(Vertex_TEX); - Vertex_TEX* vertices = (Vertex_TEX*)(buffer_data.data() + buffer_offset); - buffer_offset += AlignTo(vb_uv0.size, alignment); - for (size_t i = 0; i < vertex_uvset_0.size(); ++i) - { - vertices[i].FromFULL(vertex_uvset_0[i]); - } - } + const XMFLOAT2* uv0_stream = vertex_uvset_0.empty() ? vertex_uvset_1.data() : vertex_uvset_0.data(); + const XMFLOAT2* uv1_stream = vertex_uvset_1.empty() ? vertex_uvset_0.data() : vertex_uvset_1.data(); - // vertexBuffer - UV SET 1 - if (!vertex_uvset_1.empty()) - { - vb_uv1.offset = buffer_offset; - vb_uv1.size = vertex_uvset_1.size() * sizeof(Vertex_TEX); - Vertex_TEX* vertices = (Vertex_TEX*)(buffer_data.data() + buffer_offset); - buffer_offset += AlignTo(vb_uv1.size, alignment); - for (size_t i = 0; i < vertex_uvset_1.size(); ++i) + vb_uvs.offset = buffer_offset; + vb_uvs.size = uv_count * sizeof(Vertex_UVS); + Vertex_UVS* vertices = (Vertex_UVS*)(buffer_data.data() + buffer_offset); + buffer_offset += AlignTo(vb_uvs.size, alignment); + for (size_t i = 0; i < uv_count; ++i) { - vertices[i].FromFULL(vertex_uvset_1[i]); + vertices[i].uv0.FromFULL(uv0_stream[i]); + vertices[i].uv1.FromFULL(uv1_stream[i]); } } @@ -654,13 +642,6 @@ namespace wi::scene CreateStreamoutRenderData(); } - // subset buffer: - { - subset_view.offset = buffer_offset; - subset_view.size = subsets.size() * sizeof(ShaderMeshSubset); - buffer_offset += AlignTo(subset_view.size, alignment); - } - bool success = device->CreateBuffer(&bd, buffer_data.data(), &generalBuffer); assert(success); device->SetName(&generalBuffer, "MeshComponent::generalBuffer"); @@ -679,15 +660,10 @@ namespace wi::scene vb_tan.subresource_srv = device->CreateSubresource(&generalBuffer, SubresourceType::SRV, vb_tan.offset, vb_tan.size); vb_tan.descriptor_srv = device->GetDescriptorIndex(&generalBuffer, SubresourceType::SRV, vb_tan.subresource_srv); } - if (vb_uv0.IsValid()) + if (vb_uvs.IsValid()) { - vb_uv0.subresource_srv = device->CreateSubresource(&generalBuffer, SubresourceType::SRV, vb_uv0.offset, vb_uv0.size); - vb_uv0.descriptor_srv = device->GetDescriptorIndex(&generalBuffer, SubresourceType::SRV, vb_uv0.subresource_srv); - } - if (vb_uv1.IsValid()) - { - vb_uv1.subresource_srv = device->CreateSubresource(&generalBuffer, SubresourceType::SRV, vb_uv1.offset, vb_uv1.size); - vb_uv1.descriptor_srv = device->GetDescriptorIndex(&generalBuffer, SubresourceType::SRV, vb_uv1.subresource_srv); + vb_uvs.subresource_srv = device->CreateSubresource(&generalBuffer, SubresourceType::SRV, vb_uvs.offset, vb_uvs.size); + vb_uvs.descriptor_srv = device->GetDescriptorIndex(&generalBuffer, SubresourceType::SRV, vb_uvs.subresource_srv); } if (vb_atl.IsValid()) { @@ -705,10 +681,6 @@ namespace wi::scene vb_bon.descriptor_srv = device->GetDescriptorIndex(&generalBuffer, SubresourceType::SRV, vb_bon.subresource_srv); } - assert(subset_view.IsValid()); - subset_view.subresource_srv = device->CreateSubresource(&generalBuffer, SubresourceType::SRV, subset_view.offset, subset_view.size); - subset_view.descriptor_srv = device->GetDescriptorIndex(&generalBuffer, SubresourceType::SRV, subset_view.subresource_srv); - if (device->CheckCapability(GraphicsDeviceCapability::RAYTRACING)) { BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; @@ -783,53 +755,12 @@ namespace wi::scene so_tan.descriptor_srv = device->GetDescriptorIndex(&streamoutBuffer, SubresourceType::SRV, so_tan.subresource_srv); so_tan.descriptor_uav = device->GetDescriptorIndex(&streamoutBuffer, SubresourceType::UAV, so_tan.subresource_uav); - vb_pre.offset = AlignTo(so_tan.offset + so_tan.size, alignment); - vb_pre.size = vb_pos_nor_wind.size; - vb_pre.subresource_srv = device->CreateSubresource(&streamoutBuffer, SubresourceType::SRV, vb_pre.offset, vb_pre.size); - vb_pre.subresource_uav = device->CreateSubresource(&streamoutBuffer, SubresourceType::UAV, vb_pre.offset, vb_pre.size); - vb_pre.descriptor_srv = device->GetDescriptorIndex(&streamoutBuffer, SubresourceType::SRV, vb_pre.subresource_srv); - vb_pre.descriptor_uav = device->GetDescriptorIndex(&streamoutBuffer, SubresourceType::UAV, vb_pre.subresource_uav); - } - void MeshComponent::WriteShaderMesh(ShaderMesh* dest) const - { - ShaderMesh mesh; - mesh.init(); - mesh.ib = ib.descriptor_srv; - if (so_pos_nor_wind.IsValid()) - { - mesh.vb_pos_nor_wind = so_pos_nor_wind.descriptor_srv; - } - else - { - mesh.vb_pos_nor_wind = vb_pos_nor_wind.descriptor_srv; - } - if (so_tan.IsValid()) - { - mesh.vb_tan = so_tan.descriptor_srv; - } - else - { - mesh.vb_tan = vb_tan.descriptor_srv; - } - mesh.vb_col = vb_col.descriptor_srv; - mesh.vb_uv0 = vb_uv0.descriptor_srv; - mesh.vb_uv1 = vb_uv1.descriptor_srv; - mesh.vb_atl = vb_atl.descriptor_srv; - mesh.vb_pre = vb_pre.descriptor_srv; - mesh.blendmaterial1 = terrain_material1_index; - mesh.blendmaterial2 = terrain_material2_index; - mesh.blendmaterial3 = terrain_material3_index; - mesh.subsetbuffer = subset_view.descriptor_srv; - mesh.aabb_min = aabb._min; - mesh.aabb_max = aabb._max; - mesh.tessellation_factor = tessellationFactor; - - if (IsDoubleSided()) - { - mesh.flags |= SHADERMESH_FLAG_DOUBLE_SIDED; - } - - std::memcpy(dest, &mesh, sizeof(ShaderMesh)); // memcpy whole structure into mapped pointer to avoid read from uncached memory + so_pre.offset = AlignTo(so_tan.offset + so_tan.size, alignment); + so_pre.size = vb_pos_nor_wind.size; + so_pre.subresource_srv = device->CreateSubresource(&streamoutBuffer, SubresourceType::SRV, so_pre.offset, so_pre.size); + so_pre.subresource_uav = device->CreateSubresource(&streamoutBuffer, SubresourceType::UAV, so_pre.offset, so_pre.size); + so_pre.descriptor_srv = device->GetDescriptorIndex(&streamoutBuffer, SubresourceType::SRV, so_pre.subresource_srv); + so_pre.descriptor_uav = device->GetDescriptorIndex(&streamoutBuffer, SubresourceType::UAV, so_pre.subresource_uav); } void MeshComponent::ComputeNormals(COMPUTE_NORMALS compute) { @@ -1519,6 +1450,8 @@ namespace wi::scene + const uint32_t small_subtask_groupsize = 64u; + void Scene::Update(float dt) { this->dt = dt; @@ -1547,28 +1480,6 @@ namespace wi::scene } instanceArrayMapped = (ShaderMeshInstance*)instanceUploadBuffer[device->GetBufferIndex()].mapped_data; - meshArraySize = meshes.GetCount() + hairs.GetCount() + emitters.GetCount(); - if (meshBuffer.desc.size < (meshArraySize * sizeof(ShaderMesh))) - { - GPUBufferDesc desc; - desc.stride = sizeof(ShaderMesh); - desc.size = desc.stride * meshArraySize * 2; // *2 to grow fast - desc.bind_flags = BindFlag::SHADER_RESOURCE; - desc.misc_flags = ResourceMiscFlag::BUFFER_RAW; - device->CreateBuffer(&desc, nullptr, &meshBuffer); - device->SetName(&meshBuffer, "Scene::meshBuffer"); - - desc.usage = Usage::UPLOAD; - desc.bind_flags = BindFlag::NONE; - desc.misc_flags = ResourceMiscFlag::NONE; - for (int i = 0; i < arraysize(meshUploadBuffer); ++i) - { - device->CreateBuffer(&desc, nullptr, &meshUploadBuffer[i]); - device->SetName(&meshUploadBuffer[i], "Scene::meshUploadBuffer"); - } - } - meshArrayMapped = (ShaderMesh*)meshUploadBuffer[device->GetBufferIndex()].mapped_data; - materialArraySize = materials.GetCount(); if (materialBuffer.desc.size < (materialArraySize * sizeof(ShaderMaterial))) { @@ -1649,6 +1560,13 @@ namespace wi::scene wi::jobsystem::context ctx; + // Scan mesh subset counts to allocate GPU geometry data: + geometryAllocator.store(0u); + wi::jobsystem::Dispatch(ctx, (uint32_t)meshes.GetCount(), small_subtask_groupsize, [&](wi::jobsystem::JobArgs args) { + MeshComponent& mesh = meshes[args.jobIndex]; + mesh.geometryOffset = geometryAllocator.fetch_add((uint32_t)mesh.subsets.size()); + }); + wi::jobsystem::Execute(ctx, [&](wi::jobsystem::JobArgs args) { // Must not keep inactive TLAS instances, so zero them out for safety: std::memset(TLAS_instancesMapped, 0, TLAS_instancesUpload->desc.size); @@ -1662,6 +1580,31 @@ namespace wi::scene RunHierarchyUpdateSystem(ctx); + // GPU subset count allocation is ready at this point: + geometryArraySize = geometryAllocator.load(); + geometryArraySize += hairs.GetCount(); + geometryArraySize += emitters.GetCount(); + if (geometryBuffer.desc.size < (geometryArraySize * sizeof(ShaderGeometry))) + { + GPUBufferDesc desc; + desc.stride = sizeof(ShaderGeometry); + desc.size = desc.stride * geometryArraySize * 2; // *2 to grow fast + desc.bind_flags = BindFlag::SHADER_RESOURCE; + desc.misc_flags = ResourceMiscFlag::BUFFER_RAW; + device->CreateBuffer(&desc, nullptr, &geometryBuffer); + device->SetName(&geometryBuffer, "Scene::geometryBuffer"); + + desc.usage = Usage::UPLOAD; + desc.bind_flags = BindFlag::NONE; + desc.misc_flags = ResourceMiscFlag::NONE; + for (int i = 0; i < arraysize(geometryUploadBuffer); ++i) + { + device->CreateBuffer(&desc, nullptr, &geometryUploadBuffer[i]); + device->SetName(&geometryUploadBuffer[i], "Scene::geometryUploadBuffer"); + } + } + geometryArrayMapped = (ShaderGeometry*)geometryUploadBuffer[device->GetBufferIndex()].mapped_data; + RunMeshUpdateSystem(ctx); RunMaterialUpdateSystem(ctx); @@ -1891,7 +1834,7 @@ namespace wi::scene // Shader scene resources: shaderscene.instancebuffer = device->GetDescriptorIndex(&instanceBuffer, SubresourceType::SRV); - shaderscene.meshbuffer = device->GetDescriptorIndex(&meshBuffer, SubresourceType::SRV); + shaderscene.geometrybuffer = device->GetDescriptorIndex(&geometryBuffer, SubresourceType::SRV); shaderscene.materialbuffer = device->GetDescriptorIndex(&materialBuffer, SubresourceType::SRV); shaderscene.envmaparray = device->GetDescriptorIndex(&envmapArray, SubresourceType::SRV); if (weather.skyMap.IsValid()) @@ -2413,8 +2356,6 @@ namespace wi::scene } - const uint32_t small_subtask_groupsize = 64; - void Scene::RunAnimationUpdateSystem(wi::jobsystem::context& ctx) { for (size_t i = 0; i < animations.GetCount(); ++i) @@ -3039,63 +2980,13 @@ namespace wi::scene } } - if (mesh.so_pos_nor_wind.IsValid() && mesh.vb_pre.IsValid()) + if (mesh.so_pos_nor_wind.IsValid() && mesh.so_pre.IsValid()) { - std::swap(mesh.so_pos_nor_wind, mesh.vb_pre); + std::swap(mesh.so_pos_nor_wind, mesh.so_pre); } mesh._flags &= ~MeshComponent::TLAS_FORCE_DOUBLE_SIDED; - uint32_t subsetIndex = 0; - for (auto& subset : mesh.subsets) - { - const MaterialComponent* material = materials.GetComponent(subset.materialID); - if (material != nullptr) - { - subset.materialIndex = (uint32_t)materials.GetIndex(subset.materialID); - if (mesh.BLAS.IsValid()) - { - auto& geometry = mesh.BLAS.desc.bottom_level.geometries[subsetIndex]; - uint32_t flags = geometry.flags; - if (material->IsAlphaTestEnabled() || (material->GetRenderTypes() & RENDERTYPE_TRANSPARENT) || !material->IsCastingShadow()) - { - geometry.flags &= ~RaytracingAccelerationStructureDesc::BottomLevel::Geometry::FLAG_OPAQUE; - } - else - { - geometry.flags = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::FLAG_OPAQUE; - } - if (flags != geometry.flags) - { - mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; - } - if (mesh.streamoutBuffer.IsValid()) - { - mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; - geometry.triangles.vertex_buffer = mesh.streamoutBuffer; - geometry.triangles.vertex_byte_offset = mesh.so_pos_nor_wind.offset; - } - if (material->IsDoubleSided()) - { - mesh._flags |= MeshComponent::TLAS_FORCE_DOUBLE_SIDED; - } - } - } - else - { - subset.materialIndex = 0; - } - subsetIndex++; - } - - if (mesh.BLAS.IsValid()) - { - if (mesh.dirty_morph) - { - mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; - } - } - mesh.terrain_material1_index = (uint32_t)materials.GetIndex(mesh.terrain_material1); mesh.terrain_material2_index = (uint32_t)materials.GetIndex(mesh.terrain_material2); mesh.terrain_material3_index = (uint32_t)materials.GetIndex(mesh.terrain_material3); @@ -3136,7 +3027,86 @@ namespace wi::scene mesh.aabb = AABB(_min, _max); } - mesh.WriteShaderMesh(meshArrayMapped + args.jobIndex); + ShaderGeometry geometry; + geometry.init(); + geometry.ib = mesh.ib.descriptor_srv; + if (mesh.so_pos_nor_wind.IsValid()) + { + geometry.vb_pos_nor_wind = mesh.so_pos_nor_wind.descriptor_srv; + } + else + { + geometry.vb_pos_nor_wind = mesh.vb_pos_nor_wind.descriptor_srv; + } + if (mesh.so_tan.IsValid()) + { + geometry.vb_tan = mesh.so_tan.descriptor_srv; + } + else + { + geometry.vb_tan = mesh.vb_tan.descriptor_srv; + } + geometry.vb_col = mesh.vb_col.descriptor_srv; + geometry.vb_uvs = mesh.vb_uvs.descriptor_srv; + geometry.vb_atl = mesh.vb_atl.descriptor_srv; + geometry.vb_pre = mesh.so_pre.descriptor_srv; + geometry.blendmaterial1 = mesh.terrain_material1_index; + geometry.blendmaterial2 = mesh.terrain_material2_index; + geometry.blendmaterial3 = mesh.terrain_material3_index; + geometry.aabb_min = mesh.aabb._min; + geometry.aabb_max = mesh.aabb._max; + geometry.tessellation_factor = mesh.tessellationFactor; + + if (mesh.IsDoubleSided()) + { + geometry.flags |= SHADERMESH_FLAG_DOUBLE_SIDED; + } + + uint32_t subsetIndex = 0; + for (auto& subset : mesh.subsets) + { + const MaterialComponent* material = materials.GetComponent(subset.materialID); + if (material != nullptr) + { + subset.materialIndex = (uint32_t)materials.GetIndex(subset.materialID); + if (mesh.BLAS.IsValid()) + { + auto& geometry = mesh.BLAS.desc.bottom_level.geometries[subsetIndex]; + uint32_t flags = geometry.flags; + if (material->IsAlphaTestEnabled() || (material->GetRenderTypes() & RENDERTYPE_TRANSPARENT) || !material->IsCastingShadow()) + { + geometry.flags &= ~RaytracingAccelerationStructureDesc::BottomLevel::Geometry::FLAG_OPAQUE; + } + else + { + geometry.flags = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::FLAG_OPAQUE; + } + if (flags != geometry.flags || mesh.dirty_morph) + { + mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; + } + if (mesh.streamoutBuffer.IsValid()) + { + mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; + geometry.triangles.vertex_buffer = mesh.streamoutBuffer; + geometry.triangles.vertex_byte_offset = mesh.so_pos_nor_wind.offset; + } + if (material->IsDoubleSided()) + { + mesh._flags |= MeshComponent::TLAS_FORCE_DOUBLE_SIDED; + } + } + } + else + { + subset.materialIndex = 0; + } + + geometry.indexOffset = subset.indexOffset; + geometry.materialIndex = subset.materialIndex; + std::memcpy(geometryArrayMapped + mesh.geometryOffset + subsetIndex, &geometry, sizeof(geometry)); + subsetIndex++; + } }); } @@ -3403,7 +3373,7 @@ namespace wi::scene inst.layerMask = layerMask; inst.color = wi::math::CompressColor(object.color); inst.emissive = wi::math::Pack_R11G11B10_FLOAT(XMFLOAT3(object.emissiveColor.x * object.emissiveColor.w, object.emissiveColor.y * object.emissiveColor.w, object.emissiveColor.z * object.emissiveColor.w)); - inst.meshIndex = (uint)meshes.GetIndex(object.meshID); + inst.geometryOffset = mesh.geometryOffset; std::memcpy(instanceArrayMapped + args.jobIndex, &inst, sizeof(inst)); // memcpy whole structure into mapped pointer to avoid read from uncached memory @@ -3810,26 +3780,24 @@ namespace wi::scene GraphicsDevice* device = wi::graphics::GetDevice(); - ShaderMesh mesh; - mesh.init(); - mesh.ib = device->GetDescriptorIndex(&hair.primitiveBuffer, SubresourceType::SRV); - mesh.vb_pos_nor_wind = device->GetDescriptorIndex(&hair.vertexBuffer_POS[0], SubresourceType::SRV); - mesh.vb_pre = device->GetDescriptorIndex(&hair.vertexBuffer_POS[1], SubresourceType::SRV); - mesh.vb_uv0 = device->GetDescriptorIndex(&hair.vertexBuffer_TEX, SubresourceType::SRV); - mesh.subsetbuffer = device->GetDescriptorIndex(&hair.subsetBuffer, SubresourceType::SRV); - mesh.flags = SHADERMESH_FLAG_DOUBLE_SIDED | SHADERMESH_FLAG_HAIRPARTICLE; + ShaderGeometry geometry; + geometry.init(); + geometry.indexOffset = 0; + geometry.materialIndex = (uint)materials.GetIndex(entity); + geometry.ib = device->GetDescriptorIndex(&hair.primitiveBuffer, SubresourceType::SRV); + geometry.vb_pos_nor_wind = device->GetDescriptorIndex(&hair.vertexBuffer_POS[0], SubresourceType::SRV); + geometry.vb_pre = device->GetDescriptorIndex(&hair.vertexBuffer_POS[1], SubresourceType::SRV); + geometry.vb_uvs = device->GetDescriptorIndex(&hair.vertexBuffer_UVS, SubresourceType::SRV); + geometry.flags = SHADERMESH_FLAG_DOUBLE_SIDED | SHADERMESH_FLAG_HAIRPARTICLE; - const size_t meshIndex = meshes.GetCount() + args.jobIndex; - std::memcpy(meshArrayMapped + meshIndex, &mesh, sizeof(mesh)); + size_t geometryAllocation = geometryAllocator.fetch_add(1); + std::memcpy(geometryArrayMapped + geometryAllocation, &geometry, sizeof(geometry)); ShaderMeshInstance inst; inst.init(); inst.uid = entity; inst.layerMask = hair.layerMask; - // every vertex is pretransformed and simulated in worldspace for hair particle: - inst.transform.Create(wi::math::IDENTITY_MATRIX); - inst.transformPrev.Create(wi::math::IDENTITY_MATRIX); - inst.meshIndex = (uint)meshIndex; + inst.geometryOffset = (uint)geometryAllocation; const size_t instanceIndex = objects.GetCount() + args.jobIndex; std::memcpy(instanceArrayMapped + instanceIndex, &inst, sizeof(inst)); @@ -3892,27 +3860,24 @@ namespace wi::scene GraphicsDevice* device = wi::graphics::GetDevice(); - ShaderMesh mesh; - mesh.init(); - mesh.ib = device->GetDescriptorIndex(&emitter.primitiveBuffer, SubresourceType::SRV); - mesh.vb_pos_nor_wind = device->GetDescriptorIndex(&emitter.vertexBuffer_POS, SubresourceType::SRV); - mesh.vb_uv0 = device->GetDescriptorIndex(&emitter.vertexBuffer_TEX, SubresourceType::SRV); - mesh.vb_uv1 = device->GetDescriptorIndex(&emitter.vertexBuffer_TEX2, SubresourceType::SRV); - mesh.vb_col = device->GetDescriptorIndex(&emitter.vertexBuffer_COL, SubresourceType::SRV); - mesh.subsetbuffer = device->GetDescriptorIndex(&emitter.subsetBuffer, SubresourceType::SRV); - mesh.flags = SHADERMESH_FLAG_DOUBLE_SIDED | SHADERMESH_FLAG_EMITTEDPARTICLE; + ShaderGeometry geometry; + geometry.init(); + geometry.indexOffset = 0; + geometry.materialIndex = (uint)materials.GetIndex(entity); + geometry.ib = device->GetDescriptorIndex(&emitter.primitiveBuffer, SubresourceType::SRV); + geometry.vb_pos_nor_wind = device->GetDescriptorIndex(&emitter.vertexBuffer_POS, SubresourceType::SRV); + geometry.vb_uvs = device->GetDescriptorIndex(&emitter.vertexBuffer_UVS, SubresourceType::SRV); + geometry.vb_col = device->GetDescriptorIndex(&emitter.vertexBuffer_COL, SubresourceType::SRV); + geometry.flags = SHADERMESH_FLAG_DOUBLE_SIDED | SHADERMESH_FLAG_EMITTEDPARTICLE; - const size_t meshIndex = meshes.GetCount() + hairs.GetCount() + args.jobIndex; - std::memcpy(meshArrayMapped + meshIndex, &mesh, sizeof(mesh)); + size_t geometryAllocation = geometryAllocator.fetch_add(1); + std::memcpy(geometryArrayMapped + geometryAllocation, &geometry, sizeof(geometry)); ShaderMeshInstance inst; inst.init(); inst.uid = entity; inst.layerMask = emitter.layerMask; - // every vertex is pretransformed and simulated in worldspace for emitted particle: - inst.transform.Create(wi::math::IDENTITY_MATRIX); - inst.transformPrev.Create(wi::math::IDENTITY_MATRIX); - inst.meshIndex = (uint)meshIndex; + inst.geometryOffset = (uint)geometryAllocation; const size_t instanceIndex = objects.GetCount() + hairs.GetCount() + args.jobIndex; std::memcpy(instanceArrayMapped + instanceIndex, &inst, sizeof(inst)); @@ -4211,7 +4176,7 @@ namespace wi::scene if (distance < result.distance) { - const XMVECTOR nor = XMVector3Normalize(XMVector3Cross(XMVectorSubtract(p2, p1), XMVectorSubtract(p1, p0))); + const XMVECTOR nor = XMVector3Normalize(XMVector3TransformNormal(XMVector3Cross(XMVectorSubtract(p2, p1), XMVectorSubtract(p1, p0)), objectMat)); result.entity = entity; XMStoreFloat3(&result.position, pos); diff --git a/WickedEngine/wiScene.h b/WickedEngine/wiScene.h index 0da69bc0e..53805d53a 100644 --- a/WickedEngine/wiScene.h +++ b/WickedEngine/wiScene.h @@ -367,8 +367,8 @@ namespace wi::scene // Non-serialized attributes: wi::primitive::AABB aabb; - wi::graphics::GPUBuffer generalBuffer; - wi::graphics::GPUBuffer streamoutBuffer; + wi::graphics::GPUBuffer generalBuffer; // index buffer + all static vertex buffers + wi::graphics::GPUBuffer streamoutBuffer; // all dynamic vertex buffers struct BufferView { uint64_t offset = ~0ull; @@ -386,16 +386,15 @@ namespace wi::scene BufferView ib; BufferView vb_pos_nor_wind; BufferView vb_tan; - BufferView vb_uv0; - BufferView vb_uv1; + BufferView vb_uvs; BufferView vb_atl; BufferView vb_col; BufferView vb_bon; - BufferView vb_pre; BufferView so_pos_nor_wind; BufferView so_tan; - BufferView subset_view; + BufferView so_pre; wi::vector vertex_subsets; + uint32_t geometryOffset = 0; wi::graphics::RaytracingAccelerationStructure BLAS; enum BLAS_STATE @@ -412,7 +411,6 @@ namespace wi::scene uint32_t terrain_material3_index = ~0u; mutable bool dirty_morph = false; - mutable bool dirty_subsets = true; inline void SetRenderable(bool value) { if (value) { _flags |= RENDERABLE; } else { _flags &= ~RENDERABLE; } } inline void SetDoubleSided(bool value) { if (value) { _flags |= DOUBLE_SIDED; } else { _flags &= ~DOUBLE_SIDED; } } @@ -432,7 +430,6 @@ namespace wi::scene // Recreates GPU resources for index/vertex buffers void CreateRenderData(); void CreateStreamoutRenderData(); - void WriteShaderMesh(ShaderMesh* dest) const; enum COMPUTE_NORMALS { @@ -516,6 +513,11 @@ namespace wi::scene static const wi::graphics::Format FORMAT = wi::graphics::Format::R16G16_FLOAT; }; + struct Vertex_UVS + { + Vertex_TEX uv0; + Vertex_TEX uv1; + }; struct Vertex_BON { uint64_t ind = 0; @@ -904,10 +906,12 @@ namespace wi::scene XMFLOAT4 clipPlane = XMFLOAT4(0, 0, 0, 0); // default: no clip plane wi::Canvas canvas; uint32_t sample_count = 1; + int texture_primitiveID_index = -1; int texture_depth_index = -1; int texture_lineardepth_index = -1; - int texture_gbuffer0_index = -1; - int texture_gbuffer1_index = -1; + int texture_velocity_index = -1; + int texture_normal_index = -1; + int texture_roughness_index = -1; int texture_reflection_index = -1; int texture_refraction_index = -1; int texture_waterriples_index = -1; @@ -1309,15 +1313,16 @@ namespace wi::scene size_t instanceArraySize = 0; wi::graphics::GPUBuffer instanceBuffer; - // Meshes for bindless visiblity indexing: + // Geometries for bindless visiblity indexing: // contains in order: - // 1) meshes - // 2) hair particles - // 3) emitted particles - wi::graphics::GPUBuffer meshUploadBuffer[wi::graphics::GraphicsDevice::GetBufferCount()]; - ShaderMesh* meshArrayMapped = nullptr; - size_t meshArraySize = 0; - wi::graphics::GPUBuffer meshBuffer; + // 1) meshes * mesh.subsetCount + // 2) hair particles * 1 + // 3) emitted particles * 1 + wi::graphics::GPUBuffer geometryUploadBuffer[wi::graphics::GraphicsDevice::GetBufferCount()]; + ShaderGeometry* geometryArrayMapped = nullptr; + size_t geometryArraySize = 0; + wi::graphics::GPUBuffer geometryBuffer; + std::atomic geometryAllocator{ 0 }; // Materials for bindless visibility indexing: wi::graphics::GPUBuffer materialUploadBuffer[wi::graphics::GraphicsDevice::GetBufferCount()];