diff --git a/.github/workflows/build-nightly.yml b/.github/workflows/build-nightly.yml index 992ffe23f..8595e6f46 100644 --- a/.github/workflows/build-nightly.yml +++ b/.github/workflows/build-nightly.yml @@ -74,7 +74,7 @@ jobs: - name: Install dependencies run: | wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.162-focal.list https://packages.lunarg.com/vulkan/1.2.162/lunarg-vulkan-1.2.162-focal.list + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.170-focal.list https://packages.lunarg.com/vulkan/1.2.170/lunarg-vulkan-1.2.170-focal.list sudo apt update sudo apt install vulkan-sdk sudo apt install libsdl2-dev diff --git a/.github/workflows/build-pr.yml b/.github/workflows/build-pr.yml index 9e58b9e18..0c6958959 100644 --- a/.github/workflows/build-pr.yml +++ b/.github/workflows/build-pr.yml @@ -73,7 +73,7 @@ jobs: - name: Install dependencies run: | wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.162-focal.list https://packages.lunarg.com/vulkan/1.2.162/lunarg-vulkan-1.2.162-focal.list + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.170-focal.list https://packages.lunarg.com/vulkan/1.2.170/lunarg-vulkan-1.2.170-focal.list sudo apt update sudo apt install vulkan-sdk sudo apt install libsdl2-dev diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eb7f682a5..08a9b07fe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -74,7 +74,7 @@ jobs: - name: Install dependencies run: | wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.162-focal.list https://packages.lunarg.com/vulkan/1.2.162/lunarg-vulkan-1.2.162-focal.list + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.170-focal.list https://packages.lunarg.com/vulkan/1.2.170/lunarg-vulkan-1.2.170-focal.list sudo apt update sudo apt install vulkan-sdk sudo apt install libsdl2-dev diff --git a/Editor/ObjectWindow.cpp b/Editor/ObjectWindow.cpp index fa2220e7e..23e852336 100644 --- a/Editor/ObjectWindow.cpp +++ b/Editor/ObjectWindow.cpp @@ -710,7 +710,7 @@ void ObjectWindow::Create(EditorComponent* editor) x->SetLightmapRenderRequest(true); } - wiRenderer::InvalidateBVH(); + scene.InvalidateBVH(); }); AddWidget(&generateLightmapButton); diff --git a/README.md b/README.md index c0c5974da..9cc980dc0 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Cmake: It is possible to build the windows version with Cmake, but the recommend The Linux support is experimental. You can find a sample build script for Ubuntu 20.04 [here](.github/workflows/build.yml) (in the linux section). You might need to install some dependencies, such as Vulkan SDK 1.2 or greater (to get DirectXShaderCompiler), SDL2, cmake 3.7 and g++ compiler (C++ 17 compliant version). For Ubuntu 20.04, you can use the following commands to install dependencies: ```bash wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - -sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.162-focal.list https://packages.lunarg.com/vulkan/1.2.162/lunarg-vulkan-1.2.162-focal.list +sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.170-focal.list https://packages.lunarg.com/vulkan/1.2.170/lunarg-vulkan-1.2.170-focal.list sudo apt update sudo apt install vulkan-sdk sudo apt install libsdl2-dev diff --git a/WickedEngine/RenderPath3D.cpp b/WickedEngine/RenderPath3D.cpp index 4d84cb471..e3cc87568 100644 --- a/WickedEngine/RenderPath3D.cpp +++ b/WickedEngine/RenderPath3D.cpp @@ -536,15 +536,13 @@ void RenderPath3D::Update(float dt) if (getSceneUpdateEnabled()) { - if (getAO() == AO_RTAO || wiRenderer::GetRaytracedShadowsEnabled() || getRaytracedReflectionEnabled()) - { - scene->SetUpdateAccelerationStructuresEnabled(true); - } - else - { - scene->SetUpdateAccelerationStructuresEnabled(false); - } + GraphicsDevice* device = wiRenderer::GetDevice(); + scene->cmd = device->BeginCommandList(); + scene->Update(dt * wiRenderer::GetGameSpeed()); + + device->StashCommandLists(); + scene->cmd = INVALID_COMMANDLIST; } // Frustum culling for main camera: @@ -596,14 +594,6 @@ void RenderPath3D::Render() const RenderFrameSetUp(cmd); }); - if (scene->IsUpdateAccelerationStructuresEnabled()) - { - cmd = device->BeginCommandList(); - wiJobSystem::Execute(ctx, [this, cmd](wiJobArgs args) { - wiRenderer::UpdateRaytracingAccelerationStructures(*scene, cmd); - }); - } - static const uint32_t drawscene_flags = wiRenderer::DRAWSCENE_OPAQUE | wiRenderer::DRAWSCENE_HAIRPARTICLE | @@ -967,6 +957,8 @@ void RenderPath3D::RenderFrameSetUp(CommandList cmd) const device->BindResource(CS, &depthBuffer_Copy1, TEXSLOT_DEPTH, cmd); wiRenderer::UpdateRenderData(visibility_main, frameCB, cmd); + + wiRenderer::UpdateRaytracingAccelerationStructures(*scene, cmd); } void RenderPath3D::RenderAO(CommandList cmd) const diff --git a/WickedEngine/RenderPath3D_PathTracing.cpp b/WickedEngine/RenderPath3D_PathTracing.cpp index 7d67beb5f..660df112b 100644 --- a/WickedEngine/RenderPath3D_PathTracing.cpp +++ b/WickedEngine/RenderPath3D_PathTracing.cpp @@ -60,8 +60,6 @@ void RenderPath3D_PathTracing::ResizeBuffers() device->CreateRenderPass(&desc, &renderpass_debugbvh); } - wiRenderer::CreateRayBuffers(rayBuffers, GetInternalResolution().x * GetInternalResolution().y); - // also reset accumulation buffer state: sam = -1; } @@ -104,6 +102,11 @@ void RenderPath3D_PathTracing::Update(float dt) } sam++; + if (sam == 0) + { + scene->InvalidateBVH(); + } + RenderPath3D::Update(dt); } @@ -119,10 +122,7 @@ void RenderPath3D_PathTracing::Render() const wiRenderer::UpdateRenderData(visibility_main, frameCB, cmd); - if (sam == 0) - { - wiRenderer::BuildSceneBVH(*scene, cmd); - } + wiRenderer::UpdateRaytracingAccelerationStructures(*scene, cmd); }); // Main scene: @@ -148,7 +148,7 @@ void RenderPath3D_PathTracing::Render() const vp.Height = (float)traceResult.GetDesc().Height; device->BindViewports(1, &vp, cmd); - wiRenderer::RayTraceSceneBVH(cmd); + wiRenderer::RayTraceSceneBVH(*scene, cmd); device->RenderPassEnd(cmd); } @@ -156,8 +156,7 @@ void RenderPath3D_PathTracing::Render() const { auto range = wiProfiler::BeginRangeGPU("Traced Scene", cmd); - wiRenderer::GenerateScreenRayBuffers(rayBuffers, *camera, GetInternalResolution().x, GetInternalResolution().y, cmd); - wiRenderer::RayTraceScene(*scene, rayBuffers, &traceResult, sam, cmd); + wiRenderer::RayTraceScene(*scene, traceResult, sam, cmd); wiProfiler::EndRange(range); // Traced Scene @@ -178,7 +177,7 @@ void RenderPath3D_PathTracing::Render() const device->EventBegin("GUI Background Blur", cmd); wiRenderer::Postprocess_Downsample4x(rtPostprocess_LDR[0], rtGUIBlurredBackground[0], cmd); wiRenderer::Postprocess_Downsample4x(rtGUIBlurredBackground[0], rtGUIBlurredBackground[2], cmd); - wiRenderer::Postprocess_Blur_Gaussian(rtGUIBlurredBackground[2], rtGUIBlurredBackground[1], rtGUIBlurredBackground[2], cmd); + wiRenderer::Postprocess_Blur_Gaussian(rtGUIBlurredBackground[2], rtGUIBlurredBackground[1], rtGUIBlurredBackground[2], cmd, -1, -1, true); device->EventEnd(cmd); wiProfiler::EndRange(range); } diff --git a/WickedEngine/RenderPath3D_PathTracing.h b/WickedEngine/RenderPath3D_PathTracing.h index ea5891667..78891f2b4 100644 --- a/WickedEngine/RenderPath3D_PathTracing.h +++ b/WickedEngine/RenderPath3D_PathTracing.h @@ -13,8 +13,6 @@ protected: wiGraphics::RenderPass renderpass_debugbvh; - wiRenderer::RayBuffers rayBuffers; - void ResizeBuffers() override; public: diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index fa6184888..6d78cc701 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -112,12 +112,9 @@ int main(int argc, char* argv[]) "skinningCS.hlsl" , "skinningCS_LDS.hlsl" , "resolveMSAADepthStencilCS.hlsl" , - "raytrace_shadeCS.hlsl" , - "raytrace_tilesortCS.hlsl" , - "raytrace_kickjobsCS.hlsl" , - "raytrace_launchCS.hlsl" , + "raytraceCS.hlsl" , + "raytraceCS_rtapi.hlsl" , "paint_textureCS.hlsl" , - "raytrace_closesthitCS.hlsl" , "oceanUpdateDisplacementMapCS.hlsl" , "oceanUpdateGradientFoldingCS.hlsl" , "oceanSimulatorCS.hlsl" , @@ -248,6 +245,7 @@ int main(int argc, char* argv[]) "shadowPS_water.hlsl" , "shadowPS_alphatest.hlsl" , "renderlightmapPS.hlsl" , + "renderlightmapPS_rtapi.hlsl" , "raytrace_debugbvhPS.hlsl" , "outlinePS.hlsl" , "oceanSurfaceSimplePS.hlsl" , diff --git a/WickedEngine/shaders/CMakeLists.txt b/WickedEngine/shaders/CMakeLists.txt index 1ac73ff8d..4725d6847 100644 --- a/WickedEngine/shaders/CMakeLists.txt +++ b/WickedEngine/shaders/CMakeLists.txt @@ -39,12 +39,9 @@ set(SHADERS_CS "skinningCS.hlsl" "skinningCS_LDS.hlsl" "resolveMSAADepthStencilCS.hlsl" - "raytrace_shadeCS.hlsl" - "raytrace_tilesortCS.hlsl" - "raytrace_kickjobsCS.hlsl" - "raytrace_launchCS.hlsl" "paint_textureCS.hlsl" - "raytrace_closesthitCS.hlsl" + "raytraceCS.hlsl" + "raytraceCS_rtapi.hlsl" "oceanUpdateDisplacementMapCS.hlsl" "oceanUpdateGradientFoldingCS.hlsl" "oceanSimulatorCS.hlsl" @@ -175,6 +172,7 @@ set(SHADERS_PS "shadowPS_water.hlsl" "shadowPS_alphatest.hlsl" "renderlightmapPS.hlsl" + "renderlightmapPS_rtapi.hlsl" "raytrace_debugbvhPS.hlsl" "outlinePS.hlsl" "oceanSurfaceSimplePS.hlsl" @@ -327,11 +325,7 @@ function(Generate_Shaders_SPIRV SHADERS_SRC_LIST SHADER_TYPE) set(${Shader}-LASTMOD ${NEWTIMESTAMP} CACHE INTERNAL "") message(STATUS "CALCULATING DEPENDENCIES FOR SHADER ${SHADER_TYPE} ${FILE_NAME}") - if(${SHADER_TYPE} STREQUAL lib) - set(VK_VERSION "vulkan1.2") - else() - set(VK_VERSION "vulkan1.1") - endif() + set(VK_VERSION "vulkan1.2") set(COMMAND_PARAMS ${DXC_TARGET} "${SPIRV_SOURCE}" @@ -341,7 +335,7 @@ function(Generate_Shaders_SPIRV SHADERS_SRC_LIST SHADER_TYPE) #-all-resources-bound #-pack-optimized -res-may-alias - #-no-legacy-cbuf-layout + -no-legacy-cbuf-layout -spirv -fspv-target-env=${VK_VERSION} -fvk-use-dx-layout @@ -352,7 +346,6 @@ function(Generate_Shaders_SPIRV SHADERS_SRC_LIST SHADER_TYPE) -fvk-u-shift 2000 0 -fvk-s-shift 3000 0 #-fspv-extension=KHR - -Vd #DISABLE VALIDATION: There is currently a validation bug with raytracing RayTCurrent()!!! ) # Determine include dependencies (recursively) diff --git a/WickedEngine/shaders/ShaderInterop_Raytracing.h b/WickedEngine/shaders/ShaderInterop_Raytracing.h index 46be0f3c9..54a7299d1 100644 --- a/WickedEngine/shaders/ShaderInterop_Raytracing.h +++ b/WickedEngine/shaders/ShaderInterop_Raytracing.h @@ -4,15 +4,6 @@ static const uint RAYTRACING_LAUNCH_BLOCKSIZE = 8; -static const uint RAYTRACING_TRACE_GROUPSIZE = 64; -static const uint RAYTRACING_SORT_GROUPSIZE = 1024; - -static const uint RAYTRACE_INDIRECT_OFFSET_TRACE = 0; -static const uint RAYTRACE_INDIRECT_OFFSET_TILESORT = 4 * 3; - -// Whether to sort global ray buffer or only smaller bundles (tiles). -// The global sorting is slower, but on some GPUs, it is still worth to to this because the raytracing will be faster (more coherent) -#define RAYTRACING_SORT_GLOBAL CBUFFER(RaytracingCB, CBSLOT_RENDERER_TRACED) @@ -25,15 +16,5 @@ CBUFFER(RaytracingCB, CBSLOT_RENDERER_TRACED) uint4 xTraceUserData; }; -struct RaytracingStoredRay -{ - float3 origin; - uint pixelID; // flattened pixel index - uint3 direction_energy; // packed half3 direction | half3 energy - uint primitiveID; - float2 bary; - uint2 color; // packed rgba16 -}; - #endif // WI_SHADERINTEROP_RAYTRACING_H diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 5c832ddf9..d3d68a4a7 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -898,6 +898,17 @@ Vertex Vertex + + Compute + 4.0 + + + Compute + 4.0 + + + Pixel + Compute Compute @@ -2425,16 +2436,6 @@ Compute Compute - - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Pixel Pixel @@ -2445,26 +2446,6 @@ Pixel Pixel - - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute - - - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Vertex Vertex @@ -2475,26 +2456,6 @@ Vertex Vertex - - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute - - - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Compute - Pixel Pixel diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index f3ba3a149..ccc2ed326 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -248,24 +248,9 @@ CS - - CS - - - CS - - - CS - - - CS - CS - - CS - CS @@ -980,6 +965,15 @@ CS + + PS + + + CS + + + CS + diff --git a/WickedEngine/shaders/lightingHF.hlsli b/WickedEngine/shaders/lightingHF.hlsli index 9d2005dfb..48cb9c7e3 100644 --- a/WickedEngine/shaders/lightingHF.hlsli +++ b/WickedEngine/shaders/lightingHF.hlsli @@ -133,7 +133,7 @@ inline float shadowTrace(in Surface surface, in float3 L, in float dist) RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH > q; - float seed = g_xFrame_FrameCount * 0.001; + float seed = g_xFrame_Time; float2 uv = surface.screenUV; RayDesc ray; diff --git a/WickedEngine/shaders/raytraceCS.hlsl b/WickedEngine/shaders/raytraceCS.hlsl new file mode 100644 index 000000000..ff97f723f --- /dev/null +++ b/WickedEngine/shaders/raytraceCS.hlsl @@ -0,0 +1,562 @@ +#define RAY_BACKFACE_CULLING +#define RAYTRACE_STACK_SHARED +#include "globals.hlsli" +#include "raytracingHF.hlsli" + +#ifdef RTAPI +RAYTRACINGACCELERATIONSTRUCTURE(scene_acceleration_structure, TEXSLOT_ACCELERATION_STRUCTURE); +Texture2D bindless_textures[] : register(t0, space1); +ByteAddressBuffer bindless_buffers[] : register(t0, space2); +StructuredBuffer bindless_subsets[] : register(t0, space3); +Buffer bindless_ib[] : register(t0, space4); +#endif // RTAPI + +RWTEXTURE2D(resultTexture, float4, 0); + +[numthreads(RAYTRACING_LAUNCH_BLOCKSIZE, RAYTRACING_LAUNCH_BLOCKSIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) +{ + uint2 pixel = DTid.xy; + if (pixel.x >= xTraceResolution.x || pixel.y >= xTraceResolution.y) + { + return; + } + float3 result = 0; + + // Compute screen coordinates: + float2 screenUV = float2((pixel + xTracePixelOffset) * xTraceResolution_rcp.xy * 2.0f - 1.0f) * float2(1, -1); + float seed = xTraceRandomSeed; + + // Create starting ray: + Ray ray = CreateCameraRay(screenUV); + + uint bounces = xTraceUserData.x; + const uint bouncelimit = 16; + for (uint bounce = 0; ((bounce < min(bounces, bouncelimit)) && any(ray.energy)); ++bounce) + { + ray.Update(); + +#ifdef RTAPI + RayDesc apiray; + apiray.TMin = 0.001; + apiray.TMax = FLT_MAX; + apiray.Origin = ray.origin; + apiray.Direction = ray.direction; + RayQuery< + RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES + > q; + q.TraceRayInline( + scene_acceleration_structure, // RaytracingAccelerationStructure AccelerationStructure +#ifdef RAY_BACKFACE_CULLING + RAY_FLAG_CULL_BACK_FACING_TRIANGLES | +#endif // RAY_BACKFACE_CULLING + RAY_FLAG_FORCE_OPAQUE | + 0, // uint RayFlags + 0xFF, // uint InstanceInclusionMask + apiray // RayDesc Ray + ); + q.Proceed(); + if (q.CommittedStatus() != COMMITTED_TRIANGLE_HIT) +#else + RayHit hit = TraceRay_Closest(ray, groupIndex); + + if (hit.distance >= FLT_MAX - 1) +#endif // RTAPI + + { + float3 envColor; + [branch] + if (IsStaticSky()) + { + // We have envmap information in a texture: + envColor = DEGAMMA_SKY(texture_globalenvmap.SampleLevel(sampler_linear_clamp, ray.direction, 0).rgb); + } + else + { + envColor = GetDynamicSkyColor(ray.direction); + } + result += max(0, ray.energy * envColor); + + // Erase the ray's energy + ray.energy = 0.0f; + } + else + { + float3 facenormal = 0; + +#ifdef RTAPI + + // ray origin updated for next bounce: + ray.origin = q.WorldRayOrigin() + q.WorldRayDirection() * q.CommittedRayT(); + + // RTAPI path: bindless + ShaderMesh mesh = bindless_buffers[q.CommittedInstanceID()].Load(0); + ShaderMeshSubset subset = bindless_subsets[mesh.subsetbuffer][q.CommittedGeometryIndex()]; + ShaderMaterial material = bindless_buffers[subset.material].Load(0); + uint startIndex = q.CommittedPrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; + float4 uv0 = 0, uv1 = 0, uv2 = 0; + [branch] + if (mesh.vb_uv0 >= 0) + { + uv0.xy = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i0 * 4)); + uv1.xy = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i1 * 4)); + uv2.xy = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i2 * 4)); + } + [branch] + if (mesh.vb_uv1 >= 0) + { + uv0.zw = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i0 * 4)); + uv1.zw = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i1 * 4)); + uv2.zw = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i2 * 4)); + } + float3 n0 = 0, n1 = 0, n2 = 0; + [branch] + if (mesh.vb_pos_nor_wind >= 0) + { + const uint stride_POS = 16; + n0 = unpack_unitvector(bindless_buffers[mesh.vb_pos_nor_wind].Load4(i0 * stride_POS).w); + n1 = unpack_unitvector(bindless_buffers[mesh.vb_pos_nor_wind].Load4(i1 * stride_POS).w); + n2 = unpack_unitvector(bindless_buffers[mesh.vb_pos_nor_wind].Load4(i2 * stride_POS).w); + } + else + { + break; // error, this should always be good + } + + float2 barycentrics = q.CommittedTriangleBarycentrics(); + float u = barycentrics.x; + float v = barycentrics.y; + float w = 1 - u - v; + float4 uvsets = uv0 * w + uv1 * u + uv2 * v; + float3 N = n0 * w + n1 * u + n2 * v; + + N = mul((float3x3)q.CommittedObjectToWorld3x4(), N); + N = normalize(N); + facenormal = N; + + float4 baseColor = material.baseColor; + [branch] + if (material.texture_basecolormap_index >= 0 && (g_xFrame_Options & OPTION_BIT_DISABLE_ALBEDO_MAPS) == 0) + { + const float2 UV_baseColorMap = material.uvset_baseColorMap == 0 ? uvsets.xy : uvsets.zw; + baseColor = bindless_textures[material.texture_basecolormap_index].SampleLevel(sampler_linear_wrap, UV_baseColorMap, 2); + baseColor.rgb *= DEGAMMA(baseColor.rgb); + } + + [branch] + if (mesh.vb_col >= 0 && material.IsUsingVertexColors()) + { + float4 c0, c1, c2; + const uint stride_COL = 4; + c0 = unpack_rgba(bindless_buffers[mesh.vb_col].Load(i0 * stride_COL)); + c1 = unpack_rgba(bindless_buffers[mesh.vb_col].Load(i1 * stride_COL)); + c2 = unpack_rgba(bindless_buffers[mesh.vb_col].Load(i2 * stride_COL)); + float4 vertexColor = c0 * w + c1 * u + c2 * v; + baseColor *= vertexColor; + } + + [branch] + if (mesh.vb_tan >= 0 && material.texture_normalmap_index >= 0 && material.normalMapStrength > 0) + { + float4 t0, t1, t2; + const uint stride_TAN = 4; + t0 = unpack_utangent(bindless_buffers[mesh.vb_tan].Load(i0 * stride_TAN)); + t1 = unpack_utangent(bindless_buffers[mesh.vb_tan].Load(i1 * stride_TAN)); + t2 = unpack_utangent(bindless_buffers[mesh.vb_tan].Load(i2 * stride_TAN)); + float4 T = t0 * w + t1 * u + t2 * v; + T = T * 2 - 1; + T.xyz = mul((float3x3)q.CommittedObjectToWorld3x4(), T.xyz); + T.xyz = normalize(T.xyz); + float3 B = normalize(cross(T.xyz, N) * T.w); + float3x3 TBN = float3x3(T.xyz, B, N); + + const float2 UV_normalMap = material.uvset_normalMap == 0 ? uvsets.xy : uvsets.zw; + float3 normalMap = bindless_textures[material.texture_normalmap_index].SampleLevel(sampler_linear_wrap, UV_normalMap, 2).rgb; + normalMap = normalMap * 2 - 1; + N = normalize(lerp(N, mul(normalMap, TBN), material.normalMapStrength)); + } + + float4 surfaceMap = 1; + [branch] + if (material.texture_surfacemap_index >= 0) + { + const float2 UV_surfaceMap = material.uvset_surfaceMap == 0 ? uvsets.xy : uvsets.zw; + surfaceMap = bindless_textures[material.texture_surfacemap_index].SampleLevel(sampler_linear_wrap, UV_surfaceMap, 2); + } + + Surface surface; + surface.create(material, baseColor, surfaceMap); + + surface.emissiveColor = material.emissiveColor; + [branch] + if (material.texture_emissivemap_index >= 0) + { + const float2 UV_emissiveMap = material.uvset_emissiveMap == 0 ? uvsets.xy : uvsets.zw; + float4 emissiveMap = bindless_textures[material.texture_emissivemap_index].SampleLevel(sampler_linear_wrap, UV_emissiveMap, 2); + emissiveMap.rgb = DEGAMMA(emissiveMap.rgb); + surface.emissiveColor *= emissiveMap; + } + +#else + + // ray origin updated for next bounce: + ray.origin = hit.position; + + + TriangleData tri = TriangleData_Unpack(primitiveBuffer[hit.primitiveID], primitiveDataBuffer[hit.primitiveID]); + + float u = hit.bary.x; + float v = hit.bary.y; + float w = 1 - u - v; + + float3 N = normalize(tri.n0 * w + tri.n1 * u + tri.n2 * v); + float4 uvsets = tri.u0 * w + tri.u1 * u + tri.u2 * v; + float4 color = tri.c0 * w + tri.c1 * u + tri.c2 * v; + uint materialIndex = tri.materialIndex; + + facenormal = N; + + ShaderMaterial material = materialBuffer[materialIndex]; + + uvsets = frac(uvsets); // emulate wrap + + float4 baseColor; + [branch] + if (material.uvset_baseColorMap >= 0 && (g_xFrame_Options & OPTION_BIT_DISABLE_ALBEDO_MAPS) == 0) + { + const float2 UV_baseColorMap = material.uvset_baseColorMap == 0 ? uvsets.xy : uvsets.zw; + baseColor = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_baseColorMap * material.baseColorAtlasMulAdd.xy + material.baseColorAtlasMulAdd.zw, 0); + baseColor.rgb = DEGAMMA(baseColor.rgb); + } + else + { + baseColor = 1; + } + baseColor *= color; + + float4 surfaceMap = 1; + [branch] + if (material.uvset_surfaceMap >= 0) + { + const float2 UV_surfaceMap = material.uvset_surfaceMap == 0 ? uvsets.xy : uvsets.zw; + surfaceMap = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_surfaceMap * material.surfaceMapAtlasMulAdd.xy + material.surfaceMapAtlasMulAdd.zw, 0); + } + + Surface surface; + surface.create(material, baseColor, surfaceMap); + + surface.emissiveColor = material.emissiveColor; + [branch] + if (surface.emissiveColor.a > 0 && material.uvset_emissiveMap >= 0) + { + const float2 UV_emissiveMap = material.uvset_emissiveMap == 0 ? uvsets.xy : uvsets.zw; + float4 emissiveMap = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_emissiveMap * material.emissiveMapAtlasMulAdd.xy + material.emissiveMapAtlasMulAdd.zw, 0); + emissiveMap.rgb = DEGAMMA(emissiveMap.rgb); + surface.emissiveColor *= emissiveMap; + } + + [branch] + if (material.uvset_normalMap >= 0) + { + const float2 UV_normalMap = material.uvset_normalMap == 0 ? uvsets.xy : uvsets.zw; + float3 normalMap = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_normalMap * material.normalMapAtlasMulAdd.xy + material.normalMapAtlasMulAdd.zw, 0).rgb; + normalMap = normalMap.rgb * 2 - 1; + const float3x3 TBN = float3x3(tri.tangent, tri.binormal, N); + N = normalize(lerp(N, mul(normalMap, TBN), material.normalMapStrength)); + } + +#endif // RTAPI + + float3 P = ray.origin; + float3 V = normalize(g_xCamera_CamPos - P); + surface.P = P; + surface.N = N; + surface.V = V; + surface.update(); + + float3 current_energy = ray.energy; + result += max(0, current_energy * surface.emissiveColor.rgb * surface.emissiveColor.a); + + + float roulette; + + const float blendChance = 1 - baseColor.a; + roulette = rand(seed, screenUV); + if (roulette < blendChance) + { + // Alpha blending + + // The ray penetrates the surface, so push DOWN along normal to avoid self-intersection: + ray.origin = trace_bias_position(ray.origin, -N); + + // Add a new bounce iteration, otherwise the transparent effect can disappear: + bounces++; + } + else + { + const float refractChance = material.transmission; + roulette = rand(seed, screenUV); + if (roulette < refractChance) + { + // Refraction + const float3 R = refract(ray.direction, N, 1 - material.refraction); + ray.direction = lerp(R, SampleHemisphere_cos(R, seed, screenUV), surface.roughnessBRDF); + ray.energy *= surface.albedo; + + // The ray penetrates the surface, so push DOWN along normal to avoid self-intersection: + ray.origin = trace_bias_position(ray.origin, -N); + + // Add a new bounce iteration, otherwise the transparent effect can disappear: + bounces++; + } + else + { + const float3 F = F_Schlick(surface.f0, saturate(dot(-ray.direction, N))); + const float specChance = dot(F, 0.333); + + roulette = rand(seed, screenUV); + if (roulette < specChance) + { + // Specular reflection + const float3 R = reflect(ray.direction, N); + ray.direction = lerp(R, SampleHemisphere_cos(R, seed, screenUV), surface.roughnessBRDF); + ray.energy *= F / specChance; + } + else + { + // Diffuse reflection + ray.direction = SampleHemisphere_cos(N, seed, screenUV); + ray.energy *= surface.albedo / (1 - specChance); + } + + if (dot(ray.direction, facenormal) <= 0) + { + // Don't allow normal map to bend over the face normal more than 90 degrees to avoid light leaks + // In this case, we will not allow more bounces, + // but the current light sampling is still fine to avoid abrupt cutoff + ray.energy = 0; + } + + // Ray reflects from surface, so push UP along normal to avoid self-intersection: + ray.origin = trace_bias_position(ray.origin, N); + } + } + + + // Light sampling: + [loop] + for (uint iterator = 0; iterator < g_xFrame_LightArrayCount; iterator++) + { + ShaderEntity light = EntityArray[g_xFrame_LightArrayOffset + iterator]; + + Lighting lighting; + lighting.create(0, 0, 0, 0); + + float3 L = 0; + float dist = 0; + float NdotL = 0; + + switch (light.GetType()) + { + case ENTITY_TYPE_DIRECTIONALLIGHT: + { + dist = FLT_MAX; + + L = light.GetDirection().xyz; + + SurfaceToLight surfaceToLight; + surfaceToLight.create(surface, L); + + NdotL = surfaceToLight.NdotL; + + [branch] + if (NdotL > 0) + { + float3 atmosphereTransmittance = 1; + if (g_xFrame_Options & OPTION_BIT_REALISTIC_SKY) + { + AtmosphereParameters Atmosphere = GetAtmosphereParameters(); + atmosphereTransmittance = GetAtmosphericLightTransmittance(Atmosphere, surface.P, L, texture_transmittancelut); + } + + float3 lightColor = light.GetColor().rgb * light.GetEnergy() * atmosphereTransmittance; + + lighting.direct.specular = lightColor * BRDF_GetSpecular(surface, surfaceToLight); + lighting.direct.diffuse = lightColor * BRDF_GetDiffuse(surface, surfaceToLight); + } + } + break; + case ENTITY_TYPE_POINTLIGHT: + { + L = light.position - P; + const float dist2 = dot(L, L); + const float range2 = light.GetRange() * light.GetRange(); + + [branch] + if (dist2 < range2) + { + dist = sqrt(dist2); + L /= dist; + + SurfaceToLight surfaceToLight; + surfaceToLight.create(surface, L); + + NdotL = surfaceToLight.NdotL; + + [branch] + if (NdotL > 0) + { + const float range2 = light.GetRange() * light.GetRange(); + const float att = saturate(1 - (dist2 / range2)); + const float attenuation = att * att; + + float3 lightColor = light.GetColor().rgb * light.GetEnergy(); + lightColor *= attenuation; + + lighting.direct.specular = lightColor * BRDF_GetSpecular(surface, surfaceToLight); + lighting.direct.diffuse = lightColor * BRDF_GetDiffuse(surface, surfaceToLight); + } + } + } + break; + case ENTITY_TYPE_SPOTLIGHT: + { + L = light.position - surface.P; + const float dist2 = dot(L, L); + const float range2 = light.GetRange() * light.GetRange(); + + [branch] + if (dist2 < range2) + { + dist = sqrt(dist2); + L /= dist; + + SurfaceToLight surfaceToLight; + surfaceToLight.create(surface, L); + + NdotL = surfaceToLight.NdotL; + + [branch] + if (NdotL > 0) + { + const float SpotFactor = dot(L, light.GetDirection()); + const float spotCutOff = light.GetConeAngleCos(); + + [branch] + if (SpotFactor > spotCutOff) + { + const float range2 = light.GetRange() * light.GetRange(); + const float att = saturate(1 - (dist2 / range2)); + float attenuation = att * att; + attenuation *= saturate((1 - (1 - SpotFactor) * 1 / (1 - spotCutOff))); + + float3 lightColor = light.GetColor().rgb * light.GetEnergy(); + lightColor *= attenuation; + + lighting.direct.specular = lightColor * BRDF_GetSpecular(surface, surfaceToLight); + lighting.direct.diffuse = lightColor * BRDF_GetDiffuse(surface, surfaceToLight); + } + } + } + } + break; + } + + if (NdotL > 0 && dist > 0) + { + lighting.direct.diffuse = max(0, lighting.direct.diffuse); + lighting.direct.specular = max(0, lighting.direct.specular); + + float3 sampling_offset = float3(rand(seed, screenUV), rand(seed, screenUV), rand(seed, screenUV)) * 2 - 1; // todo: should be specific to light surface + + Ray newRay; + newRay.origin = P; + newRay.direction = L + sampling_offset * 0.025; + newRay.energy = 0; + newRay.Update(); +#ifdef RTAPI + RayDesc apiray; + apiray.TMin = 0.001; + apiray.TMax = dist; + apiray.Origin = newRay.origin; + apiray.Direction = newRay.direction; + q.TraceRayInline( + scene_acceleration_structure, // RaytracingAccelerationStructure AccelerationStructure + 0, // uint RayFlags + 0xFF, // uint InstanceInclusionMask + apiray // RayDesc Ray + ); + while (q.Proceed()) + { + ShaderMesh mesh = bindless_buffers[q.CandidateInstanceID()].Load(0); + ShaderMeshSubset subset = bindless_subsets[mesh.subsetbuffer][q.CandidateGeometryIndex()]; + ShaderMaterial material = bindless_buffers[subset.material].Load(0); + [branch] + if (material.texture_basecolormap_index < 0) + { + q.CommitNonOpaqueTriangleHit(); + continue; + } + uint startIndex = q.CandidatePrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; + float2 uv0 = 0, uv1 = 0, uv2 = 0; + [branch] + if (mesh.vb_uv0 >= 0 && material.uvset_baseColorMap == 0) + { + uv0 = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i0 * 4)); + uv1 = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i1 * 4)); + uv2 = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i2 * 4)); + } + else if (mesh.vb_uv1 >= 0 && material.uvset_baseColorMap != 0) + { + uv0 = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i0 * 4)); + uv1 = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i1 * 4)); + uv2 = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i2 * 4)); + } + else + { + q.CommitNonOpaqueTriangleHit(); + continue; + } + + float2 barycentrics = q.CandidateTriangleBarycentrics(); + float u = barycentrics.x; + float v = barycentrics.y; + float w = 1 - u - v; + float2 uv = uv0 * w + uv1 * u + uv2 * v; + float alpha = bindless_textures[material.texture_basecolormap_index].SampleLevel(sampler_point_wrap, uv, 2).a; + + [branch] + if (alpha - material.alphaTest > 0) + { + q.CommitNonOpaqueTriangleHit(); + } + } + bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT; +#else + bool hit = TraceRay_Any(newRay, dist, groupIndex); +#endif // RTAPI + if (!hit) + { + result += max(0, current_energy * NdotL * (surface.albedo * lighting.direct.diffuse + lighting.direct.specular)); + } + } + } + + + } + + } + + // Pre-clear result texture for first bounce and first accumulation sample: + if (xTraceUserData.y == 0) + { + resultTexture[pixel] = 0; + } + resultTexture[pixel] = lerp(resultTexture[pixel], float4(result, 1), xTraceAccumulationFactor); +} diff --git a/WickedEngine/shaders/raytraceCS_rtapi.hlsl b/WickedEngine/shaders/raytraceCS_rtapi.hlsl new file mode 100644 index 000000000..f473f8031 --- /dev/null +++ b/WickedEngine/shaders/raytraceCS_rtapi.hlsl @@ -0,0 +1,4 @@ +#ifndef HLSL5 +#define RTAPI +#endif // HLSL5 +#include "raytraceCS.hlsl" diff --git a/WickedEngine/shaders/raytrace_closesthitCS.hlsl b/WickedEngine/shaders/raytrace_closesthitCS.hlsl deleted file mode 100644 index c2e85a474..000000000 --- a/WickedEngine/shaders/raytrace_closesthitCS.hlsl +++ /dev/null @@ -1,153 +0,0 @@ -#define RAY_BACKFACE_CULLING -#define RAYTRACE_STACK_SHARED -#include "globals.hlsli" -#include "raytracingHF.hlsli" - -RAWBUFFER(counterBuffer_READ, TEXSLOT_ONDEMAND7); -STRUCTUREDBUFFER(rayIndexBuffer_READ, uint, TEXSLOT_ONDEMAND8); -STRUCTUREDBUFFER(rayBuffer_READ, RaytracingStoredRay, TEXSLOT_ONDEMAND9); - -RWRAWBUFFER(counterBuffer_WRITE, 0); -RWSTRUCTUREDBUFFER(rayBuffer_WRITE, RaytracingStoredRay, 1); -#ifdef RAYTRACING_SORT_GLOBAL -RWSTRUCTUREDBUFFER(rayIndexBuffer_WRITE, uint, 2); -RWSTRUCTUREDBUFFER(raySortBuffer_WRITE, float, 3); -#endif // RAYTRACING_SORT_GLOBAL - -// This enables reduced atomics into global memory. -#define ADVANCED_ALLOCATION - -#ifdef ADVANCED_ALLOCATION -static const uint GroupActiveRayMaskBucketCount = RAYTRACING_TRACE_GROUPSIZE / 32; -groupshared uint GroupActiveRayMask[GroupActiveRayMaskBucketCount]; -groupshared uint GroupRayCount; -groupshared uint GroupRayWriteOffset; -#endif // ADVANCED_ALLOCATION - - -[numthreads(RAYTRACING_TRACE_GROUPSIZE, 1, 1)] -void main( uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex ) -{ -#ifdef ADVANCED_ALLOCATION - const bool isGlobalUpdateThread = groupIndex == 0; - const bool isBucketUpdateThread = groupIndex < GroupActiveRayMaskBucketCount; - - // Preinitialize group shared memory: - if (isGlobalUpdateThread) - { - GroupRayCount = 0; - GroupRayWriteOffset = 0; - } - if (isBucketUpdateThread) - { - GroupActiveRayMask[groupIndex] = 0; - } - GroupMemoryBarrierWithGroupSync(); -#endif // ADVANCED_ALLOCATION - - - // Initialize ray and pixel ID as non-contributing: - Ray ray = (Ray)0; - bool ray_active = false; - - if (DTid.x < counterBuffer_READ.Load(0)) - { - // Load the current ray: - ray = LoadRay(rayBuffer_READ[rayIndexBuffer_READ[DTid.x]]); - ray_active = any(ray.energy); - - if (ray_active) - { - RayHit hit = TraceRay_Closest(ray, groupIndex); - if (hit.distance >= FLT_MAX - 1) - { - float3 envColor; - [branch] - if (IsStaticSky()) - { - // We have envmap information in a texture: - envColor = DEGAMMA_SKY(texture_globalenvmap.SampleLevel(sampler_linear_clamp, ray.direction, 0).rgb); - } - else - { - envColor = GetDynamicSkyColor(ray.direction); - } - ray.color += max(0, ray.energy * envColor); - - // Erase the ray's energy - ray.energy = 0.0f; - } - - ray.origin = hit.position; - ray.primitiveID = hit.primitiveID; - ray.bary = hit.bary; - -#ifndef ADVANCED_ALLOCATION - // Naive strategy to allocate active rays. Global memory atomics will be performed for every thread: - uint dest; - counterBuffer_WRITE.InterlockedAdd(0, 1, dest); - rayBuffer_WRITE[dest] = CreateStoredRay(ray); -#ifdef RAYTRACING_SORT_GLOBAL - rayIndexBuffer_WRITE[dest] = dest; - raySortBuffer_WRITE[dest] = CreateRaySortCode(ray); -#endif // RAYTRACING_SORT_GLOBAL -#endif // ADVANCED_ALLOCATION - } - } - -#ifdef ADVANCED_ALLOCATION - - const uint bucket = groupIndex / 32; // which bitfield bucket does this thread belong to? - const uint threadIndexInBucket = groupIndex % 32; // thread bit offset from bucket start - const uint threadMask = 1u << threadIndexInBucket; // thread bit mask in current bucket - - // Count rays that are still active with a bitmask insertion: - if (ray_active) - { - InterlockedOr(GroupActiveRayMask[bucket], threadMask); - } - GroupMemoryBarrierWithGroupSync(); - - // Count all bucket set bits: - if (isBucketUpdateThread) - { - InterlockedAdd(GroupRayCount, countbits(GroupActiveRayMask[groupIndex])); - } - GroupMemoryBarrierWithGroupSync(); - - // Allocation: - if (isGlobalUpdateThread) - { - counterBuffer_WRITE.InterlockedAdd(0, GroupRayCount, GroupRayWriteOffset); - } - GroupMemoryBarrierWithGroupSync(); - - // Finally, write all active rays into global memory: - if (ray_active) - { - // Need to compute prefix-sum of just the active ray count before this thread - uint activePrefixSum = 0; - for (uint i = 0; i <= bucket; ++i) // only up until its own bucket - { - // If we are in a bucket before the current bucket, the prefix read mask is 0xFFFFFFFF aka 11111111111.... - uint prefixMask = 0xFFFFFFFF; - - // If we are in the current bucket, then we need to only consider the bits before the current thread (and also the current thread!) eg. 00000001111111..... - [flatten] - if (i == bucket) - { - prefixMask >>= (31 - threadIndexInBucket); - } - - activePrefixSum += countbits(GroupActiveRayMask[i] & prefixMask); - } - - const uint dest = GroupRayWriteOffset + activePrefixSum - 1; // -1 because activePrefixSum includes current thread, but arrays start from 0! - rayBuffer_WRITE[dest] = CreateStoredRay(ray); -#ifdef RAYTRACING_SORT_GLOBAL - rayIndexBuffer_WRITE[dest] = dest; - raySortBuffer_WRITE[dest] = CreateRaySortCode(ray); -#endif // RAYTRACING_SORT_GLOBAL - } -#endif // ADVANCED_ALLOCATION -} diff --git a/WickedEngine/shaders/raytrace_kickjobsCS.hlsl b/WickedEngine/shaders/raytrace_kickjobsCS.hlsl deleted file mode 100644 index dc6ca71d9..000000000 --- a/WickedEngine/shaders/raytrace_kickjobsCS.hlsl +++ /dev/null @@ -1,21 +0,0 @@ -#include "globals.hlsli" -#include "ShaderInterop_Raytracing.h" - -RWRAWBUFFER(counterBuffer_WRITE, 0); -RWRAWBUFFER(indirectBuffer, 1); - -RAWBUFFER(counterBuffer_READ, TEXSLOT_UNIQUE0); - -[numthreads(1, 1, 1)] -void main( uint3 DTid : SV_DispatchThreadID ) -{ - // Load raycount from previous step: - uint rayCount = counterBuffer_READ.Load(0); - - // write the indirect dispatch arguments: - indirectBuffer.Store3(RAYTRACE_INDIRECT_OFFSET_TRACE, uint3((rayCount + RAYTRACING_TRACE_GROUPSIZE - 1) / RAYTRACING_TRACE_GROUPSIZE, 1, 1)); - indirectBuffer.Store3(RAYTRACE_INDIRECT_OFFSET_TILESORT, uint3((rayCount + RAYTRACING_SORT_GROUPSIZE - 1) / RAYTRACING_SORT_GROUPSIZE, 1, 1)); - - // Reset counter buffer for this step: - counterBuffer_WRITE.Store(0, 0); -} diff --git a/WickedEngine/shaders/raytrace_launchCS.hlsl b/WickedEngine/shaders/raytrace_launchCS.hlsl deleted file mode 100644 index 5cc9d0908..000000000 --- a/WickedEngine/shaders/raytrace_launchCS.hlsl +++ /dev/null @@ -1,24 +0,0 @@ -#include "globals.hlsli" -#include "raytracingHF.hlsli" - -RWSTRUCTUREDBUFFER(rayIndexBuffer, uint, 0); -RWSTRUCTUREDBUFFER(rayBuffer, RaytracingStoredRay, 1); - -[numthreads(RAYTRACING_LAUNCH_BLOCKSIZE, RAYTRACING_LAUNCH_BLOCKSIZE, 1)] -void main( uint3 DTid : SV_DispatchThreadID ) -{ - if (DTid.x < xTraceResolution.x && DTid.y < xTraceResolution.y) - { - // Compute screen coordinates: - float2 uv = float2((DTid.xy + xTracePixelOffset) * xTraceResolution_rcp.xy * 2.0f - 1.0f) * float2(1, -1); - - // Create starting ray: - Ray ray = CreateCameraRay(uv); - ray.pixelID = (DTid.x & 0xFFFF) | ((DTid.y & 0xFFFF) << 16); - - // The launch writes each ray to the pixel location: - const uint rayIndex = flatten2D(DTid.xy, xTraceResolution.xy); - rayIndexBuffer[rayIndex] = rayIndex; - rayBuffer[rayIndex] = CreateStoredRay(ray); - } -} diff --git a/WickedEngine/shaders/raytrace_shadeCS.hlsl b/WickedEngine/shaders/raytrace_shadeCS.hlsl deleted file mode 100644 index f3f620724..000000000 --- a/WickedEngine/shaders/raytrace_shadeCS.hlsl +++ /dev/null @@ -1,306 +0,0 @@ -#define RAYTRACE_STACK_SHARED -#include "globals.hlsli" -#include "raytracingHF.hlsli" - -RAWBUFFER(counterBuffer_READ, TEXSLOT_ONDEMAND7); -STRUCTUREDBUFFER(rayIndexBuffer_READ, uint, TEXSLOT_ONDEMAND8); - -RWSTRUCTUREDBUFFER(rayBuffer, RaytracingStoredRay, 0); -RWTEXTURE2D(resultTexture, float4, 1); - -[numthreads(RAYTRACING_TRACE_GROUPSIZE, 1, 1)] -void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) -{ - if (DTid.x >= counterBuffer_READ.Load(0)) - return; - - // Load the current ray: - const uint rayIndex = rayIndexBuffer_READ[DTid.x]; - //const uint rayIndex = DTid.x; - Ray ray = LoadRay(rayBuffer[rayIndex]); - uint2 pixel = uint2(ray.pixelID & 0xFFFF, (ray.pixelID >> 16) & 0xFFFF); - - if (any(ray.energy)) - { - float3 bounceResult = 0; - float2 uv = float2((pixel + xTracePixelOffset) * xTraceResolution_rcp.xy * 2.0f - 1.0f) * float2(1, -1); - float seed = xTraceRandomSeed; - - TriangleData tri = TriangleData_Unpack(primitiveBuffer[ray.primitiveID], primitiveDataBuffer[ray.primitiveID]); - - float u = ray.bary.x; - float v = ray.bary.y; - float w = 1 - u - v; - - float3 N = normalize(tri.n0 * w + tri.n1 * u + tri.n2 * v); - float4 uvsets = tri.u0 * w + tri.u1 * u + tri.u2 * v; - float4 color = tri.c0 * w + tri.c1 * u + tri.c2 * v; - uint materialIndex = tri.materialIndex; - - ShaderMaterial material = materialBuffer[materialIndex]; - - uvsets = frac(uvsets); // emulate wrap - - float4 baseColor; - [branch] - if (material.uvset_baseColorMap >= 0 && (g_xFrame_Options & OPTION_BIT_DISABLE_ALBEDO_MAPS) == 0) - { - const float2 UV_baseColorMap = material.uvset_baseColorMap == 0 ? uvsets.xy : uvsets.zw; - baseColor = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_baseColorMap * material.baseColorAtlasMulAdd.xy + material.baseColorAtlasMulAdd.zw, 0); - baseColor.rgb = DEGAMMA(baseColor.rgb); - } - else - { - baseColor = 1; - } - baseColor *= color; - - float4 surfaceMap = 1; - [branch] - if (material.uvset_surfaceMap >= 0) - { - const float2 UV_surfaceMap = material.uvset_surfaceMap == 0 ? uvsets.xy : uvsets.zw; - surfaceMap = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_surfaceMap * material.surfaceMapAtlasMulAdd.xy + material.surfaceMapAtlasMulAdd.zw, 0); - } - - Surface surface; - surface.create(material, baseColor, surfaceMap); - - surface.emissiveColor = material.emissiveColor; - [branch] - if (surface.emissiveColor.a > 0 && material.uvset_emissiveMap >= 0) - { - const float2 UV_emissiveMap = material.uvset_emissiveMap == 0 ? uvsets.xy : uvsets.zw; - float4 emissiveMap = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_emissiveMap * material.emissiveMapAtlasMulAdd.xy + material.emissiveMapAtlasMulAdd.zw, 0); - emissiveMap.rgb = DEGAMMA(emissiveMap.rgb); - surface.emissiveColor *= emissiveMap; - } - - bounceResult += surface.emissiveColor.rgb * surface.emissiveColor.a; - - [branch] - if (material.uvset_normalMap >= 0) - { - const float2 UV_normalMap = material.uvset_normalMap == 0 ? uvsets.xy : uvsets.zw; - float3 normalMap = materialTextureAtlas.SampleLevel(sampler_linear_clamp, UV_normalMap * material.normalMapAtlasMulAdd.xy + material.normalMapAtlasMulAdd.zw, 0).rgb; - normalMap = normalMap.rgb * 2 - 1; - const float3x3 TBN = float3x3(tri.tangent, tri.binormal, N); - N = normalize(lerp(N, mul(normalMap, TBN), material.normalMapStrength)); - } - - // Calculate chances of reflection types: - const float refractChance = 1 - baseColor.a; - - // Roughness to cone aperture: - float alphaRoughness = surface.roughness * surface.roughness; - - // Roulette-select the ray's path - float roulette = rand(seed, uv); - if (roulette < refractChance) - { - // Refraction - const float3 R = refract(ray.direction, N, 1 - material.refraction); - ray.direction = lerp(R, SampleHemisphere_cos(R, seed, uv), alphaRoughness); - ray.energy *= lerp(baseColor.rgb, 1, refractChance); - - // The ray penetrates the surface, so push DOWN along normal to avoid self-intersection: - ray.origin = trace_bias_position(ray.origin, -N); - } - else - { - // Calculate chances of reflection types: - const float3 F = F_Schlick(surface.f0, saturate(dot(-ray.direction, N))); - const float specChance = dot(F, 0.333); - - roulette = rand(seed, uv); - if (roulette < specChance) - { - // Specular reflection - const float3 R = reflect(ray.direction, N); - ray.direction = lerp(R, SampleHemisphere_cos(R, seed, uv), alphaRoughness); - ray.energy *= F / specChance; - } - else - { - // Diffuse reflection - ray.direction = SampleHemisphere_cos(N, seed, uv); - ray.energy *= surface.albedo / (1 - specChance); - } - - // Ray reflects from surface, so push UP along normal to avoid self-intersection: - ray.origin = trace_bias_position(ray.origin, N); - } - - ray.Update(); - - - - // Light sampling: - float3 P = ray.origin; - float3 V = normalize(g_xCamera_CamPos - P); - surface.P = P; - surface.N = N; - surface.V = V; - surface.update(); - - [loop] - for (uint iterator = 0; iterator < g_xFrame_LightArrayCount; iterator++) - { - ShaderEntity light = EntityArray[g_xFrame_LightArrayOffset + iterator]; - - Lighting lighting; - lighting.create(0, 0, 0, 0); - - float3 L = 0; - float dist = 0; - float NdotL = 0; - - switch (light.GetType()) - { - case ENTITY_TYPE_DIRECTIONALLIGHT: - { - dist = FLT_MAX; - - L = light.GetDirection().xyz; - - SurfaceToLight surfaceToLight; - surfaceToLight.create(surface, L); - - NdotL = surfaceToLight.NdotL; - - [branch] - if (NdotL > 0) - { - float3 atmosphereTransmittance = 1; - if (g_xFrame_Options & OPTION_BIT_REALISTIC_SKY) - { - AtmosphereParameters Atmosphere = GetAtmosphereParameters(); - atmosphereTransmittance = GetAtmosphericLightTransmittance(Atmosphere, surface.P, L, texture_transmittancelut); - } - - float3 lightColor = light.GetColor().rgb * light.GetEnergy() * atmosphereTransmittance; - - lighting.direct.specular = lightColor * BRDF_GetSpecular(surface, surfaceToLight); - lighting.direct.diffuse = lightColor * BRDF_GetDiffuse(surface, surfaceToLight); - } - } - break; - case ENTITY_TYPE_POINTLIGHT: - { - L = light.position - P; - const float dist2 = dot(L, L); - const float range2 = light.GetRange() * light.GetRange(); - - [branch] - if (dist2 < range2) - { - dist = sqrt(dist2); - L /= dist; - - SurfaceToLight surfaceToLight; - surfaceToLight.create(surface, L); - - NdotL = surfaceToLight.NdotL; - - [branch] - if (NdotL > 0) - { - const float3 lightColor = light.GetColor().rgb * light.GetEnergy(); - - lighting.direct.specular = lightColor * BRDF_GetSpecular(surface, surfaceToLight); - lighting.direct.diffuse = lightColor * BRDF_GetDiffuse(surface, surfaceToLight); - - const float range2 = light.GetRange() * light.GetRange(); - const float att = saturate(1 - (dist2 / range2)); - const float attenuation = att * att; - - lighting.direct.diffuse *= attenuation; - lighting.direct.specular *= attenuation; - } - } - } - break; - case ENTITY_TYPE_SPOTLIGHT: - { - L = light.position - surface.P; - const float dist2 = dot(L, L); - const float range2 = light.GetRange() * light.GetRange(); - - [branch] - if (dist2 < range2) - { - dist = sqrt(dist2); - L /= dist; - - SurfaceToLight surfaceToLight; - surfaceToLight.create(surface, L); - - NdotL = surfaceToLight.NdotL; - - [branch] - if (NdotL > 0) - { - const float SpotFactor = dot(L, light.GetDirection()); - const float spotCutOff = light.GetConeAngleCos(); - - [branch] - if (SpotFactor > spotCutOff) - { - const float3 lightColor = light.GetColor().rgb * light.GetEnergy(); - - lighting.direct.specular = lightColor * BRDF_GetSpecular(surface, surfaceToLight); - lighting.direct.diffuse = lightColor * BRDF_GetDiffuse(surface, surfaceToLight); - - const float range2 = light.GetRange() * light.GetRange(); - const float att = saturate(1 - (dist2 / range2)); - float attenuation = att * att; - attenuation *= saturate((1 - (1 - SpotFactor) * 1 / (1 - spotCutOff))); - - lighting.direct.diffuse *= attenuation; - lighting.direct.specular *= attenuation; - } - } - } - } - break; - } - - if (NdotL > 0 && dist > 0) - { - lighting.direct.diffuse = max(0, lighting.direct.diffuse); - lighting.direct.specular = max(0, lighting.direct.specular); - - float3 sampling_offset = float3(rand(seed, uv), rand(seed, uv), rand(seed, uv)) * 2 - 1; // todo: should be specific to light surface - - Ray newRay; - newRay.origin = P; - newRay.direction = L + sampling_offset * 0.025; - newRay.direction_rcp = rcp(newRay.direction); - newRay.energy = 0; - bool hit = TraceRay_Any(newRay, dist, groupIndex); - bounceResult += (hit ? 0 : NdotL) * (lighting.direct.diffuse + lighting.direct.specular); - } - } - - - ray.color += max(0, ray.energy * bounceResult); - } - - - // Pre-clear result texture for first bounce and first accumulation sample: - if (xTraceUserData.x == 1) - { - resultTexture[pixel] = 0; - } - if (!any(ray.energy) || xTraceUserData.y == 1) - { - // If the ray is killed or last bounce, we write to accumulation texture: - resultTexture[pixel] = lerp(resultTexture[pixel], float4(ray.color, 1), xTraceAccumulationFactor); - } - else - { - // Else, continue with storing the ray: - rayBuffer[rayIndex] = CreateStoredRay(ray); - } - -} diff --git a/WickedEngine/shaders/raytrace_tilesortCS.hlsl b/WickedEngine/shaders/raytrace_tilesortCS.hlsl deleted file mode 100644 index 1477b3477..000000000 --- a/WickedEngine/shaders/raytrace_tilesortCS.hlsl +++ /dev/null @@ -1,58 +0,0 @@ -#include "globals.hlsli" -#include "raytracingHF.hlsli" - -RAWBUFFER(counterBuffer_READ, TEXSLOT_ONDEMAND7); -STRUCTUREDBUFFER(rayBuffer_READ, RaytracingStoredRay, TEXSLOT_ONDEMAND8); - -RWSTRUCTUREDBUFFER(rayIndexBuffer_WRITE, uint, 0); - -static const uint numArray = RAYTRACING_SORT_GROUPSIZE; -static const uint numArrayPowerOfTwo = 2u << firstbithigh(numArray - 1); -groupshared uint2 Array[numArray]; - -void BitonicSort(in uint localIdxFlattened) -{ - for (uint nMergeSize = 2; nMergeSize <= numArrayPowerOfTwo; nMergeSize = nMergeSize * 2) - { - for (uint nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) - { - uint tmp_index = localIdxFlattened; - uint index_low = tmp_index & (nMergeSubSize - 1); - uint index_high = 2 * (tmp_index - index_low); - uint index = index_high + index_low; - - uint nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low; - - if (nSwapElem < numArray && index < numArray) - { - if (Array[index].x > Array[nSwapElem].x) - { - uint2 uTemp = Array[index]; - Array[index] = Array[nSwapElem]; - Array[nSwapElem] = uTemp; - } - } - GroupMemoryBarrierWithGroupSync(); - } - } -} - -[numthreads(RAYTRACING_SORT_GROUPSIZE, 1, 1)] -void main(uint3 DTid : SV_DispatchThreadID, uint groupIndex : SV_GroupIndex) -{ - uint sortcode = ~0; - - [branch] - if (DTid.x < counterBuffer_READ.Load(0)) - { - sortcode = CreateRaySortCode(LoadRay(rayBuffer_READ[DTid.x])); - } - - Array[groupIndex] = uint2(sortcode, DTid.x); - GroupMemoryBarrierWithGroupSync(); - - BitonicSort(groupIndex); - GroupMemoryBarrierWithGroupSync(); - - rayIndexBuffer_WRITE[DTid.x] = Array[groupIndex].y; -} diff --git a/WickedEngine/shaders/raytracingHF.hlsli b/WickedEngine/shaders/raytracingHF.hlsli index 04e81ed22..f8c17a58a 100644 --- a/WickedEngine/shaders/raytracingHF.hlsli +++ b/WickedEngine/shaders/raytracingHF.hlsli @@ -15,14 +15,10 @@ inline float3 trace_bias_position(in float3 P, in float3 N) struct Ray { - uint pixelID; float3 origin; float3 direction; float3 direction_rcp; float3 energy; - uint primitiveID; - float2 bary; - float3 color; inline void Update() { @@ -30,65 +26,12 @@ struct Ray } }; -inline uint CreateRaySortCode(in Ray ray) -{ - // Sorting purely based on morton code works best so far: - return morton3D((ray.origin - g_xFrame_WorldBoundsMin) * g_xFrame_WorldBoundsExtents_rcp); - - //return ray.primitiveID; - - //uint hash = 0; - - //// quantize direction [-1; 1] on 8x4x8 grid (3 + 2 + 3 = 8 bits): - //hash |= (uint)clamp(ray.direction.x * 4 + 4, 0, 7) << 0; - //hash |= (uint)clamp(ray.direction.y * 2 + 2, 0, 3) << 3; - //hash |= (uint)clamp(ray.direction.z * 4 + 4, 0, 7) << 5; - - //// quantize origin [0, 1] on 256x256x256 grid (8 bits per component): - //const float3 origin = (ray.origin - g_xFrame_WorldBoundsMin) * g_xFrame_WorldBoundsExtents_rcp; - //hash |= ((uint)abs(origin.x * 255) % 256) << 8; - //hash |= ((uint)abs(origin.x * 255) % 256) << 16; - //hash |= ((uint)abs(origin.x * 255) % 256) << 24; - - //return (float)hash; -} -inline RaytracingStoredRay CreateStoredRay(in Ray ray) -{ - RaytracingStoredRay storedray; - - storedray.origin = ray.origin; - storedray.pixelID = ray.pixelID; - storedray.direction_energy = f32tof16(ray.direction) | (f32tof16(ray.energy) << 16); - storedray.primitiveID = ray.primitiveID; - storedray.bary = ray.bary; - storedray.color = pack_half3(ray.color); - - return storedray; -} -inline Ray LoadRay(in RaytracingStoredRay storedray) -{ - Ray ray; - ray.pixelID = storedray.pixelID; - ray.origin = storedray.origin; - ray.direction = asfloat(f16tof32(storedray.direction_energy)); - ray.energy = asfloat(f16tof32(storedray.direction_energy >> 16)); - ray.primitiveID = storedray.primitiveID; - ray.bary = storedray.bary; - ray.color = unpack_half3(storedray.color); - ray.Update(); - return ray; -} - inline Ray CreateRay(float3 origin, float3 direction) { Ray ray; ray.origin = origin; - ray.direction = direction; + ray.direction = normalize(direction); ray.energy = float3(1, 1, 1); - ray.pixelID = 0xFFFFFFFF; - ray.primitiveID = 0xFFFFFFFF; - ray.bary = 0; - ray.color = 0; ray.Update(); return ray; } @@ -274,7 +217,7 @@ inline bool IntersectNode(in Ray ray, in BVHNode box) // have the stack in shared memory instead of registers: #ifdef RAYTRACE_STACK_SHARED -groupshared uint stack[RAYTRACE_STACKSIZE][RAYTRACING_TRACE_GROUPSIZE]; +groupshared uint stack[RAYTRACE_STACKSIZE][RAYTRACING_LAUNCH_BLOCKSIZE * RAYTRACING_LAUNCH_BLOCKSIZE]; #endif // RAYTRACE_STACK_SHARED STRUCTUREDBUFFER(materialBuffer, ShaderMaterial, TEXSLOT_ONDEMAND0); diff --git a/WickedEngine/shaders/renderlightmapPS.hlsl b/WickedEngine/shaders/renderlightmapPS.hlsl index 1d2b3822c..1702d0dda 100644 --- a/WickedEngine/shaders/renderlightmapPS.hlsl +++ b/WickedEngine/shaders/renderlightmapPS.hlsl @@ -2,6 +2,14 @@ #include "globals.hlsli" #include "raytracingHF.hlsli" +#ifdef RTAPI +RAYTRACINGACCELERATIONSTRUCTURE(scene_acceleration_structure, TEXSLOT_ACCELERATION_STRUCTURE); +Texture2D bindless_textures[] : register(t0, space1); +ByteAddressBuffer bindless_buffers[] : register(t0, space2); +StructuredBuffer bindless_subsets[] : register(t0, space3); +Buffer bindless_ib[] : register(t0, space4); +#endif // RTAPI + struct Input { float4 pos : SV_POSITION; @@ -18,12 +26,13 @@ float4 main(Input input) : SV_TARGET float seed = xTraceRandomSeed; float3 direction = SampleHemisphere_cos(N, seed, uv); Ray ray = CreateRay(trace_bias_position(P, N), direction); + float3 result = 0; - const uint bounces = xTraceUserData.x; - for (uint i = 0; (i < bounces) && any(ray.energy); ++i) + uint bounces = xTraceUserData.x; + const uint bouncelimit = 16; + for (uint bounce = 0; ((bounce < min(bounces, bouncelimit)) && any(ray.energy)); ++bounce) { P = ray.origin; - float3 bounceResult = 0; [loop] for (uint iterator = 0; iterator < g_xFrame_LightArrayCount; iterator++) @@ -144,18 +153,50 @@ float4 main(Input input) : SV_TARGET Ray newRay; newRay.origin = trace_bias_position(P, N); newRay.direction = L + sampling_offset * 0.025f; - newRay.direction_rcp = rcp(newRay.direction); newRay.energy = 0; + newRay.Update(); +#ifdef RTAPI + RayDesc apiray; + apiray.TMin = 0.001; + apiray.TMax = dist; + apiray.Origin = newRay.origin; + apiray.Direction = newRay.direction; + RayQuery< + RAY_FLAG_FORCE_OPAQUE | + RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | + RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH + > q; + q.TraceRayInline(scene_acceleration_structure, 0, 0xFF, apiray); + q.Proceed(); + bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT; +#else bool hit = TraceRay_Any(newRay, dist); - bounceResult += (hit ? 0 : NdotL) * lighting.direct.diffuse / PI; +#endif // RTAPI + result += max(0, ray.energy * (hit ? 0 : NdotL) * lighting.direct.diffuse / PI); } } - ray.color += max(0, ray.energy * bounceResult); // Sample primary ray (scene materials, sky, etc): + +#ifdef RTAPI + RayDesc apiray; + apiray.TMin = 0.001; + apiray.TMax = FLT_MAX; + apiray.Origin = ray.origin; + apiray.Direction = ray.direction; + RayQuery< + RAY_FLAG_FORCE_OPAQUE | + RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES + > q; + q.TraceRayInline(scene_acceleration_structure, 0, 0xFF, apiray); + q.Proceed(); + if(q.CommittedStatus() != COMMITTED_TRIANGLE_HIT) +#else RayHit hit = TraceRay_Closest(ray); if (hit.distance >= FLT_MAX - 1) +#endif // RTAPI + { float3 envColor; [branch] @@ -168,21 +209,137 @@ float4 main(Input input) : SV_TARGET { envColor = GetDynamicSkyColor(ray.direction, true, true, false, true); } - ray.color += max(0, ray.energy * envColor); + result += max(0, ray.energy * envColor); // Erase the ray's energy - ray.energy = 0.0f; + ray.energy = 0; break; } +#ifdef RTAPI + + // ray origin updated for next bounce: + ray.origin = q.WorldRayOrigin() + q.WorldRayDirection() * q.CommittedRayT(); + + // RTAPI path: bindless + ShaderMesh mesh = bindless_buffers[q.CommittedInstanceID()].Load(0); + ShaderMeshSubset subset = bindless_subsets[mesh.subsetbuffer][q.CommittedGeometryIndex()]; + ShaderMaterial material = bindless_buffers[subset.material].Load(0); + uint startIndex = q.CommittedPrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; + float4 uv0 = 0, uv1 = 0, uv2 = 0; + [branch] + if (mesh.vb_uv0 >= 0) + { + uv0.xy = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i0 * 4)); + uv1.xy = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i1 * 4)); + uv2.xy = unpack_half2(bindless_buffers[mesh.vb_uv0].Load(i2 * 4)); + } + [branch] + if (mesh.vb_uv1 >= 0) + { + uv0.zw = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i0 * 4)); + uv1.zw = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i1 * 4)); + uv2.zw = unpack_half2(bindless_buffers[mesh.vb_uv1].Load(i2 * 4)); + } + float3 n0 = 0, n1 = 0, n2 = 0; + [branch] + if (mesh.vb_pos_nor_wind >= 0) + { + const uint stride_POS = 16; + n0 = unpack_unitvector(bindless_buffers[mesh.vb_pos_nor_wind].Load4(i0 * stride_POS).w); + n1 = unpack_unitvector(bindless_buffers[mesh.vb_pos_nor_wind].Load4(i1 * stride_POS).w); + n2 = unpack_unitvector(bindless_buffers[mesh.vb_pos_nor_wind].Load4(i2 * stride_POS).w); + } + else + { + return float4(1, 0, 1, 1); // error, this should always be good + } + + float2 barycentrics = q.CommittedTriangleBarycentrics(); + float u = barycentrics.x; + float v = barycentrics.y; + float w = 1 - u - v; + float4 uvsets = uv0 * w + uv1 * u + uv2 * v; + float3 N = n0 * w + n1 * u + n2 * v; + + N = mul((float3x3)q.CommittedObjectToWorld3x4(), N); + N = normalize(N); + + float4 baseColor = material.baseColor; + [branch] + if (material.texture_basecolormap_index >= 0 && (g_xFrame_Options & OPTION_BIT_DISABLE_ALBEDO_MAPS) == 0) + { + const float2 UV_baseColorMap = material.uvset_baseColorMap == 0 ? uvsets.xy : uvsets.zw; + baseColor = bindless_textures[material.texture_basecolormap_index].SampleLevel(sampler_linear_wrap, UV_baseColorMap, 2); + baseColor.rgb *= DEGAMMA(baseColor.rgb); + } + + [branch] + if (mesh.vb_col >= 0 && material.IsUsingVertexColors()) + { + float4 c0, c1, c2; + const uint stride_COL = 4; + c0 = unpack_rgba(bindless_buffers[mesh.vb_col].Load(i0 * stride_COL)); + c1 = unpack_rgba(bindless_buffers[mesh.vb_col].Load(i1 * stride_COL)); + c2 = unpack_rgba(bindless_buffers[mesh.vb_col].Load(i2 * stride_COL)); + float4 vertexColor = c0 * w + c1 * u + c2 * v; + baseColor *= vertexColor; + } + + [branch] + if (mesh.vb_tan >= 0 && material.texture_normalmap_index >= 0 && material.normalMapStrength > 0) + { + float4 t0, t1, t2; + const uint stride_TAN = 4; + t0 = unpack_utangent(bindless_buffers[mesh.vb_tan].Load(i0 * stride_TAN)); + t1 = unpack_utangent(bindless_buffers[mesh.vb_tan].Load(i1 * stride_TAN)); + t2 = unpack_utangent(bindless_buffers[mesh.vb_tan].Load(i2 * stride_TAN)); + float4 T = t0 * w + t1 * u + t2 * v; + T = T * 2 - 1; + T.xyz = mul((float3x3)q.CommittedObjectToWorld3x4(), T.xyz); + T.xyz = normalize(T.xyz); + float3 B = normalize(cross(T.xyz, N) * T.w); + float3x3 TBN = float3x3(T.xyz, B, N); + + const float2 UV_normalMap = material.uvset_normalMap == 0 ? uvsets.xy : uvsets.zw; + float3 normalMap = bindless_textures[material.texture_normalmap_index].SampleLevel(sampler_linear_wrap, UV_normalMap, 2).rgb; + normalMap = normalMap * 2 - 1; + N = normalize(lerp(N, mul(normalMap, TBN), material.normalMapStrength)); + } + + float4 surfaceMap = 1; + [branch] + if (material.texture_surfacemap_index >= 0) + { + const float2 UV_surfaceMap = material.uvset_surfaceMap == 0 ? uvsets.xy : uvsets.zw; + surfaceMap = bindless_textures[material.texture_surfacemap_index].SampleLevel(sampler_linear_wrap, UV_surfaceMap, 2); + } + + Surface surface; + surface.create(material, baseColor, surfaceMap); + + surface.emissiveColor = material.emissiveColor; + [branch] + if (material.texture_emissivemap_index >= 0) + { + const float2 UV_emissiveMap = material.uvset_emissiveMap == 0 ? uvsets.xy : uvsets.zw; + float4 emissiveMap = bindless_textures[material.texture_emissivemap_index].SampleLevel(sampler_linear_wrap, UV_emissiveMap, 2); + emissiveMap.rgb = DEGAMMA(emissiveMap.rgb); + surface.emissiveColor *= emissiveMap; + } + +#else + + // Non-RTAPI path: sampling from texture atlas ray.origin = hit.position; - ray.primitiveID = hit.primitiveID; - ray.bary = hit.bary; - TriangleData tri = TriangleData_Unpack(primitiveBuffer[ray.primitiveID], primitiveDataBuffer[ray.primitiveID]); + TriangleData tri = TriangleData_Unpack(primitiveBuffer[hit.primitiveID], primitiveDataBuffer[hit.primitiveID]); - float u = ray.bary.x; - float v = ray.bary.y; + float u = hit.bary.x; + float v = hit.bary.y; float w = 1 - u - v; N = normalize(tri.n0 * w + tri.n1 * u + tri.n2 * v); @@ -229,8 +386,6 @@ float4 main(Input input) : SV_TARGET surface.emissiveColor *= emissiveMap; } - ray.color += max(0, ray.energy * surface.emissiveColor.rgb * surface.emissiveColor.a); - [branch] if (material.uvset_normalMap >= 0) { @@ -240,12 +395,14 @@ float4 main(Input input) : SV_TARGET const float3x3 TBN = float3x3(tri.tangent, tri.binormal, N); N = normalize(lerp(N, mul(normalMap, TBN), material.normalMapStrength)); } +#endif // RTAPI + + surface.update(); + + result += max(0, ray.energy * surface.emissiveColor.rgb * surface.emissiveColor.a); // Calculate chances of reflection types: - const float refractChance = 1 - baseColor.a; - - // Roughness to cone aperture: - float alphaRoughness = surface.roughness * surface.roughness; + const float refractChance = material.transmission; // Roulette-select the ray's path float roulette = rand(seed, uv); @@ -253,11 +410,14 @@ float4 main(Input input) : SV_TARGET { // Refraction const float3 R = refract(ray.direction, N, 1 - material.refraction); - ray.direction = lerp(R, SampleHemisphere_cos(R, seed, uv), alphaRoughness); - ray.energy *= lerp(baseColor.rgb, 1, refractChance); + ray.direction = lerp(R, SampleHemisphere_cos(R, seed, uv), surface.roughnessBRDF); + ray.energy *= surface.albedo; // The ray penetrates the surface, so push DOWN along normal to avoid self-intersection: ray.origin = trace_bias_position(ray.origin, -N); + + // Add a new bounce iteration, otherwise the transparent effect can disappear: + bounces++; } else { @@ -270,7 +430,7 @@ float4 main(Input input) : SV_TARGET { // Specular reflection const float3 R = reflect(ray.direction, N); - ray.direction = lerp(R, SampleHemisphere_cos(R, seed, uv), alphaRoughness); + ray.direction = lerp(R, SampleHemisphere_cos(R, seed, uv), surface.roughnessBRDF); ray.energy *= F / specChance; } else @@ -287,5 +447,5 @@ float4 main(Input input) : SV_TARGET ray.Update(); } - return float4(ray.color, xTraceAccumulationFactor); + return float4(result, xTraceAccumulationFactor); } diff --git a/WickedEngine/shaders/renderlightmapPS_rtapi.hlsl b/WickedEngine/shaders/renderlightmapPS_rtapi.hlsl new file mode 100644 index 000000000..7c5500937 --- /dev/null +++ b/WickedEngine/shaders/renderlightmapPS_rtapi.hlsl @@ -0,0 +1,4 @@ +#ifndef HLSL5 +#define RTAPI +#endif // HLSL5 +#include "renderlightmapPS.hlsl" diff --git a/WickedEngine/shaders/rtaoLIB.hlsl b/WickedEngine/shaders/rtaoLIB.hlsl index bc7184e9a..028e14fd2 100644 --- a/WickedEngine/shaders/rtaoLIB.hlsl +++ b/WickedEngine/shaders/rtaoLIB.hlsl @@ -78,10 +78,10 @@ void RTAO_AnyHit(inout RayPayload payload, in BuiltInTriangleIntersectionAttribu AcceptHitAndEndSearch(); return; } - uint primitiveIndex = PrimitiveIndex(); - uint i0 = bindless_ib[mesh.ib][primitiveIndex * 3 + 0]; - uint i1 = bindless_ib[mesh.ib][primitiveIndex * 3 + 1]; - uint i2 = bindless_ib[mesh.ib][primitiveIndex * 3 + 2]; + uint startIndex = PrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; float2 uv0 = 0, uv1 = 0, uv2 = 0; [branch] if (mesh.vb_uv0 >= 0 && material.uvset_baseColorMap == 0) diff --git a/WickedEngine/shaders/rtreflectionLIB.hlsl b/WickedEngine/shaders/rtreflectionLIB.hlsl index 37ddcc6e9..a5b5915f5 100644 --- a/WickedEngine/shaders/rtreflectionLIB.hlsl +++ b/WickedEngine/shaders/rtreflectionLIB.hlsl @@ -112,10 +112,10 @@ void RTReflection_ClosestHit(inout RayPayload payload, in BuiltInTriangleInterse ShaderMesh mesh = bindless_buffers[InstanceID()].Load(0); ShaderMeshSubset subset = bindless_subsets[mesh.subsetbuffer][GeometryIndex()]; ShaderMaterial material = bindless_buffers[subset.material].Load(0); - uint primitiveIndex = PrimitiveIndex(); - uint i0 = bindless_ib[mesh.ib][primitiveIndex * 3 + 0]; - uint i1 = bindless_ib[mesh.ib][primitiveIndex * 3 + 1]; - uint i2 = bindless_ib[mesh.ib][primitiveIndex * 3 + 2]; + uint startIndex = PrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; float4 uv0 = 0, uv1 = 0, uv2 = 0; [branch] if (mesh.vb_uv0 >= 0) @@ -287,10 +287,10 @@ void RTReflection_AnyHit(inout RayPayload payload, in BuiltInTriangleIntersectio AcceptHitAndEndSearch(); return; } - uint primitiveIndex = PrimitiveIndex(); - uint i0 = bindless_ib[mesh.ib][primitiveIndex * 3 + 0]; - uint i1 = bindless_ib[mesh.ib][primitiveIndex * 3 + 1]; - uint i2 = bindless_ib[mesh.ib][primitiveIndex * 3 + 2]; + uint startIndex = PrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; float2 uv0 = 0, uv1 = 0, uv2 = 0; [branch] if (mesh.vb_uv0 >= 0 && material.uvset_baseColorMap == 0) diff --git a/WickedEngine/shaders/rtshadowLIB.hlsl b/WickedEngine/shaders/rtshadowLIB.hlsl index 3e4ba7d20..5d2060f9a 100644 --- a/WickedEngine/shaders/rtshadowLIB.hlsl +++ b/WickedEngine/shaders/rtshadowLIB.hlsl @@ -172,7 +172,7 @@ void RTShadow_Raygen() [branch] if (ray.TMax > 0) { - float seed = g_xFrame_FrameCount * 0.001; + float seed = g_xFrame_Time; RayPayload payload; payload.color = 0; @@ -243,10 +243,10 @@ void RTShadow_AnyHit(inout RayPayload payload, in BuiltInTriangleIntersectionAtt AcceptHitAndEndSearch(); return; } - uint primitiveIndex = PrimitiveIndex(); - uint i0 = bindless_ib[mesh.ib][primitiveIndex * 3 + 0]; - uint i1 = bindless_ib[mesh.ib][primitiveIndex * 3 + 1]; - uint i2 = bindless_ib[mesh.ib][primitiveIndex * 3 + 2]; + uint startIndex = PrimitiveIndex() * 3 + subset.indexOffset; + uint i0 = bindless_ib[mesh.ib][startIndex + 0]; + uint i1 = bindless_ib[mesh.ib][startIndex + 1]; + uint i2 = bindless_ib[mesh.ib][startIndex + 2]; float2 uv0 = 0, uv1 = 0, uv2 = 0; [branch] if (mesh.vb_uv0 >= 0 && material.uvset_baseColorMap == 0) diff --git a/WickedEngine/wiEmittedParticle.h b/WickedEngine/wiEmittedParticle.h index 8b59c331c..6ad6092fc 100644 --- a/WickedEngine/wiEmittedParticle.h +++ b/WickedEngine/wiEmittedParticle.h @@ -31,7 +31,7 @@ public: private: ParticleCounters statistics = {}; - wiGraphics::GPUBuffer statisticsReadbackBuffer[wiGraphics::GraphicsDevice::GetBackBufferCount() + 1]; + wiGraphics::GPUBuffer statisticsReadbackBuffer[wiGraphics::GraphicsDevice::GetBackBufferCount() + 3]; wiGraphics::GPUBuffer particleBuffer; wiGraphics::GPUBuffer aliveList[2]; diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 364e933bb..b8a69acf8 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -235,7 +235,7 @@ enum SHADERTYPE PSTYPE_VOXELIZER_TERRAIN, PSTYPE_VOXEL, PSTYPE_FORCEFIELDVISUALIZER, - PSTYPE_RENDERLIGHTMAP, + PSTYPE_RENDERLIGHTMAP, PSTYPE_RAYTRACE_DEBUGBVH, PSTYPE_DOWNSAMPLEDEPTHBUFFER, PSTYPE_POSTPROCESS_UPSAMPLE_BILATERAL, @@ -302,11 +302,7 @@ enum SHADERTYPE CSTYPE_COPYTEXTURE2D_FLOAT4_BORDEREXPAND, CSTYPE_SKINNING, CSTYPE_SKINNING_LDS, - CSTYPE_RAYTRACE_LAUNCH, - CSTYPE_RAYTRACE_KICKJOBS, - CSTYPE_RAYTRACE_CLOSESTHIT, - CSTYPE_RAYTRACE_SHADE, - CSTYPE_RAYTRACE_TILESORT, + CSTYPE_RAYTRACE, CSTYPE_PAINT_TEXTURE, CSTYPE_POSTPROCESS_BLUR_GAUSSIAN_FLOAT1, CSTYPE_POSTPROCESS_BLUR_GAUSSIAN_FLOAT3, diff --git a/WickedEngine/wiGPUBVH.cpp b/WickedEngine/wiGPUBVH.cpp index b65b008f5..f6fd9ba6a 100644 --- a/WickedEngine/wiGPUBVH.cpp +++ b/WickedEngine/wiGPUBVH.cpp @@ -29,8 +29,9 @@ enum CSTYPES_BVH static Shader computeShaders[CSTYPE_BVH_COUNT]; static GPUBuffer constantBuffer; +static const int atlasWrapBorder = 1; -void wiGPUBVH::UpdateGlobalMaterialResources(const Scene& scene, CommandList cmd) +void wiGPUBVH::UpdateGlobalMaterialResources(const Scene& scene) { GraphicsDevice* device = wiRenderer::GetDevice(); @@ -69,8 +70,7 @@ void wiGPUBVH::UpdateGlobalMaterialResources(const Scene& scene, CommandList cmd } - bool repackAtlas = false; - const int atlasWrapBorder = 1; + repackAtlas = false; for (auto res : sceneTextures) { if (res == nullptr) @@ -118,11 +118,6 @@ void wiGPUBVH::UpdateGlobalMaterialResources(const Scene& scene, CommandList cmd device->CreateTexture(&desc, nullptr, &globalMaterialAtlas); device->SetName(&globalMaterialAtlas, "globalMaterialAtlas"); - - for (auto& it : storedTextures) - { - wiRenderer::CopyTexture2D(globalMaterialAtlas, -1, it.second.x + atlasWrapBorder, it.second.y + atlasWrapBorder, it.first->texture, 0, cmd, wiRenderer::BORDEREXPAND_WRAP); - } } else { @@ -222,11 +217,10 @@ void wiGPUBVH::UpdateGlobalMaterialResources(const Scene& scene, CommandList cmd device->CreateBuffer(&desc, nullptr, &globalMaterialBuffer); } - device->UpdateBuffer(&globalMaterialBuffer, materialArray.data(), cmd, sizeof(ShaderMaterial) * (int)materialArray.size()); } -void wiGPUBVH::Build(const Scene& scene, CommandList cmd) +void wiGPUBVH::Update(const wiScene::Scene& scene) { GraphicsDevice* device = wiRenderer::GetDevice(); @@ -253,7 +247,7 @@ void wiGPUBVH::Build(const Scene& scene, CommandList cmd) desc.MiscFlags = RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; desc.Usage = USAGE_DEFAULT; device->CreateBuffer(&desc, nullptr, &primitiveCounterBuffer); - device->SetName(&primitiveCounterBuffer, "primitiveCounterBuffer"); + device->SetName(&primitiveCounterBuffer, "primitiveCounterBuffer"); } // Pre-gather scene properties: @@ -347,18 +341,44 @@ void wiGPUBVH::Build(const Scene& scene, CommandList cmd) device->SetName(&primitiveMortonBuffer, "primitiveMortonBuffer"); } + UpdateGlobalMaterialResources(scene); +} +void wiGPUBVH::Build(const Scene& scene, CommandList cmd) const +{ + GraphicsDevice* device = wiRenderer::GetDevice(); + + // Pre-gather scene properties: + uint totalTriangles = 0; + for (size_t i = 0; i < scene.objects.GetCount(); ++i) + { + const ObjectComponent& object = scene.objects[i]; + + if (object.meshID != INVALID_ENTITY) + { + const MeshComponent& mesh = *scene.meshes.GetComponent(object.meshID); + + totalTriangles += (uint)mesh.indices.size() / 3; + } + } auto range = wiProfiler::BeginRangeGPU("BVH Rebuild", cmd); - UpdateGlobalMaterialResources(scene, cmd); + if (repackAtlas) + { + for (auto& it : storedTextures) + { + wiRenderer::CopyTexture2D(globalMaterialAtlas, -1, it.second.x + atlasWrapBorder, it.second.y + atlasWrapBorder, it.first->texture, 0, cmd, wiRenderer::BORDEREXPAND_WRAP); + } + } + device->UpdateBuffer(&globalMaterialBuffer, materialArray.data(), cmd, sizeof(ShaderMaterial) * (int)materialArray.size()); - primitiveCount = 0; + uint32_t primitiveCount = 0; uint32_t materialCount = 0; device->EventBegin("BVH - Primitive Builder", cmd); { device->BindComputeShader(&computeShaders[CSTYPE_BVH_PRIMITIVES], cmd); - GPUResource* uavs[] = { + const GPUResource* uavs[] = { &primitiveIDBuffer, &primitiveBuffer, &primitiveDataBuffer, @@ -421,14 +441,14 @@ void wiGPUBVH::Build(const Scene& scene, CommandList cmd) device->EventBegin("BVH - Build Hierarchy", cmd); { device->BindComputeShader(&computeShaders[CSTYPE_BVH_HIERARCHY], cmd); - GPUResource* uavs[] = { + const GPUResource* uavs[] = { &bvhNodeBuffer, &bvhParentBuffer, &bvhFlagBuffer }; device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - GPUResource* res[] = { + const GPUResource* res[] = { &primitiveCounterBuffer, &primitiveIDBuffer, &primitiveMortonBuffer, @@ -453,13 +473,13 @@ void wiGPUBVH::Build(const Scene& scene, CommandList cmd) device->Barrier(barriers, arraysize(barriers), cmd); device->BindComputeShader(&computeShaders[CSTYPE_BVH_PROPAGATEAABB], cmd); - GPUResource* uavs[] = { + const GPUResource* uavs[] = { &bvhNodeBuffer, &bvhFlagBuffer, }; device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - GPUResource* res[] = { + const GPUResource* res[] = { &primitiveCounterBuffer, &primitiveIDBuffer, &primitiveBuffer, @@ -477,88 +497,88 @@ void wiGPUBVH::Build(const Scene& scene, CommandList cmd) wiProfiler::EndRange(range); // BVH rebuild #ifdef BVH_VALIDATE - - GPUBufferDesc readback_desc; - bool download_success; - - // Download primitive count: - readback_desc = primitiveCounterBuffer.GetDesc(); - readback_desc.Usage = USAGE_STAGING; - readback_desc.CPUAccessFlags = CPU_ACCESS_READ; - readback_desc.BindFlags = 0; - readback_desc.MiscFlags = 0; - GPUBuffer readback_primitiveCounterBuffer; - device->CreateBuffer(&readback_desc, nullptr, &readback_primitiveCounterBuffer); - uint primitiveCount; - download_success = device->DownloadResource(&primitiveCounterBuffer, &readback_primitiveCounterBuffer, &primitiveCount, cmd); - assert(download_success); - - if (primitiveCount > 0) { - const uint leafNodeOffset = primitiveCount - 1; + GPUBufferDesc readback_desc; + bool download_success; - // Validate node buffer: - readback_desc = bvhNodeBuffer.GetDesc(); + // Download primitive count: + readback_desc = primitiveCounterBuffer.GetDesc(); readback_desc.Usage = USAGE_STAGING; readback_desc.CPUAccessFlags = CPU_ACCESS_READ; readback_desc.BindFlags = 0; readback_desc.MiscFlags = 0; - GPUBuffer readback_nodeBuffer; - device->CreateBuffer(&readback_desc, nullptr, &readback_nodeBuffer); - vector nodes(readback_desc.ByteWidth / sizeof(BVHNode)); - download_success = device->DownloadResource(&bvhNodeBuffer, &readback_nodeBuffer, nodes.data(), cmd); + GPUBuffer readback_primitiveCounterBuffer; + device->CreateBuffer(&readback_desc, nullptr, &readback_primitiveCounterBuffer); + uint primitiveCount; + download_success = device->DownloadResource(&primitiveCounterBuffer, &readback_primitiveCounterBuffer, &primitiveCount, cmd); assert(download_success); - set visitedLeafs; - vector stack; - stack.push_back(0); - while (!stack.empty()) - { - uint nodeIndex = stack.back(); - stack.pop_back(); - if (nodeIndex >= leafNodeOffset) - { - // leaf node - assert(visitedLeafs.count(nodeIndex) == 0); // leaf node was already visited, this must not happen! - visitedLeafs.insert(nodeIndex); - } - else - { - // internal node - BVHNode& node = nodes[nodeIndex]; - stack.push_back(node.LeftChildIndex); - stack.push_back(node.RightChildIndex); - } - } - for (uint i = 0; i < primitiveCount; ++i) + if (primitiveCount > 0) { - uint nodeIndex = leafNodeOffset + i; - BVHNode& leaf = nodes[nodeIndex]; - assert(leaf.LeftChildIndex == 0 && leaf.RightChildIndex == 0); // a leaf must have no children - assert(visitedLeafs.count(nodeIndex) > 0); // every leaf node must have been visited in the traversal above - } + const uint leafNodeOffset = primitiveCount - 1; - // Validate flag buffer: - readback_desc = bvhFlagBuffer.GetDesc(); - readback_desc.Usage = USAGE_STAGING; - readback_desc.CPUAccessFlags = CPU_ACCESS_READ; - readback_desc.BindFlags = 0; - readback_desc.MiscFlags = 0; - GPUBuffer readback_flagBuffer; - device->CreateBuffer(&readback_desc, nullptr, &readback_flagBuffer); - vector flags(readback_desc.ByteWidth / sizeof(uint)); - download_success = device->DownloadResource(&bvhFlagBuffer, &readback_flagBuffer, flags.data(), cmd); - assert(download_success); - for (auto& x : flags) - { - if (x > 2) + // Validate node buffer: + readback_desc = bvhNodeBuffer.GetDesc(); + readback_desc.Usage = USAGE_STAGING; + readback_desc.CPUAccessFlags = CPU_ACCESS_READ; + readback_desc.BindFlags = 0; + readback_desc.MiscFlags = 0; + GPUBuffer readback_nodeBuffer; + device->CreateBuffer(&readback_desc, nullptr, &readback_nodeBuffer); + vector nodes(readback_desc.ByteWidth / sizeof(BVHNode)); + download_success = device->DownloadResource(&bvhNodeBuffer, &readback_nodeBuffer, nodes.data(), cmd); + assert(download_success); + set visitedLeafs; + vector stack; + stack.push_back(0); + while (!stack.empty()) { - assert(0); // flagbuffer anomaly detected: node can't have more than two children (AABB propagation step)! - break; + uint nodeIndex = stack.back(); + stack.pop_back(); + + if (nodeIndex >= leafNodeOffset) + { + // leaf node + assert(visitedLeafs.count(nodeIndex) == 0); // leaf node was already visited, this must not happen! + visitedLeafs.insert(nodeIndex); + } + else + { + // internal node + BVHNode& node = nodes[nodeIndex]; + stack.push_back(node.LeftChildIndex); + stack.push_back(node.RightChildIndex); + } + } + for (uint i = 0; i < primitiveCount; ++i) + { + uint nodeIndex = leafNodeOffset + i; + BVHNode& leaf = nodes[nodeIndex]; + assert(leaf.LeftChildIndex == 0 && leaf.RightChildIndex == 0); // a leaf must have no children + assert(visitedLeafs.count(nodeIndex) > 0); // every leaf node must have been visited in the traversal above + } + + // Validate flag buffer: + readback_desc = bvhFlagBuffer.GetDesc(); + readback_desc.Usage = USAGE_STAGING; + readback_desc.CPUAccessFlags = CPU_ACCESS_READ; + readback_desc.BindFlags = 0; + readback_desc.MiscFlags = 0; + GPUBuffer readback_flagBuffer; + device->CreateBuffer(&readback_desc, nullptr, &readback_flagBuffer); + vector flags(readback_desc.ByteWidth / sizeof(uint)); + download_success = device->DownloadResource(&bvhFlagBuffer, &readback_flagBuffer, flags.data(), cmd); + assert(download_success); + for (auto& x : flags) + { + if (x > 2) + { + assert(0); // flagbuffer anomaly detected: node can't have more than two children (AABB propagation step)! + break; + } } } } - #endif // BVH_VALIDATE } @@ -580,7 +600,6 @@ void wiGPUBVH::Bind(SHADERSTAGE stage, CommandList cmd) const void wiGPUBVH::Clear() { primitiveCapacity = 0; - primitiveCount = 0; materialArray.clear(); storedTextures.clear(); sceneTextures.clear(); diff --git a/WickedEngine/wiGPUBVH.h b/WickedEngine/wiGPUBVH.h index fba5770f5..197fad025 100644 --- a/WickedEngine/wiGPUBVH.h +++ b/WickedEngine/wiGPUBVH.h @@ -23,7 +23,6 @@ private: wiGraphics::GPUBuffer primitiveDataBuffer; wiGraphics::GPUBuffer primitiveMortonBuffer; uint32_t primitiveCapacity = 0; - uint32_t primitiveCount = 0; // Scene material resources: wiGraphics::GPUBuffer globalMaterialBuffer; @@ -31,10 +30,12 @@ private: std::vector materialArray; std::unordered_map, wiRectPacker::rect_xywh> storedTextures; std::unordered_set> sceneTextures; - void UpdateGlobalMaterialResources(const wiScene::Scene& scene, wiGraphics::CommandList cmd); + bool repackAtlas = false; + void UpdateGlobalMaterialResources(const wiScene::Scene& scene); public: - void Build(const wiScene::Scene& scene, wiGraphics::CommandList cmd); + void Update(const wiScene::Scene& scene); + void Build(const wiScene::Scene& scene, wiGraphics::CommandList cmd) const; void Bind(wiGraphics::SHADERSTAGE stage, wiGraphics::CommandList cmd) const; void Clear(); diff --git a/WickedEngine/wiGraphics.h b/WickedEngine/wiGraphics.h index 9506c8362..178f40121 100644 --- a/WickedEngine/wiGraphics.h +++ b/WickedEngine/wiGraphics.h @@ -870,6 +870,14 @@ namespace wiGraphics { struct Instance { + enum FLAGS + { + FLAG_EMPTY = 0, + FLAG_TRIANGLE_CULL_DISABLE = 1 << 0, + FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = 1 << 1, + FLAG_FORCE_OPAQUE = 1 << 2, + FLAG_FORCE_NON_OPAQUE = 1 << 3, + }; XMFLOAT3X4 transform; uint32_t InstanceID : 24; uint32_t InstanceMask : 8; diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 43a6d4fa5..d51f02803 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -89,7 +89,6 @@ bool advancedLightCulling = true; bool variableRateShadingClassification = false; bool variableRateShadingClassificationDebug = false; bool ldsSkinningEnabled = true; -bool scene_bvh_invalid = true; float GameSpeed = 1; bool debugLightCulling = false; bool occlusionCulling = false; @@ -144,8 +143,6 @@ std::vector paintrads; wiSpinLock deferredMIPGenLock; std::vector, bool>> deferredMIPGens; -wiGPUBVH sceneBVH; - static const int atlasClampBorder = 1; @@ -1190,7 +1187,14 @@ void LoadShaders() wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_VOXELIZER_TERRAIN], "objectPS_voxelizer_terrain.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_VOXEL], "voxelPS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_FORCEFIELDVISUALIZER], "forceFieldVisualizerPS.cso"); }); - wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_RENDERLIGHTMAP], "renderlightmapPS.cso"); }); + if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING_INLINE)) + { + wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_RENDERLIGHTMAP], "renderlightmapPS_rtapi.cso"); }); + } + else + { + wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_RENDERLIGHTMAP], "renderlightmapPS.cso"); }); + } wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_RAYTRACE_DEBUGBVH], "raytrace_debugbvhPS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_DOWNSAMPLEDEPTHBUFFER], "downsampleDepthBuffer4xPS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(PS, shaders[PSTYPE_POSTPROCESS_UPSAMPLE_BILATERAL], "upsample_bilateralPS.cso"); }); @@ -1235,11 +1239,14 @@ void LoadShaders() wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_COPYTEXTURE2D_FLOAT4_BORDEREXPAND], "copytexture2D_float4_borderexpandCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_SKINNING], "skinningCS.cso"); }); wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_SKINNING_LDS], "skinningCS_LDS.cso"); }); - wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE_LAUNCH], "raytrace_launchCS.cso"); }); - wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE_KICKJOBS], "raytrace_kickjobsCS.cso"); }); - wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE_CLOSESTHIT], "raytrace_closesthitCS.cso"); }); - wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE_SHADE], "raytrace_shadeCS.cso"); }); - wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE_TILESORT], "raytrace_tilesortCS.cso"); }); + if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING_INLINE)) + { + wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE], "raytraceCS_rtapi.cso"); }); + } + else + { + wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_RAYTRACE], "raytraceCS.cso"); }); + } wiJobSystem::Execute(ctx, [](wiJobArgs args) { LoadShader(CS, shaders[CSTYPE_PAINT_TEXTURE], "paint_textureCS.cso"); }); @@ -2634,8 +2641,8 @@ void ClearWorld(Scene& scene) waterRipples.clear(); - sceneBVH.Clear(); - scene_bvh_invalid = true; + scene.BVH.Clear(); + scene.BVH_invalid = true; deferredMIPGenLock.lock(); deferredMIPGens.clear(); @@ -3709,6 +3716,10 @@ void UpdatePerFrameData( wiJobSystem::Wait(ctx); + if (scene.BVH_invalid) + { + scene.BVH.Update(scene); + } // Update CPU-side frame constant buffer: frameCB.g_xFrame_ConstantOne = 1; @@ -4287,60 +4298,80 @@ void UpdateRenderData( } void UpdateRaytracingAccelerationStructures(const Scene& scene, CommandList cmd) { - if (!device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING)) - return; - - if (!scene.TLAS.IsValid()) - return; - - // BLAS: + if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING)) { - auto rangeCPU = wiProfiler::BeginRangeCPU("BLAS Update (CPU)"); - auto range = wiProfiler::BeginRangeGPU("BLAS Update (GPU)", cmd); - device->EventBegin("BLAS Update", cmd); - for (Entity entity : scene.BLAS_builds) + if (!scene.TLAS.IsValid()) + return; + + // BLAS: { - const MeshComponent* mesh = scene.meshes.GetComponent(entity); - if (mesh != nullptr && mesh->BLAS.IsValid()) + auto rangeCPU = wiProfiler::BeginRangeCPU("BLAS Update (CPU)"); + auto range = wiProfiler::BeginRangeGPU("BLAS Update (GPU)", cmd); + device->EventBegin("BLAS Update", cmd); + + for (size_t i = 0; i < scene.meshes.GetCount(); ++i) { - device->BuildRaytracingAccelerationStructure(&mesh->BLAS, cmd, nullptr); + const MeshComponent& mesh = scene.meshes[i]; + if (mesh.BLAS.IsValid()) + { + switch (mesh.BLAS_state) + { + default: + case MeshComponent::BLAS_STATE_COMPLETE: + break; + case MeshComponent::BLAS_STATE_NEEDS_REBUILD: + device->BuildRaytracingAccelerationStructure(&mesh.BLAS, cmd, nullptr); + break; + case MeshComponent::BLAS_STATE_NEEDS_REFIT: + device->BuildRaytracingAccelerationStructure(&mesh.BLAS, cmd, &mesh.BLAS); + break; + } + mesh.BLAS_state = MeshComponent::BLAS_STATE_COMPLETE; + } } + + { + GPUBarrier barriers[] = { + GPUBarrier::Memory(), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + + device->EventEnd(cmd); + wiProfiler::EndRange(range); + wiProfiler::EndRange(rangeCPU); } + // TLAS: { - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - }; - device->Barrier(barriers, arraysize(barriers), cmd); + auto rangeCPU = wiProfiler::BeginRangeCPU("TLAS Update (CPU)"); + auto range = wiProfiler::BeginRangeGPU("TLAS Update (GPU)", cmd); + device->EventBegin("TLAS Update", cmd); + + device->UpdateBuffer(&scene.TLAS.desc.toplevel.instanceBuffer, scene.TLAS_instances.data(), cmd); + device->BuildRaytracingAccelerationStructure(&scene.TLAS, cmd, nullptr); + + { + GPUBarrier barriers[] = { + GPUBarrier::Memory(&scene.TLAS), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + + device->EventEnd(cmd); + wiProfiler::EndRange(range); + wiProfiler::EndRange(rangeCPU); } - - device->EventEnd(cmd); - wiProfiler::EndRange(range); - wiProfiler::EndRange(rangeCPU); } - - // TLAS: + else { - auto rangeCPU = wiProfiler::BeginRangeCPU("TLAS Update (CPU)"); - auto range = wiProfiler::BeginRangeGPU("TLAS Update (GPU)", cmd); - device->EventBegin("TLAS Update", cmd); - - device->UpdateBuffer(&scene.TLAS.desc.toplevel.instanceBuffer, scene.TLAS_instances.data(), cmd); - device->BuildRaytracingAccelerationStructure(&scene.TLAS, cmd, nullptr); - + if (scene.BVH_invalid) { - GPUBarrier barriers[] = { - GPUBarrier::Memory(&scene.TLAS), - }; - device->Barrier(barriers, arraysize(barriers), cmd); + scene.BVH_invalid = false; + scene.BVH.Build(scene, cmd); } - - device->EventEnd(cmd); - wiProfiler::EndRange(range); - wiProfiler::EndRange(rangeCPU); } - } void OcclusionCulling_Render(const CameraComponent& camera_previous, const Visibility& vis, CommandList cmd) { @@ -6386,7 +6417,7 @@ void DrawDebugWorld( if (GetRaytraceDebugBVHVisualizerEnabled()) { - RayTraceSceneBVH(cmd); + RayTraceSceneBVH(scene, cmd); } device->EventEnd(cmd); @@ -7826,125 +7857,27 @@ void CopyTexture2D(const Texture& dst, int DstMIP, int DstX, int DstY, const Tex } -void BuildSceneBVH(const Scene& scene, CommandList cmd) -{ - sceneBVH.Build(scene, cmd); -} - -void CreateRayBuffers(RayBuffers& rayBuffers, uint32_t rayCount) -{ - rayBuffers.rayCapacity = rayCount; - - GPUBufferDesc desc; - - desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS; - desc.CPUAccessFlags = 0; - desc.Format = FORMAT_UNKNOWN; - desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED; - desc.Usage = USAGE_DEFAULT; - - desc.StructureByteStride = sizeof(uint); - desc.ByteWidth = desc.StructureByteStride * rayBuffers.rayCapacity; - device->CreateBuffer(&desc, nullptr, &rayBuffers.rayIndexBuffer[0]); - device->SetName(&rayBuffers.rayIndexBuffer[0], "rayIndexBuffer[0]"); - device->CreateBuffer(&desc, nullptr, &rayBuffers.rayIndexBuffer[1]); - device->SetName(&rayBuffers.rayIndexBuffer[1], "rayIndexBuffer[1]"); - -#ifdef RAYTRACING_SORT_GLOBAL - desc.StructureByteStride = sizeof(float); // sorting needs float now - desc.ByteWidth = desc.StructureByteStride * rayBuffers.rayCapacity; - device->CreateBuffer(&desc, nullptr, &rayBuffers.raySortBuffer); - device->SetName(&rayBuffers.raySortBuffer, "raySortBuffer"); -#endif // RAYTRACING_SORT_GLOBAL - - desc.StructureByteStride = sizeof(RaytracingStoredRay); - desc.ByteWidth = desc.StructureByteStride * rayBuffers.rayCapacity; - device->CreateBuffer(&desc, nullptr, &rayBuffers.rayBuffer[0]); - device->SetName(&rayBuffers.rayBuffer[0], "rayBuffer[0]"); - device->CreateBuffer(&desc, nullptr, &rayBuffers.rayBuffer[1]); - device->SetName(&rayBuffers.rayBuffer[1], "rayBuffer[1]"); - - desc.MiscFlags = RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; - desc.StructureByteStride = sizeof(uint); - desc.ByteWidth = desc.StructureByteStride; - device->CreateBuffer(&desc, nullptr, &rayBuffers.rayCountBuffer[0]); - device->SetName(&rayBuffers.rayCountBuffer[0], "rayCountBuffer[0]"); - device->CreateBuffer(&desc, nullptr, &rayBuffers.rayCountBuffer[1]); - device->SetName(&rayBuffers.rayCountBuffer[1], "rayCountBuffer[1]"); -} -void GenerateScreenRayBuffers(const RayBuffers& rayBuffers, const CameraComponent& camera, uint32_t width, uint32_t height, CommandList cmd) -{ - device->EventBegin("Launch Screen Rays", cmd); - { - device->BindComputeShader(&shaders[CSTYPE_RAYTRACE_LAUNCH], cmd); - - const XMFLOAT4& halton = wiMath::GetHaltonSequence((int)device->GetFrameCount()); - RaytracingCB cb; - cb.xTracePixelOffset = XMFLOAT2(halton.x, halton.y); - cb.xTraceResolution.x = width; - cb.xTraceResolution.y = height; - cb.xTraceResolution_rcp.x = 1.0f / cb.xTraceResolution.x; - cb.xTraceResolution_rcp.y = 1.0f / cb.xTraceResolution.y; - device->UpdateBuffer(&constantBuffers[CBTYPE_RAYTRACE], &cb, cmd); - device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_RAYTRACE], CB_GETBINDSLOT(RaytracingCB), cmd); - - const GPUResource* uavs[] = { - &rayBuffers.rayIndexBuffer[0], - &rayBuffers.rayBuffer[0], - }; - device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - - device->Dispatch( - (width + RAYTRACING_LAUNCH_BLOCKSIZE - 1) / RAYTRACING_LAUNCH_BLOCKSIZE, - (height + RAYTRACING_LAUNCH_BLOCKSIZE - 1) / RAYTRACING_LAUNCH_BLOCKSIZE, - 1, - cmd); - - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - - device->UnbindUAVs(0, arraysize(uavs), cmd); - - // write initial ray count: - device->UpdateBuffer(&rayBuffers.rayCountBuffer[0], &rayBuffers.rayCapacity, cmd); - } - device->EventEnd(cmd); -} void RayTraceScene( const Scene& scene, - const RayBuffers& rayBuffers, - const Texture* result, + const Texture& output, int accumulation_sample, CommandList cmd ) { device->EventBegin("RayTraceScene", cmd); + auto range = wiProfiler::BeginRangeGPU("RayTraceScene", cmd); - - static GPUBuffer indirectBuffer; // GPU job kicks - if (!indirectBuffer.IsValid()) - { - GPUBufferDesc desc; - - desc.BindFlags = BIND_UNORDERED_ACCESS; - desc.StructureByteStride = sizeof(IndirectDispatchArgs) * 2; - desc.ByteWidth = desc.StructureByteStride; - desc.CPUAccessFlags = 0; - desc.Format = FORMAT_UNKNOWN; - desc.MiscFlags = RESOURCE_MISC_INDIRECT_ARGS | RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; - desc.Usage = USAGE_DEFAULT; - device->CreateBuffer(&desc, nullptr, &indirectBuffer); - device->SetName(&indirectBuffer, "raytrace_indirectBuffer"); - } - - const TextureDesc& result_desc = result->GetDesc(); - - auto range = wiProfiler::BeginRangeGPU("RayTrace - ALL", cmd); + const TextureDesc& desc = output.GetDesc(); // Set up tracing resources: - sceneBVH.Bind(CS, cmd); + if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING_INLINE)) + { + device->BindResource(CS, &scene.TLAS, TEXSLOT_ACCELERATION_STRUCTURE, cmd); + } + else + { + scene.BVH.Bind(CS, cmd); + } if (scene.weather.skyMap != nullptr) { @@ -7961,197 +7894,44 @@ void RayTraceScene( RaytracingCB cb; cb.xTracePixelOffset = XMFLOAT2(halton.x, halton.y); cb.xTraceAccumulationFactor = 1.0f / ((float)accumulation_sample + 1.0f); - cb.xTraceResolution.x = result_desc.Width; - cb.xTraceResolution.y = result_desc.Height; + cb.xTraceResolution.x = desc.Width; + cb.xTraceResolution.y = desc.Height; cb.xTraceResolution_rcp.x = 1.0f / cb.xTraceResolution.x; cb.xTraceResolution_rcp.y = 1.0f / cb.xTraceResolution.y; + cb.xTraceUserData.x = raytraceBounceCount; + cb.xTraceUserData.y = accumulation_sample; + cb.xTraceRandomSeed = (accumulation_sample + 1) * 0.0001f; + device->UpdateBuffer(&constantBuffers[CBTYPE_RAYTRACE], &cb, cmd); + device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_RAYTRACE], CB_GETBINDSLOT(RaytracingCB), cmd); - for (uint32_t bounce = 0; bounce < raytraceBounceCount + 1; ++bounce) // first contact + indirect bounces - { - const uint32_t __readBufferID = bounce % 2; - const uint32_t __writeBufferID = (bounce + 1) % 2; + device->BindComputeShader(&shaders[CSTYPE_RAYTRACE], cmd); - cb.xTraceUserData.x = (bounce == 1 && accumulation_sample == 0) ? 1 : 0; // pre-clear result texture? - cb.xTraceUserData.y = bounce == raytraceBounceCount ? 1 : 0; // accumulation step? - cb.xTraceRandomSeed = (float)accumulation_sample * (float)(bounce + 1); - device->UpdateBuffer(&constantBuffers[CBTYPE_RAYTRACE], &cb, cmd); - device->BindConstantBuffer(CS, &constantBuffers[CBTYPE_RAYTRACE], CB_GETBINDSLOT(RaytracingCB), cmd); - - - // 1.) Kick off raytracing jobs for this bounce - device->EventBegin("Kick Raytrace Jobs", cmd); - { - // Prepare indirect dispatch based on counter buffer value: - device->BindComputeShader(&shaders[CSTYPE_RAYTRACE_KICKJOBS], cmd); - - const GPUResource* res[] = { - &rayBuffers.rayCountBuffer[__readBufferID], - }; - device->BindResources(CS, res, TEXSLOT_UNIQUE0, arraysize(res), cmd); - const GPUResource* uavs[] = { - &rayBuffers.rayCountBuffer[__writeBufferID], - &indirectBuffer, - }; - device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - - { - GPUBarrier barriers[] = { - GPUBarrier::Buffer(&indirectBuffer, BUFFER_STATE_INDIRECT_ARGUMENT, BUFFER_STATE_UNORDERED_ACCESS) - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - - device->Dispatch(1, 1, 1, cmd); - - { - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - GPUBarrier::Buffer(&indirectBuffer, BUFFER_STATE_UNORDERED_ACCESS, BUFFER_STATE_INDIRECT_ARGUMENT) - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - - device->UnbindUAVs(0, arraysize(uavs), cmd); - } - device->EventEnd(cmd); - - // Sorting and shading only after first bounce: - if (bounce > 0) - { - // Sort rays to achieve more coherency: - { - device->EventBegin("Ray Sorting", cmd); - -#ifdef RAYTRACING_SORT_GLOBAL - wiGPUSortLib::Sort(rayBuffers.rayCapacity, rayBuffers.raySortBuffer, rayBuffers.rayCountBuffer[__readBufferID], 0, rayBuffers.rayIndexBuffer[__readBufferID], cmd); -#else - device->BindComputeShader(&shaders[CSTYPE_RAYTRACE_TILESORT], cmd); - - const GPUResource* res[] = { - &rayBuffers.rayCountBuffer[__readBufferID], - &rayBuffers.rayBuffer[__readBufferID], - }; - device->BindResources(CS, res, TEXSLOT_ONDEMAND7, arraysize(res), cmd); - const GPUResource* uavs[] = { - &rayBuffers.rayIndexBuffer[__readBufferID], - }; - device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - - device->DispatchIndirect(&indirectBuffer, RAYTRACE_INDIRECT_OFFSET_TILESORT, cmd); - - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - - device->UnbindUAVs(0, arraysize(uavs), cmd); -#endif // RAYTRACING_SORT_GLOBAL - - device->EventEnd(cmd); - } - - // Shade - { - device->EventBegin("Shading Rays", cmd); - - wiProfiler::range_id range; - if (bounce == 1) - { - range = wiProfiler::BeginRangeGPU("RayTrace - Shade", cmd); - } - - device->BindComputeShader(&shaders[CSTYPE_RAYTRACE_SHADE], cmd); - - const GPUResource* res[] = { - &rayBuffers.rayCountBuffer[__readBufferID], - &rayBuffers.rayIndexBuffer[__readBufferID], - }; - device->BindResources(CS, res, TEXSLOT_ONDEMAND7, arraysize(res), cmd); - const GPUResource* uavs[] = { - &rayBuffers.rayBuffer[__readBufferID], - result, - }; - device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - - device->DispatchIndirect(&indirectBuffer, RAYTRACE_INDIRECT_OFFSET_TRACE, cmd); - - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - - device->UnbindUAVs(0, arraysize(uavs), cmd); - - if (bounce == 1) - { - wiProfiler::EndRange(range); // RayTrace - Shade - } - device->EventEnd(cmd); - } - } - - // Compute Closest hits (skip after last bounce) - if(bounce < raytraceBounceCount) - { - device->EventBegin("Primary Rays", cmd); - - wiProfiler::range_id range; - if (bounce == 0) - { - range = wiProfiler::BeginRangeGPU("RayTrace - First Contact", cmd); - } - else if (bounce == 1) - { - range = wiProfiler::BeginRangeGPU("RayTrace - First Bounce", cmd); - } - - device->BindComputeShader(&shaders[CSTYPE_RAYTRACE_CLOSESTHIT], cmd); - - const GPUResource* res[] = { - &rayBuffers.rayCountBuffer[__readBufferID], - &rayBuffers.rayIndexBuffer[__readBufferID], - &rayBuffers.rayBuffer[__readBufferID], - }; - device->BindResources(CS, res, TEXSLOT_ONDEMAND7, arraysize(res), cmd); - const GPUResource* uavs[] = { - &rayBuffers.rayCountBuffer[__writeBufferID], - &rayBuffers.rayBuffer[__writeBufferID], -#ifdef RAYTRACING_SORT_GLOBAL - &rayBuffers.rayIndexBuffer[__writeBufferID], - &rayBuffers.raySortBuffer, -#endif // RAYTRACING_SORT_GLOBAL - }; - device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); - - device->DispatchIndirect(&indirectBuffer, RAYTRACE_INDIRECT_OFFSET_TRACE, cmd); - - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - - device->UnbindUAVs(0, arraysize(uavs), cmd); - - if (bounce == 0 || bounce == 1) - { - wiProfiler::EndRange(range); // RayTrace - First Contact/Bounce - } - device->EventEnd(cmd); - } - } - - wiProfiler::EndRange(range); // RayTrace - ALL + const GPUResource* uavs[] = { + &output, + }; + device->BindUAVs(CS, uavs, 0, arraysize(uavs), cmd); + device->Dispatch( + (desc.Width + RAYTRACING_LAUNCH_BLOCKSIZE - 1) / RAYTRACING_LAUNCH_BLOCKSIZE, + (desc.Height + RAYTRACING_LAUNCH_BLOCKSIZE - 1) / RAYTRACING_LAUNCH_BLOCKSIZE, + 1, + cmd); + GPUBarrier barriers[] = { + GPUBarrier::Memory(), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + device->UnbindUAVs(0, arraysize(uavs), cmd); + wiProfiler::EndRange(range); device->EventEnd(cmd); // RayTraceScene } -void RayTraceSceneBVH(CommandList cmd) +void RayTraceSceneBVH(const Scene& scene, CommandList cmd) { device->EventBegin("RayTraceSceneBVH", cmd); device->BindPipelineState(&PSO_debug[DEBUGRENDERING_RAYTRACE_BVH], cmd); - sceneBVH.Bind(PS, cmd); + scene.BVH.Bind(PS, cmd); device->Draw(3, 0, cmd); device->EventEnd(cmd); } @@ -8298,6 +8078,7 @@ void ManageLightmapAtlas(Scene& scene) if (object.IsLightmapRenderRequested()) { + scene.InvalidateBVH(); refresh = true; if (object.lightmapIterationCount == 0) @@ -8495,7 +8276,7 @@ void RenderObjectLightMap(const Scene& scene, const ObjectComponent& object, Com cb.xTracePixelOffset.y = (halton.y * 2 - 1) * cb.xTraceResolution_rcp.y; cb.xTracePixelOffset.x *= 1.4f; // boost the jitter by a bit cb.xTracePixelOffset.y *= 1.4f; // boost the jitter by a bit - cb.xTraceRandomSeed = (float)lightmapIterationCount + 1.2345f; // random seed + cb.xTraceRandomSeed = (lightmapIterationCount + 1) * 0.0001f; // random seed cb.xTraceAccumulationFactor = 1.0f / (lightmapIterationCount + 1.0f); // accumulation factor (alpha) cb.xTraceUserData.x = raytraceBounceCount; device->UpdateBuffer(&constantBuffers[CBTYPE_RAYTRACE], &cb, cmd); @@ -8528,13 +8309,13 @@ void RefreshLightmapAtlas(const Scene& scene, CommandList cmd) auto range = wiProfiler::BeginRangeGPU("Lightmap Processing", cmd); // Update GPU scene and BVH data: + if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING_INLINE)) { - if (scene_bvh_invalid) - { - scene_bvh_invalid = false; - BuildSceneBVH(scene, cmd); - } - sceneBVH.Bind(PS, cmd); + device->BindResource(PS, &scene.TLAS, TEXSLOT_ACCELERATION_STRUCTURE, cmd); + } + else + { + scene.BVH.Bind(PS, cmd); } // Render lightmaps for each object: @@ -12318,15 +12099,13 @@ void Postprocess_Tonemap( } else { + device->UnbindResources(TEXSLOT_ONDEMAND0, 4, cmd); + device->BindResource(CS, &input, TEXSLOT_ONDEMAND0, cmd); device->BindResource(CS, texture_luminance, TEXSLOT_ONDEMAND1, cmd); device->BindResource(CS, texture_distortion, TEXSLOT_ONDEMAND2, cmd); - if (texture_colorgradinglut == nullptr) - { - device->UnbindResources(TEXSLOT_ONDEMAND3, 1, cmd); - } - else + if (texture_colorgradinglut != nullptr) { device->BindResource(CS, texture_colorgradinglut, TEXSLOT_ONDEMAND3, cmd); } @@ -12817,7 +12596,6 @@ void SetVoxelRadianceRayStepSize(float value) { voxelSceneData.rayStepSize = val void SetGameSpeed(float value) { GameSpeed = std::max(0.0f, value); } float GetGameSpeed() { return GameSpeed; } void OceanRegenerate(const WeatherComponent& weather) { if (ocean != nullptr) ocean = std::make_unique(weather); } -void InvalidateBVH() { scene_bvh_invalid = true; } void SetRaytraceBounceCount(uint32_t bounces) { raytraceBounceCount = bounces; diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index a0abd34f3..ee054df12 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -606,30 +606,15 @@ namespace wiRenderer wiGraphics::CommandList cmd ); - // Build the scene BVH on GPU that can be used by ray traced rendering - void BuildSceneBVH(const wiScene::Scene& scene, wiGraphics::CommandList cmd); - - struct RayBuffers - { - uint32_t rayCapacity = 0; - wiGraphics::GPUBuffer rayBuffer[2]; - wiGraphics::GPUBuffer rayIndexBuffer[2]; - wiGraphics::GPUBuffer rayCountBuffer[2]; - wiGraphics::GPUBuffer raySortBuffer; - }; - void CreateRayBuffers(RayBuffers& value, uint32_t rayCount); - // Generate rays for every pixel of the internal resolution - void GenerateScreenRayBuffers(const RayBuffers& rayBuffers, const wiScene::CameraComponent& camera, uint32_t width, uint32_t height, wiGraphics::CommandList cmd); // Render the scene with ray tracing. You provide the ray buffer, where each ray maps to one pixel of the result testure void RayTraceScene( const wiScene::Scene& scene, - const RayBuffers& rayBuffers, - const wiGraphics::Texture* result, + const wiGraphics::Texture& output, int accumulation_sample, wiGraphics::CommandList cmd ); // Render the scene BVH with ray tracing to the screen - void RayTraceSceneBVH(wiGraphics::CommandList cmd); + void RayTraceSceneBVH(const wiScene::Scene& scene, wiGraphics::CommandList cmd); // Render occluders against a depth buffer void OcclusionCulling_Render(const wiScene::CameraComponent& camera_previous, const Visibility& vis, wiGraphics::CommandList cmd); @@ -750,7 +735,6 @@ namespace wiRenderer void SetGameSpeed(float value); float GetGameSpeed(); void OceanRegenerate(const wiScene::WeatherComponent& weather); // regeenrates ocean if it is already created - void InvalidateBVH(); // invalidates scene bvh so if something wants to use it, it will recompute and validate it void SetRaytraceBounceCount(uint32_t bounces); uint32_t GetRaytraceBounceCount(); void SetRaytraceDebugBVHVisualizerEnabled(bool value); diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index 29d55921d..9a51df68a 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -725,7 +725,7 @@ namespace wiScene if (wiRenderer::GetDevice()->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING)) { - _flags |= DIRTY_BLAS; + BLAS_state = BLAS_STATE_NEEDS_REBUILD; RaytracingAccelerationStructureDesc desc; desc.type = RaytracingAccelerationStructureDesc::BOTTOMLEVEL; @@ -740,21 +740,6 @@ namespace wiScene desc._flags |= RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_TRACE; } -#if 0 - // Flattened subsets: - desc.bottomlevel.geometries.emplace_back(); - auto& geometry = desc.bottomlevel.geometries.back(); - geometry.type = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::TRIANGLES; - geometry.triangles.vertexBuffer = streamoutBuffer_POS.IsValid() ? streamoutBuffer_POS : vertexBuffer_POS; - geometry.triangles.indexBuffer = indexBuffer; - geometry.triangles.indexFormat = GetIndexFormat(); - geometry.triangles.indexCount = (uint32_t)indices.size(); - geometry.triangles.indexOffset = 0; - geometry.triangles.vertexCount = (uint32_t)vertex_positions.size(); - geometry.triangles.vertexFormat = FORMAT_R32G32B32_FLOAT; - geometry.triangles.vertexStride = sizeof(MeshComponent::Vertex_POS); -#else - // One geometry per subset: for (auto& subset : subsets) { desc.bottomlevel.geometries.emplace_back(); @@ -769,7 +754,6 @@ namespace wiScene geometry.triangles.vertexFormat = FORMAT_R32G32B32_FLOAT; geometry.triangles.vertexStride = sizeof(MeshComponent::Vertex_POS); } -#endif bool success = device->CreateRaytracingAccelerationStructure(&desc, &BLAS); assert(success); @@ -1398,15 +1382,6 @@ namespace wiScene this->dt = dt; GraphicsDevice* device = wiRenderer::GetDevice(); - if (dt > 0) - { - cmd = device->BeginCommandList(); - BLAS_builds.clear(); - } - else - { - cmd = INVALID_COMMANDLIST; - } if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING)) { @@ -1506,7 +1481,7 @@ namespace wiScene if (device->CheckCapability(GRAPHICSDEVICE_CAPABILITY_RAYTRACING)) { // Recreate top level acceleration structure if the object count changed: - if (dt > 0 && objects.GetCount() > 0 && objects.GetCount() != TLAS.desc.toplevel.count) + if (objects.GetCount() > 0 && objects.GetCount() != TLAS.desc.toplevel.count) { RaytracingAccelerationStructureDesc desc; desc._flags = RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_BUILD; @@ -1523,11 +1498,6 @@ namespace wiScene device->SetName(&TLAS, "TLAS"); } } - - if (cmd != INVALID_COMMANDLIST) - { - device->StashCommandLists(); - } } void Scene::Clear() { @@ -2647,23 +2617,20 @@ namespace wiScene } if (flags != geometry._flags) { - mesh._flags |= MeshComponent::DIRTY_BLAS; + mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; } if (mesh.streamoutBuffer_POS.IsValid()) { - mesh._flags |= MeshComponent::DIRTY_BLAS; + mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; geometry.triangles.vertexBuffer = mesh.streamoutBuffer_POS; } } subsetIndex++; } - if (IsUpdateAccelerationStructuresEnabled() && cmd != INVALID_COMMANDLIST && (mesh._flags & MeshComponent::DIRTY_BLAS)) + if (mesh.IsDirtyMorph()) { - mesh._flags &= ~MeshComponent::DIRTY_BLAS; - locker.lock(); - BLAS_builds.push_back(entity); - locker.unlock(); + mesh.BLAS_state = MeshComponent::BLAS_STATE_NEEDS_REBUILD; } } @@ -2969,7 +2936,7 @@ namespace wiScene object.prev_transform_index = -1; } - if (IsUpdateAccelerationStructuresEnabled() && TLAS.IsValid()) + if (TLAS.IsValid()) { GraphicsDevice* device = wiRenderer::GetDevice(); RaytracingAccelerationStructureDesc::TopLevel::Instance instance = {}; @@ -2984,6 +2951,13 @@ namespace wiScene instance.InstanceMask = 1; instance.bottomlevel = mesh->BLAS; + if (XMVectorGetX(XMMatrixDeterminant(W)) > 0) + { + // There is a mismatch between object space winding and BLAS winding: + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_raytracing_instance_flags + instance.Flags = RaytracingAccelerationStructureDesc::TopLevel::Instance::FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE; + } + void* dest = (void*)((size_t)TLAS_instances.data() + (size_t)args.jobIndex * device->GetTopLevelAccelerationStructureInstanceSize()); device->WriteTopLevelAccelerationStructureInstance(&instance, dest); } diff --git a/WickedEngine/wiScene.h b/WickedEngine/wiScene.h index 5349a4860..c8f8d1455 100644 --- a/WickedEngine/wiScene.h +++ b/WickedEngine/wiScene.h @@ -9,6 +9,7 @@ #include "wiAudio.h" #include "wiResourceManager.h" #include "wiSpinLock.h" +#include "wiGPUBVH.h" #include "wiECS.h" #include "wiScene_Decl.h" @@ -316,7 +317,6 @@ namespace wiScene TERRAIN = 1 << 3, DIRTY_MORPH = 1 << 4, DIRTY_BINDLESS = 1 << 5, - DIRTY_BLAS = 1 << 6, }; uint32_t _flags = RENDERABLE; @@ -380,6 +380,13 @@ namespace wiScene wiGraphics::GPUBuffer subsetBuffer; wiGraphics::RaytracingAccelerationStructure BLAS; + enum BLAS_STATE + { + BLAS_STATE_NEEDS_REBUILD, + BLAS_STATE_NEEDS_REFIT, + BLAS_STATE_COMPLETE, + }; + mutable BLAS_STATE BLAS_state = BLAS_STATE_NEEDS_REBUILD; // Only valid for 1 frame material component indices: int terrain_material1_index = -1; @@ -1278,20 +1285,19 @@ namespace wiScene enum FLAGS { EMPTY = 0, - UPDATE_ACCELERATION_STRUCTURES = 1 << 0, }; uint32_t flags = EMPTY; - constexpr void SetUpdateAccelerationStructuresEnabled(bool value){ if (value) { flags |= UPDATE_ACCELERATION_STRUCTURES; } else { flags &= ~UPDATE_ACCELERATION_STRUCTURES; } } - constexpr bool IsUpdateAccelerationStructuresEnabled() const { return flags & UPDATE_ACCELERATION_STRUCTURES; } - wiSpinLock locker; AABB bounds; std::vector parallel_bounds; WeatherComponent weather; wiGraphics::RaytracingAccelerationStructure TLAS; std::vector TLAS_instances; - std::vector BLAS_builds; + + wiGPUBVH BVH; // this is for non-hardware accelerated raytracing + mutable bool BVH_invalid = false; + void InvalidateBVH() { BVH_invalid = true; } std::mutex cmd_locker; wiGraphics::CommandList cmd = wiGraphics::INVALID_COMMANDLIST; // for gpu data updates diff --git a/WickedEngine/wiShaderCompiler.cpp b/WickedEngine/wiShaderCompiler.cpp index d0164e487..020cee5df 100644 --- a/WickedEngine/wiShaderCompiler.cpp +++ b/WickedEngine/wiShaderCompiler.cpp @@ -95,22 +95,22 @@ namespace wiShaderCompiler args.push_back(L"as_6_5"); break; case wiGraphics::VS: - args.push_back(L"vs_6_0"); + args.push_back(L"vs_6_5"); break; case wiGraphics::HS: - args.push_back(L"hs_6_0"); + args.push_back(L"hs_6_5"); break; case wiGraphics::DS: - args.push_back(L"ds_6_0"); + args.push_back(L"ds_6_5"); break; case wiGraphics::GS: - args.push_back(L"gs_6_0"); + args.push_back(L"gs_6_5"); break; case wiGraphics::PS: - args.push_back(L"ps_6_0"); + args.push_back(L"ps_6_5"); break; case wiGraphics::CS: - args.push_back(L"cs_6_0"); + args.push_back(L"cs_6_5"); break; case wiGraphics::LIB: args.push_back(L"lib_6_5"); diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index c5455cf67..39ee73553 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wiVersion // minor features, major updates, breaking compatibility changes const int minor = 55; // minor bug fixes, alterations, refactors, updates - const int revision = 4; + const int revision = 5; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);