diff --git a/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl b/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl index 777973563..bd4b172ca 100644 --- a/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl +++ b/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl @@ -9,17 +9,34 @@ struct Terrain }; ConstantBuffer terrain : register(b0); -// These are expected to be in the same bind slots as corresponding MaterialComponent::TEXTURESLOT enums -RWTexture2D output_baseColorMap : register(u0); -RWTexture2D output_normalMap : register(u1); -RWTexture2D output_surfaceMap : register(u2); +RWTexture2D output_baseColorMap_mip0 : register(u0); +RWTexture2D output_baseColorMap_mip1 : register(u1); +RWTexture2D output_baseColorMap_mip2 : register(u2); +RWTexture2D output_baseColorMap_mip3 : register(u3); + +RWTexture2D output_normalMap_mip0 : register(u4); +RWTexture2D output_normalMap_mip1 : register(u5); +RWTexture2D output_normalMap_mip2 : register(u6); +RWTexture2D output_normalMap_mip3 : register(u7); + +RWTexture2D output_surfaceMap_mip0 : register(u8); +RWTexture2D output_surfaceMap_mip1 : register(u9); +RWTexture2D output_surfaceMap_mip2 : register(u10); +RWTexture2D output_surfaceMap_mip3 : register(u11); + +groupshared float lds_r[8][8]; +groupshared float lds_g[8][8]; +groupshared float lds_b[8][8]; +groupshared float lds_a[8][8]; [numthreads(8, 8, 1)] -void main(uint3 DTid : SV_DispatchThreadID) +void main(uint3 DTid : SV_DispatchThreadID, uint2 GTid : SV_GroupThreadID) { + const uint2 pixel = DTid.xy; + float2 output_dim = 0; - output_baseColorMap.GetDimensions(output_dim.x, output_dim.y); - const float2 uv = (DTid.xy + 0.5f) / output_dim; + output_baseColorMap_mip0.GetDimensions(output_dim.x, output_dim.y); + const float2 uv = (pixel + 0.5f) / output_dim; float4 region_weights = region_weights_texture.SampleLevel(sampler_linear_clamp, uv, 0); @@ -84,7 +101,181 @@ void main(uint3 DTid : SV_DispatchThreadID) total_surface /= weight_sum; total_normal /= weight_sum; - output_baseColorMap[DTid.xy] = total_baseColor; - output_surfaceMap[DTid.xy] = total_surface; - output_normalMap[DTid.xy] = float4(total_normal, 1, 1); + output_baseColorMap_mip0[pixel] = total_baseColor; + output_surfaceMap_mip0[pixel] = total_surface; + output_normalMap_mip0[pixel] = float4(total_normal, 1, 1); + + // Mip writes: + + // Basecolormap: + + lds_r[GTid.x][GTid.y] = total_baseColor.r; + lds_g[GTid.x][GTid.y] = total_baseColor.g; + lds_b[GTid.x][GTid.y] = total_baseColor.b; + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 2 == 0 && GTid.y % 2 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], lds_b[GTid.x + 0][GTid.y + 0], 1) + + float4(lds_r[GTid.x + 1][GTid.y + 0], lds_g[GTid.x + 1][GTid.y + 0], lds_b[GTid.x + 1][GTid.y + 0], 1) + + float4(lds_r[GTid.x + 0][GTid.y + 1], lds_g[GTid.x + 0][GTid.y + 1], lds_b[GTid.x + 0][GTid.y + 1], 1) + + float4(lds_r[GTid.x + 1][GTid.y + 1], lds_g[GTid.x + 1][GTid.y + 1], lds_b[GTid.x + 1][GTid.y + 1], 1) + ) / 4.0f; + + output_baseColorMap_mip1[pixel / 2] = value; + + lds_r[GTid.x][GTid.y] = value.r; + lds_g[GTid.x][GTid.y] = value.g; + lds_b[GTid.x][GTid.y] = value.b; + } + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 4 == 0 && GTid.y % 4 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], lds_b[GTid.x + 0][GTid.y + 0], 1) + + float4(lds_r[GTid.x + 2][GTid.y + 0], lds_g[GTid.x + 2][GTid.y + 0], lds_b[GTid.x + 2][GTid.y + 0], 1) + + float4(lds_r[GTid.x + 0][GTid.y + 2], lds_g[GTid.x + 0][GTid.y + 2], lds_b[GTid.x + 0][GTid.y + 2], 1) + + float4(lds_r[GTid.x + 2][GTid.y + 2], lds_g[GTid.x + 2][GTid.y + 2], lds_b[GTid.x + 2][GTid.y + 2], 1) + ) / 4.0f; + + output_baseColorMap_mip2[pixel / 4] = value; + + lds_r[GTid.x][GTid.y] = value.r; + lds_g[GTid.x][GTid.y] = value.g; + lds_b[GTid.x][GTid.y] = value.b; + } + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 8 == 0 && GTid.y % 8 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], lds_b[GTid.x + 0][GTid.y + 0], 1) + + float4(lds_r[GTid.x + 4][GTid.y + 0], lds_g[GTid.x + 4][GTid.y + 0], lds_b[GTid.x + 4][GTid.y + 0], 1) + + float4(lds_r[GTid.x + 0][GTid.y + 4], lds_g[GTid.x + 0][GTid.y + 4], lds_b[GTid.x + 0][GTid.y + 4], 1) + + float4(lds_r[GTid.x + 4][GTid.y + 4], lds_g[GTid.x + 4][GTid.y + 4], lds_b[GTid.x + 4][GTid.y + 4], 1) + ) / 4.0f; + + output_baseColorMap_mip3[pixel / 8] = value; + } + + GroupMemoryBarrierWithGroupSync(); + + // Normalmap: + + lds_r[GTid.x][GTid.y] = total_normal.r; + lds_g[GTid.x][GTid.y] = total_normal.g; + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 2 == 0 && GTid.y % 2 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], 1, 1) + + float4(lds_r[GTid.x + 1][GTid.y + 0], lds_g[GTid.x + 1][GTid.y + 0], 1, 1) + + float4(lds_r[GTid.x + 0][GTid.y + 1], lds_g[GTid.x + 0][GTid.y + 1], 1, 1) + + float4(lds_r[GTid.x + 1][GTid.y + 1], lds_g[GTid.x + 1][GTid.y + 1], 1, 1) + ) / 4.0f; + + output_normalMap_mip1[pixel / 2] = value; + + lds_r[GTid.x][GTid.y] = value.r; + lds_g[GTid.x][GTid.y] = value.g; + lds_b[GTid.x][GTid.y] = value.b; + } + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 4 == 0 && GTid.y % 4 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], 1, 1) + + float4(lds_r[GTid.x + 2][GTid.y + 0], lds_g[GTid.x + 2][GTid.y + 0], 1, 1) + + float4(lds_r[GTid.x + 0][GTid.y + 2], lds_g[GTid.x + 0][GTid.y + 2], 1, 1) + + float4(lds_r[GTid.x + 2][GTid.y + 2], lds_g[GTid.x + 2][GTid.y + 2], 1, 1) + ) / 4.0f; + + output_normalMap_mip2[pixel / 4] = value; + + lds_r[GTid.x][GTid.y] = value.r; + lds_g[GTid.x][GTid.y] = value.g; + lds_b[GTid.x][GTid.y] = value.b; + } + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 8 == 0 && GTid.y % 8 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], 1, 1) + + float4(lds_r[GTid.x + 4][GTid.y + 0], lds_g[GTid.x + 4][GTid.y + 0], 1, 1) + + float4(lds_r[GTid.x + 0][GTid.y + 4], lds_g[GTid.x + 0][GTid.y + 4], 1, 1) + + float4(lds_r[GTid.x + 4][GTid.y + 4], lds_g[GTid.x + 4][GTid.y + 4], 1, 1) + ) / 4.0f; + + output_normalMap_mip3[pixel / 8] = value; + } + + GroupMemoryBarrierWithGroupSync(); + + // Surfacemap: + + lds_r[GTid.x][GTid.y] = total_surface.r; + lds_g[GTid.x][GTid.y] = total_surface.g; + lds_b[GTid.x][GTid.y] = total_surface.b; + lds_a[GTid.x][GTid.y] = total_surface.a; + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 2 == 0 && GTid.y % 2 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], lds_b[GTid.x + 0][GTid.y + 0], lds_a[GTid.x + 0][GTid.y + 0]) + + float4(lds_r[GTid.x + 1][GTid.y + 0], lds_g[GTid.x + 1][GTid.y + 0], lds_b[GTid.x + 1][GTid.y + 0], lds_a[GTid.x + 1][GTid.y + 0]) + + float4(lds_r[GTid.x + 0][GTid.y + 1], lds_g[GTid.x + 0][GTid.y + 1], lds_b[GTid.x + 0][GTid.y + 1], lds_a[GTid.x + 0][GTid.y + 1]) + + float4(lds_r[GTid.x + 1][GTid.y + 1], lds_g[GTid.x + 1][GTid.y + 1], lds_b[GTid.x + 1][GTid.y + 1], lds_a[GTid.x + 1][GTid.y + 1]) + ) / 4.0f; + + output_surfaceMap_mip1[pixel / 2] = value; + + lds_r[GTid.x][GTid.y] = value.r; + lds_g[GTid.x][GTid.y] = value.g; + lds_b[GTid.x][GTid.y] = value.b; + } + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 4 == 0 && GTid.y % 4 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], lds_b[GTid.x + 0][GTid.y + 0], lds_a[GTid.x + 0][GTid.y + 0]) + + float4(lds_r[GTid.x + 2][GTid.y + 0], lds_g[GTid.x + 2][GTid.y + 0], lds_b[GTid.x + 2][GTid.y + 0], lds_a[GTid.x + 2][GTid.y + 0]) + + float4(lds_r[GTid.x + 0][GTid.y + 2], lds_g[GTid.x + 0][GTid.y + 2], lds_b[GTid.x + 0][GTid.y + 2], lds_a[GTid.x + 0][GTid.y + 2]) + + float4(lds_r[GTid.x + 2][GTid.y + 2], lds_g[GTid.x + 2][GTid.y + 2], lds_b[GTid.x + 2][GTid.y + 2], lds_a[GTid.x + 2][GTid.y + 2]) + ) / 4.0f; + + output_surfaceMap_mip2[pixel / 4] = value; + + lds_r[GTid.x][GTid.y] = value.r; + lds_g[GTid.x][GTid.y] = value.g; + lds_b[GTid.x][GTid.y] = value.b; + } + + GroupMemoryBarrierWithGroupSync(); + + if (GTid.x % 8 == 0 && GTid.y % 8 == 0) + { + float4 value = ( + float4(lds_r[GTid.x + 0][GTid.y + 0], lds_g[GTid.x + 0][GTid.y + 0], lds_b[GTid.x + 0][GTid.y + 0], lds_a[GTid.x + 0][GTid.y + 0]) + + float4(lds_r[GTid.x + 4][GTid.y + 0], lds_g[GTid.x + 4][GTid.y + 0], lds_b[GTid.x + 4][GTid.y + 0], lds_a[GTid.x + 4][GTid.y + 0]) + + float4(lds_r[GTid.x + 0][GTid.y + 4], lds_g[GTid.x + 0][GTid.y + 4], lds_b[GTid.x + 0][GTid.y + 4], lds_a[GTid.x + 0][GTid.y + 4]) + + float4(lds_r[GTid.x + 4][GTid.y + 4], lds_g[GTid.x + 4][GTid.y + 4], lds_b[GTid.x + 4][GTid.y + 4], lds_a[GTid.x + 4][GTid.y + 4]) + ) / 4.0f; + + output_surfaceMap_mip3[pixel / 8] = value; + } } diff --git a/WickedEngine/wiTerrain.cpp b/WickedEngine/wiTerrain.cpp index a9dbe7259..fcb4ab194 100644 --- a/WickedEngine/wiTerrain.cpp +++ b/WickedEngine/wiTerrain.cpp @@ -518,16 +518,17 @@ namespace wi::terrain TextureDesc desc; desc.width = chunk_data.required_texture_resolution; desc.height = chunk_data.required_texture_resolution; - desc.format = Format::R8G8B8A8_UNORM; - desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - if (desc.width > 64) + if (i == MaterialComponent::TEXTURESLOT::NORMALMAP) { - desc.mip_levels = (uint32_t)log2(std::max(desc.width, desc.height)) + 1; + desc.format = Format::R8G8_UNORM; } else { - desc.mip_levels = 1; + desc.format = Format::R8G8B8A8_UNORM; } + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + desc.mip_levels = 4; Texture texture; bool success = device->CreateTexture(&desc, nullptr, &texture); assert(success); @@ -536,10 +537,7 @@ namespace wi::terrain { for (uint32_t i = 0; i < texture.desc.mip_levels; ++i) { - int subresource_index; - subresource_index = device->CreateSubresource(&texture, SubresourceType::SRV, 0, 1, i, 1); - assert(subresource_index == i); - subresource_index = device->CreateSubresource(&texture, SubresourceType::UAV, 0, 1, i, 1); + int subresource_index = device->CreateSubresource(&texture, SubresourceType::UAV, 0, 1, i, 1); assert(subresource_index == i); } } @@ -955,12 +953,10 @@ namespace wi::terrain { const Texture& texture = material->textures[i].resource.GetTexture(); - device->BindUAV(&texture, i, cmd); - - if (texture.GetDesc().mip_levels > 1) - { - wi::renderer::AddDeferredMIPGen(material->textures[i].resource.GetTexture()); - } + device->BindUAV(&texture, i * 4 + 0, cmd, 0); + device->BindUAV(&texture, i * 4 + 1, cmd, 1); + device->BindUAV(&texture, i * 4 + 2, cmd, 2); + device->BindUAV(&texture, i * 4 + 3, cmd, 3); } } } @@ -970,8 +966,6 @@ namespace wi::terrain device->Barrier(virtual_texture_barriers_end.data(), (uint32_t)virtual_texture_barriers_end.size(), cmd); - wi::renderer::ProcessDeferredMipGenRequests(cmd); - wi::profiler::EndRange(range); device->EventEnd(cmd); } diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index f62a39a59..0c94bad5a 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 67; + const int revision = 68; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);