From 1f0f13bd2840ce32d2decbea5f9f7f5bee2b2b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Tue, 4 Oct 2022 18:55:24 +0200 Subject: [PATCH] terrain virtual texture atlas --- .../terrainVirtualTextureUpdateCS.hlsl | 17 +- WickedEngine/wiTerrain.cpp | 254 ++++++++++++------ WickedEngine/wiTerrain.h | 110 +++++++- WickedEngine/wiVersion.cpp | 2 +- 4 files changed, 284 insertions(+), 99 deletions(-) diff --git a/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl b/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl index bd4b172ca..529ee0ddd 100644 --- a/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl +++ b/WickedEngine/shaders/terrainVirtualTextureUpdateCS.hlsl @@ -3,6 +3,12 @@ static const uint region_count = 4; Texture2D region_weights_texture : register(t0); +struct VirtualTexturePush +{ + uint4 offset_size; +}; +PUSHCONSTANT(push, VirtualTexturePush); + struct Terrain { ShaderMaterial materials[region_count]; @@ -32,11 +38,11 @@ groupshared float lds_a[8][8]; [numthreads(8, 8, 1)] void main(uint3 DTid : SV_DispatchThreadID, uint2 GTid : SV_GroupThreadID) { - const uint2 pixel = DTid.xy; + if (DTid.x >= push.offset_size.z || DTid.y >= push.offset_size.w) + return; - float2 output_dim = 0; - output_baseColorMap_mip0.GetDimensions(output_dim.x, output_dim.y); - const float2 uv = (pixel + 0.5f) / output_dim; + float2 output_dim = push.offset_size.zw; + const float2 uv = (DTid.xy + 0.5f) / output_dim; float4 region_weights = region_weights_texture.SampleLevel(sampler_linear_clamp, uv, 0); @@ -101,10 +107,12 @@ void main(uint3 DTid : SV_DispatchThreadID, uint2 GTid : SV_GroupThreadID) total_surface /= weight_sum; total_normal /= weight_sum; + const uint2 pixel = DTid.xy + push.offset_size.xy; output_baseColorMap_mip0[pixel] = total_baseColor; output_surfaceMap_mip0[pixel] = total_surface; output_normalMap_mip0[pixel] = float4(total_normal, 1, 1); +#if 0 // Mip writes: // Basecolormap: @@ -278,4 +286,5 @@ void main(uint3 DTid : SV_DispatchThreadID, uint2 GTid : SV_GroupThreadID) output_surfaceMap_mip3[pixel / 8] = value; } +#endif } diff --git a/WickedEngine/wiTerrain.cpp b/WickedEngine/wiTerrain.cpp index fcb4ab194..108d9908d 100644 --- a/WickedEngine/wiTerrain.cpp +++ b/WickedEngine/wiTerrain.cpp @@ -207,6 +207,8 @@ namespace wi::terrain chunks.clear(); + virtual_texture_allocator = {}; + wi::vector entities_to_remove; for (size_t i = 0; i < scene->hierarchy.GetCount(); ++i) { @@ -281,6 +283,7 @@ namespace wi::terrain if (terrainEntity == INVALID_ENTITY) { + virtual_texture_allocator = {}; chunks.clear(); return; } @@ -305,13 +308,10 @@ namespace wi::terrain const int removal_threshold = generation + 2; const float texlodMultiplier = texlod; GraphicsDevice* device = GetDevice(); - virtual_texture_updates.clear(); - virtual_texture_barriers_begin.clear(); - virtual_texture_barriers_end.clear(); // Check whether there are any materials that would write to virtual textures: bool virtual_texture_any = false; - virtual_texture_available[MaterialComponent::TEXTURESLOT_COUNT] = {}; + bool virtual_texture_available[MaterialComponent::TEXTURESLOT_COUNT] = {}; MaterialComponent* virtual_materials[4] = { &material_Base, &material_Slope, @@ -341,6 +341,66 @@ namespace wi::terrain } virtual_texture_available[MaterialComponent::SURFACEMAP] = true; // this is always needed to bake individual material properties + target_texture_resolution = wi::math::GetNextPowerOfTwo(target_texture_resolution); + if (virtual_texture_allocator.max_tile != target_texture_resolution) + { + virtual_texture_allocator.init(target_texture_resolution); + virtual_texture_clear = true; + + for (auto it = chunks.begin(); it != chunks.end(); it++) + { + const Chunk& chunk = it->first; + ChunkData& chunk_data = it->second; + chunk_data.vt = {}; + } + + TextureDesc desc; + desc.width = (uint32_t)virtual_texture_allocator.width; + desc.height = (uint32_t)virtual_texture_allocator.height; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + //desc.mip_levels = 4; + + for (int i = 0; i < MaterialComponent::TEXTURESLOT_COUNT; ++i) + { + if (!virtual_texture_available[i]) + continue; + + switch (i) + { + case MaterialComponent::NORMALMAP: + desc.format = Format::R8G8_UNORM; + break; + default: + desc.format = Format::R8G8B8A8_UNORM; + break; + } + bool success = device->CreateTexture(&desc, nullptr, &virtual_textures[i]); + assert(success); + device->SetName(&virtual_textures[i], "Terrain::virtual_textures[i]"); + + if (desc.mip_levels > 1) + { + for (uint32_t mip = 0; mip < virtual_textures[i].desc.mip_levels; ++mip) + { + int subresource_index = device->CreateSubresource(&virtual_textures[i], SubresourceType::UAV, 0, 1, mip, 1); + assert(subresource_index == mip); + } + } + } + } + + const XMUINT2 virtual_texture_resolution = XMUINT2( + (uint32_t)virtual_textures[0].desc.width, + (uint32_t)virtual_textures[0].desc.height + ); + const XMFLOAT2 virtual_texture_resolution_rcp = XMFLOAT2( + 1.0f / virtual_texture_resolution.x, + 1.0f / virtual_texture_resolution.y + ); + int virtual_texture_rendering_budget = std::max(2048, (int)target_texture_resolution); + virtual_texture_rendering_budget *= virtual_texture_rendering_budget; // square size + for (auto it = chunks.begin(); it != chunks.end();) { const Chunk& chunk = it->first; @@ -389,6 +449,7 @@ namespace wi::terrain { if (dist > removal_threshold) { + virtual_texture_allocator.free(chunk_data.vt); scene->Entity_Remove(it->second.entity); it = chunks.erase(it); continue; // don't increment iterator @@ -473,6 +534,8 @@ namespace wi::terrain // Collect virtual texture update requests: if (virtual_texture_any) { + uint32_t required_texture_resolution = 0; + float request_score = 0; if (chunk_visible) { uint32_t texture_lod = 0; @@ -486,81 +549,69 @@ namespace wi::terrain else { const float dist = std::sqrt(distsq); - const float dist_to_sphere = dist - radius; + const float dist_to_sphere = std::max(0.0f, dist - radius); texture_lod = uint32_t(dist_to_sphere * texlodMultiplier); } - chunk_data.required_texture_resolution = uint32_t(target_texture_resolution / std::pow(2.0f, (float)std::max(0u, texture_lod))); - chunk_data.required_texture_resolution = AlignTo(chunk_data.required_texture_resolution, 8u); - chunk_data.required_texture_resolution = std::max(8u, chunk_data.required_texture_resolution); + request_score = distsq; + required_texture_resolution = uint32_t(target_texture_resolution / std::pow(2.0f, (float)std::max(0u, texture_lod))); + required_texture_resolution = AlignTo(required_texture_resolution, 8u); + required_texture_resolution = std::max(VirtualTextureAllocator::min_tile_constant, required_texture_resolution); } else { - chunk_data.required_texture_resolution = 8u; + required_texture_resolution = VirtualTextureAllocator::min_tile_constant; } - MaterialComponent* material = scene->materials.GetComponent(chunk_data.entity); - if (material != nullptr) + if (chunk_data.vt.size != required_texture_resolution) { - bool need_update = false; - for (int i = 0; i < MaterialComponent::TEXTURESLOT_COUNT; ++i) + if (required_texture_resolution > chunk_data.vt.size) { - if (virtual_texture_available[i]) + // upscaling continuously: + required_texture_resolution = std::max(VirtualTextureAllocator::min_tile_constant, chunk_data.vt.size * 2); + } + + MaterialComponent* material = scene->materials.GetComponent(chunk_data.entity); + if (material != nullptr) + { + VirtualTextureAllocator::Tile vt = virtual_texture_allocator.allocate(required_texture_resolution); + + if (vt.IsValid()) { - uint32_t current_resolution = 0; - if (material->textures[i].resource.IsValid()) - { - current_resolution = material->textures[i].resource.GetTexture().GetDesc().width; - } + int budget_after_alloc = virtual_texture_rendering_budget - int(required_texture_resolution * required_texture_resolution); - if (current_resolution != chunk_data.required_texture_resolution) + if (budget_after_alloc >= 0 || required_texture_resolution < VirtualTextureAllocator::min_tile_constant * 2) { - need_update = true; - TextureDesc desc; - desc.width = chunk_data.required_texture_resolution; - desc.height = chunk_data.required_texture_resolution; - if (i == MaterialComponent::TEXTURESLOT::NORMALMAP) - { - desc.format = Format::R8G8_UNORM; - } - else - { - desc.format = Format::R8G8B8A8_UNORM; - } - desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; - desc.layout = ResourceState::SHADER_RESOURCE_COMPUTE; - desc.mip_levels = 4; - Texture texture; - bool success = device->CreateTexture(&desc, nullptr, &texture); - assert(success); + virtual_texture_allocator.free(chunk_data.vt); + chunk_data.vt = vt; - if (desc.mip_levels > 1) + // Shrink the uvs to avoid wrap sampling across edge by object rendering shaders: + material->texMulAdd.x = float(chunk_data.vt.size - 1) * virtual_texture_resolution_rcp.x; + material->texMulAdd.y = float(chunk_data.vt.size - 1) * virtual_texture_resolution_rcp.y; + material->texMulAdd.z = float(chunk_data.vt.x + 0.5f) * virtual_texture_resolution_rcp.x; + material->texMulAdd.w = float(chunk_data.vt.y + 0.5f) * virtual_texture_resolution_rcp.y; + + for (int i = 0; i < MaterialComponent::TEXTURESLOT_COUNT; ++i) { - for (uint32_t i = 0; i < texture.desc.mip_levels; ++i) + if (virtual_textures[i].IsValid()) { - int subresource_index = device->CreateSubresource(&texture, SubresourceType::UAV, 0, 1, i, 1); - assert(subresource_index == i); + material->textures[i].resource.SetTexture(virtual_textures[i]); } } - material->textures[i].resource.SetTexture(texture); - virtual_texture_barriers_begin.push_back(GPUBarrier::Image(&material->textures[i].resource.GetTexture(), desc.layout, ResourceState::UNORDERED_ACCESS)); - virtual_texture_barriers_end.push_back(GPUBarrier::Image(&material->textures[i].resource.GetTexture(), ResourceState::UNORDERED_ACCESS, desc.layout)); + VirtualTextureUpdateRequest& request = virtual_texture_updates.emplace_back(); + request.vt = chunk_data.vt; + request.score = request_score; + request.region_weights_texture = chunk_data.region_weights_texture; + + virtual_texture_rendering_budget = budget_after_alloc; + } + else + { + virtual_texture_allocator.free(vt); } } + } - - if (need_update) - { - // Shrink the uvs to avoid wrap sampling across edge by object rendering shaders: - float virtual_texture_resolution_rcp = 1.0f / float(chunk_data.required_texture_resolution); - material->texMulAdd.x = float(chunk_data.required_texture_resolution - 1) * virtual_texture_resolution_rcp; - material->texMulAdd.y = float(chunk_data.required_texture_resolution - 1) * virtual_texture_resolution_rcp; - material->texMulAdd.z = 0.5f * virtual_texture_resolution_rcp; - material->texMulAdd.w = 0.5f * virtual_texture_resolution_rcp; - - virtual_texture_updates.push_back(chunk); - } - } } @@ -918,10 +969,13 @@ namespace wi::terrain if (virtual_texture_updates.empty()) return; + std::sort(virtual_texture_updates.begin(), virtual_texture_updates.end(), [](const VirtualTextureUpdateRequest& a, const VirtualTextureUpdateRequest& b) { + return a.score < b.score; + }); + GraphicsDevice* device = GetDevice(); - device->EventBegin("TerrainVirtualTextureUpdate", cmd); - auto range = wi::profiler::BeginRangeGPU("TerrainVirtualTextureUpdate", cmd); - device->Barrier(virtual_texture_barriers_begin.data(), (uint32_t)virtual_texture_barriers_begin.size(), cmd); + device->EventBegin("Terrain - Virtual Texture Update", cmd); + auto range = wi::profiler::BeginRangeGPU("Terrain - Virtual Texture Update", cmd); device->BindComputeShader(wi::renderer::GetShader(wi::enums::CSTYPE_TERRAIN_VIRTUALTEXTURE_UPDATE), cmd); @@ -932,39 +986,69 @@ namespace wi::terrain material_HighAltitude.WriteShaderMaterial(&materials[3]); device->BindDynamicConstantBuffer(materials, 0, cmd); - for (auto& chunk : virtual_texture_updates) + GPUBarrier barriers[MaterialComponent::TEXTURESLOT_COUNT]; + uint32_t num_barriers = 0; + + for (int i = 0; i < MaterialComponent::TEXTURESLOT_COUNT; ++i) { - auto it = chunks.find(chunk); - if (it == chunks.end()) - continue; - const ChunkData& chunk_data = it->second; - - const GPUResource* res[] = { - &chunk_data.region_weights_texture, - }; - device->BindResources(res, 0, arraysize(res), cmd); - - const MaterialComponent* material = scene->materials.GetComponent(chunk_data.entity); - if (material != nullptr) + if (virtual_textures[i].IsValid()) { - for (int i = 0; i < MaterialComponent::TEXTURESLOT_COUNT; ++i) + if (virtual_textures[i].desc.mip_levels > 1) { - if (virtual_texture_available[i]) - { - const Texture& texture = material->textures[i].resource.GetTexture(); + device->BindUAV(&virtual_textures[i], i * 4 + 0, cmd, 0); + device->BindUAV(&virtual_textures[i], i * 4 + 1, cmd, 1); + device->BindUAV(&virtual_textures[i], i * 4 + 2, cmd, 2); + device->BindUAV(&virtual_textures[i], i * 4 + 3, cmd, 3); + } + else + { + device->BindUAV(&virtual_textures[i], i * 4 + 0, cmd); + } - device->BindUAV(&texture, i * 4 + 0, cmd, 0); - device->BindUAV(&texture, i * 4 + 1, cmd, 1); - device->BindUAV(&texture, i * 4 + 2, cmd, 2); - device->BindUAV(&texture, i * 4 + 3, cmd, 3); - } + barriers[num_barriers++] = GPUBarrier::Image(&virtual_textures[i], virtual_textures[i].desc.layout, ResourceState::UNORDERED_ACCESS); + } + } + + device->Barrier(barriers, num_barriers, cmd); + + if (virtual_texture_clear) + { + for (int i = 0; i < MaterialComponent::TEXTURESLOT_COUNT; ++i) + { + if (virtual_textures[i].IsValid()) + { + device->ClearUAV(&virtual_textures[i], 0, cmd); } } - device->Dispatch(chunk_data.required_texture_resolution / 8u, chunk_data.required_texture_resolution / 8u, 1, cmd); + GPUBarrier memory_barrier = GPUBarrier::Memory(); + device->Barrier(&memory_barrier, 1, cmd); } - device->Barrier(virtual_texture_barriers_end.data(), (uint32_t)virtual_texture_barriers_end.size(), cmd); + for (auto& request : virtual_texture_updates) + { + const GPUResource* res[] = { + &request.region_weights_texture, + }; + device->BindResources(res, 0, arraysize(res), cmd); + + uint4 offset_size = uint4( + (uint)request.vt.x, (uint)request.vt.y, + (uint)request.vt.size, (uint)request.vt.size + ); + device->PushConstants(&offset_size, sizeof(offset_size), cmd); + + device->Dispatch(request.vt.size / 8u, request.vt.size / 8u, 1, cmd); + } + + for (uint32_t i = 0; i < num_barriers; ++i) + { + std::swap(barriers[i].image.layout_before, barriers[i].image.layout_after); + } + device->Barrier(barriers, num_barriers, cmd); + + virtual_texture_clear = false; + virtual_texture_updates.clear(); wi::profiler::EndRange(range); device->EventEnd(cmd); diff --git a/WickedEngine/wiTerrain.h b/WickedEngine/wiTerrain.h index 6df05d697..820ec9165 100644 --- a/WickedEngine/wiTerrain.h +++ b/WickedEngine/wiTerrain.h @@ -46,6 +46,95 @@ namespace wi::terrain static constexpr float chunk_width_rcp = 1.0f / (chunk_width - 1); static constexpr uint32_t vertexCount = chunk_width * chunk_width; + struct VirtualTextureAllocator + { + static constexpr uint32_t min_tile_constant = 8; + static constexpr uint32_t max_width_constant = 8192; + uint32_t max_tile = 0; + uint32_t width = 0; + uint32_t height = 0; + struct Tile + { + uint32_t x = 0; + uint32_t y = 0; + uint32_t size = 0; + + constexpr bool IsValid() const { return size > 0; } + }; + struct LOD + { + wi::vector free_tiles; + }; + wi::vector lods; + + void init(uint32_t max_tile) + { + max_tile = wi::math::GetNextPowerOfTwo(max_tile); + max_tile = std::min(max_tile, max_width_constant); + this->max_tile = max_tile; + width = 0; + height = 0; + lods.clear(); + + uint32_t tile_size = max_tile; + uint32_t tile_count = max_width_constant / (tile_size / 2); + uint32_t y = 0; + while (tile_size >= min_tile_constant) + { + LOD& lod = lods.emplace_back(); + + uint32_t x = 0; + for (uint32_t i = 0; i < tile_count; ++i) + { + if (x + tile_size > max_width_constant) + { + x = 0; + y += tile_size; + } + Tile tile; + tile.x = x; + tile.y = y; + tile.size = tile_size; + lod.free_tiles.push_back(tile); + + width = std::max(width, tile.x + tile.size); + height = std::max(height, tile.y + tile.size); + x += tile_size; + } + y += tile_size; + tile_size /= 2; + tile_count *= 2; + } + } + + LOD& get_lod(uint32_t tile_size) + { + int lod = (int)lods.size() - 1 - ((int)std::log2(tile_size) - (int)std::log2(min_tile_constant)); + lod = std::max(0, lod); + lod = std::min((int)lods.size() - 1, lod); + return lods[lod]; + } + + Tile allocate(uint32_t tile_size) + { + LOD& lod = get_lod(tile_size); + if (lod.free_tiles.empty()) + return {}; + + Tile tile = lod.free_tiles.back(); + lod.free_tiles.pop_back(); + return tile; + } + + void free(Tile& tile) + { + if (!tile.IsValid()) + return; + LOD& lod = get_lod(tile.size); + lod.free_tiles.push_back(tile); + } + }; + struct ChunkData { wi::ecs::Entity entity = wi::ecs::INVALID_ENTITY; @@ -57,9 +146,10 @@ namespace wi::terrain float grass_density_current = 1; wi::vector region_weights; wi::graphics::Texture region_weights_texture; - uint32_t required_texture_resolution = 0; wi::primitive::Sphere sphere; XMFLOAT3 position = XMFLOAT3(0, 0, 0); + + VirtualTextureAllocator::Tile vt; }; struct Prop @@ -112,14 +202,16 @@ namespace wi::terrain std::shared_ptr generator; float generation_time_budget_milliseconds = 12; // after this much time, the generation thread will exit. This can help avoid a very long running, resource consuming and slow cancellation generation - // Virtual texture updates will be batched like: - // 1) Execute all barriers (dst: UNORDERED_ACCESS) - // 2) Execute all compute shaders - // 3) Execute all barriers (dst: SHADER_RESOURCE) - wi::vector virtual_texture_updates; - wi::vector virtual_texture_barriers_begin; - wi::vector virtual_texture_barriers_end; - bool virtual_texture_available[wi::scene::MaterialComponent::TEXTURESLOT_COUNT] = {}; + VirtualTextureAllocator virtual_texture_allocator; + wi::graphics::Texture virtual_textures[wi::scene::MaterialComponent::TEXTURESLOT_COUNT]; + struct VirtualTextureUpdateRequest + { + VirtualTextureAllocator::Tile vt; + float score = 0; + wi::graphics::Texture region_weights_texture; + }; + mutable wi::vector virtual_texture_updates; + mutable bool virtual_texture_clear = false; constexpr bool IsCenterToCamEnabled() const { return _flags & CENTER_TO_CAM; } constexpr bool IsRemovalEnabled() const { return _flags & REMOVAL; } diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 0c94bad5a..660207b64 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 68; + const int revision = 69; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);