fix for hair particles and ray tracing #755

This commit is contained in:
Turánszki János
2023-09-28 07:28:25 +02:00
parent 21c24cb456
commit 63d4e4967b
6 changed files with 87 additions and 42 deletions
+5
View File
@@ -243,6 +243,11 @@ namespace wi::graphics
return CreateBuffer2(desc, [&](void* dest) { std::memcpy(dest, initial_data, desc->size); }, buffer);
}
void Barrier(const GPUBarrier& barrier, CommandList cmd)
{
Barrier(&barrier, 1, cmd);
}
struct GPULinearAllocator
{
GPUBuffer buffer;
+17 -11
View File
@@ -3396,20 +3396,26 @@ using namespace dx12_internal;
if (has_flag(desc->bind_flags, BindFlag::UNORDERED_ACCESS))
{
CreateSubresource(buffer, SubresourceType::UAV, 0);
if (has_flag(desc->bind_flags, BindFlag::UNORDERED_ACCESS) && !has_flag(desc->misc_flags, ResourceMiscFlag::BUFFER_RAW))
{
// Create raw buffer if doesn't exist for ClearUAV:
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
uav_desc.Buffer.NumElements = uint32_t(desc->size / sizeof(uint32_t));
internal_state->uav_raw.init(this, uav_desc, internal_state->resource.Get());
}
}
}
if (
has_flag(desc->bind_flags, BindFlag::UNORDERED_ACCESS) &&
(
!has_flag(desc->misc_flags, ResourceMiscFlag::BUFFER_RAW) ||
has_flag(desc->misc_flags, ResourceMiscFlag::NO_DEFAULT_DESCRIPTORS)
)
)
{
// Create raw buffer if doesn't exist for ClearUAV:
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
uav_desc.Buffer.NumElements = uint32_t(desc->size / sizeof(uint32_t));
internal_state->uav_raw.init(this, uav_desc, internal_state->resource.Get());
}
return SUCCEEDED(hr);
}
bool GraphicsDevice_DX12::CreateTexture(const TextureDesc* desc, const SubresourceData* initial_data, Texture* texture) const
+10 -1
View File
@@ -221,7 +221,6 @@ namespace wi
{
RaytracingAccelerationStructureDesc desc;
desc.type = RaytracingAccelerationStructureDesc::Type::BOTTOMLEVEL;
desc.flags |= RaytracingAccelerationStructureDesc::FLAG_ALLOW_UPDATE;
desc.flags |= RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_BUILD;
desc.bottom_level.geometries.emplace_back();
@@ -400,6 +399,16 @@ namespace wi
device->EventEnd(cmd);
}
void HairParticleSystem::InitializeGPUDataIfNeeded(wi::graphics::CommandList cmd)
{
if (gpu_initialized)
return;
GraphicsDevice* device = wi::graphics::GetDevice();
device->ClearUAV(&generalBuffer, 0, cmd);
device->Barrier(GPUBarrier::Buffer(&generalBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::COPY_DST), cmd);
gpu_initialized = true;
}
void HairParticleSystem::Draw(const MaterialComponent& material, wi::enums::RENDERPASS renderPass, CommandList cmd) const
{
if (strandCount == 0 || !constantBuffer.IsValid())
+3
View File
@@ -57,6 +57,9 @@ namespace wi
wi::graphics::CommandList cmd
);
mutable bool gpu_initialized = false;
void InitializeGPUDataIfNeeded(wi::graphics::CommandList cmd);
void Draw(
const wi::scene::MaterialComponent& material,
wi::enums::RENDERPASS renderPass,
+50 -29
View File
@@ -2276,14 +2276,17 @@ void SetUpStates()
const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count)
{
const size_t required_index_count = max_quad_count * 6;
const size_t required_max_index = max_quad_count * 4;
const size_t required_max_index = max_quad_count * 4u;
static std::mutex locker;
std::scoped_lock lock(locker);
if (required_max_index < 65536)
if (required_max_index < 65536u)
{
// 16-bit request:
max_quad_count = std::max(65535u / 4u, max_quad_count); // minimum a full 16 bit index buffer request, avoid allocating multiple 16-bit small requests
const size_t required_index_count = max_quad_count * 6u;
static GPUBuffer indexBufferForQuads16;
if (!indexBufferForQuads16.IsValid() || indexBufferForQuads16.desc.size / indexBufferForQuads16.desc.stride < required_index_count)
{
@@ -2296,24 +2299,31 @@ const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count)
bd.format = Format::R16_UINT;
bd.stride = GetFormatStride(bd.format);
bd.size = bd.stride * required_index_count;
wi::vector<uint16_t> primitiveData(required_index_count);
for (uint16_t particleID = 0; particleID < uint16_t(max_quad_count); ++particleID)
auto fill_ib = [&](void* dst)
{
uint16_t v0 = particleID * 4;
uint32_t i0 = particleID * 6;
primitiveData[i0 + 0] = v0 + 0;
primitiveData[i0 + 1] = v0 + 1;
primitiveData[i0 + 2] = v0 + 2;
primitiveData[i0 + 3] = v0 + 2;
primitiveData[i0 + 4] = v0 + 1;
primitiveData[i0 + 5] = v0 + 3;
}
device->CreateBuffer(&bd, primitiveData.data(), &indexBufferForQuads16);
device->SetName(&indexBufferForQuads16, "wi::renderer::indexBufferForQuads16");
uint16_t* primitiveData = (uint16_t*)dst;
for (uint16_t particleID = 0; particleID < uint16_t(max_quad_count); ++particleID)
{
uint16_t v0 = particleID * 4;
uint32_t i0 = particleID * 6;
primitiveData[i0 + 0] = v0 + 0;
primitiveData[i0 + 1] = v0 + 1;
primitiveData[i0 + 2] = v0 + 2;
primitiveData[i0 + 3] = v0 + 2;
primitiveData[i0 + 4] = v0 + 1;
primitiveData[i0 + 5] = v0 + 3;
}
};
device->CreateBuffer2(&bd, fill_ib, &indexBufferForQuads16);
device->SetName(&indexBufferForQuads16, "wi::renderer::indexBufferForQuads16bit");
}
return indexBufferForQuads16;
}
// 32-bit request below:
max_quad_count = wi::math::GetNextPowerOfTwo(max_quad_count); // reduce allocations by making larger fitting allocations
const size_t required_index_count = max_quad_count * 6u;
static GPUBuffer indexBufferForQuads32;
if (!indexBufferForQuads32.IsValid() || indexBufferForQuads32.desc.size / indexBufferForQuads32.desc.stride < required_index_count)
{
@@ -2326,20 +2336,23 @@ const GPUBuffer& GetIndexBufferForQuads(uint32_t max_quad_count)
bd.format = Format::R32_UINT;
bd.stride = GetFormatStride(bd.format);
bd.size = bd.stride * required_index_count;
wi::vector<uint32_t> primitiveData(required_index_count);
for (uint particleID = 0; particleID < max_quad_count; ++particleID)
auto fill_ib = [&](void* dst)
{
uint32_t v0 = particleID * 4;
uint32_t i0 = particleID * 6;
primitiveData[i0 + 0] = v0 + 0;
primitiveData[i0 + 1] = v0 + 1;
primitiveData[i0 + 2] = v0 + 2;
primitiveData[i0 + 3] = v0 + 2;
primitiveData[i0 + 4] = v0 + 1;
primitiveData[i0 + 5] = v0 + 3;
}
device->CreateBuffer(&bd, primitiveData.data(), &indexBufferForQuads32);
device->SetName(&indexBufferForQuads32, "wi::renderer::indexBufferForQuads32");
uint32_t* primitiveData = (uint32_t*)dst;
for (uint particleID = 0; particleID < max_quad_count; ++particleID)
{
uint32_t v0 = particleID * 4;
uint32_t i0 = particleID * 6;
primitiveData[i0 + 0] = v0 + 0;
primitiveData[i0 + 1] = v0 + 1;
primitiveData[i0 + 2] = v0 + 2;
primitiveData[i0 + 3] = v0 + 2;
primitiveData[i0 + 4] = v0 + 1;
primitiveData[i0 + 5] = v0 + 3;
}
};
device->CreateBuffer2(&bd, fill_ib, &indexBufferForQuads32);
device->SetName(&indexBufferForQuads32, "wi::renderer::indexBufferForQuads32bit");
}
return indexBufferForQuads32;
@@ -4243,6 +4256,14 @@ void UpdateRenderData(
barrier_stack_flush(cmd); // wind/skinning flush
// Hair particle initialization is needed for all, not just visible ones:
// This fixes an issue when hair is included in ray tracing acceleration
// structure, but not yet updated properly, because it was not yet visible
for (size_t i = 0; i < vis.scene->hairs.GetCount(); ++i)
{
vis.scene->hairs[i].InitializeGPUDataIfNeeded(cmd);
}
// Hair particle systems GPU simulation:
// (This must be non-async too, as prepass will render hairs!)
static thread_local wi::vector<HairParticleSystem::UpdateGPUItem> hair_updates;
+2 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 298;
const int revision = 299;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);
@@ -149,6 +149,7 @@ Patreon supporters
- Anthony Python
- Gnowos
- Klaus
- slaughternaut
)";
return credits;