async queue updates (#885)
This commit is contained in:
@@ -51,24 +51,17 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/ccache
|
||||
key: ccache-${{ github.run_id }}
|
||||
restore-keys: ccache
|
||||
save-always: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install libsdl2-dev ccache
|
||||
sudo apt install libsdl2-dev
|
||||
|
||||
- name: Initial compile
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
CCACHE_NODIRECT=1 make -j$(nproc)
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make -j$(nproc)
|
||||
|
||||
- name: Generate shader dump
|
||||
run: |
|
||||
@@ -79,7 +72,7 @@ jobs:
|
||||
- name: Recompile with shader dump
|
||||
run: |
|
||||
cd build
|
||||
CCACHE_NODIRECT=1 make -B -j $(nproc)
|
||||
make -B -j $(nproc)
|
||||
|
||||
- name: Move files
|
||||
run: |
|
||||
|
||||
@@ -132,13 +132,12 @@ static const uint IndirectDispatchArgsAlignment = 4u;
|
||||
#define CBSLOT_OTHER_EMITTEDPARTICLE 4
|
||||
#define CBSLOT_OTHER_HAIRPARTICLE 4
|
||||
#define CBSLOT_OTHER_FFTGENERATOR 4
|
||||
#define CBSLOT_OTHER_OCEAN_SIMULATION_IMMUTABLE 4
|
||||
#define CBSLOT_OTHER_OCEAN_SIMULATION_PERFRAME 5
|
||||
#define CBSLOT_OTHER_OCEAN_RENDER 7
|
||||
#define CBSLOT_OTHER_OCEAN 4
|
||||
#define CBSLOT_OTHER_CLOUDGENERATOR 4
|
||||
#define CBSLOT_OTHER_GPUSORTLIB 4
|
||||
#define CBSLOT_MSAO 4
|
||||
#define CBSLOT_FSR 4
|
||||
#define CBSLOT_TRAILRENDERER 4
|
||||
#endif // !__PSSL__ && !__SCE__
|
||||
|
||||
#endif // WI_SHADERINTEROP_H
|
||||
|
||||
@@ -131,12 +131,14 @@ namespace wi::graphics
|
||||
// Returns whether the graphics debug layer is enabled. It can be enabled when creating the device.
|
||||
constexpr bool IsDebugDevice() const { return validationMode != ValidationMode::Disabled; }
|
||||
|
||||
// Get GPU-specific metrics:
|
||||
constexpr size_t GetShaderIdentifierSize() const { return SHADER_IDENTIFIER_SIZE; }
|
||||
constexpr size_t GetTopLevelAccelerationStructureInstanceSize() const { return TOPLEVEL_ACCELERATION_STRUCTURE_INSTANCE_SIZE; }
|
||||
constexpr uint32_t GetVariableRateShadingTileSize() const { return VARIABLE_RATE_SHADING_TILE_SIZE; }
|
||||
constexpr uint64_t GetTimestampFrequency() const { return TIMESTAMP_FREQUENCY; }
|
||||
constexpr uint64_t GetVideoDecodeBitstreamAlignment() const { return VIDEO_DECODE_BITSTREAM_ALIGNMENT; }
|
||||
|
||||
// Get information about the graphics device manufacturer:
|
||||
constexpr uint32_t GetVendorId() const { return vendorId; }
|
||||
constexpr uint32_t GetDeviceId() const { return deviceId; }
|
||||
constexpr const std::string& GetAdapterName() const { return adapterName; }
|
||||
@@ -178,7 +180,13 @@ namespace wi::graphics
|
||||
// - These commands are not immediately executed, but they begin executing on the GPU after calling SubmitCommandLists()
|
||||
// - These are not thread safe, only a single thread should use a single CommandList at one time
|
||||
|
||||
// Tell the command list to wait for an other command list which was started before it
|
||||
// The granularity of this is at least that the beginning of the command list will wait for the end of the other command list
|
||||
// On some platform like PS5 this can be implemented by waiting exactly at the wait insertion point within the command lists which is more precise
|
||||
virtual void WaitCommandList(CommandList cmd, CommandList wait_for) = 0;
|
||||
// Tell the command list to wait for the specified queue to finish processing
|
||||
// It is useful when you want to wait for a previous frame, or just don't know which command list to wait for
|
||||
virtual void WaitQueue(CommandList cmd, QUEUE_TYPE wait_for) = 0;
|
||||
virtual void RenderPassBegin(const SwapChain* swapchain, CommandList cmd) = 0;
|
||||
virtual void RenderPassBegin(const RenderPassImage* images, uint32_t image_count, CommandList cmd, RenderPassFlags flags = RenderPassFlags::NONE) = 0;
|
||||
virtual void RenderPassEnd(CommandList cmd) = 0;
|
||||
|
||||
@@ -1610,7 +1610,38 @@ namespace dx12_internal
|
||||
}
|
||||
using namespace dx12_internal;
|
||||
|
||||
|
||||
#ifdef PLATFORM_XBOX
|
||||
std::mutex queue_locker;
|
||||
#endif // PLATFORM_XBOX
|
||||
|
||||
void GraphicsDevice_DX12::CommandQueue::signal(const Semaphore& semaphore)
|
||||
{
|
||||
if (queue == nullptr)
|
||||
return;
|
||||
HRESULT hr = queue->Signal(semaphore.fence.Get(), semaphore.fenceValue);
|
||||
assert(SUCCEEDED(hr));
|
||||
}
|
||||
void GraphicsDevice_DX12::CommandQueue::wait(const Semaphore& semaphore)
|
||||
{
|
||||
if (queue == nullptr)
|
||||
return;
|
||||
HRESULT hr = queue->Wait(semaphore.fence.Get(), semaphore.fenceValue);
|
||||
assert(SUCCEEDED(hr));
|
||||
}
|
||||
void GraphicsDevice_DX12::CommandQueue::submit()
|
||||
{
|
||||
if (queue == nullptr)
|
||||
return;
|
||||
if (submit_cmds.empty())
|
||||
return;
|
||||
|
||||
queue->ExecuteCommandLists(
|
||||
(UINT)submit_cmds.size(),
|
||||
submit_cmds.data()
|
||||
);
|
||||
|
||||
submit_cmds.clear();
|
||||
}
|
||||
|
||||
void GraphicsDevice_DX12::CopyAllocator::init(GraphicsDevice_DX12* device)
|
||||
{
|
||||
@@ -2461,15 +2492,6 @@ using namespace dx12_internal;
|
||||
}
|
||||
hr = queues[QUEUE_GRAPHICS].queue->SetName(L"QUEUE_GRAPHICS");
|
||||
assert(SUCCEEDED(hr));
|
||||
hr = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(queues[QUEUE_GRAPHICS].fence));
|
||||
assert(SUCCEEDED(hr));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
std::stringstream ss("");
|
||||
ss << "ID3D12Device::CreateFence[QUEUE_GRAPHICS] failed! ERROR: 0x" << std::hex << hr;
|
||||
wi::helper::messageBox(ss.str(), "Error!");
|
||||
wi::platform::Exit();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@@ -2488,15 +2510,6 @@ using namespace dx12_internal;
|
||||
}
|
||||
hr = queues[QUEUE_COMPUTE].queue->SetName(L"QUEUE_COMPUTE");
|
||||
assert(SUCCEEDED(hr));
|
||||
hr = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(queues[QUEUE_COMPUTE].fence));
|
||||
assert(SUCCEEDED(hr));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
std::stringstream ss("");
|
||||
ss << "ID3D12Device::CreateFence[QUEUE_COMPUTE] failed! ERROR: 0x" << std::hex << hr;
|
||||
wi::helper::messageBox(ss.str(), "Error!");
|
||||
wi::platform::Exit();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@@ -2515,15 +2528,6 @@ using namespace dx12_internal;
|
||||
}
|
||||
hr = queues[QUEUE_COPY].queue->SetName(L"QUEUE_COPY");
|
||||
assert(SUCCEEDED(hr));
|
||||
hr = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(queues[QUEUE_COPY].fence));
|
||||
assert(SUCCEEDED(hr));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
std::stringstream ss("");
|
||||
ss << "ID3D12Device::CreateFence[QUEUE_COPY] failed! ERROR: 0x" << std::hex << hr;
|
||||
wi::helper::messageBox(ss.str(), "Error!");
|
||||
wi::platform::Exit();
|
||||
}
|
||||
}
|
||||
|
||||
if (SUCCEEDED(device.As(&video_device)))
|
||||
@@ -2539,15 +2543,6 @@ using namespace dx12_internal;
|
||||
capabilities |= GraphicsDeviceCapability::VIDEO_DECODE_H264;
|
||||
hr = queues[QUEUE_VIDEO_DECODE].queue->SetName(L"QUEUE_VIDEO_DECODE");
|
||||
assert(SUCCEEDED(hr));
|
||||
hr = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(queues[QUEUE_VIDEO_DECODE].fence));
|
||||
assert(SUCCEEDED(hr));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
std::stringstream ss("");
|
||||
ss << "ID3D12Device::CreateFence[QUEUE_VIDEO_DECODE] failed! ERROR: 0x" << std::hex << hr;
|
||||
wi::helper::messageBox(ss.str(), "Error!");
|
||||
wi::platform::Exit();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3552,10 +3547,11 @@ using namespace dx12_internal;
|
||||
{
|
||||
resourcedesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
}
|
||||
if (has_flag(desc->misc_flags, ResourceMiscFlag::VIDEO_DECODE))
|
||||
if (!has_flag(desc->bind_flags, BindFlag::DEPTH_STENCIL) && resourcedesc.SampleDesc.Count <= 1)
|
||||
{
|
||||
// Because video queue can only transition from/to VIDEO_ and COMMON states, we will use COMMON internally and rely on implicit transition for DPB textures
|
||||
// (See how the resource barrier on video queue overrides any user specified state into COMMON)
|
||||
// The copy and video queues have much stricter requirements to supported resource states, but they support
|
||||
// implicit promotion from COMMON state. Because user is not allowed to set resource to COMMON state, we use this flag
|
||||
// so textures automatically decay to COMMON state at the queue submit when they are left in a read-only state
|
||||
resourcedesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
|
||||
}
|
||||
|
||||
@@ -5261,7 +5257,6 @@ using namespace dx12_internal;
|
||||
commandlist.reset(GetBufferIndex());
|
||||
commandlist.queue = queue;
|
||||
commandlist.id = cmd_current;
|
||||
commandlist.waited_on.store(false);
|
||||
|
||||
if (commandlist.GetCommandList() == nullptr)
|
||||
{
|
||||
@@ -5382,38 +5377,56 @@ using namespace dx12_internal;
|
||||
assert(SUCCEEDED(hr));
|
||||
|
||||
CommandQueue& queue = queues[commandlist.queue];
|
||||
const bool dependency = !commandlist.signals.empty() || !commandlist.waits.empty() || !commandlist.wait_queues.empty();
|
||||
|
||||
if (dependency)
|
||||
{
|
||||
// If the current commandlist must resolve a dependency, then previous ones will be submitted before doing that:
|
||||
// This improves GPU utilization because not the whole batch of command lists will need to synchronize, but only the one that handles it
|
||||
queue.submit();
|
||||
}
|
||||
|
||||
queue.submit_cmds.push_back(commandlist.GetCommandList());
|
||||
|
||||
if (commandlist.waited_on.load() || !commandlist.waits.empty())
|
||||
if (dependency)
|
||||
{
|
||||
for (auto& wait : commandlist.waits)
|
||||
for (auto& wait : commandlist.wait_queues)
|
||||
{
|
||||
// record wait for signal on a previous submit:
|
||||
const CommandList_DX12& waitcommandlist = GetCommandList(wait);
|
||||
hr = queue.queue->Wait(
|
||||
queues[waitcommandlist.queue].fence.Get(),
|
||||
FRAMECOUNT * commandlists.size() + (uint64_t)waitcommandlist.id
|
||||
);
|
||||
assert(SUCCEEDED(hr));
|
||||
}
|
||||
CommandQueue& waitqueue = queues[wait.first];
|
||||
const Semaphore& semaphore = wait.second;
|
||||
|
||||
if (!queue.submit_cmds.empty())
|
||||
{
|
||||
queue.queue->ExecuteCommandLists(
|
||||
(UINT)queue.submit_cmds.size(),
|
||||
queue.submit_cmds.data()
|
||||
);
|
||||
queue.submit_cmds.clear();
|
||||
}
|
||||
// The WaitQueue operation will submit and signal the specified dependency queue:
|
||||
waitqueue.submit();
|
||||
waitqueue.signal(semaphore); // signals immediately after submit
|
||||
|
||||
if (commandlist.waited_on.load())
|
||||
{
|
||||
hr = queue.queue->Signal(
|
||||
queue.fence.Get(),
|
||||
FRAMECOUNT * commandlists.size() + (uint64_t)commandlist.id
|
||||
);
|
||||
assert(SUCCEEDED(hr));
|
||||
// The current queue will be waiting for the dependency queue to complete:
|
||||
queue.wait(semaphore);
|
||||
|
||||
// recycle semaphore:
|
||||
free_semaphore(semaphore);
|
||||
}
|
||||
commandlist.wait_queues.clear();
|
||||
|
||||
for(auto& semaphore : commandlist.waits)
|
||||
{
|
||||
// Wait for command list dependency:
|
||||
queue.wait(semaphore);
|
||||
|
||||
// semaphore is not recycled here, only the signals recycle themselves vecause wait will use the same
|
||||
}
|
||||
commandlist.waits.clear();
|
||||
|
||||
queue.submit();
|
||||
|
||||
for(auto& semaphore : commandlist.signals)
|
||||
{
|
||||
// Signal this command list's completion:
|
||||
queue.signal(semaphore);
|
||||
|
||||
// recycle semaphore:
|
||||
free_semaphore(semaphore);
|
||||
}
|
||||
commandlist.signals.clear();
|
||||
}
|
||||
|
||||
for (auto& x : commandlist.pipelines_worker)
|
||||
@@ -5439,14 +5452,7 @@ using namespace dx12_internal;
|
||||
if (queue.queue == nullptr)
|
||||
continue;
|
||||
|
||||
if (!queue.submit_cmds.empty())
|
||||
{
|
||||
queue.queue->ExecuteCommandLists(
|
||||
(UINT)queue.submit_cmds.size(),
|
||||
queue.submit_cmds.data()
|
||||
);
|
||||
queue.submit_cmds.clear();
|
||||
}
|
||||
queue.submit();
|
||||
|
||||
hr = queue.queue->Signal(frame_fence[GetBufferIndex()][q].Get(), 1);
|
||||
assert(SUCCEEDED(hr));
|
||||
@@ -5947,8 +5953,14 @@ using namespace dx12_internal;
|
||||
CommandList_DX12& commandlist = GetCommandList(cmd);
|
||||
CommandList_DX12& commandlist_wait_for = GetCommandList(wait_for);
|
||||
assert(commandlist_wait_for.id < commandlist.id); // can't wait for future command list!
|
||||
commandlist.waits.push_back(wait_for);
|
||||
commandlist_wait_for.waited_on.store(true);
|
||||
Semaphore semaphore = new_semaphore();
|
||||
commandlist.waits.push_back(semaphore);
|
||||
commandlist_wait_for.signals.push_back(semaphore);
|
||||
}
|
||||
void GraphicsDevice_DX12::WaitQueue(CommandList cmd, QUEUE_TYPE wait_for)
|
||||
{
|
||||
CommandList_DX12& commandlist = GetCommandList(cmd);
|
||||
commandlist.wait_queues.push_back(std::make_pair(wait_for, new_semaphore()));
|
||||
}
|
||||
void GraphicsDevice_DX12::RenderPassBegin(const SwapChain* swapchain, CommandList cmd)
|
||||
{
|
||||
|
||||
@@ -71,17 +71,22 @@ namespace wi::graphics
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE nullUAV = {};
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE nullSAM = {};
|
||||
|
||||
struct Semaphore
|
||||
{
|
||||
Microsoft::WRL::ComPtr<ID3D12Fence> fence;
|
||||
uint64_t fenceValue = 0;
|
||||
};
|
||||
|
||||
struct CommandQueue
|
||||
{
|
||||
D3D12_COMMAND_QUEUE_DESC desc = {};
|
||||
Microsoft::WRL::ComPtr<ID3D12CommandQueue> queue;
|
||||
Microsoft::WRL::ComPtr<ID3D12Fence> fence;
|
||||
wi::vector<ID3D12CommandList*> submit_cmds;
|
||||
} queues[QUEUE_COUNT];
|
||||
|
||||
#ifdef PLATFORM_XBOX
|
||||
std::mutex queue_locker;
|
||||
#endif // PLATFORM_XBOX
|
||||
void signal(const Semaphore& semaphore);
|
||||
void wait(const Semaphore& semaphore);
|
||||
void submit();
|
||||
} queues[QUEUE_COUNT];
|
||||
|
||||
struct CopyAllocator
|
||||
{
|
||||
@@ -124,6 +129,28 @@ namespace wi::graphics
|
||||
void flush(bool graphics, CommandList cmd);
|
||||
};
|
||||
|
||||
wi::vector<Semaphore> semaphore_pool;
|
||||
std::mutex semaphore_pool_locker;
|
||||
Semaphore new_semaphore()
|
||||
{
|
||||
std::scoped_lock lck(semaphore_pool_locker);
|
||||
if (semaphore_pool.empty())
|
||||
{
|
||||
Semaphore& dependency = semaphore_pool.emplace_back();
|
||||
HRESULT hr = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(dependency.fence));
|
||||
assert(SUCCEEDED(hr));
|
||||
}
|
||||
Semaphore semaphore = std::move(semaphore_pool.back());
|
||||
semaphore_pool.pop_back();
|
||||
semaphore.fenceValue++;
|
||||
return semaphore;
|
||||
}
|
||||
void free_semaphore(const Semaphore& semaphore)
|
||||
{
|
||||
std::scoped_lock lck(semaphore_pool_locker);
|
||||
semaphore_pool.push_back(semaphore);
|
||||
}
|
||||
|
||||
struct CommandList_DX12
|
||||
{
|
||||
Microsoft::WRL::ComPtr<ID3D12CommandAllocator> commandAllocators[BUFFERCOUNT][QUEUE_COUNT];
|
||||
@@ -133,8 +160,9 @@ namespace wi::graphics
|
||||
|
||||
QUEUE_TYPE queue = {};
|
||||
uint32_t id = 0;
|
||||
wi::vector<CommandList> waits;
|
||||
std::atomic_bool waited_on{ false };
|
||||
wi::vector<std::pair<QUEUE_TYPE, Semaphore>> wait_queues;
|
||||
wi::vector<Semaphore> waits;
|
||||
wi::vector<Semaphore> signals;
|
||||
|
||||
DescriptorBinder binder;
|
||||
GPULinearAllocator frame_allocators[BUFFERCOUNT];
|
||||
@@ -176,7 +204,9 @@ namespace wi::graphics
|
||||
void reset(uint32_t bufferindex)
|
||||
{
|
||||
buffer_index = bufferindex;
|
||||
wait_queues.clear();
|
||||
waits.clear();
|
||||
signals.clear();
|
||||
binder.reset();
|
||||
frame_allocators[buffer_index].reset();
|
||||
prev_pt = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||
@@ -336,6 +366,7 @@ namespace wi::graphics
|
||||
///////////////Thread-sensitive////////////////////////
|
||||
|
||||
void WaitCommandList(CommandList cmd, CommandList wait_for) override;
|
||||
void WaitQueue(CommandList cmd, QUEUE_TYPE wait_for) override;
|
||||
void RenderPassBegin(const SwapChain* swapchain, CommandList cmd) override;
|
||||
void RenderPassBegin(const RenderPassImage* images, uint32_t image_count, CommandList cmd, RenderPassFlags flags = RenderPassFlags::NONE) override;
|
||||
void RenderPassEnd(CommandList cmd) override;
|
||||
|
||||
@@ -348,16 +348,20 @@ namespace vulkan_internal
|
||||
case ResourceState::UNORDERED_ACCESS:
|
||||
return VK_IMAGE_LAYOUT_GENERAL;
|
||||
case ResourceState::COPY_SRC:
|
||||
return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
case ResourceState::COPY_DST:
|
||||
return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
// we can't assume transfer layout because it's allowed for resource to be used by multiple queues like DX12 (decay to common state), so this is a workaround
|
||||
// the problem is that image copy commands will require specifying the current layout, but different queues can often use textures in different layouts
|
||||
return VK_IMAGE_LAYOUT_GENERAL;
|
||||
case ResourceState::SHADING_RATE_SOURCE:
|
||||
return VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
|
||||
case ResourceState::VIDEO_DECODE_SRC:
|
||||
case ResourceState::VIDEO_DECODE_DST:
|
||||
return VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
|
||||
default:
|
||||
return VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
// combination of state flags will default to general
|
||||
// whether the combination of states is valid needs to be validated by the user
|
||||
// combining read-only states should be fine
|
||||
return VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
}
|
||||
constexpr VkShaderStageFlags _ConvertStageFlags(ShaderStage value)
|
||||
@@ -741,6 +745,7 @@ namespace vulkan_internal
|
||||
std::shared_ptr<GraphicsDevice_Vulkan::AllocationHandler> allocationhandler;
|
||||
VmaAllocation allocation = nullptr;
|
||||
VkImage resource = VK_NULL_HANDLE;
|
||||
VkImageLayout defaultLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
VkBuffer staging_resource = VK_NULL_HANDLE;
|
||||
struct TextureSubresource
|
||||
{
|
||||
@@ -1321,6 +1326,26 @@ using namespace vulkan_internal;
|
||||
|
||||
|
||||
|
||||
void GraphicsDevice_Vulkan::CommandQueue::signal(VkSemaphore semaphore)
|
||||
{
|
||||
if (queue == VK_NULL_HANDLE)
|
||||
return;
|
||||
VkSemaphoreSubmitInfo& signalSemaphore = submit_signalSemaphoreInfos.emplace_back();
|
||||
signalSemaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO;
|
||||
signalSemaphore.semaphore = semaphore;
|
||||
signalSemaphore.value = 0; // not a timeline semaphore
|
||||
signalSemaphore.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
}
|
||||
void GraphicsDevice_Vulkan::CommandQueue::wait(VkSemaphore semaphore)
|
||||
{
|
||||
if (queue == VK_NULL_HANDLE)
|
||||
return;
|
||||
VkSemaphoreSubmitInfo& waitSemaphore = submit_waitSemaphoreInfos.emplace_back();
|
||||
waitSemaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO;
|
||||
waitSemaphore.semaphore = semaphore;
|
||||
waitSemaphore.value = 0; // not a timeline semaphore
|
||||
waitSemaphore.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
}
|
||||
void GraphicsDevice_Vulkan::CommandQueue::submit(GraphicsDevice_Vulkan* device, VkFence fence)
|
||||
{
|
||||
if (queue == VK_NULL_HANDLE)
|
||||
@@ -1848,8 +1873,7 @@ using namespace vulkan_internal;
|
||||
auto texture_internal = to_internal((const Texture*)&resource);
|
||||
auto& subresource_descriptor = subresource >= 0 ? texture_internal->subresources_srv[subresource] : texture_internal->srv;
|
||||
imageInfos.back().imageView = subresource_descriptor.image_view;
|
||||
|
||||
imageInfos.back().imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
imageInfos.back().imageLayout = texture_internal->defaultLayout;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -3607,13 +3631,17 @@ using namespace vulkan_internal;
|
||||
{
|
||||
x.destroy();
|
||||
}
|
||||
vkDestroySemaphore(device, commandlist->semaphore, nullptr);
|
||||
}
|
||||
for (auto& x : pipelines_global)
|
||||
{
|
||||
vkDestroyPipeline(device, x.second, nullptr);
|
||||
}
|
||||
|
||||
for (auto& x : semaphore_pool)
|
||||
{
|
||||
vkDestroySemaphore(device, x, nullptr);
|
||||
}
|
||||
|
||||
vmaDestroyBuffer(allocationhandler->allocator, nullBuffer, nullBufferAllocation);
|
||||
vkDestroyBufferView(device, nullBufferView, nullptr);
|
||||
vmaDestroyImage(allocationhandler->allocator, nullImage1D, nullImageAllocation1D);
|
||||
@@ -4056,6 +4084,7 @@ using namespace vulkan_internal;
|
||||
{
|
||||
auto internal_state = std::make_shared<Texture_Vulkan>();
|
||||
internal_state->allocationhandler = allocationhandler;
|
||||
internal_state->defaultLayout = _ConvertImageLayout(desc->layout);
|
||||
texture->internal_state = internal_state;
|
||||
texture->type = GPUResource::Type::TEXTURE;
|
||||
texture->mapped_data = nullptr;
|
||||
@@ -7045,7 +7074,6 @@ using namespace vulkan_internal;
|
||||
commandlist.reset(GetBufferIndex());
|
||||
commandlist.queue = queue;
|
||||
commandlist.id = cmd_current;
|
||||
commandlist.waited_on.store(false);
|
||||
|
||||
if (commandlist.GetCommandBuffer() == VK_NULL_HANDLE)
|
||||
{
|
||||
@@ -7090,11 +7118,6 @@ using namespace vulkan_internal;
|
||||
commandlist.binder_pools[buffer].init(this);
|
||||
}
|
||||
|
||||
VkSemaphoreCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
res = vkCreateSemaphore(device, &createInfo, nullptr, &commandlist.semaphore);
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
commandlist.binder.init(this);
|
||||
}
|
||||
|
||||
@@ -7157,6 +7180,14 @@ using namespace vulkan_internal;
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
CommandQueue& queue = queues[commandlist.queue];
|
||||
const bool dependency = !commandlist.signals.empty() || !commandlist.waits.empty() || !commandlist.wait_queues.empty();
|
||||
|
||||
if (dependency)
|
||||
{
|
||||
// If the current commandlist must resolve a dependency, then previous ones will be submitted before doing that:
|
||||
// This improves GPU utilization because not the whole batch of command lists will need to synchronize, but only the one that handles it
|
||||
queue.submit(this, VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
VkCommandBufferSubmitInfo& cbSubmitInfo = queue.submit_cmds.emplace_back();
|
||||
cbSubmitInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
|
||||
@@ -7183,29 +7214,43 @@ using namespace vulkan_internal;
|
||||
signalSemaphore.value = 0; // not a timeline semaphore
|
||||
}
|
||||
|
||||
if (commandlist.waited_on.load() || !commandlist.waits.empty())
|
||||
if (dependency)
|
||||
{
|
||||
for (auto& wait : commandlist.waits)
|
||||
for (auto& wait : commandlist.wait_queues)
|
||||
{
|
||||
CommandQueue& waitqueue = queues[wait.first];
|
||||
VkSemaphore semaphore = wait.second;
|
||||
|
||||
// The WaitQueue operation will submit and signal the specified dependency queue:
|
||||
waitqueue.signal(semaphore); // signal recorded, will be executed at submit
|
||||
waitqueue.submit(this, VK_NULL_HANDLE);
|
||||
|
||||
// The current queue will be waiting for the dependency queue to complete:
|
||||
queue.wait(semaphore);
|
||||
|
||||
// recycle semaphore
|
||||
free_semaphore(semaphore);
|
||||
}
|
||||
commandlist.wait_queues.clear();
|
||||
|
||||
for (auto& semaphore : commandlist.waits)
|
||||
{
|
||||
// Wait for command list dependency:
|
||||
CommandList_Vulkan& waitcommandlist = GetCommandList(wait);
|
||||
queue.wait(semaphore);
|
||||
|
||||
VkSemaphoreSubmitInfo& waitSemaphore = queue.submit_waitSemaphoreInfos.emplace_back();
|
||||
waitSemaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO;
|
||||
waitSemaphore.semaphore = waitcommandlist.semaphore;
|
||||
waitSemaphore.value = 0; // not a timeline semaphore
|
||||
waitSemaphore.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
// semaphore is not recycled here, only the signals recycle themselves vecause wait will use the same
|
||||
}
|
||||
commandlist.waits.clear();
|
||||
|
||||
if (commandlist.waited_on.load())
|
||||
for (auto& semaphore : commandlist.signals)
|
||||
{
|
||||
// Signal this command list's completion:
|
||||
VkSemaphoreSubmitInfo& signalSemaphore = queue.submit_signalSemaphoreInfos.emplace_back();
|
||||
signalSemaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO;
|
||||
signalSemaphore.semaphore = commandlist.semaphore;
|
||||
signalSemaphore.value = 0; // not a timeline semaphore
|
||||
signalSemaphore.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
queue.signal(semaphore);
|
||||
|
||||
// recycle semaphore
|
||||
free_semaphore(semaphore);
|
||||
}
|
||||
commandlist.signals.clear();
|
||||
|
||||
queue.submit(this, VK_NULL_HANDLE);
|
||||
}
|
||||
@@ -7556,8 +7601,14 @@ using namespace vulkan_internal;
|
||||
CommandList_Vulkan& commandlist = GetCommandList(cmd);
|
||||
CommandList_Vulkan& commandlist_wait_for = GetCommandList(wait_for);
|
||||
assert(commandlist_wait_for.id < commandlist.id); // can't wait for future command list!
|
||||
commandlist.waits.push_back(wait_for);
|
||||
commandlist_wait_for.waited_on.store(true);
|
||||
VkSemaphore semaphore = new_semaphore();
|
||||
commandlist.waits.push_back(semaphore);
|
||||
commandlist_wait_for.signals.push_back(semaphore);
|
||||
}
|
||||
void GraphicsDevice_Vulkan::WaitQueue(CommandList cmd, QUEUE_TYPE wait_for)
|
||||
{
|
||||
CommandList_Vulkan& commandlist = GetCommandList(cmd);
|
||||
commandlist.wait_queues.push_back(std::make_pair(wait_for, new_semaphore()));
|
||||
}
|
||||
void GraphicsDevice_Vulkan::RenderPassBegin(const SwapChain* swapchain, CommandList cmd)
|
||||
{
|
||||
@@ -8435,7 +8486,7 @@ using namespace vulkan_internal;
|
||||
commandlist.GetCommandBuffer(),
|
||||
internal_state_src->staging_resource,
|
||||
internal_state_dst->resource,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
_ConvertImageLayout(ResourceState::COPY_DST),
|
||||
1,
|
||||
©
|
||||
);
|
||||
@@ -8473,7 +8524,7 @@ using namespace vulkan_internal;
|
||||
vkCmdCopyImageToBuffer(
|
||||
commandlist.GetCommandBuffer(),
|
||||
internal_state_src->resource,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
_ConvertImageLayout(ResourceState::COPY_SRC),
|
||||
internal_state_dst->staging_resource,
|
||||
1,
|
||||
©
|
||||
@@ -8536,8 +8587,8 @@ using namespace vulkan_internal;
|
||||
copy.dstSubresource.mipLevel = 0;
|
||||
|
||||
vkCmdCopyImage(commandlist.GetCommandBuffer(),
|
||||
internal_state_src->resource, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
internal_state_dst->resource, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
internal_state_src->resource, _ConvertImageLayout(ResourceState::COPY_SRC),
|
||||
internal_state_dst->resource, _ConvertImageLayout(ResourceState::COPY_DST),
|
||||
1, ©
|
||||
);
|
||||
}
|
||||
@@ -8633,9 +8684,9 @@ using namespace vulkan_internal;
|
||||
vkCmdCopyImage(
|
||||
commandlist.GetCommandBuffer(),
|
||||
src_internal->resource,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
_ConvertImageLayout(ResourceState::COPY_SRC),
|
||||
dst_internal->resource,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
_ConvertImageLayout(ResourceState::COPY_DST),
|
||||
1,
|
||||
©
|
||||
);
|
||||
|
||||
@@ -120,6 +120,8 @@ namespace wi::graphics
|
||||
bool sparse_binding_supported = false;
|
||||
std::shared_ptr<std::mutex> locker;
|
||||
|
||||
void signal(VkSemaphore semaphore);
|
||||
void wait(VkSemaphore semaphore);
|
||||
void submit(GraphicsDevice_Vulkan* device, VkFence fence);
|
||||
|
||||
} queues[QUEUE_COUNT];
|
||||
@@ -193,17 +195,40 @@ namespace wi::graphics
|
||||
void reset();
|
||||
};
|
||||
|
||||
wi::vector<VkSemaphore> semaphore_pool;
|
||||
std::mutex semaphore_pool_locker;
|
||||
VkSemaphore new_semaphore()
|
||||
{
|
||||
std::scoped_lock lck(semaphore_pool_locker);
|
||||
if (semaphore_pool.empty())
|
||||
{
|
||||
VkSemaphore& sema = semaphore_pool.emplace_back();
|
||||
VkSemaphoreCreateInfo info = {};
|
||||
info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
VkResult res = vkCreateSemaphore(device, &info, nullptr, &sema);
|
||||
assert(res == VK_SUCCESS);
|
||||
}
|
||||
VkSemaphore semaphore = semaphore_pool.back();
|
||||
semaphore_pool.pop_back();
|
||||
return semaphore;
|
||||
}
|
||||
void free_semaphore(VkSemaphore semaphore)
|
||||
{
|
||||
std::scoped_lock lck(semaphore_pool_locker);
|
||||
semaphore_pool.push_back(semaphore);
|
||||
}
|
||||
|
||||
struct CommandList_Vulkan
|
||||
{
|
||||
VkSemaphore semaphore = VK_NULL_HANDLE;
|
||||
VkCommandPool commandPools[BUFFERCOUNT][QUEUE_COUNT] = {};
|
||||
VkCommandBuffer commandBuffers[BUFFERCOUNT][QUEUE_COUNT] = {};
|
||||
uint32_t buffer_index = 0;
|
||||
|
||||
QUEUE_TYPE queue = {};
|
||||
uint32_t id = 0;
|
||||
wi::vector<CommandList> waits;
|
||||
std::atomic_bool waited_on{ false };
|
||||
wi::vector<std::pair<QUEUE_TYPE, VkSemaphore>> wait_queues;
|
||||
wi::vector<VkSemaphore> waits;
|
||||
wi::vector<VkSemaphore> signals;
|
||||
|
||||
DescriptorBinder binder;
|
||||
DescriptorBinderPool binder_pools[BUFFERCOUNT];
|
||||
@@ -229,7 +254,9 @@ namespace wi::graphics
|
||||
void reset(uint32_t bufferindex)
|
||||
{
|
||||
buffer_index = bufferindex;
|
||||
wait_queues.clear();
|
||||
waits.clear();
|
||||
signals.clear();
|
||||
binder_pools[buffer_index].reset();
|
||||
binder.reset();
|
||||
frame_allocators[buffer_index].reset();
|
||||
@@ -370,6 +397,7 @@ namespace wi::graphics
|
||||
///////////////Thread-sensitive////////////////////////
|
||||
|
||||
void WaitCommandList(CommandList cmd, CommandList wait_for) override;
|
||||
void WaitQueue(CommandList cmd, QUEUE_TYPE wait_for) override;
|
||||
void RenderPassBegin(const SwapChain* swapchain, CommandList cmd) override;
|
||||
void RenderPassBegin(const RenderPassImage* images, uint32_t image_count, CommandList cmd, RenderPassFlags flags = RenderPassFlags::NONE) override;
|
||||
void RenderPassEnd(CommandList cmd) override;
|
||||
|
||||
@@ -187,6 +187,7 @@ namespace wi
|
||||
SubresourceData initdata;
|
||||
initdata.data_ptr = displacementdata.data();
|
||||
initdata.row_pitch = tex_desc.width * sizeof(XMFLOAT4);
|
||||
tex_desc.layout = ResourceState::COPY_SRC | ResourceState::SHADER_RESOURCE_COMPUTE;
|
||||
device->CreateTexture(&tex_desc, &initdata, &displacementMap);
|
||||
device->SetName(&displacementMap, "displacementMap");
|
||||
|
||||
@@ -429,16 +430,18 @@ namespace wi
|
||||
|
||||
wi::renderer::GenerateMipChain(gradientMap, wi::renderer::MIPGENFILTER_LINEAR, cmd);
|
||||
|
||||
// Copy displacement map to readback:
|
||||
device->Barrier(GPUBarrier::Image(&displacementMap, displacementMap.desc.layout, ResourceState::COPY_SRC), cmd);
|
||||
device->CopyResource(&displacementMap_readback[displacement_readback_index], &displacementMap, cmd);
|
||||
displacement_readback_valid[displacement_readback_index] = true;
|
||||
displacement_readback_index = (displacement_readback_index + 1) % device->GetBufferCount();
|
||||
device->Barrier(GPUBarrier::Image(&displacementMap, ResourceState::COPY_SRC, displacementMap.desc.layout), cmd);
|
||||
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
void Ocean::CopyDisplacementMapReadback(wi::graphics::CommandList cmd) const
|
||||
{
|
||||
GraphicsDevice* device = wi::graphics::GetDevice();
|
||||
device->EventBegin("Ocean Readback Copy", cmd);
|
||||
device->CopyResource(&displacementMap_readback[displacement_readback_index], &displacementMap, cmd);
|
||||
displacement_readback_valid[displacement_readback_index] = true;
|
||||
displacement_readback_index = (displacement_readback_index + 1) % device->GetBufferCount();
|
||||
device->EventEnd(cmd);
|
||||
}
|
||||
|
||||
void Ocean::Render(const CameraComponent& camera, CommandList cmd) const
|
||||
{
|
||||
|
||||
@@ -43,6 +43,8 @@ namespace wi
|
||||
void UpdateDisplacementMap(wi::graphics::CommandList cmd) const;
|
||||
void Render(const wi::scene::CameraComponent& camera, wi::graphics::CommandList cmd) const;
|
||||
|
||||
void CopyDisplacementMapReadback(wi::graphics::CommandList cmd) const;
|
||||
|
||||
const wi::graphics::Texture* getDisplacementMap() const;
|
||||
const wi::graphics::Texture* getGradientMap() const;
|
||||
|
||||
|
||||
@@ -800,6 +800,7 @@ namespace wi
|
||||
if (scene->terrains.GetCount() > 0)
|
||||
{
|
||||
cmd_copypages = device->BeginCommandList(QUEUE_COPY);
|
||||
device->WaitQueue(cmd_copypages, QUEUE_GRAPHICS); // sync to prev frame graphics
|
||||
wi::jobsystem::Execute(ctx, [this, cmd_copypages](wi::jobsystem::JobArgs args) {
|
||||
for (size_t i = 0; i < scene->terrains.GetCount(); ++i)
|
||||
{
|
||||
@@ -810,6 +811,7 @@ namespace wi
|
||||
|
||||
// Preparing the frame:
|
||||
CommandList cmd = device->BeginCommandList();
|
||||
device->WaitQueue(cmd, QUEUE_COMPUTE); // sync to prev frame compute (disallow prev frame overlapping a compute task into updating global scene resources for this frame)
|
||||
CommandList cmd_prepareframe = cmd;
|
||||
wi::renderer::ProcessDeferredTextureRequests(cmd); // Execute it first thing in the frame here, on main thread, to not allow other thread steal it and execute on different command list!
|
||||
wi::jobsystem::Execute(ctx, [this, cmd](wi::jobsystem::JobArgs args) {
|
||||
@@ -837,7 +839,7 @@ namespace wi
|
||||
|
||||
});
|
||||
|
||||
// async compute parallel with depth prepass
|
||||
// async compute parallel with depth prepass
|
||||
cmd = device->BeginCommandList(QUEUE_COMPUTE);
|
||||
CommandList cmd_prepareframe_async = cmd;
|
||||
device->WaitCommandList(cmd, cmd_prepareframe);
|
||||
@@ -1111,13 +1113,26 @@ namespace wi
|
||||
|
||||
});
|
||||
|
||||
CommandList cmd_ocean;
|
||||
if (scene->weather.IsOceanEnabled() && scene->ocean.IsValid())
|
||||
{
|
||||
// Ocean simulation can be updated async to opaque passes:
|
||||
cmd_ocean = device->BeginCommandList(QUEUE_COMPUTE);
|
||||
wi::renderer::UpdateOcean(visibility_main, cmd_ocean);
|
||||
|
||||
// Copying to readback is done on copy queue to use DMA instead of compute warps:
|
||||
CommandList cmd_oceancopy = device->BeginCommandList(QUEUE_COPY);
|
||||
device->WaitCommandList(cmd_oceancopy, cmd_ocean);
|
||||
wi::renderer::ReadbackOcean(visibility_main, cmd_oceancopy);
|
||||
}
|
||||
|
||||
// Shadow maps:
|
||||
if (getShadowsEnabled())
|
||||
{
|
||||
cmd = device->BeginCommandList();
|
||||
wi::jobsystem::Execute(ctx, [this, cmd](wi::jobsystem::JobArgs args) {
|
||||
wi::renderer::DrawShadowmaps(visibility_main, cmd);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (wi::renderer::GetVXGIEnabled() && getSceneUpdateEnabled())
|
||||
@@ -1331,12 +1346,49 @@ namespace wi
|
||||
});
|
||||
}
|
||||
|
||||
if (scene->weather.IsOceanEnabled())
|
||||
// Main camera weather compute effects depending on shadow maps, envmaps, etc, but don't depend on async surface pass:
|
||||
if (scene->weather.IsRealisticSky() || scene->weather.IsVolumetricClouds())
|
||||
{
|
||||
// Ocean simulation can be updated async to opaque passes:
|
||||
CommandList cmd_ocean = device->BeginCommandList(QUEUE_COMPUTE);
|
||||
device->WaitCommandList(cmd_ocean, cmd);
|
||||
wi::renderer::UpdateOcean(visibility_main, cmd_ocean);
|
||||
cmd = device->BeginCommandList();
|
||||
wi::jobsystem::Execute(ctx, [this, cmd](wi::jobsystem::JobArgs args) {
|
||||
|
||||
wi::renderer::BindCameraCB(
|
||||
*camera,
|
||||
camera_previous,
|
||||
camera_reflection,
|
||||
cmd
|
||||
);
|
||||
|
||||
if (scene->weather.IsRealisticSky())
|
||||
{
|
||||
wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd);
|
||||
|
||||
if (scene->weather.IsRealisticSkyAerialPerspective())
|
||||
{
|
||||
wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd);
|
||||
}
|
||||
}
|
||||
if (scene->weather.IsRealisticSky() && scene->weather.IsRealisticSkyAerialPerspective())
|
||||
{
|
||||
wi::renderer::Postprocess_AerialPerspective(
|
||||
aerialperspectiveResources,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
if (scene->weather.IsVolumetricClouds())
|
||||
{
|
||||
wi::renderer::Postprocess_VolumetricClouds(
|
||||
volumetriccloudResources,
|
||||
cmd,
|
||||
*camera,
|
||||
camera_previous,
|
||||
camera_reflection,
|
||||
wi::renderer::GetTemporalAAEnabled() || getFSR2Enabled(),
|
||||
scene->weather.volumetricCloudsWeatherMapFirst.IsValid() ? &scene->weather.volumetricCloudsWeatherMapFirst.GetTexture() : nullptr,
|
||||
scene->weather.volumetricCloudsWeatherMapSecond.IsValid() ? &scene->weather.volumetricCloudsWeatherMapSecond.GetTexture() : nullptr
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Main camera opaque color pass:
|
||||
@@ -1354,17 +1406,6 @@ namespace wi
|
||||
cmd
|
||||
);
|
||||
|
||||
// This can't run in "main camera compute effects" async compute,
|
||||
// because it depends on shadow maps, and envmaps
|
||||
if (scene->weather.IsRealisticSky())
|
||||
{
|
||||
wi::renderer::ComputeSkyAtmosphereSkyViewLut(cmd);
|
||||
|
||||
if (scene->weather.IsRealisticSkyAerialPerspective())
|
||||
{
|
||||
wi::renderer::ComputeSkyAtmosphereCameraVolumeLut(cmd);
|
||||
}
|
||||
}
|
||||
if (getRaytracedReflectionEnabled())
|
||||
{
|
||||
wi::renderer::Postprocess_RTReflection(
|
||||
@@ -1395,26 +1436,6 @@ namespace wi
|
||||
cmd
|
||||
);
|
||||
}
|
||||
if (scene->weather.IsRealisticSky() && scene->weather.IsRealisticSkyAerialPerspective())
|
||||
{
|
||||
wi::renderer::Postprocess_AerialPerspective(
|
||||
aerialperspectiveResources,
|
||||
cmd
|
||||
);
|
||||
}
|
||||
if (scene->weather.IsVolumetricClouds())
|
||||
{
|
||||
wi::renderer::Postprocess_VolumetricClouds(
|
||||
volumetriccloudResources,
|
||||
cmd,
|
||||
*camera,
|
||||
camera_previous,
|
||||
camera_reflection,
|
||||
wi::renderer::GetTemporalAAEnabled() || getFSR2Enabled(),
|
||||
scene->weather.volumetricCloudsWeatherMapFirst.IsValid() ? &scene->weather.volumetricCloudsWeatherMapFirst.GetTexture() : nullptr,
|
||||
scene->weather.volumetricCloudsWeatherMapSecond.IsValid() ? &scene->weather.volumetricCloudsWeatherMapSecond.GetTexture() : nullptr
|
||||
);
|
||||
}
|
||||
|
||||
// Depth buffers were created on COMPUTE queue, so make them available for pixel shaders here:
|
||||
{
|
||||
@@ -1593,6 +1614,10 @@ namespace wi
|
||||
|
||||
// Transparents, post processes, etc:
|
||||
cmd = device->BeginCommandList();
|
||||
if (cmd_ocean.IsValid())
|
||||
{
|
||||
device->WaitCommandList(cmd, cmd_ocean);
|
||||
}
|
||||
wi::jobsystem::Execute(ctx, [this, cmd](wi::jobsystem::JobArgs args) {
|
||||
|
||||
GraphicsDevice* device = wi::graphics::GetDevice();
|
||||
@@ -1613,8 +1638,6 @@ namespace wi
|
||||
|
||||
RenderTransparents(cmd);
|
||||
|
||||
RenderPostprocessChain(cmd);
|
||||
|
||||
// Depth buffers expect a non-pixel shader resource state as they are generated on compute queue:
|
||||
{
|
||||
GPUBarrier barriers[] = {
|
||||
@@ -1624,20 +1647,24 @@ namespace wi
|
||||
};
|
||||
device->Barrier(barriers, arraysize(barriers), cmd);
|
||||
}
|
||||
|
||||
wi::renderer::TextureStreamingReadbackCopy(*scene, cmd);
|
||||
});
|
||||
|
||||
if (scene->IsWetmapProcessingRequired())
|
||||
{
|
||||
CommandList cmd_wetmaps = device->BeginCommandList(QUEUE_COMPUTE);
|
||||
device->WaitCommandList(cmd_wetmaps, cmd); // wait for transparents, it will be scheduled with late frame (GUI, etc)
|
||||
CommandList wetmap_cmd = device->BeginCommandList(QUEUE_COMPUTE);
|
||||
device->WaitCommandList(wetmap_cmd, cmd); // wait for transparents, it will be scheduled with late frame (GUI, etc)
|
||||
// Note: GPU processing of this compute task can overlap with beginning of the next frame because no one is waiting for it
|
||||
wi::jobsystem::Execute(ctx, [this, cmd_wetmaps](wi::jobsystem::JobArgs args) {
|
||||
wi::renderer::RefreshWetmaps(*scene, cmd_wetmaps);
|
||||
wi::jobsystem::Execute(ctx, [this, wetmap_cmd](wi::jobsystem::JobArgs args) {
|
||||
wi::renderer::RefreshWetmaps(visibility_main, wetmap_cmd);
|
||||
});
|
||||
}
|
||||
|
||||
cmd = device->BeginCommandList();
|
||||
wi::jobsystem::Execute(ctx, [this, cmd](wi::jobsystem::JobArgs args) {
|
||||
RenderPostprocessChain(cmd);
|
||||
wi::renderer::TextureStreamingReadbackCopy(*scene, cmd);
|
||||
});
|
||||
|
||||
RenderPath2D::Render();
|
||||
|
||||
wi::jobsystem::Wait(ctx);
|
||||
@@ -1995,6 +2022,7 @@ namespace wi
|
||||
);
|
||||
|
||||
// Note: volumetrics and light shafts are blended before transparent scene, because they used depth of the opaques
|
||||
// But the ocean is special, because it does have depth for them implicitly computed from ocean plane
|
||||
|
||||
if (getVolumeLightsEnabled() && visibility_main.IsRequestedVolumetricLights())
|
||||
{
|
||||
@@ -2131,6 +2159,9 @@ namespace wi
|
||||
{
|
||||
GraphicsDevice* device = wi::graphics::GetDevice();
|
||||
|
||||
wi::renderer::BindCommonResources(cmd);
|
||||
wi::renderer::BindCameraCB(*camera, camera_previous, camera_reflection, cmd);
|
||||
|
||||
const Texture* rt_first = nullptr; // not ping-ponged with read / write
|
||||
const Texture* rt_read = &rtMain;
|
||||
const Texture* rt_write = &rtPostprocess;
|
||||
|
||||
+44
-13
@@ -4570,6 +4570,18 @@ void UpdateRenderDataAsync(
|
||||
|
||||
BindCommonResources(cmd);
|
||||
|
||||
// Wetmaps will be initialized:
|
||||
for (uint32_t objectIndex = 0; objectIndex < vis.scene->objects.GetCount(); ++objectIndex)
|
||||
{
|
||||
const ObjectComponent& object = vis.scene->objects[objectIndex];
|
||||
if (!object.wetmap.IsValid() || object.wetmap_cleared)
|
||||
continue;
|
||||
device->ClearUAV(&object.wetmap, 0, cmd);
|
||||
object.wetmap_cleared = true;
|
||||
barrier_stack.push_back(GPUBarrier::Buffer(&object.wetmap, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE));
|
||||
}
|
||||
barrier_stack_flush(cmd);
|
||||
|
||||
// Precompute static volumetric cloud textures:
|
||||
if (!volumetric_clouds_precomputed && vis.scene->weather.IsVolumetricClouds())
|
||||
{
|
||||
@@ -4766,6 +4778,8 @@ void UpdateOcean(
|
||||
CommandList cmd
|
||||
)
|
||||
{
|
||||
if (!vis.scene->weather.IsOceanEnabled() || !vis.scene->ocean.IsValid())
|
||||
return;
|
||||
bool occluded = false;
|
||||
if (vis.flags & wi::renderer::Visibility::ALLOW_OCCLUSION_CULLING)
|
||||
{
|
||||
@@ -4779,6 +4793,23 @@ void UpdateOcean(
|
||||
wi::profiler::EndRange(range);
|
||||
}
|
||||
}
|
||||
void ReadbackOcean(
|
||||
const Visibility& vis,
|
||||
CommandList cmd
|
||||
)
|
||||
{
|
||||
if (!vis.scene->weather.IsOceanEnabled() || !vis.scene->ocean.IsValid())
|
||||
return;
|
||||
bool occluded = false;
|
||||
if (vis.flags & wi::renderer::Visibility::ALLOW_OCCLUSION_CULLING)
|
||||
{
|
||||
occluded = vis.scene->ocean.IsOccluded();
|
||||
}
|
||||
if (!occluded)
|
||||
{
|
||||
vis.scene->ocean.CopyDisplacementMapReadback(cmd);
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateRaytracingAccelerationStructures(const Scene& scene, CommandList cmd)
|
||||
{
|
||||
@@ -6193,7 +6224,7 @@ void DrawScene(
|
||||
|
||||
BindCommonResources(cmd);
|
||||
|
||||
if (ocean && !skip_planar_reflection_objects && vis.scene->weather.IsOceanEnabled())
|
||||
if (ocean && !skip_planar_reflection_objects && vis.scene->weather.IsOceanEnabled() && vis.scene->ocean.IsValid())
|
||||
{
|
||||
if (!occlusion || !vis.scene->ocean.IsOccluded())
|
||||
{
|
||||
@@ -9906,9 +9937,9 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd)
|
||||
}
|
||||
}
|
||||
|
||||
void RefreshWetmaps(const Scene& scene, CommandList cmd)
|
||||
void RefreshWetmaps(const Visibility& vis, CommandList cmd)
|
||||
{
|
||||
if (!scene.IsWetmapProcessingRequired())
|
||||
if (!vis.scene->IsWetmapProcessingRequired())
|
||||
return;
|
||||
|
||||
device->EventBegin("RefreshWetmaps", cmd);
|
||||
@@ -9916,43 +9947,44 @@ void RefreshWetmaps(const Scene& scene, CommandList cmd)
|
||||
BindCommonResources(cmd);
|
||||
device->BindComputeShader(&shaders[CSTYPE_WETMAP_UPDATE], cmd);
|
||||
|
||||
for (uint32_t objectIndex = 0; objectIndex < scene.objects.GetCount(); ++objectIndex)
|
||||
WetmapPush push = {};
|
||||
push.rain_amount = vis.scene->weather.rain_amount;
|
||||
|
||||
// Note: every object wetmap is updated, not just visible
|
||||
for (uint32_t objectIndex = 0; objectIndex < vis.scene->objects.GetCount(); ++objectIndex)
|
||||
{
|
||||
const ObjectComponent& object = scene.objects[objectIndex];
|
||||
const ObjectComponent& object = vis.scene->objects[objectIndex];
|
||||
if (!object.wetmap.IsValid())
|
||||
continue;
|
||||
|
||||
uint32_t vertexCount = uint32_t(object.wetmap.desc.size / GetFormatStride(object.wetmap.desc.format));
|
||||
|
||||
WetmapPush push = {};
|
||||
push.wetmap = device->GetDescriptorIndex(&object.wetmap, SubresourceType::UAV);
|
||||
|
||||
if (push.wetmap < 0)
|
||||
continue;
|
||||
|
||||
push.instanceID = objectIndex;
|
||||
push.rain_amount = scene.weather.rain_amount;
|
||||
device->PushConstants(&push, sizeof(push), cmd);
|
||||
|
||||
device->Dispatch((vertexCount + 63u) / 64u, 1, 1, cmd);
|
||||
}
|
||||
|
||||
for (uint32_t hairIndex = 0; hairIndex < scene.hairs.GetCount(); ++hairIndex)
|
||||
// Note: only visible hair particles will be updated, becasue invisible ones will not have valid vertices
|
||||
for (uint32_t hairIndex : vis.visibleHairs)
|
||||
{
|
||||
const wi::HairParticleSystem& hair = scene.hairs[hairIndex];
|
||||
const wi::HairParticleSystem& hair = vis.scene->hairs[hairIndex];
|
||||
if (!hair.wetmap.IsValid())
|
||||
continue;
|
||||
|
||||
uint32_t vertexCount = uint32_t(hair.wetmap.size / sizeof(uint16_t));
|
||||
|
||||
WetmapPush push = {};
|
||||
push.wetmap = hair.wetmap.descriptor_uav;
|
||||
|
||||
if (push.wetmap < 0)
|
||||
continue;
|
||||
|
||||
push.instanceID = uint32_t(scene.objects.GetCount() + hairIndex);
|
||||
push.rain_amount = scene.weather.rain_amount;
|
||||
push.instanceID = uint32_t(vis.scene->objects.GetCount() + hairIndex);
|
||||
device->PushConstants(&push, sizeof(push), cmd);
|
||||
|
||||
device->Dispatch((vertexCount + 63u) / 64u, 1, 1, cmd);
|
||||
@@ -16908,7 +16940,6 @@ void Postprocess_Downsample4x(
|
||||
|
||||
{
|
||||
GPUBarrier barriers[] = {
|
||||
GPUBarrier::Memory(),
|
||||
GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout),
|
||||
};
|
||||
device->Barrier(barriers, arraysize(barriers), cmd);
|
||||
|
||||
@@ -205,6 +205,11 @@ namespace wi::renderer
|
||||
const Visibility& vis,
|
||||
wi::graphics::CommandList cmd
|
||||
);
|
||||
// Readback the ocean, can be on async compute or async copy
|
||||
void ReadbackOcean(
|
||||
const Visibility& vis,
|
||||
wi::graphics::CommandList cmd
|
||||
);
|
||||
|
||||
void UpdateRaytracingAccelerationStructures(const wi::scene::Scene& scene, wi::graphics::CommandList cmd);
|
||||
|
||||
@@ -312,7 +317,7 @@ namespace wi::renderer
|
||||
// Call once per frame to render lightmaps
|
||||
void RefreshLightmaps(const wi::scene::Scene& scene, wi::graphics::CommandList cmd);
|
||||
// Call once per frame to render wetmaps
|
||||
void RefreshWetmaps(const wi::scene::Scene& scene, wi::graphics::CommandList cmd);
|
||||
void RefreshWetmaps(const Visibility& vis, wi::graphics::CommandList cmd);
|
||||
// Run a compute shader that will resolve a MSAA depth buffer to a single-sample texture
|
||||
void ResolveMSAADepthBuffer(const wi::graphics::Texture& dst, const wi::graphics::Texture& src, wi::graphics::CommandList cmd);
|
||||
void DownsampleDepthBuffer(const wi::graphics::Texture& src, wi::graphics::CommandList cmd);
|
||||
|
||||
@@ -1036,6 +1036,7 @@ namespace wi::scene
|
||||
|
||||
surfelgi = {};
|
||||
ddgi = {};
|
||||
ocean = {};
|
||||
|
||||
aabb_objects.clear();
|
||||
aabb_lights.clear();
|
||||
@@ -4053,10 +4054,9 @@ namespace wi::scene
|
||||
desc.size = mesh.vertex_positions.size() * sizeof(uint16_t);
|
||||
desc.format = Format::R16_UNORM;
|
||||
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
|
||||
wi::vector<uint8_t> zeroes(desc.size);
|
||||
std::fill(zeroes.begin(), zeroes.end(), 0);
|
||||
device->CreateBuffer(&desc, zeroes.data(), &object.wetmap);
|
||||
device->CreateBuffer(&desc, nullptr, &object.wetmap);
|
||||
device->SetName(&object.wetmap, "wetmap");
|
||||
object.wetmap_cleared = false;
|
||||
}
|
||||
else if(!object.IsWetmapEnabled() && object.wetmap.IsValid())
|
||||
{
|
||||
@@ -4759,6 +4759,10 @@ namespace wi::scene
|
||||
{
|
||||
OceanRegenerate();
|
||||
}
|
||||
if (!weather.IsOceanEnabled())
|
||||
{
|
||||
ocean = {};
|
||||
}
|
||||
|
||||
// Ocean occlusion status:
|
||||
if (!wi::renderer::GetFreezeCullingCameraEnabled() && weather.IsOceanEnabled())
|
||||
|
||||
@@ -850,6 +850,7 @@ namespace wi::scene
|
||||
wi::graphics::GPUBuffer vb_ao;
|
||||
int vb_ao_srv = -1;
|
||||
wi::graphics::GPUBuffer wetmap;
|
||||
mutable bool wetmap_cleared = false;
|
||||
|
||||
XMFLOAT3 center = XMFLOAT3(0, 0, 0);
|
||||
float radius = 0;
|
||||
|
||||
Reference in New Issue
Block a user