diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index fddf12b2f..4766f8021 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -1642,6 +1642,7 @@ std::mutex queue_locker; { this->device = device; #ifdef PLATFORM_XBOX + // Xbox only has 1 copy queue queue = device->queues[QUEUE_COPY].queue; #else // On PC we can create secondary copy queue for background uploading tasks: @@ -1690,6 +1691,7 @@ std::mutex queue_locker; dx12_check(cmd.commandList->Close()); dx12_check(device->device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(cmd.fence))); + dx12_check(cmd.fence->SetName(L"CopyAllocator::fence")); GPUBufferDesc uploadBufferDesc; uploadBufferDesc.size = wi::math::GetNextPowerOfTwo(staging_size); @@ -1724,13 +1726,22 @@ std::mutex queue_locker; queue->ExecuteCommandLists(1, commandlists); dx12_check(queue->Signal(cmd.fence.Get(), cmd.fenceValueSignaled)); +#if 1 + // Wait on CPU: + dx12_check(cmd.fence->SetEventOnCompletion(cmd.fenceValueSignaled, nullptr)); +#else + // Wait on GPU: dx12_check(device->queues[QUEUE_GRAPHICS].queue->Wait(cmd.fence.Get(), cmd.fenceValueSignaled)); dx12_check(device->queues[QUEUE_COMPUTE].queue->Wait(cmd.fence.Get(), cmd.fenceValueSignaled)); +#ifndef PLATFORM_XBOX + // Xbox only has 1 copy queue, so it doesn't need to wait for itself dx12_check(device->queues[QUEUE_COPY].queue->Wait(cmd.fence.Get(), cmd.fenceValueSignaled)); +#endif // PLATFORM_XBOX if (device->queues[QUEUE_VIDEO_DECODE].queue) { dx12_check(device->queues[QUEUE_VIDEO_DECODE].queue->Wait(cmd.fence.Get(), cmd.fenceValueSignaled)); } +#endif } void GraphicsDevice_DX12::DescriptorBinder::init(GraphicsDevice_DX12* device) @@ -2531,6 +2542,7 @@ std::mutex queue_locker; wilog_messagebox("ID3D12Device::CreateFence[CBV_SRV_UAV] failed! ERROR: %s", wi::helper::GetPlatformErrorString(hr).c_str()); wi::platform::Exit(); } + dx12_check(descriptorheap_res.fence->SetName(L"DescriptorHeapGPU[CBV_SRV_UAV]::fence")); descriptorheap_res.fenceValue = descriptorheap_res.fence->GetCompletedValue(); allocationhandler->free_bindless_res.reserve(BINDLESS_RESOURCE_CAPACITY); @@ -2562,6 +2574,7 @@ std::mutex queue_locker; wilog_messagebox("ID3D12Device::CreateFence[SAMPLER] failed! ERROR: %s", wi::helper::GetPlatformErrorString(hr).c_str()); wi::platform::Exit(); } + dx12_check(descriptorheap_sam.fence->SetName(L"DescriptorHeapGPU[SAMPLER]::fence")); descriptorheap_sam.fenceValue = descriptorheap_sam.fence->GetCompletedValue(); allocationhandler->free_bindless_sam.reserve(BINDLESS_SAMPLER_CAPACITY); @@ -2582,6 +2595,21 @@ std::mutex queue_locker; wilog_messagebox("ID3D12Device::CreateFence[FRAME] failed! ERROR: %s", wi::helper::GetPlatformErrorString(hr).c_str()); wi::platform::Exit(); } + switch (queue) + { + case QUEUE_GRAPHICS: + dx12_check(frame_fence[buffer][queue]->SetName(L"frame_fence[QUEUE_GRAPHICS]")); + break; + case QUEUE_COMPUTE: + dx12_check(frame_fence[buffer][queue]->SetName(L"frame_fence[QUEUE_COMPUTE]")); + break; + case QUEUE_COPY: + dx12_check(frame_fence[buffer][queue]->SetName(L"frame_fence[QUEUE_COPY]")); + break; + case QUEUE_VIDEO_DECODE: + dx12_check(frame_fence[buffer][queue]->SetName(L"frame_fence[QUEUE_VIDEO_DECODE]")); + break; + }; } } @@ -2793,6 +2821,7 @@ std::mutex queue_locker; // Create fence to detect device removal { dx12_check(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(deviceRemovedFence.GetAddressOf()))); + dx12_check(deviceRemovedFence->SetName(L"deviceRemovedFence")); HANDLE deviceRemovedEvent = CreateEventW(NULL, FALSE, FALSE, NULL); dx12_check(deviceRemovedFence->SetEventOnCompletion(UINT64_MAX, deviceRemovedEvent)); @@ -5365,6 +5394,23 @@ std::mutex queue_locker; } } + // Sync up every queue to every other queue at the end of the frame: + // Note: it disables overlapping queues into the next frame + for (int queue1 = 0; queue1 < QUEUE_COUNT; ++queue1) + { + if (queues[queue1].queue == nullptr) + continue; + for (int queue2 = 0; queue2 < QUEUE_COUNT; ++queue2) + { + if (queue1 == queue2) + continue; + if (queues[queue2].queue == nullptr) + continue; + ID3D12Fence* fence = frame_fence[GetBufferIndex()][queue2].Get(); + queues[queue1].queue->Wait(fence, 1); + } + } + descriptorheap_res.SignalGPU(queues[QUEUE_GRAPHICS].queue.Get()); descriptorheap_sam.SignalGPU(queues[QUEUE_GRAPHICS].queue.Get()); @@ -5377,13 +5423,17 @@ std::mutex queue_locker; { if (queues[queue].queue == nullptr) continue; - if (FRAMECOUNT >= BUFFERCOUNT && frame_fence[bufferindex][queue]->GetCompletedValue() < 1) + ID3D12Fence* fence = frame_fence[bufferindex][queue].Get(); + if (FRAMECOUNT >= BUFFERCOUNT) { - // NULL event handle will simply wait immediately: - // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12fence-seteventoncompletion#remarks - dx12_check(frame_fence[bufferindex][queue]->SetEventOnCompletion(1, NULL)); + if (fence->GetCompletedValue() < 1) + { + // nullptr event handle will simply wait immediately: + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12fence-seteventoncompletion#remarks + dx12_check(fence->SetEventOnCompletion(1, nullptr)); + } } - dx12_check(frame_fence[bufferindex][queue]->Signal(0)); + dx12_check(fence->Signal(0)); } allocationhandler->Update(FRAMECOUNT, BUFFERCOUNT); @@ -5653,7 +5703,7 @@ std::mutex queue_locker; dx12_check(queue.queue->Signal(fence.Get(), 1)); if (fence->GetCompletedValue() < 1) { - dx12_check(fence->SetEventOnCompletion(1, NULL)); + dx12_check(fence->SetEventOnCompletion(1, nullptr)); } fence->Signal(0); } @@ -7567,7 +7617,7 @@ std::mutex queue_locker; dx12_check(queue.queue->Signal(fence.Get(), 1)); if (fence->GetCompletedValue() < 1) { - dx12_check(fence->SetEventOnCompletion(1, NULL)); + dx12_check(fence->SetEventOnCompletion(1, nullptr)); } fence->Signal(0); diff --git a/WickedEngine/wiGraphicsDevice_DX12.h b/WickedEngine/wiGraphicsDevice_DX12.h index 158b63a78..8f3adf5d1 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.h +++ b/WickedEngine/wiGraphicsDevice_DX12.h @@ -142,6 +142,7 @@ namespace wi::graphics { Semaphore& dependency = semaphore_pool.emplace_back(); dx12_check(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, PPV_ARGS(dependency.fence))); + dx12_check(dependency.fence.Get()->SetName(L"DependencySemaphore")); } Semaphore semaphore = std::move(semaphore_pool.back()); semaphore_pool.pop_back(); diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index 95cecb232..cf598df16 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -1298,7 +1298,34 @@ namespace vulkan_internal } using namespace vulkan_internal; + void GraphicsDevice_Vulkan::set_fence_name(VkFence fence, const char* name) + { + if (!debugUtils) + return; + if (fence == VK_NULL_HANDLE) + return; + VkDebugUtilsObjectNameInfoEXT info{ VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT }; + info.pObjectName = name; + info.objectType = VK_OBJECT_TYPE_FENCE; + info.objectHandle = (uint64_t)fence; + + vulkan_check(vkSetDebugUtilsObjectNameEXT(device, &info)); + } + void GraphicsDevice_Vulkan::set_semaphore_name(VkSemaphore semaphore, const char* name) + { + if (!debugUtils) + return; + if (semaphore == VK_NULL_HANDLE) + return; + + VkDebugUtilsObjectNameInfoEXT info{ VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT }; + info.pObjectName = name; + info.objectType = VK_OBJECT_TYPE_SEMAPHORE; + info.objectHandle = (uint64_t)semaphore; + + vulkan_check(vkSetDebugUtilsObjectNameEXT(device, &info)); + } void GraphicsDevice_Vulkan::CommandQueue::signal(VkSemaphore semaphore) { @@ -1326,6 +1353,17 @@ using namespace vulkan_internal; return; std::scoped_lock lock(*locker); + if (fence != VK_NULL_HANDLE) + { + // end of frame mark: + for (int q = 0; q < QUEUE_COUNT; ++q) + { + if (frame_semaphores[q] == VK_NULL_HANDLE) + continue; + signal(frame_semaphores[q]); + } + } + VkSubmitInfo2 submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2; submitInfo.commandBufferInfoCount = (uint32_t)submit_cmds.size(); @@ -1388,10 +1426,7 @@ using namespace vulkan_internal; { vkDestroyCommandPool(device->device, x.transferCommandPool, nullptr); vkDestroyCommandPool(device->device, x.transitionCommandPool, nullptr); - for (auto& sema : x.semaphores) - { - vkDestroySemaphore(device->device, sema, nullptr); - } + vkDestroySemaphore(device->device, x.semaphore, nullptr); vkDestroyFence(device->device, x.fence, nullptr); } } @@ -1439,12 +1474,12 @@ using namespace vulkan_internal; VkFenceCreateInfo fenceInfo = {}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; vulkan_check(vkCreateFence(device->device, &fenceInfo, nullptr, &cmd.fence)); + device->set_fence_name(cmd.fence, "CopyAllocator::fence"); VkSemaphoreCreateInfo semaphoreInfo = {}; semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - vulkan_check(vkCreateSemaphore(device->device, &semaphoreInfo, nullptr, &cmd.semaphores[0])); - vulkan_check(vkCreateSemaphore(device->device, &semaphoreInfo, nullptr, &cmd.semaphores[1])); - vulkan_check(vkCreateSemaphore(device->device, &semaphoreInfo, nullptr, &cmd.semaphores[2])); + vulkan_check(vkCreateSemaphore(device->device, &semaphoreInfo, nullptr, &cmd.semaphore)); + device->set_semaphore_name(cmd.semaphore, "CopyAllocator::semaphore"); GPUBufferDesc uploaddesc; uploaddesc.size = wi::math::GetNextPowerOfTwo(staging_size); @@ -1490,7 +1525,7 @@ using namespace vulkan_internal; { cbSubmitInfo.commandBuffer = cmd.transferCommandBuffer; - signalSemaphoreInfos[0].semaphore = cmd.semaphores[0]; // signal for graphics queue + signalSemaphoreInfos[0].semaphore = cmd.semaphore; // signal for graphics queue signalSemaphoreInfos[0].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; submitInfo.commandBufferInfoCount = 1; @@ -1503,64 +1538,55 @@ using namespace vulkan_internal; } { - waitSemaphoreInfo.semaphore = cmd.semaphores[0]; // wait for copy queue + waitSemaphoreInfo.semaphore = cmd.semaphore; // wait for copy queue waitSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; cbSubmitInfo.commandBuffer = cmd.transitionCommandBuffer; - signalSemaphoreInfos[0].semaphore = cmd.semaphores[1]; // signal for compute queue - signalSemaphoreInfos[0].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; // signal for compute queue submitInfo.waitSemaphoreInfoCount = 1; submitInfo.pWaitSemaphoreInfos = &waitSemaphoreInfo; submitInfo.commandBufferInfoCount = 1; submitInfo.pCommandBufferInfos = &cbSubmitInfo; - if (device->queues[QUEUE_VIDEO_DECODE].queue != VK_NULL_HANDLE) - { - signalSemaphoreInfos[1].semaphore = cmd.semaphores[2]; // signal for video decode queue - signalSemaphoreInfos[1].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; // signal for video decode queue - submitInfo.signalSemaphoreInfoCount = 2; - } - else - { - submitInfo.signalSemaphoreInfoCount = 1; - } - submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; + submitInfo.signalSemaphoreInfoCount = 0; + submitInfo.pSignalSemaphoreInfos = nullptr; std::scoped_lock lock(*device->queues[QUEUE_GRAPHICS].locker); - vulkan_check(vkQueueSubmit2(device->queues[QUEUE_GRAPHICS].queue, 1, &submitInfo, VK_NULL_HANDLE)); + vulkan_check(vkQueueSubmit2(device->queues[QUEUE_GRAPHICS].queue, 1, &submitInfo, cmd.fence)); } - if (device->queues[QUEUE_VIDEO_DECODE].queue != VK_NULL_HANDLE) - { - waitSemaphoreInfo.semaphore = cmd.semaphores[2]; // wait for graphics queue - waitSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + vulkan_check(vkWaitForFences(device->device, 1, &cmd.fence, VK_TRUE, ~0ull)); - submitInfo.waitSemaphoreInfoCount = 1; - submitInfo.pWaitSemaphoreInfos = &waitSemaphoreInfo; - submitInfo.commandBufferInfoCount = 0; - submitInfo.pCommandBufferInfos = nullptr; - submitInfo.signalSemaphoreInfoCount = 0; - submitInfo.pSignalSemaphoreInfos = nullptr; + //if (device->queues[QUEUE_VIDEO_DECODE].queue != VK_NULL_HANDLE) + //{ + // waitSemaphoreInfo.semaphore = cmd.semaphores[2]; // wait for graphics queue + // waitSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - std::scoped_lock lock(*device->queues[QUEUE_VIDEO_DECODE].locker); - vulkan_check(vkQueueSubmit2(device->queues[QUEUE_VIDEO_DECODE].queue, 1, &submitInfo, VK_NULL_HANDLE)); - } + // submitInfo.waitSemaphoreInfoCount = 1; + // submitInfo.pWaitSemaphoreInfos = &waitSemaphoreInfo; + // submitInfo.commandBufferInfoCount = 0; + // submitInfo.pCommandBufferInfos = nullptr; + // submitInfo.signalSemaphoreInfoCount = 0; + // submitInfo.pSignalSemaphoreInfos = nullptr; - // This must be final submit in this function because it will also signal a fence for state tracking by CPU! - { - waitSemaphoreInfo.semaphore = cmd.semaphores[1]; // wait for graphics queue - waitSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + // std::scoped_lock lock(*device->queues[QUEUE_VIDEO_DECODE].locker); + // vulkan_check(vkQueueSubmit2(device->queues[QUEUE_VIDEO_DECODE].queue, 1, &submitInfo, VK_NULL_HANDLE)); + //} - submitInfo.waitSemaphoreInfoCount = 1; - submitInfo.pWaitSemaphoreInfos = &waitSemaphoreInfo; - submitInfo.commandBufferInfoCount = 0; - submitInfo.pCommandBufferInfos = nullptr; - submitInfo.signalSemaphoreInfoCount = 0; - submitInfo.pSignalSemaphoreInfos = nullptr; + //// This must be final submit in this function because it will also signal a fence for state tracking by CPU! + //{ + // waitSemaphoreInfo.semaphore = cmd.semaphores[1]; // wait for graphics queue + // waitSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - std::scoped_lock lock(*device->queues[QUEUE_COMPUTE].locker); - vulkan_check(vkQueueSubmit2(device->queues[QUEUE_COMPUTE].queue, 1, &submitInfo, cmd.fence)); // final submit also signals fence! - } + // submitInfo.waitSemaphoreInfoCount = 1; + // submitInfo.pWaitSemaphoreInfos = &waitSemaphoreInfo; + // submitInfo.commandBufferInfoCount = 0; + // submitInfo.pCommandBufferInfos = nullptr; + // submitInfo.signalSemaphoreInfoCount = 0; + // submitInfo.pSignalSemaphoreInfos = nullptr; + + // std::scoped_lock lock(*device->queues[QUEUE_COMPUTE].locker); + // vulkan_check(vkQueueSubmit2(device->queues[QUEUE_COMPUTE].queue, 1, &submitInfo, cmd.fence)); // final submit also signals fence! + //} std::scoped_lock lock(locker); freelist.push_back(cmd); @@ -3164,6 +3190,40 @@ using namespace vulkan_internal; wi::helper::messageBox("vkCreateFence[FRAME] failed! ERROR: " + std::string(string_VkResult(res)), "Error!"); wi::platform::Exit(); } + switch (queue) + { + case QUEUE_GRAPHICS: + set_fence_name(frame_fence[fr][queue], "frame_fence[QUEUE_GRAPHICS]"); + break; + case QUEUE_COMPUTE: + set_fence_name(frame_fence[fr][queue], "frame_fence[QUEUE_COMPUTE]"); + break; + case QUEUE_COPY: + set_fence_name(frame_fence[fr][queue], "frame_fence[QUEUE_COPY]"); + break; + case QUEUE_VIDEO_DECODE: + set_fence_name(frame_fence[fr][queue], "frame_fence[QUEUE_VIDEO_DECODE]"); + break; + }; + } + } + + // Frame end semaphores: + for (int queue1 = 0; queue1 < QUEUE_COUNT; ++queue1) + { + if (queues[queue1].queue == nullptr) + continue; + for (int queue2 = 0; queue2 < QUEUE_COUNT; ++queue2) + { + if (queue1 == queue2) + continue; + if (queues[queue2].queue == nullptr) + continue; + + VkSemaphoreCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + vulkan_check(vkCreateSemaphore(device, &info, nullptr, &queues[queue1].frame_semaphores[queue2])); + set_semaphore_name(queues[queue1].frame_semaphores[queue2], "CommandQueue::frame_semaphores"); } } @@ -3559,7 +3619,10 @@ using namespace vulkan_internal; { for (int queue = 0; queue < QUEUE_COUNT; ++queue) { - vkDestroyFence(device, frame_fence[fr][queue], nullptr); + VkFence fence = frame_fence[fr][queue]; + if (fence == VK_NULL_HANDLE) + continue; + vkDestroyFence(device, fence, nullptr); } } @@ -7155,6 +7218,7 @@ using namespace vulkan_internal; VkSemaphoreSubmitInfo& signalSemaphore = queue.submit_signalSemaphoreInfos.emplace_back(); signalSemaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; signalSemaphore.semaphore = internal_state->swapchainReleaseSemaphore; + signalSemaphore.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; signalSemaphore.value = 0; // not a timeline semaphore } @@ -7205,6 +7269,24 @@ using namespace vulkan_internal; } } + // Sync up every queue to every other queue at the end of the frame: + // Note: it disables overlapping queues into the next frame + for (int queue1 = 0; queue1 < QUEUE_COUNT; ++queue1) + { + if (queues[queue1].queue == nullptr) + continue; + for (int queue2 = 0; queue2 < QUEUE_COUNT; ++queue2) + { + if (queue1 == queue2) + continue; + VkSemaphore semaphore = queues[queue2].frame_semaphores[queue1]; + if (semaphore == VK_NULL_HANDLE) + continue; + queues[queue1].wait(semaphore); + } + queues[queue1].submit(this, VK_NULL_HANDLE); + } + // From here, we begin a new frame, this affects GetBufferIndex()! FRAMECOUNT++; @@ -7212,17 +7294,28 @@ using namespace vulkan_internal; if (FRAMECOUNT >= BUFFERCOUNT) { const uint32_t bufferindex = GetBufferIndex(); - VkFence fences[QUEUE_COUNT] = {}; - uint32_t fenceCount = 0; + VkFence waitFences[QUEUE_COUNT] = {}; + uint32_t waitFenceCount = 0; + VkFence resetFences[QUEUE_COUNT] = {}; + uint32_t resetFenceCount = 0; for (int queue = 0; queue < QUEUE_COUNT; ++queue) { - if (frame_fence[bufferindex][queue] == VK_NULL_HANDLE) + VkFence fence = frame_fence[bufferindex][queue]; + if (fence == VK_NULL_HANDLE) continue; - fences[fenceCount++] = frame_fence[bufferindex][queue]; + resetFences[resetFenceCount++] = fence; + if (vkGetFenceStatus(device, fence) == VK_SUCCESS) + continue; + waitFences[waitFenceCount++] = fence; + } + if (waitFenceCount > 0) + { + vulkan_check(vkWaitForFences(device, waitFenceCount, waitFences, VK_TRUE, ~0ull)); + } + if (resetFenceCount > 0) + { + vulkan_check(vkResetFences(device, resetFenceCount, resetFences)); } - - vulkan_check(vkWaitForFences(device, fenceCount, fences, VK_TRUE, ~0ull)); - vulkan_check(vkResetFences(device, fenceCount, fences)); } allocationhandler->Update(FRAMECOUNT, BUFFERCOUNT); diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.h b/WickedEngine/wiGraphicsDevice_Vulkan.h index 6e2be34f2..1cd43c6b6 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.h +++ b/WickedEngine/wiGraphicsDevice_Vulkan.h @@ -192,6 +192,7 @@ namespace wi::graphics struct CommandQueue { VkQueue queue = VK_NULL_HANDLE; + VkSemaphore frame_semaphores[QUEUE_COUNT] = {}; wi::vector swapchain_updates; wi::vector submit_swapchains; wi::vector submit_swapChainImageIndices; @@ -223,7 +224,7 @@ namespace wi::graphics VkCommandPool transitionCommandPool = VK_NULL_HANDLE; VkCommandBuffer transitionCommandBuffer = VK_NULL_HANDLE; VkFence fence = VK_NULL_HANDLE; - VkSemaphore semaphores[3] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE }; // graphics, compute, video + VkSemaphore semaphore = VK_NULL_HANDLE; GPUBuffer uploadbuffer; constexpr bool IsValid() const { return transferCommandBuffer != VK_NULL_HANDLE; } }; @@ -291,6 +292,7 @@ namespace wi::graphics VkSemaphoreCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; vulkan_check(vkCreateSemaphore(device, &info, nullptr, &sema)); + set_semaphore_name(sema, "DependencySemaphore"); } VkSemaphore semaphore = semaphore_pool.back(); semaphore_pool.pop_back(); @@ -394,6 +396,9 @@ namespace wi::graphics static constexpr uint32_t immutable_sampler_slot_begin = 100; wi::vector immutable_samplers; + void set_fence_name(VkFence fence, const char* name); + void set_semaphore_name(VkSemaphore semaphore, const char* name); + public: GraphicsDevice_Vulkan(wi::platform::window_type window, ValidationMode validationMode = ValidationMode::Disabled, GPUPreference preference = GPUPreference::Discrete); ~GraphicsDevice_Vulkan() override; diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index 3176bd82a..adf908f78 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -3811,9 +3811,10 @@ namespace wi::scene ShaderTransform& shadertransform = armature.boneData[boneIndex]; shadertransform.Create(mat); - if (skinningDataMapped != nullptr) + uint8_t* gpu_bone_dst = (uint8_t*)(gpu_dst + boneIndex); + if (skinningDataMapped != nullptr && ((size_t)gpu_bone_dst - size_t(skinningDataMapped) + sizeof(ShaderTransform)) <= skinningDataSize) { - std::memcpy(gpu_dst + boneIndex, &shadertransform, sizeof(shadertransform)); + std::memcpy(gpu_bone_dst, &shadertransform, sizeof(shadertransform)); } const float bone_radius = 1; @@ -3833,7 +3834,10 @@ namespace wi::scene const uint32_t dataSize = uint32_t(softbody.boneData.size() * sizeof(ShaderTransform)); softbody.gpuBoneOffset = skinningAllocator.fetch_add(dataSize); ShaderTransform* gpu_dst = (ShaderTransform*)((uint8_t*)skinningDataMapped + softbody.gpuBoneOffset); - std::memcpy(gpu_dst, softbody.boneData.data(), dataSize); + if (((size_t)gpu_dst - (size_t)skinningDataMapped + (size_t)dataSize) <= skinningDataSize) + { + std::memcpy(gpu_dst, softbody.boneData.data(), dataSize); + } }); } void Scene::RunMeshUpdateSystem(wi::jobsystem::context& ctx) diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 2f33fb439..ccd3bf3bd 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 706; + const int revision = 707; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);