diff --git a/Documentation/WickedEngine-Documentation.md b/Documentation/WickedEngine-Documentation.md index 9af6975db..81cd64e20 100644 --- a/Documentation/WickedEngine-Documentation.md +++ b/Documentation/WickedEngine-Documentation.md @@ -509,12 +509,14 @@ Unordered Access Views, in other words resources with read-write access. `GPUBuf - Constant buffers
Only `GPUBuffer`s can be set as constant buffers if they were created with a `BindFlags` in their description that has the `BIND_CONSTANT_BUFFER` bit set. The resource can't be a constant buffer at the same time when it is also a shader resource or a UAV or a vertex buffer or an index buffer. Use the `GraphicsDevice::BindConstantBuffer()` function to bind constant buffers. - Samplers
-Only `Sampler` can be bound as sampler. Use the `GraphicsDevice::BindSampler()` function to bind samplers. +Only `Sampler` can be bound as sampler. Use the `GraphicsDevice::BindSampler()` function to bind samplers. Additionally, you can specify auto samplers and common samplers and avoid binding them every time. There are some limitations on the maximum value of slots that can be used, these are defined as compile time constants in [Graphics device SharedInternals](../WickedEngine/wiGraphicsDevice_SharedInternals.h). The user can modify these and recompile the engine if the predefined slots are not enough. This could slightly affect performance. Remarks: - Vulkan and DX12 devices make an effort to combine descriptors across shader stages, so overlapping descriptors will not be supported with those APIs to some extent. For example it is OK, to have a constant buffer on slot 0 (b0) in a vertex shader while having a Texture2D on slot 0 (t0) in pixel shader. However, having a StructuredBuffer on vertex shader slot 0 (t0) and a Texture2D in pixel shader slot 0 (t0) will not work correctly, as only one of them will be bound to a pipeline state. This is made for performance reasons and to retain compatibility with the [advanced binding model](#resource-binding-advanced). +- Auto samplers can be added to `Shader`s. These sasmplers will always be bound to the shader stage as static samplers. The user doesn't need to use `BindSampler()` function for these. +- Common Samplers can be set on the graphics device. These samplers will be bound to all shaders that are created after the common sampler have been set. The user doesn't need to use `BindSampler()` function for these. ##### Resource Binding (Advanced) This resource binding model is based on a combination of DirectX 12 and Vulkan resource binding model and allows the developer to use a more fine grained resource management that can be fitted for specific use cases more optimally. For example, a bindless descriptor model could be implemented with descriptor arrays, or a system where descriptors are grouped by update frequency into tables. The developer can query whether the `GraphicsDevice` supports advanced binding model, by querying the `GRAPHICSDEVICE_CAPABILITY_DESCRIPTOR_MANAGEMENT` capability with `GraphicsDevice::CheckCapability()`. diff --git a/Editor/RendererWindow.cpp b/Editor/RendererWindow.cpp index 3f9238b68..0915bc2a6 100644 --- a/Editor/RendererWindow.cpp +++ b/Editor/RendererWindow.cpp @@ -434,7 +434,7 @@ void RendererWindow::Create(EditorComponent* editor) break; } - wiRenderer::ModifySampler(desc, SSLOT_OBJECTSHADER); + wiRenderer::ModifyObjectSampler(desc); }); textureQualityComboBox.SetSelected(3); @@ -448,7 +448,7 @@ void RendererWindow::Create(EditorComponent* editor) mipLodBiasSlider.OnSlide([&](wiEventArgs args) { wiGraphics::SamplerDesc desc = wiRenderer::GetSampler(SSLOT_OBJECTSHADER)->GetDesc(); desc.MipLODBias = wiMath::Clamp(args.fValue, -15.9f, 15.9f); - wiRenderer::ModifySampler(desc, SSLOT_OBJECTSHADER); + wiRenderer::ModifyObjectSampler(desc); }); AddWidget(&mipLodBiasSlider); diff --git a/WickedEngine/wiFont.cpp b/WickedEngine/wiFont.cpp index b085e2092..1219ede35 100644 --- a/WickedEngine/wiFont.cpp +++ b/WickedEngine/wiFont.cpp @@ -239,6 +239,11 @@ void LoadShaders() wiRenderer::LoadShader(VS, vertexShader, "fontVS.cso"); + + pixelShader.auto_samplers.emplace_back(); + pixelShader.auto_samplers.back().sampler = sampler; + pixelShader.auto_samplers.back().slot = SSLOT_ONDEMAND1; + wiRenderer::LoadShader(PS, pixelShader, "fontPS.cso"); @@ -593,7 +598,6 @@ void Draw_internal(const T* text, size_t text_length, const wiFontParams& params device->BindConstantBuffer(VS, &constantBuffer, CB_GETBINDSLOT(FontCB), cmd); device->BindConstantBuffer(PS, &constantBuffer, CB_GETBINDSLOT(FontCB), cmd); device->BindResource(PS, &texture, TEXSLOT_FONTATLAS, cmd); - device->BindSampler(PS, &sampler, SSLOT_ONDEMAND1, cmd); device->BindResource(VS, mem.buffer, 0, cmd); diff --git a/WickedEngine/wiGraphics.h b/WickedEngine/wiGraphics.h index 8ad478ef5..a0ee40016 100644 --- a/WickedEngine/wiGraphics.h +++ b/WickedEngine/wiGraphics.h @@ -731,19 +731,25 @@ namespace wiGraphics inline bool IsValid() const { return internal_state.get() != nullptr; } }; - struct Shader : public GraphicsDeviceChild - { - SHADERSTAGE stage = SHADERSTAGE_COUNT; - std::vector code; - const RootSignature* rootSignature = nullptr; - }; - struct Sampler : public GraphicsDeviceChild { SamplerDesc desc; const SamplerDesc& GetDesc() const { return desc; } }; + struct StaticSampler + { + Sampler sampler; + uint32_t slot = 0; + }; + + struct Shader : public GraphicsDeviceChild + { + SHADERSTAGE stage = SHADERSTAGE_COUNT; + std::vector code; + const RootSignature* rootSignature = nullptr; + std::vector auto_samplers; // ability to set static samplers without explicit root signature + }; struct GPUResource : public GraphicsDeviceChild { @@ -993,11 +999,6 @@ namespace wiGraphics uint32_t slot = 0; uint32_t count = 1; }; - struct StaticSampler - { - Sampler sampler; - uint32_t slot = 0; - }; struct DescriptorTable : public GraphicsDeviceChild { SHADERSTAGE stage = SHADERSTAGE_COUNT; diff --git a/WickedEngine/wiGraphicsDevice.h b/WickedEngine/wiGraphicsDevice.h index 8d0b256ab..ed298638b 100644 --- a/WickedEngine/wiGraphicsDevice.h +++ b/WickedEngine/wiGraphicsDevice.h @@ -51,6 +51,8 @@ namespace wiGraphics virtual void Unmap(const GPUResource* resource) = 0; virtual bool QueryRead(const GPUQuery* query, GPUQueryResult* result) = 0; + virtual void SetCommonSampler(const StaticSampler* sam) = 0; + virtual void SetName(GPUResource* pResource, const char* name) = 0; virtual void PresentBegin(CommandList cmd) = 0; diff --git a/WickedEngine/wiGraphicsDevice_DX11.cpp b/WickedEngine/wiGraphicsDevice_DX11.cpp index accb959d9..82effa833 100644 --- a/WickedEngine/wiGraphicsDevice_DX11.cpp +++ b/WickedEngine/wiGraphicsDevice_DX11.cpp @@ -6,14 +6,6 @@ #include "ResourceMapping.h" #include "wiBackLog.h" -#ifdef PLATFORM_UWP -// UWP will use static link + /DELAYLOAD linker feature for the dlls (optionally) -#pragma comment(lib,"d3d11.lib") -#define dll_D3D11CreateDevice D3D11CreateDevice -#else -static PFN_D3D11_CREATE_DEVICE dll_D3D11CreateDevice = nullptr; -#endif // PLATFORM_UWP - #pragma comment(lib,"dxguid.lib") #include @@ -32,6 +24,14 @@ namespace wiGraphics namespace DX11_Internal { + +#ifdef PLATFORM_UWP + // UWP will use static link + /DELAYLOAD linker feature for the dlls (optionally) +#pragma comment(lib,"d3d11.lib") +#else + static PFN_D3D11_CREATE_DEVICE D3D11CreateDevice = nullptr; +#endif // PLATFORM_UWP + // Engine -> Native converters constexpr uint32_t _ParseBindFlags(uint32_t value) @@ -1188,30 +1188,70 @@ void GraphicsDevice_DX11::pso_validate(CommandList cmd) { deviceContexts[cmd]->VSSetShader(vs, nullptr, 0); prev_vs[cmd] = vs; + + if (desc.vs != nullptr) + { + for (auto& x : desc.vs->auto_samplers) + { + BindSampler(VS, &x.sampler, x.slot, cmd); + } + } } ID3D11PixelShader* ps = desc.ps == nullptr ? nullptr : static_cast(desc.ps->internal_state.get())->resource.Get(); if (ps != prev_ps[cmd]) { deviceContexts[cmd]->PSSetShader(ps, nullptr, 0); prev_ps[cmd] = ps; + + if (desc.ps != nullptr) + { + for (auto& x : desc.ps->auto_samplers) + { + BindSampler(PS, &x.sampler, x.slot, cmd); + } + } } ID3D11HullShader* hs = desc.hs == nullptr ? nullptr : static_cast(desc.hs->internal_state.get())->resource.Get(); if (hs != prev_hs[cmd]) { deviceContexts[cmd]->HSSetShader(hs, nullptr, 0); prev_hs[cmd] = hs; + + if (desc.hs != nullptr) + { + for (auto& x : desc.hs->auto_samplers) + { + BindSampler(HS, &x.sampler, x.slot, cmd); + } + } } ID3D11DomainShader* ds = desc.ds == nullptr ? nullptr : static_cast(desc.ds->internal_state.get())->resource.Get(); if (ds != prev_ds[cmd]) { deviceContexts[cmd]->DSSetShader(ds, nullptr, 0); prev_ds[cmd] = ds; + + if (desc.ds != nullptr) + { + for (auto& x : desc.ds->auto_samplers) + { + BindSampler(DS, &x.sampler, x.slot, cmd); + } + } } ID3D11GeometryShader* gs = desc.gs == nullptr ? nullptr : static_cast(desc.gs->internal_state.get())->resource.Get(); if (gs != prev_gs[cmd]) { deviceContexts[cmd]->GSSetShader(gs, nullptr, 0); prev_gs[cmd] = gs; + + if (desc.gs != nullptr) + { + for (auto& x : desc.gs->auto_samplers) + { + BindSampler(GS, &x.sampler, x.slot, cmd); + } + } } ID3D11BlendState* bs = desc.bs == nullptr ? nullptr : internal_state->bs.Get(); @@ -1305,8 +1345,8 @@ GraphicsDevice_DX11::GraphicsDevice_DX11(wiPlatform::window_type window, bool fu #ifndef PLATFORM_UWP HMODULE dx11 = LoadLibraryEx(L"d3d11.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32); - dll_D3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(dx11, "D3D11CreateDevice"); - assert(dll_D3D11CreateDevice != nullptr); + D3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(dx11, "D3D11CreateDevice"); + assert(D3D11CreateDevice != nullptr); #endif // PLATFORM_UWP HRESULT hr = E_FAIL; @@ -1336,7 +1376,7 @@ GraphicsDevice_DX11::GraphicsDevice_DX11(wiPlatform::window_type window, bool fu for (uint32_t driverTypeIndex = 0; driverTypeIndex < numDriverTypes; driverTypeIndex++) { driverType = driverTypes[driverTypeIndex]; - hr = dll_D3D11CreateDevice(nullptr, driverType, nullptr, createDeviceFlags, featureLevels, numFeatureLevels, D3D11_SDK_VERSION, &device + hr = D3D11CreateDevice(nullptr, driverType, nullptr, createDeviceFlags, featureLevels, numFeatureLevels, D3D11_SDK_VERSION, &device , &featureLevel, &immediateContext); if (SUCCEEDED(hr)) @@ -2515,6 +2555,11 @@ bool GraphicsDevice_DX11::QueryRead(const GPUQuery* query, GPUQueryResult* resul return hr != S_FALSE; } +void GraphicsDevice_DX11::SetCommonSampler(const StaticSampler* sam) +{ + common_samplers.push_back(*sam); +} + void GraphicsDevice_DX11::SetName(GPUResource* pResource, const char* name) { auto internal_state = to_internal(pResource); @@ -2566,6 +2611,14 @@ CommandList GraphicsDevice_DX11::BeginCommandList() BindPipelineState(nullptr, cmd); BindComputeShader(nullptr, cmd); + for (int stage = 0; stage < SHADERSTAGE_COUNT; ++stage) + { + for (auto& sam : common_samplers) + { + BindSampler((SHADERSTAGE)stage, &sam.sampler, sam.slot, cmd); + } + } + D3D11_VIEWPORT vp = {}; vp.Width = (float)RESOLUTIONWIDTH; vp.Height = (float)RESOLUTIONHEIGHT; @@ -3048,6 +3101,14 @@ void GraphicsDevice_DX11::BindComputeShader(const Shader* cs, CommandList cmd) { deviceContexts[cmd]->CSSetShader(_cs, nullptr, 0); prev_cs[cmd] = _cs; + + if (cs != nullptr) + { + for (auto& x : cs->auto_samplers) + { + BindSampler(CS, &x.sampler, x.slot, cmd); + } + } } } void GraphicsDevice_DX11::Draw(uint32_t vertexCount, uint32_t startVertexLocation, CommandList cmd) diff --git a/WickedEngine/wiGraphicsDevice_DX11.h b/WickedEngine/wiGraphicsDevice_DX11.h index 4dfb7a7b6..f064ab239 100644 --- a/WickedEngine/wiGraphicsDevice_DX11.h +++ b/WickedEngine/wiGraphicsDevice_DX11.h @@ -20,7 +20,7 @@ namespace wiGraphics class GraphicsDevice_DX11 : public GraphicsDevice { - private: + protected: D3D_DRIVER_TYPE driverType; D3D_FEATURE_LEVEL featureLevel; Microsoft::WRL::ComPtr device; @@ -74,6 +74,8 @@ namespace wiGraphics std::atomic cmd_count{ 0 }; + std::vector common_samplers; + struct EmptyResourceHandle {}; // only care about control-block std::shared_ptr emptyresource; @@ -95,6 +97,8 @@ namespace wiGraphics void Unmap(const GPUResource* resource) override; bool QueryRead(const GPUQuery* query, GPUQueryResult* result) override; + void SetCommonSampler(const StaticSampler* sam) override; + void SetName(GPUResource* pResource, const char* name) override; void PresentBegin(CommandList cmd) override; diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index 8ee0c557d..544aab3bf 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -23,20 +23,9 @@ #include #include -#ifdef PLATFORM_UWP -// UWP will use static link + /DELAYLOAD linker feature for the dlls (optionally) -#pragma comment(lib,"d3d12.lib") -#pragma comment(lib,"dxgi.lib") -#define dll_CreateDXGIFactory2 CreateDXGIFactory2 -#define dll_D3D12CreateDevice D3D12CreateDevice -#define dll_D3D12SerializeVersionedRootSignature D3D12SerializeVersionedRootSignature -#else -static decltype(&CreateDXGIFactory2) dll_CreateDXGIFactory2 = nullptr; -static PFN_D3D12_CREATE_DEVICE dll_D3D12CreateDevice = nullptr; -static PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE dll_D3D12SerializeVersionedRootSignature = nullptr; -#endif // PLATFORM_UWP - -static DxcCreateInstanceProc dll_DxcCreateInstance = nullptr; +// Choose how many constant buffers will be placed in root in auto root signature: +#define CONSTANT_BUFFER_AUTO_PLACEMENT_IN_ROOT 4 +static_assert(GPU_RESOURCE_HEAP_CBV_COUNT < 32, "cbv root mask must fit into uint32_t!"); using namespace Microsoft::WRL; @@ -45,6 +34,20 @@ namespace wiGraphics namespace DX12_Internal { + +#ifdef PLATFORM_UWP + // UWP will use static link + /DELAYLOAD linker feature for the dlls (optionally) +#pragma comment(lib,"d3d12.lib") +#pragma comment(lib,"dxgi.lib") +#else + using PFN_CREATE_DXGI_FACTORY_2 = decltype(&CreateDXGIFactory2); + static PFN_CREATE_DXGI_FACTORY_2 CreateDXGIFactory2 = nullptr; + static PFN_D3D12_CREATE_DEVICE D3D12CreateDevice = nullptr; + static PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE D3D12SerializeVersionedRootSignature = nullptr; +#endif // PLATFORM_UWP + + static DxcCreateInstanceProc DxcCreateInstance = nullptr; + // Engine -> Native converters inline uint32_t _ParseColorWriteMask(uint32_t value) @@ -736,6 +739,23 @@ namespace DX12_Internal } return D3D12_SHADING_RATE_1X1; } + constexpr D3D12_STATIC_SAMPLER_DESC _ConvertStaticSampler(const StaticSampler& x) + { + D3D12_STATIC_SAMPLER_DESC desc = {}; + desc.ShaderRegister = x.slot; + desc.Filter = _ConvertFilter(x.sampler.desc.Filter); + desc.AddressU = _ConvertTextureAddressMode(x.sampler.desc.AddressU); + desc.AddressV = _ConvertTextureAddressMode(x.sampler.desc.AddressV); + desc.AddressW = _ConvertTextureAddressMode(x.sampler.desc.AddressW); + desc.MipLODBias = x.sampler.desc.MipLODBias; + desc.MaxAnisotropy = x.sampler.desc.MaxAnisotropy; + desc.ComparisonFunc = _ConvertComparisonFunc(x.sampler.desc.ComparisonFunc); + desc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + desc.MinLOD = x.sampler.desc.MinLOD; + desc.MaxLOD = x.sampler.desc.MaxLOD; + desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + return desc; + } // Native -> Engine converters @@ -1008,7 +1028,7 @@ namespace DX12_Internal this->allocationhandler = device->allocationhandler; type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; handle = allocationhandler->descriptors_res.allocate(); - device->device->CreateConstantBufferView(&cbv, handle); + allocationhandler->device->CreateConstantBufferView(&cbv, handle); } void init(GraphicsDevice_DX12* device, const D3D12_SHADER_RESOURCE_VIEW_DESC& srv, ID3D12Resource* res) { @@ -1016,7 +1036,7 @@ namespace DX12_Internal this->allocationhandler = device->allocationhandler; type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; handle = allocationhandler->descriptors_res.allocate(); - device->device->CreateShaderResourceView(res, &srv, handle); + allocationhandler->device->CreateShaderResourceView(res, &srv, handle); } void init(GraphicsDevice_DX12* device, const D3D12_UNORDERED_ACCESS_VIEW_DESC& uav, ID3D12Resource* res) { @@ -1024,7 +1044,7 @@ namespace DX12_Internal this->allocationhandler = device->allocationhandler; type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; handle = allocationhandler->descriptors_res.allocate(); - device->device->CreateUnorderedAccessView(res, nullptr, &uav, handle); + allocationhandler->device->CreateUnorderedAccessView(res, nullptr, &uav, handle); } void init(GraphicsDevice_DX12* device, const D3D12_SAMPLER_DESC& sam) { @@ -1032,7 +1052,7 @@ namespace DX12_Internal this->allocationhandler = device->allocationhandler; type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; handle = allocationhandler->descriptors_sam.allocate(); - device->device->CreateSampler(&sam, handle); + allocationhandler->device->CreateSampler(&sam, handle); } void init(GraphicsDevice_DX12* device, const D3D12_RENDER_TARGET_VIEW_DESC& rtv, ID3D12Resource* res) { @@ -1040,7 +1060,7 @@ namespace DX12_Internal this->allocationhandler = device->allocationhandler; type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; handle = allocationhandler->descriptors_rtv.allocate(); - device->device->CreateRenderTargetView(res, &rtv, handle); + allocationhandler->device->CreateRenderTargetView(res, &rtv, handle); } void init(GraphicsDevice_DX12* device, const D3D12_DEPTH_STENCIL_VIEW_DESC& dsv, ID3D12Resource* res) { @@ -1048,7 +1068,7 @@ namespace DX12_Internal this->allocationhandler = device->allocationhandler; type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; handle = allocationhandler->descriptors_dsv.allocate(); - device->device->CreateDepthStencilView(res, &dsv, handle); + allocationhandler->device->CreateDepthStencilView(res, &dsv, handle); } void destroy() { @@ -1094,6 +1114,10 @@ namespace DX12_Internal GraphicsDevice::GPUAllocation dynamic[COMMANDLIST_COUNT]; + uint64_t cbv_mask_frame[COMMANDLIST_COUNT] = {}; + uint32_t cbv_mask_gfx[COMMANDLIST_COUNT] = {}; + uint32_t cbv_mask_compute[COMMANDLIST_COUNT] = {}; + virtual ~Resource_DX12() { allocationhandler->destroylocker.lock(); @@ -1186,15 +1210,21 @@ namespace DX12_Internal ComPtr resource; ComPtr rootSignature; + std::vector root_cbvs; std::vector resources; std::vector samplers; + std::vector resource_bindings; + uint32_t bindpoint_res = 0; uint32_t bindpoint_sam = 0; + size_t root_binding_hash = 0; size_t resource_binding_hash = 0; size_t sampler_binding_hash = 0; + std::vector staticsamplers; + ~PipelineState_DX12() { allocationhandler->destroylocker.lock(); @@ -1467,19 +1497,18 @@ using namespace DX12_Internal; const uint32_t wrap_effective_size = device->descriptorheap_res.heapDesc.NumDescriptors - wrap_reservation; assert(wrap_reservation > resources); // for correct lockless wrap behaviour - uint64_t offset = device->descriptorheap_res.allocationOffset.fetch_add(resources); - uint64_t wrapped_offset = offset % wrap_effective_size; + const uint64_t offset = device->descriptorheap_res.allocationOffset.fetch_add(resources); + const uint64_t wrapped_offset = offset % wrap_effective_size; ringOffset_res = (uint32_t)wrapped_offset; - uint64_t wrapped_offset_end = wrapped_offset + resources; - - uint64_t gpu_offset = device->descriptorheap_res.fence->GetCompletedValue(); + const uint64_t wrapped_offset_end = wrapped_offset + resources; + + uint64_t gpu_offset = device->descriptorheap_res.cached_completedValue; uint64_t wrapped_gpu_offset = gpu_offset % wrap_effective_size; - if (wrapped_offset < wrapped_gpu_offset && wrapped_offset_end > wrapped_gpu_offset) + while (wrapped_offset < wrapped_gpu_offset && wrapped_offset_end > wrapped_gpu_offset) { assert(device->descriptorheap_res.fenceValue > wrapped_offset_end); // simply not enough space, even with GPU drain - HRESULT hr = device->descriptorheap_res.fence->SetEventOnCompletion(device->descriptorheap_res.fenceValue, device->descriptorheap_res.fenceEvent); - assert(SUCCEEDED(hr)); - WaitForSingleObject(device->descriptorheap_res.fenceEvent, INFINITE); + gpu_offset = device->descriptorheap_res.fence->GetCompletedValue(); + wrapped_gpu_offset = gpu_offset % wrap_effective_size; } } @@ -1491,29 +1520,91 @@ using namespace DX12_Internal; const uint32_t wrap_effective_size = device->descriptorheap_sam.heapDesc.NumDescriptors - wrap_reservation; assert(wrap_reservation > samplers); // for correct lockless wrap behaviour - uint64_t offset = device->descriptorheap_sam.allocationOffset.fetch_add(samplers); - uint64_t wrapped_offset = offset % wrap_effective_size; + const uint64_t offset = device->descriptorheap_sam.allocationOffset.fetch_add(samplers); + const uint64_t wrapped_offset = offset % wrap_effective_size; ringOffset_sam = (uint32_t)wrapped_offset; - uint64_t wrapped_offset_end = wrapped_offset + samplers; + const uint64_t wrapped_offset_end = wrapped_offset + samplers; - uint64_t gpu_offset = device->descriptorheap_sam.fence->GetCompletedValue(); + uint64_t gpu_offset = device->descriptorheap_sam.cached_completedValue; uint64_t wrapped_gpu_offset = gpu_offset % wrap_effective_size; - if (wrapped_offset < wrapped_gpu_offset && wrapped_offset_end > wrapped_gpu_offset) + while (wrapped_offset < wrapped_gpu_offset && wrapped_offset_end > wrapped_gpu_offset) { assert(device->descriptorheap_sam.fenceValue > wrapped_offset_end); // simply not enough space, even with GPU drain - HRESULT hr = device->descriptorheap_sam.fence->SetEventOnCompletion(device->descriptorheap_sam.fenceValue, device->descriptorheap_sam.fenceEvent); - assert(SUCCEEDED(hr)); - WaitForSingleObject(device->descriptorheap_sam.fenceEvent, INFINITE); + gpu_offset = device->descriptorheap_sam.fence->GetCompletedValue(); + wrapped_gpu_offset = gpu_offset % wrap_effective_size; } } } void GraphicsDevice_DX12::FrameResources::DescriptorTableFrameAllocator::validate(bool graphics, CommandList cmd) { - if (!dirty_res && !dirty_sam) - return; - auto pso_internal = graphics ? to_internal(device->active_pso[cmd]) : to_internal(device->active_cs[cmd]); + // Bind root descriptors: + if ((dirty_root_cbvs_gfx != 0 && graphics) || (dirty_root_cbvs_compute != 0 && !graphics)) + { + uint32_t root_param = 0; + for (auto& x : pso_internal->root_cbvs) + { + bool dirty; + if (graphics) + { + dirty = dirty_root_cbvs_gfx & (1 << x.ShaderRegister); + } + else + { + dirty = dirty_root_cbvs_compute & (1 << x.ShaderRegister); + } + if (!dirty) + { + root_param++; + continue; + } + + const GPUBuffer* buffer = CBV[x.ShaderRegister]; + + D3D12_GPU_VIRTUAL_ADDRESS address; + + if (buffer == nullptr || !buffer->IsValid()) + { + address = 0; + } + else + { + auto internal_state = to_internal(buffer); + + if (buffer->desc.Usage == USAGE_DYNAMIC) + { + GraphicsDevice::GPUAllocation allocation = internal_state->dynamic[cmd]; + address = to_internal(allocation.buffer)->gpu_address; + address += (D3D12_GPU_VIRTUAL_ADDRESS)allocation.offset; + } + else + { + address = internal_state->cbv.cbv.BufferLocation; + } + } + + if (graphics) + { + device->GetDirectCommandList(cmd)->SetGraphicsRootConstantBufferView(root_param, address); + } + else + { + device->GetDirectCommandList(cmd)->SetComputeRootConstantBufferView(root_param, address); + } + root_param++; + } + + if (graphics) + { + dirty_root_cbvs_gfx = 0; + } + else + { + dirty_root_cbvs_compute = 0; + } + } + uint32_t request_res = dirty_res ? (uint32_t)pso_internal->resources.size() : 0; uint32_t request_sam = dirty_sam ? (uint32_t)pso_internal->samplers.size() : 0; request_heaps(request_res, request_sam, cmd); @@ -1526,12 +1617,15 @@ using namespace DX12_Internal; D3D12_GPU_DESCRIPTOR_HANDLE binding_table = heap.start_gpu; binding_table.ptr += (UINT64)ringOffset_res * (UINT64)device->resource_descriptor_size; + int i = 0; for (auto& x : pso_internal->resources) { D3D12_CPU_DESCRIPTOR_HANDLE dst = heap.start_cpu; uint32_t ringOffset = ringOffset_res++; dst.ptr += ringOffset * device->resource_descriptor_size; + RESOURCEBINDING binding = pso_internal->resource_bindings[i++]; + switch (x.RangeType) { default: @@ -1541,7 +1635,41 @@ using namespace DX12_Internal; const int subresource = SRV_index[x.BaseShaderRegister]; if (resource == nullptr || !resource->IsValid()) { - device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_buffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + switch (binding) + { + case RAWBUFFER: + case STRUCTUREDBUFFER: + case TYPEDBUFFER: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_buffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURE1D: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture1d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURE1DARRAY: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture1darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURE2D: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture2d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURE2DARRAY: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture2darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURECUBE: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texturecube, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURECUBEARRAY: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texturecubearray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case TEXTURE3D: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_texture3d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case ACCELERATIONSTRUCTURE: + device->device->CopyDescriptorsSimple(1, dst, device->nullSRV_accelerationstructure, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + default: + assert(0); + break; + } } else { @@ -1572,7 +1700,32 @@ using namespace DX12_Internal; const int subresource = UAV_index[x.BaseShaderRegister]; if (resource == nullptr || !resource->IsValid()) { - device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_buffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + switch (binding) + { + case RWRAWBUFFER: + case RWSTRUCTUREDBUFFER: + case RWTYPEDBUFFER: + device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_buffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case RWTEXTURE1D: + device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture1d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case RWTEXTURE1DARRAY: + device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture1darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case RWTEXTURE2D: + device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture2d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case RWTEXTURE2DARRAY: + device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture2darray, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + case RWTEXTURE3D: + device->device->CopyDescriptorsSimple(1, dst, device->nullUAV_texture3d, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + break; + default: + assert(0); + break; + } } else { @@ -1593,6 +1746,7 @@ using namespace DX12_Internal; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: { const GPUBuffer* buffer = CBV[x.BaseShaderRegister]; + if (buffer == nullptr || !buffer->IsValid()) { device->device->CopyDescriptorsSimple(1, dst, device->nullCBV, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -2122,18 +2276,18 @@ using namespace DX12_Internal; HMODULE dx12 = LoadLibraryEx(L"d3d12.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32); HMODULE dxcompiler = LoadLibrary(L"dxcompiler.dll"); - dll_CreateDXGIFactory2 = (decltype(&CreateDXGIFactory2))GetProcAddress(dxgi, "CreateDXGIFactory2"); - assert(dll_CreateDXGIFactory2 != nullptr); + CreateDXGIFactory2 = (PFN_CREATE_DXGI_FACTORY_2)GetProcAddress(dxgi, "CreateDXGIFactory2"); + assert(CreateDXGIFactory2 != nullptr); - dll_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(dx12, "D3D12CreateDevice"); - assert(dll_D3D12CreateDevice != nullptr); + D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(dx12, "D3D12CreateDevice"); + assert(D3D12CreateDevice != nullptr); - dll_D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(dx12, "D3D12SerializeVersionedRootSignature"); - assert(dll_D3D12SerializeVersionedRootSignature != nullptr); + D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(dx12, "D3D12SerializeVersionedRootSignature"); + assert(D3D12SerializeVersionedRootSignature != nullptr); #endif // PLATFORM_UWP - dll_DxcCreateInstance = (DxcCreateInstanceProc)GetProcAddress(dxcompiler, "DxcCreateInstance"); - assert(dll_DxcCreateInstance != nullptr); + DxcCreateInstance = (DxcCreateInstanceProc)GetProcAddress(dxcompiler, "DxcCreateInstance"); + assert(DxcCreateInstance != nullptr); HRESULT hr = E_FAIL; @@ -2155,7 +2309,7 @@ using namespace DX12_Internal; } #endif - hr = dll_CreateDXGIFactory2(debuglayer ? DXGI_CREATE_FACTORY_DEBUG : 0, IID_PPV_ARGS(&factory)); + hr = CreateDXGIFactory2(debuglayer ? DXGI_CREATE_FACTORY_DEBUG : 0, IID_PPV_ARGS(&factory)); if (FAILED(hr)) { std::stringstream ss(""); @@ -2175,7 +2329,7 @@ using namespace DX12_Internal; // ignore software adapter and check device creation succeeds if (!(adapterDesc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) && - SUCCEEDED(dll_D3D12CreateDevice(candidateAdapter.Get(), D3D_FEATURE_LEVEL_12_1, __uuidof(ID3D12Device), nullptr))) + SUCCEEDED(D3D12CreateDevice(candidateAdapter.Get(), D3D_FEATURE_LEVEL_12_1, __uuidof(ID3D12Device), nullptr))) { candidateAdapter.As(&adapter); break; @@ -2188,7 +2342,7 @@ using namespace DX12_Internal; wiPlatform::Exit(); } - hr = dll_D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_12_1, IID_PPV_ARGS(&device)); + hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_12_1, IID_PPV_ARGS(&device)); if (FAILED(hr)) { std::stringstream ss(""); @@ -2308,7 +2462,6 @@ using namespace DX12_Internal; hr = device->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&descriptorheap_res.fence)); assert(SUCCEEDED(hr)); - descriptorheap_res.fenceEvent = CreateEventEx(NULL, FALSE, FALSE, EVENT_ALL_ACCESS); descriptorheap_res.fenceValue = descriptorheap_res.fence->GetCompletedValue(); } @@ -2326,7 +2479,6 @@ using namespace DX12_Internal; hr = device->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&descriptorheap_sam.fence)); assert(SUCCEEDED(hr)); - descriptorheap_sam.fenceEvent = CreateEventEx(NULL, FALSE, FALSE, EVENT_ALL_ACCESS); descriptorheap_sam.fenceValue = descriptorheap_sam.fence->GetCompletedValue(); } @@ -2839,6 +2991,10 @@ using namespace DX12_Internal; if (pDesc->BindFlags & BIND_UNORDERED_ACCESS) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + if (pInitialData == nullptr) + { + allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; + } } switch (pTexture->desc.type) @@ -3036,7 +3192,7 @@ using namespace DX12_Internal; blob.size = BytecodeLength; ComPtr container_reflection; - hr = dll_DxcCreateInstance(CLSID_DxcContainerReflection, __uuidof(IDxcContainerReflection), (void**)&container_reflection); + hr = DxcCreateInstance(CLSID_DxcContainerReflection, __uuidof(IDxcContainerReflection), (void**)&container_reflection); assert(SUCCEEDED(hr)); hr = container_reflection->Load(&blob); assert(SUCCEEDED(hr)); @@ -3049,11 +3205,27 @@ using namespace DX12_Internal; { if (desc.Type == D3D_SIT_SAMPLER) { + for (auto& sam : pShader->auto_samplers) + { + if (desc.BindPoint == sam.slot) + { + internal_state->staticsamplers.push_back(_ConvertStaticSampler(sam)); + return; // static sampler will be used instead + } + } + for (auto& sam : common_samplers) + { + if (desc.BindPoint == sam.ShaderRegister) + { + internal_state->staticsamplers.push_back(sam); + return; // static sampler will be used instead + } + } + internal_state->samplers.emplace_back(); D3D12_DESCRIPTOR_RANGE1& descriptor = internal_state->samplers.back(); descriptor.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; - //descriptor.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; descriptor.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; descriptor.BaseShaderRegister = desc.BindPoint; @@ -3104,6 +3276,101 @@ using namespace DX12_Internal; descriptor.NumDescriptors = desc.BindCount; descriptor.RegisterSpace = desc.Space; descriptor.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + internal_state->resource_bindings.emplace_back(); + RESOURCEBINDING& binding = internal_state->resource_bindings.back(); + + switch (desc.Type) + { + default: + case D3D_SIT_CBUFFER: + binding = CONSTANTBUFFER; + break; + case D3D_SIT_TBUFFER: + binding = TYPEDBUFFER; + break; + case D3D_SIT_STRUCTURED: + binding = STRUCTUREDBUFFER; + break; + case D3D_SIT_BYTEADDRESS: + binding = RAWBUFFER; + break; + case D3D_SIT_TEXTURE: + switch (desc.Dimension) + { + case D3D_SRV_DIMENSION_BUFFER: + binding = TYPEDBUFFER; + break; + case D3D_SRV_DIMENSION_TEXTURE1D: + binding = TEXTURE1D; + break; + case D3D_SRV_DIMENSION_TEXTURE1DARRAY: + binding = TEXTURE1DARRAY; + break; + case D3D_SRV_DIMENSION_TEXTURE2D: + case D3D_SRV_DIMENSION_TEXTURE2DMS: + binding = TEXTURE2D; + break; + case D3D_SRV_DIMENSION_TEXTURE2DARRAY: + case D3D_SRV_DIMENSION_TEXTURE2DMSARRAY: + binding = TEXTURE2DARRAY; + break; + case D3D_SRV_DIMENSION_TEXTURE3D: + binding = TEXTURE3D; + break; + case D3D_SRV_DIMENSION_TEXTURECUBE: + binding = TEXTURECUBE; + break; + case D3D_SRV_DIMENSION_TEXTURECUBEARRAY: + binding = TEXTURECUBEARRAY; + break; + default: + assert(0); + break; + } + break; + case D3D_SIT_RTACCELERATIONSTRUCTURE: + binding = ACCELERATIONSTRUCTURE; + break; + case D3D_SIT_UAV_RWSTRUCTURED: + case D3D_SIT_UAV_APPEND_STRUCTURED: + case D3D_SIT_UAV_CONSUME_STRUCTURED: + case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: + binding = RWSTRUCTUREDBUFFER; + break; + case D3D_SIT_UAV_RWBYTEADDRESS: + binding = RWRAWBUFFER; + break; + case D3D_SIT_UAV_RWTYPED: + binding = RWTYPEDBUFFER; + break; + case D3D_SIT_UAV_FEEDBACKTEXTURE: + switch (desc.Dimension) + { + case D3D_SRV_DIMENSION_BUFFER: + binding = RWTYPEDBUFFER; + break; + case D3D_SRV_DIMENSION_TEXTURE1D: + binding = RWTEXTURE1D; + break; + case D3D_SRV_DIMENSION_TEXTURE1DARRAY: + binding = RWTEXTURE1DARRAY; + break; + case D3D_SRV_DIMENSION_TEXTURE2D: + binding = RWTEXTURE2D; + break; + case D3D_SRV_DIMENSION_TEXTURE2DARRAY: + binding = RWTEXTURE2DARRAY; + break; + case D3D_SRV_DIMENSION_TEXTURE3D: + binding = RWTEXTURE3D; + break; + default: + assert(0); + break; + } + break; + } } }; @@ -3153,11 +3420,79 @@ using namespace DX12_Internal; } } + for (auto& sam : internal_state->staticsamplers) + { + switch (stage) + { + case MS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_MESH; + break; + case AS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_AMPLIFICATION; + break; + case VS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + break; + case HS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_HULL; + break; + case DS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN; + break; + case GS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_GEOMETRY; + break; + case PS: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + break; + case CS: + case SHADERSTAGE_COUNT: + default: + sam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + break; + } + } + if (stage == CS || stage == SHADERSTAGE_COUNT) { std::vector params; + // Split resources into root descriptors and tables: + { + std::vector resources; + std::vector bindings; + int i = 0; + for (auto& x : internal_state->resources) + { + RESOURCEBINDING binding = internal_state->resource_bindings[i++]; + if (x.NumDescriptors == 1 && binding == CONSTANTBUFFER && internal_state->root_cbvs.size() < CONSTANT_BUFFER_AUTO_PLACEMENT_IN_ROOT) + { + internal_state->root_cbvs.emplace_back(); + D3D12_ROOT_DESCRIPTOR1& descriptor = internal_state->root_cbvs.back(); + descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; + descriptor.ShaderRegister = x.BaseShaderRegister; + descriptor.RegisterSpace = x.RegisterSpace; + } + else + { + resources.push_back(x); + bindings.push_back(binding); + } + } + internal_state->resources = resources; + internal_state->resource_bindings = bindings; + } + + for (auto& x : internal_state->root_cbvs) + { + params.emplace_back(); + D3D12_ROOT_PARAMETER1& param = params.back(); + param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + param.Descriptor = x; + } + if (!internal_state->resources.empty()) { internal_state->bindpoint_res = (uint32_t)params.size(); @@ -3181,7 +3516,8 @@ using namespace DX12_Internal; } D3D12_ROOT_SIGNATURE_DESC1 rootSigDesc = {}; - rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.NumStaticSamplers = (UINT)internal_state->staticsamplers.size(); + rootSigDesc.pStaticSamplers = internal_state->staticsamplers.data(); rootSigDesc.NumParameters = (UINT)params.size(); rootSigDesc.pParameters = params.data(); rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; @@ -3192,7 +3528,7 @@ using namespace DX12_Internal; ID3DBlob* rootSigBlob; ID3DBlob* rootSigError; - hr = dll_D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); + hr = D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); if (FAILED(hr)) { OutputDebugStringA((char*)rootSigError->GetBufferPointer()); @@ -3200,47 +3536,62 @@ using namespace DX12_Internal; } hr = device->CreateRootSignature(0, rootSigBlob->GetBufferPointer(), rootSigBlob->GetBufferSize(), IID_PPV_ARGS(&internal_state->rootSignature)); assert(SUCCEEDED(hr)); + } + } - if (stage == CS) - { - struct PSO_STREAM - { - CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; - CD3DX12_PIPELINE_STATE_STREAM_CS CS; - } stream; + if (stage == CS) + { + struct PSO_STREAM + { + CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; + CD3DX12_PIPELINE_STATE_STREAM_CS CS; + } stream; - stream.pRootSignature = internal_state->rootSignature.Get(); - stream.CS = { pShader->code.data(), pShader->code.size() }; + if (pShader->rootSignature == nullptr) + { + stream.pRootSignature = internal_state->rootSignature.Get(); + } + else + { + stream.pRootSignature = to_internal(pShader->rootSignature)->resource.Get(); + } + stream.CS = { pShader->code.data(), pShader->code.size() }; - D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = {}; - streamDesc.pPipelineStateSubobjectStream = &stream; - streamDesc.SizeInBytes = sizeof(stream); + D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = {}; + streamDesc.pPipelineStateSubobjectStream = &stream; + streamDesc.SizeInBytes = sizeof(stream); - hr = device->CreatePipelineState(&streamDesc, IID_PPV_ARGS(&internal_state->resource)); - assert(SUCCEEDED(hr)); + hr = device->CreatePipelineState(&streamDesc, IID_PPV_ARGS(&internal_state->resource)); + assert(SUCCEEDED(hr)); - internal_state->resource_binding_hash = 0; - for (auto& x : internal_state->resources) - { - wiHelper::hash_combine(internal_state->resource_binding_hash, x.BaseShaderRegister); - wiHelper::hash_combine(internal_state->resource_binding_hash, x.NumDescriptors); - wiHelper::hash_combine(internal_state->resource_binding_hash, x.Flags); - wiHelper::hash_combine(internal_state->resource_binding_hash, x.OffsetInDescriptorsFromTableStart); - wiHelper::hash_combine(internal_state->resource_binding_hash, x.RangeType); - wiHelper::hash_combine(internal_state->resource_binding_hash, x.RegisterSpace); - } + internal_state->root_binding_hash = 0; + for (auto& x : internal_state->root_cbvs) + { + wiHelper::hash_combine(internal_state->root_binding_hash, x.Flags); + wiHelper::hash_combine(internal_state->root_binding_hash, x.ShaderRegister); + wiHelper::hash_combine(internal_state->root_binding_hash, x.RegisterSpace); + } - internal_state->sampler_binding_hash = 0; - for (auto& x : internal_state->samplers) - { - wiHelper::hash_combine(internal_state->sampler_binding_hash, x.BaseShaderRegister); - wiHelper::hash_combine(internal_state->sampler_binding_hash, x.NumDescriptors); - wiHelper::hash_combine(internal_state->sampler_binding_hash, x.Flags); - wiHelper::hash_combine(internal_state->sampler_binding_hash, x.OffsetInDescriptorsFromTableStart); - wiHelper::hash_combine(internal_state->sampler_binding_hash, x.RangeType); - wiHelper::hash_combine(internal_state->sampler_binding_hash, x.RegisterSpace); - } - } + internal_state->resource_binding_hash = 0; + for (auto& x : internal_state->resources) + { + wiHelper::hash_combine(internal_state->resource_binding_hash, x.BaseShaderRegister); + wiHelper::hash_combine(internal_state->resource_binding_hash, x.NumDescriptors); + wiHelper::hash_combine(internal_state->resource_binding_hash, x.Flags); + wiHelper::hash_combine(internal_state->resource_binding_hash, x.OffsetInDescriptorsFromTableStart); + wiHelper::hash_combine(internal_state->resource_binding_hash, x.RangeType); + wiHelper::hash_combine(internal_state->resource_binding_hash, x.RegisterSpace); + } + + internal_state->sampler_binding_hash = 0; + for (auto& x : internal_state->samplers) + { + wiHelper::hash_combine(internal_state->sampler_binding_hash, x.BaseShaderRegister); + wiHelper::hash_combine(internal_state->sampler_binding_hash, x.NumDescriptors); + wiHelper::hash_combine(internal_state->sampler_binding_hash, x.Flags); + wiHelper::hash_combine(internal_state->sampler_binding_hash, x.OffsetInDescriptorsFromTableStart); + wiHelper::hash_combine(internal_state->sampler_binding_hash, x.RangeType); + wiHelper::hash_combine(internal_state->sampler_binding_hash, x.RegisterSpace); } } @@ -3335,8 +3686,11 @@ using namespace DX12_Internal; return; size_t check_max = internal_state->resources.size(); // dont't check for duplicates within self table + int b = 0; for (auto& x : shader_internal->resources) { + RESOURCEBINDING binding = shader_internal->resource_bindings[b++]; + bool found = false; size_t i = 0; for (auto& y : internal_state->resources) @@ -3353,6 +3707,7 @@ using namespace DX12_Internal; if (!found) { internal_state->resources.push_back(x); + internal_state->resource_bindings.push_back(binding); } } @@ -3377,24 +3732,63 @@ using namespace DX12_Internal; internal_state->samplers.push_back(x); } } + + for (auto& x : shader_internal->staticsamplers) + { + internal_state->staticsamplers.push_back(x); + } }; + insert_shader(pDesc->ps); // prioritize ps root descriptor assignment insert_shader(pDesc->ms); insert_shader(pDesc->as); insert_shader(pDesc->vs); insert_shader(pDesc->hs); insert_shader(pDesc->ds); insert_shader(pDesc->gs); - insert_shader(pDesc->ps); std::vector params; + // Split resources into root descriptors and tables: + { + std::vector resources; + std::vector bindings; + int i = 0; + for (auto& x : internal_state->resources) + { + RESOURCEBINDING binding = internal_state->resource_bindings[i++]; + if (x.NumDescriptors == 1 && binding == CONSTANTBUFFER && internal_state->root_cbvs.size() < CONSTANT_BUFFER_AUTO_PLACEMENT_IN_ROOT) + { + internal_state->root_cbvs.emplace_back(); + D3D12_ROOT_DESCRIPTOR1& descriptor = internal_state->root_cbvs.back(); + descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; + descriptor.ShaderRegister = x.BaseShaderRegister; + descriptor.RegisterSpace = x.RegisterSpace; + } + else + { + resources.push_back(x); + bindings.push_back(binding); + } + } + internal_state->resources = resources; + internal_state->resource_bindings = bindings; + } + + for (auto& x : internal_state->root_cbvs) + { + params.emplace_back(); + D3D12_ROOT_PARAMETER1& param = params.back(); + param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + param.Descriptor = x; + } + if (!internal_state->resources.empty()) { internal_state->bindpoint_res = (uint32_t)params.size(); params.emplace_back(); D3D12_ROOT_PARAMETER1& param = params.back(); - param = {}; param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; param.DescriptorTable.NumDescriptorRanges = (UINT)internal_state->resources.size(); @@ -3414,7 +3808,8 @@ using namespace DX12_Internal; } D3D12_ROOT_SIGNATURE_DESC1 rootSigDesc = {}; - rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.NumStaticSamplers = (UINT)internal_state->staticsamplers.size(); + rootSigDesc.pStaticSamplers = internal_state->staticsamplers.data(); rootSigDesc.NumParameters = (UINT)params.size(); rootSigDesc.pParameters = params.data(); rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; @@ -3425,15 +3820,23 @@ using namespace DX12_Internal; ID3DBlob* rootSigBlob; ID3DBlob* rootSigError; - HRESULT hr = dll_D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); + HRESULT hr = D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); if (FAILED(hr)) { - assert(0); OutputDebugStringA((char*)rootSigError->GetBufferPointer()); + assert(0); } hr = device->CreateRootSignature(0, rootSigBlob->GetBufferPointer(), rootSigBlob->GetBufferSize(), IID_PPV_ARGS(&internal_state->rootSignature)); assert(SUCCEEDED(hr)); + internal_state->root_binding_hash = 0; + for (auto& x : internal_state->root_cbvs) + { + wiHelper::hash_combine(internal_state->root_binding_hash, x.Flags); + wiHelper::hash_combine(internal_state->root_binding_hash, x.ShaderRegister); + wiHelper::hash_combine(internal_state->root_binding_hash, x.RegisterSpace); + } + internal_state->resource_binding_hash = 0; for (auto& x : internal_state->resources) { @@ -4042,20 +4445,7 @@ using namespace DX12_Internal; for (auto& x : table->staticsamplers) { - internal_state->staticsamplers.emplace_back(); - auto& desc = internal_state->staticsamplers.back(); - desc = {}; - desc.ShaderRegister = x.slot; - desc.Filter = _ConvertFilter(x.sampler.desc.Filter); - desc.AddressU = _ConvertTextureAddressMode(x.sampler.desc.AddressU); - desc.AddressV = _ConvertTextureAddressMode(x.sampler.desc.AddressV); - desc.AddressW = _ConvertTextureAddressMode(x.sampler.desc.AddressW); - desc.MipLODBias = x.sampler.desc.MipLODBias; - desc.MaxAnisotropy = x.sampler.desc.MaxAnisotropy; - desc.ComparisonFunc = _ConvertComparisonFunc(x.sampler.desc.ComparisonFunc); - desc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - desc.MinLOD = x.sampler.desc.MinLOD; - desc.MaxLOD = x.sampler.desc.MaxLOD; + internal_state->staticsamplers.push_back(_ConvertStaticSampler(x)); } HRESULT hr = S_OK; @@ -4258,7 +4648,7 @@ using namespace DX12_Internal; ID3DBlob* rootSigBlob; ID3DBlob* rootSigError; - HRESULT hr = dll_D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); + HRESULT hr = D3D12SerializeVersionedRootSignature(&versioned_rs, &rootSigBlob, &rootSigError); if (FAILED(hr)) { OutputDebugStringA((char*)rootSigError->GetBufferPointer()); @@ -5081,6 +5471,11 @@ using namespace DX12_Internal; return true; } + void GraphicsDevice_DX12::SetCommonSampler(const StaticSampler* sam) + { + common_samplers.push_back(_ConvertStaticSampler(*sam)); + } + void GraphicsDevice_DX12::SetName(GPUResource* pResource, const char* name) { wchar_t text[256]; @@ -5283,9 +5678,11 @@ using namespace DX12_Internal; descriptorheap_res.fenceValue = descriptorheap_res.allocationOffset.load(); hr = directQueue->Signal(descriptorheap_res.fence.Get(), descriptorheap_res.fenceValue); assert(SUCCEEDED(hr)); + descriptorheap_res.cached_completedValue = descriptorheap_res.fence->GetCompletedValue(); descriptorheap_sam.fenceValue = descriptorheap_sam.allocationOffset.load(); hr = directQueue->Signal(descriptorheap_sam.fence.Get(), descriptorheap_sam.fenceValue); assert(SUCCEEDED(hr)); + descriptorheap_sam.cached_completedValue = descriptorheap_sam.fence->GetCompletedValue(); // Determine the last frame that we should not wait on: const uint64_t lastFrameToAllowLatency = std::max(uint64_t(BACKBUFFER_COUNT - 1u), FRAMECOUNT) - (BACKBUFFER_COUNT - 1); @@ -5297,11 +5694,6 @@ using namespace DX12_Internal; WaitForSingleObject(frameFenceEvent, INFINITE); } - - //WaitForGPU(); - //descriptorheap_res.allocationOffset.store(0); - //descriptorheap_sam.allocationOffset.store(0); - allocationhandler->Update(FRAMECOUNT, BACKBUFFER_COUNT); copyQueueLock.unlock(); @@ -5487,6 +5879,30 @@ using namespace DX12_Internal; { descriptors.CBV[slot] = buffer; descriptors.dirty_res = true; + + // Root constant buffer root signature state tracking: + auto internal_state = to_internal(buffer); + if (internal_state->cbv_mask_frame[cmd] != FRAMECOUNT) + { + // This is the first binding as constant buffer in this frame for this resource, + // so clear the cbv flags completely + internal_state->cbv_mask_gfx[cmd] = 0; + internal_state->cbv_mask_compute[cmd] = 0; + internal_state->cbv_mask_frame[cmd] = FRAMECOUNT; + } + + // CBV flag marked as bound for this slot: + // Also, the corresponding slot is marked dirty + if (stage == CS) + { + internal_state->cbv_mask_compute[cmd] |= 1 << slot; + descriptors.dirty_root_cbvs_compute |= 1 << slot; + } + else + { + internal_state->cbv_mask_gfx[cmd] |= 1 << slot; + descriptors.dirty_root_cbvs_gfx |= 1 << slot; + } } } void GraphicsDevice_DX12::BindVertexBuffers(const GPUBuffer* const* vertexBuffers, uint32_t slot, uint32_t count, const uint32_t* strides, const uint32_t* offsets, CommandList cmd) @@ -5577,11 +5993,16 @@ using namespace DX12_Internal; { GetFrameResources().descriptors[cmd].dirty_res = true; GetFrameResources().descriptors[cmd].dirty_sam = true; + GetFrameResources().descriptors[cmd].dirty_root_cbvs_gfx = ~0; } else { auto internal_state = to_internal(pso); auto active_internal = to_internal(active_pso[cmd]); + if (internal_state->root_binding_hash != active_internal->root_binding_hash) + { + GetFrameResources().descriptors[cmd].dirty_root_cbvs_gfx = ~0; + } if (internal_state->resource_binding_hash != active_internal->resource_binding_hash) { GetFrameResources().descriptors[cmd].dirty_res = true; @@ -5606,11 +6027,16 @@ using namespace DX12_Internal; { GetFrameResources().descriptors[cmd].dirty_res = true; GetFrameResources().descriptors[cmd].dirty_sam = true; + GetFrameResources().descriptors[cmd].dirty_root_cbvs_compute = ~0; } else { auto internal_state = to_internal(cs); auto active_internal = to_internal(active_cs[cmd]); + if (internal_state->root_binding_hash != active_internal->root_binding_hash) + { + GetFrameResources().descriptors[cmd].dirty_root_cbvs_compute = ~0; + } if (internal_state->resource_binding_hash != active_internal->resource_binding_hash) { GetFrameResources().descriptors[cmd].dirty_res = true; @@ -5736,7 +6162,10 @@ using namespace DX12_Internal; memcpy(allocation.data, data, dataSize); internal_state->dynamic[cmd] = allocation; + // The proper binding slot is not tracked properly, but instead all the previous bindings are invalidated: GetFrameResources().descriptors[cmd].dirty_res = true; + GetFrameResources().descriptors[cmd].dirty_root_cbvs_gfx |= internal_state->cbv_mask_gfx[cmd]; + GetFrameResources().descriptors[cmd].dirty_root_cbvs_compute |= internal_state->cbv_mask_compute[cmd]; } else { diff --git a/WickedEngine/wiGraphicsDevice_DX12.h b/WickedEngine/wiGraphicsDevice_DX12.h index a773b3f85..9d1d69bfe 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.h +++ b/WickedEngine/wiGraphicsDevice_DX12.h @@ -28,7 +28,7 @@ namespace wiGraphics { class GraphicsDevice_DX12 : public GraphicsDevice { - public: + protected: Microsoft::WRL::ComPtr device; Microsoft::WRL::ComPtr adapter; Microsoft::WRL::ComPtr factory; @@ -74,6 +74,8 @@ namespace wiGraphics D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture2darray = {}; D3D12_CPU_DESCRIPTOR_HANDLE nullUAV_texture3d = {}; + std::vector common_samplers; + Microsoft::WRL::ComPtr copyQueue; std::mutex copyQueueLock; bool copyQueueUse = false; @@ -99,8 +101,8 @@ namespace wiGraphics // GPU status: Microsoft::WRL::ComPtr fence; - HANDLE fenceEvent; uint64_t fenceValue = 0; + uint64_t cached_completedValue = 0; }; DescriptorHeap descriptorheap_res; DescriptorHeap descriptorheap_sam; @@ -128,6 +130,9 @@ namespace wiGraphics int UAV_index[GPU_RESOURCE_HEAP_UAV_COUNT]; const Sampler* SAM[GPU_SAMPLER_HEAP_COUNT]; + uint32_t dirty_root_cbvs_gfx = 0; // bitmask + uint32_t dirty_root_cbvs_compute = 0; // bitmask + struct DescriptorHandles { D3D12_GPU_DESCRIPTOR_HANDLE sampler_handle = {}; @@ -226,6 +231,8 @@ namespace wiGraphics void Unmap(const GPUResource* resource) override; bool QueryRead(const GPUQuery* query, GPUQueryResult* result) override; + void SetCommonSampler(const StaticSampler* sam) override; + void SetName(GPUResource* pResource, const char* name) override; void PresentBegin(CommandList cmd) override; diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index 8c3fd864e..c916ce3e6 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -571,17 +571,15 @@ namespace Vulkan_Internal return flags; } - bool checkDeviceExtensionSupport(const char* checkExtension, - const std::vector& available_deviceExtensions) { - - for (const auto& x : available_deviceExtensions) + bool checkExtensionSupport(const char* checkExtension, const std::vector& available_extensions) + { + for (const auto& x : available_extensions) { if (strcmp(x.extensionName, checkExtension) == 0) { return true; } } - return false; } @@ -589,7 +587,8 @@ namespace Vulkan_Internal const std::vector validationLayers = { "VK_LAYER_KHRONOS_validation" }; - bool checkValidationLayerSupport() { + bool checkValidationLayerSupport() + { uint32_t layerCount; VkResult res = vkEnumerateInstanceLayerProperties(&layerCount, nullptr); assert(res == VK_SUCCESS); @@ -642,62 +641,6 @@ namespace Vulkan_Internal return VK_FALSE; } - static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( - VkDebugReportFlagsEXT flags, - VkDebugReportObjectTypeEXT objType, - uint64_t obj, - size_t location, - int32_t code, - const char* layerPrefix, - const char* msg, - void* userData) { - - std::stringstream ss(""); - ss << "[VULKAN validation layer]: " << msg << std::endl; - - std::clog << ss.str(); -#ifdef _WIN32 - OutputDebugStringA(ss.str().c_str()); -#endif - - return VK_FALSE; - } - VkResult CreateDebugUtilsMessengerEXT(VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pMessenger) - { - auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); - if (func != nullptr) - { - return func(instance, pCreateInfo, pAllocator, pMessenger); - } - - return VK_ERROR_EXTENSION_NOT_PRESENT; - } - void DestroyDebugUtilsMessengerEXT(VkInstance instance, VkDebugUtilsMessengerEXT messenger, const VkAllocationCallbacks* pAllocator) - { - auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"); - if (func != nullptr) - { - func(instance, messenger, pAllocator); - } - } - - VkResult CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback) { - auto func = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT"); - if (func != nullptr) { - return func(instance, pCreateInfo, pAllocator, pCallback); - } - else { - return VK_ERROR_EXTENSION_NOT_PRESENT; - } - } - void DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator) { - auto func = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT"); - if (func != nullptr) { - func(instance, callback, pAllocator); - } - } - - // Memory tools: inline size_t Align(size_t uLocation, size_t uAlign) @@ -1266,6 +1209,12 @@ using namespace Vulkan_Internal; int i = 0; for (auto& x : layoutBindings) { + if (x.pImmutableSamplers != nullptr) + { + i++; + continue; + } + descriptorWrites.emplace_back(); auto& write = descriptorWrites.back(); write = {}; @@ -2156,27 +2105,14 @@ using namespace Vulkan_Internal; std::vector extensionNames; - // Check if VK_EXT_debug_utils is supported, which supersedes VK_EXT_Debug_Report - bool debugUtils = false; - if (debuglayer) + if (checkExtensionSupport(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, availableInstanceExtensions)) { - for (auto& available_extension : availableInstanceExtensions) - { - if (strcmp(available_extension.extensionName, VK_EXT_DEBUG_UTILS_EXTENSION_NAME) == 0) - { - debugUtils = true; - extensionNames.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - break; - } - } - - if (!debugUtils) - { - extensionNames.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); - } + // This is needed for not only debug layer, but also debug markers, object naming, etc: + extensionNames.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensionNames.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + #ifdef _WIN32 extensionNames.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); #elif SDL2 @@ -2219,24 +2155,12 @@ using namespace Vulkan_Internal; // Register validation layer callback: if (debuglayer) { - if(debugUtils) - { - VkDebugUtilsMessengerCreateInfoEXT createInfo = {VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT}; - createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; - createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; - createInfo.pfnUserCallback = debugUtilsMessengerCallback; - res = CreateDebugUtilsMessengerEXT(instance, &createInfo, nullptr, &debugUtilsMessenger); - assert(res == VK_SUCCESS); - } - else - { - VkDebugReportCallbackCreateInfoEXT createInfo = {}; - createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; - createInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; - createInfo.pfnCallback = debugCallback; - res = CreateDebugReportCallbackEXT(instance, &createInfo, nullptr, &debugReportCallback); - assert(res == VK_SUCCESS); - } + VkDebugUtilsMessengerCreateInfoEXT createInfo = {VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT}; + createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; + createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + createInfo.pfnUserCallback = debugUtilsMessengerCallback; + res = vkCreateDebugUtilsMessengerEXT(instance, &createInfo, nullptr, &debugUtilsMessenger); + assert(res == VK_SUCCESS); } @@ -2312,7 +2236,7 @@ using namespace Vulkan_Internal; for (auto& x : required_deviceExtensions) { - if (!checkDeviceExtensionSupport(x, available)) + if (!checkExtensionSupport(x, available)) { suitable = false; // device doesn't have a required extension } @@ -2419,7 +2343,7 @@ using namespace Vulkan_Internal; res = vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &extensionCount, available_deviceExtensions.data()); assert(res == VK_SUCCESS); - if (checkDeviceExtensionSupport(VK_KHR_SPIRV_1_4_EXTENSION_NAME, available_deviceExtensions)) + if (checkExtensionSupport(VK_KHR_SPIRV_1_4_EXTENSION_NAME, available_deviceExtensions)) { enabled_deviceExtensions.push_back(VK_KHR_SPIRV_1_4_EXTENSION_NAME); } @@ -2433,14 +2357,14 @@ using namespace Vulkan_Internal; void** features_chain = &features_1_2.pNext; - if (checkDeviceExtensionSupport(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, available_deviceExtensions)) + if (checkExtensionSupport(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, available_deviceExtensions)) { enabled_deviceExtensions.push_back(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; *features_chain = &acceleration_structure_features; features_chain = &acceleration_structure_features.pNext; - if (checkDeviceExtensionSupport(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, available_deviceExtensions)) + if (checkExtensionSupport(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, available_deviceExtensions)) { SHADER_IDENTIFIER_SIZE = raytracing_properties.shaderGroupHandleSize; enabled_deviceExtensions.push_back(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME); @@ -2450,7 +2374,7 @@ using namespace Vulkan_Internal; features_chain = &raytracing_features.pNext; } - if (checkDeviceExtensionSupport(VK_KHR_RAY_QUERY_EXTENSION_NAME, available_deviceExtensions)) + if (checkExtensionSupport(VK_KHR_RAY_QUERY_EXTENSION_NAME, available_deviceExtensions)) { enabled_deviceExtensions.push_back(VK_KHR_RAY_QUERY_EXTENSION_NAME); enabled_deviceExtensions.push_back(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME); @@ -2460,7 +2384,7 @@ using namespace Vulkan_Internal; } } - if (checkDeviceExtensionSupport(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, available_deviceExtensions)) + if (checkExtensionSupport(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, available_deviceExtensions)) { VARIABLE_RATE_SHADING_TILE_SIZE = std::min(fragment_shading_rate_properties.maxFragmentShadingRateAttachmentTexelSize.width, fragment_shading_rate_properties.maxFragmentShadingRateAttachmentTexelSize.height); enabled_deviceExtensions.push_back(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); @@ -2469,7 +2393,7 @@ using namespace Vulkan_Internal; features_chain = &fragment_shading_rate_features.pNext; } - if (checkDeviceExtensionSupport(VK_NV_MESH_SHADER_EXTENSION_NAME, available_deviceExtensions)) + if (checkExtensionSupport(VK_NV_MESH_SHADER_EXTENSION_NAME, available_deviceExtensions)) { enabled_deviceExtensions.push_back(VK_NV_MESH_SHADER_EXTENSION_NAME); mesh_shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV; @@ -2586,12 +2510,6 @@ using namespace Vulkan_Internal; res = vmaCreateAllocator(&allocatorInfo, &allocationhandler->allocator); assert(res == VK_SUCCESS); - // looks like volk doesn't get these properly: - vkSetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(device, "vkSetDebugUtilsObjectNameEXT"); - vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetDeviceProcAddr(device, "vkCmdBeginDebugUtilsLabelEXT"); - vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetDeviceProcAddr(device, "vkCmdEndDebugUtilsLabelEXT"); - vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT)vkGetDeviceProcAddr(device, "vkCmdInsertDebugUtilsLabelEXT"); - CreateBackBufferResources(); vkGetDeviceQueue(device, copyFamily, 0, ©Queue); @@ -2914,12 +2832,7 @@ using namespace Vulkan_Internal; if (debugUtilsMessenger != VK_NULL_HANDLE) { - DestroyDebugUtilsMessengerEXT(instance, debugUtilsMessenger, nullptr); - } - - if (debugReportCallback != VK_NULL_HANDLE) - { - DestroyDebugReportCallbackEXT(instance, debugReportCallback, nullptr); + vkDestroyDebugUtilsMessengerEXT(instance, debugUtilsMessenger, nullptr); } vkDestroySurfaceKHR(instance, surface, nullptr); @@ -3013,16 +2926,19 @@ using namespace Vulkan_Internal; assert(res == VK_SUCCESS); swapChainImageFormat = surfaceFormat.format; - VkDebugUtilsObjectNameInfoEXT info = {}; - info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; - info.pObjectName = "SWAPCHAIN"; - info.objectType = VK_OBJECT_TYPE_IMAGE; - for (auto& x : swapChainImages) + if (vkSetDebugUtilsObjectNameEXT != nullptr) { - info.objectHandle = (uint64_t)x; + VkDebugUtilsObjectNameInfoEXT info = {}; + info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + info.pObjectName = "SWAPCHAIN"; + info.objectType = VK_OBJECT_TYPE_IMAGE; + for (auto& x : swapChainImages) + { + info.objectHandle = (uint64_t)x; - res = vkSetDebugUtilsObjectNameEXT(device, &info); - assert(res == VK_SUCCESS); + res = vkSetDebugUtilsObjectNameEXT(device, &info); + assert(res == VK_SUCCESS); + } } // Create default render pass: @@ -3758,6 +3674,8 @@ using namespace Vulkan_Internal; std::vector& layoutBindings = internal_state->layoutBindings; std::vector& imageViewTypes = internal_state->imageViewTypes; + std::vector staticsamplers; + for (auto& x : bindings) { imageViewTypes.push_back(VK_IMAGE_VIEW_TYPE_MAX_ENUM); @@ -3766,6 +3684,36 @@ using namespace Vulkan_Internal; layoutBindings.back().binding = x->binding; layoutBindings.back().descriptorCount = 1; + if (x->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER) + { + bool staticsampler = false; + for (auto& sam : pShader->auto_samplers) + { + if (x->binding == sam.slot + VULKAN_BINDING_SHIFT_S) + { + layoutBindings.back().pImmutableSamplers = &to_internal(&sam.sampler)->resource; + staticsampler = true; + break; // static sampler will be used instead + } + } + if (!staticsampler) + { + for (auto& sam : common_samplers) + { + if (x->binding == sam.slot + VULKAN_BINDING_SHIFT_S) + { + layoutBindings.back().pImmutableSamplers = &to_internal(&sam.sampler)->resource; + staticsampler = true; + break; // static sampler will be used instead + } + } + } + if (staticsampler) + { + continue; + } + } + switch (x->descriptor_type) { default: @@ -5692,34 +5640,42 @@ using namespace Vulkan_Internal; return res == VK_SUCCESS; } + void GraphicsDevice_Vulkan::SetCommonSampler(const StaticSampler* sam) + { + common_samplers.push_back(*sam); + } + void GraphicsDevice_Vulkan::SetName(GPUResource* pResource, const char* name) { - VkDebugUtilsObjectNameInfoEXT info = {}; - info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; - info.pObjectName = name; - if (pResource->IsTexture()) + if (vkSetDebugUtilsObjectNameEXT != nullptr) { - info.objectType = VK_OBJECT_TYPE_IMAGE; - info.objectHandle = (uint64_t)to_internal((const Texture*)pResource)->resource; - } - else if (pResource->IsBuffer()) - { - info.objectType = VK_OBJECT_TYPE_BUFFER; - info.objectHandle = (uint64_t)to_internal((const GPUBuffer*)pResource)->resource; - } - else if (pResource->IsAccelerationStructure()) - { - info.objectType = VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR; - info.objectHandle = (uint64_t)to_internal((const RaytracingAccelerationStructure*)pResource)->resource; - } + VkDebugUtilsObjectNameInfoEXT info = {}; + info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + info.pObjectName = name; + if (pResource->IsTexture()) + { + info.objectType = VK_OBJECT_TYPE_IMAGE; + info.objectHandle = (uint64_t)to_internal((const Texture*)pResource)->resource; + } + else if (pResource->IsBuffer()) + { + info.objectType = VK_OBJECT_TYPE_BUFFER; + info.objectHandle = (uint64_t)to_internal((const GPUBuffer*)pResource)->resource; + } + else if (pResource->IsAccelerationStructure()) + { + info.objectType = VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR; + info.objectHandle = (uint64_t)to_internal((const RaytracingAccelerationStructure*)pResource)->resource; + } - if (info.objectHandle == VK_NULL_HANDLE) - { - return; - } + if (info.objectHandle == VK_NULL_HANDLE) + { + return; + } - VkResult res = vkSetDebugUtilsObjectNameEXT(device, &info); - assert(res == VK_SUCCESS); + VkResult res = vkSetDebugUtilsObjectNameEXT(device, &info); + assert(res == VK_SUCCESS); + } } void GraphicsDevice_Vulkan::PresentBegin(CommandList cmd) diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.h b/WickedEngine/wiGraphicsDevice_Vulkan.h index 7a04fa7c0..80b09ce5f 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.h +++ b/WickedEngine/wiGraphicsDevice_Vulkan.h @@ -30,10 +30,9 @@ namespace wiGraphics { class GraphicsDevice_Vulkan : public GraphicsDevice { - private: + protected: VkInstance instance = VK_NULL_HANDLE; VkDebugUtilsMessengerEXT debugUtilsMessenger = VK_NULL_HANDLE; - VkDebugReportCallbackEXT debugReportCallback = VK_NULL_HANDLE; // Deprecated VkSurfaceKHR surface = VK_NULL_HANDLE; VkPhysicalDevice physicalDevice = VK_NULL_HANDLE; VkDevice device = VK_NULL_HANDLE; @@ -186,6 +185,8 @@ namespace wiGraphics std::atomic cmd_count{ 0 }; + std::vector common_samplers; + public: GraphicsDevice_Vulkan(wiPlatform::window_type window, bool fullscreen = false, bool debuglayer = false); virtual ~GraphicsDevice_Vulkan(); @@ -215,6 +216,8 @@ namespace wiGraphics void Unmap(const GPUResource* resource) override; bool QueryRead(const GPUQuery* query, GPUQueryResult* result) override; + void SetCommonSampler(const StaticSampler* sam) override; + void SetName(GPUResource* pResource, const char* name) override; void PresentBegin(CommandList cmd) override; diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 7dc272ebd..75390ed75 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -156,6 +156,133 @@ unordered_map packedLightmaps; void SetDevice(std::shared_ptr newDevice) { device = newDevice; + + SamplerDesc samplerDesc; + samplerDesc.Filter = FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = TEXTURE_ADDRESS_MIRROR; + samplerDesc.AddressV = TEXTURE_ADDRESS_MIRROR; + samplerDesc.AddressW = TEXTURE_ADDRESS_MIRROR; + samplerDesc.MipLODBias = 0.0f; + samplerDesc.MaxAnisotropy = 0; + samplerDesc.ComparisonFunc = COMPARISON_NEVER; + samplerDesc.BorderColor[0] = 0; + samplerDesc.BorderColor[1] = 0; + samplerDesc.BorderColor[2] = 0; + samplerDesc.BorderColor[3] = 0; + samplerDesc.MinLOD = 0; + samplerDesc.MaxLOD = FLT_MAX; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_LINEAR_MIRROR]); + + samplerDesc.Filter = FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_LINEAR_CLAMP]); + + samplerDesc.Filter = FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_LINEAR_WRAP]); + + samplerDesc.Filter = FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = TEXTURE_ADDRESS_MIRROR; + samplerDesc.AddressV = TEXTURE_ADDRESS_MIRROR; + samplerDesc.AddressW = TEXTURE_ADDRESS_MIRROR; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_POINT_MIRROR]); + + samplerDesc.Filter = FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_POINT_WRAP]); + + + samplerDesc.Filter = FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_POINT_CLAMP]); + + samplerDesc.Filter = FILTER_ANISOTROPIC; + samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; + samplerDesc.MaxAnisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_ANISO_CLAMP]); + + samplerDesc.Filter = FILTER_ANISOTROPIC; + samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; + samplerDesc.MaxAnisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_ANISO_WRAP]); + + samplerDesc.Filter = FILTER_ANISOTROPIC; + samplerDesc.AddressU = TEXTURE_ADDRESS_MIRROR; + samplerDesc.AddressV = TEXTURE_ADDRESS_MIRROR; + samplerDesc.AddressW = TEXTURE_ADDRESS_MIRROR; + samplerDesc.MaxAnisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_ANISO_MIRROR]); + + samplerDesc.Filter = FILTER_ANISOTROPIC; + samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; + samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; + samplerDesc.MaxAnisotropy = 16; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_OBJECTSHADER]); + + samplerDesc.Filter = FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT; + samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; + samplerDesc.MipLODBias = 0.0f; + samplerDesc.MaxAnisotropy = 0; + samplerDesc.ComparisonFunc = COMPARISON_GREATER_EQUAL; + device->CreateSampler(&samplerDesc, &samplers[SSLOT_CMP_DEPTH]); + + + StaticSampler sam; + + sam.sampler = samplers[SSLOT_CMP_DEPTH]; + sam.slot = SSLOT_CMP_DEPTH; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_LINEAR_MIRROR]; + sam.slot = SSLOT_LINEAR_MIRROR; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_LINEAR_CLAMP]; + sam.slot = SSLOT_LINEAR_CLAMP; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_LINEAR_WRAP]; + sam.slot = SSLOT_LINEAR_WRAP; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_POINT_MIRROR]; + sam.slot = SSLOT_POINT_MIRROR; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_POINT_WRAP]; + sam.slot = SSLOT_POINT_WRAP; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_POINT_CLAMP]; + sam.slot = SSLOT_POINT_CLAMP; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_ANISO_CLAMP]; + sam.slot = SSLOT_ANISO_CLAMP; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_ANISO_WRAP]; + sam.slot = SSLOT_ANISO_WRAP; + device->SetCommonSampler(&sam); + + sam.sampler = samplers[SSLOT_ANISO_MIRROR]; + sam.slot = SSLOT_ANISO_MIRROR; + device->SetCommonSampler(&sam); } GraphicsDevice* GetDevice() { @@ -1834,92 +1961,6 @@ void LoadBuffers() } void SetUpStates() { - SamplerDesc samplerDesc; - samplerDesc.Filter = FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = TEXTURE_ADDRESS_MIRROR; - samplerDesc.AddressV = TEXTURE_ADDRESS_MIRROR; - samplerDesc.AddressW = TEXTURE_ADDRESS_MIRROR; - samplerDesc.MipLODBias = 0.0f; - samplerDesc.MaxAnisotropy = 0; - samplerDesc.ComparisonFunc = COMPARISON_NEVER; - samplerDesc.BorderColor[0] = 0; - samplerDesc.BorderColor[1] = 0; - samplerDesc.BorderColor[2] = 0; - samplerDesc.BorderColor[3] = 0; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = FLT_MAX; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_LINEAR_MIRROR]); - - samplerDesc.Filter = FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_LINEAR_CLAMP]); - - samplerDesc.Filter = FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_LINEAR_WRAP]); - - samplerDesc.Filter = FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = TEXTURE_ADDRESS_MIRROR; - samplerDesc.AddressV = TEXTURE_ADDRESS_MIRROR; - samplerDesc.AddressW = TEXTURE_ADDRESS_MIRROR; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_POINT_MIRROR]); - - samplerDesc.Filter = FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_POINT_WRAP]); - - - samplerDesc.Filter = FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_POINT_CLAMP]); - - samplerDesc.Filter = FILTER_ANISOTROPIC; - samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; - samplerDesc.MaxAnisotropy = 16; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_ANISO_CLAMP]); - - samplerDesc.Filter = FILTER_ANISOTROPIC; - samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; - samplerDesc.MaxAnisotropy = 16; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_ANISO_WRAP]); - - samplerDesc.Filter = FILTER_ANISOTROPIC; - samplerDesc.AddressU = TEXTURE_ADDRESS_MIRROR; - samplerDesc.AddressV = TEXTURE_ADDRESS_MIRROR; - samplerDesc.AddressW = TEXTURE_ADDRESS_MIRROR; - samplerDesc.MaxAnisotropy = 16; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_ANISO_MIRROR]); - - samplerDesc.Filter = FILTER_ANISOTROPIC; - samplerDesc.AddressU = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressV = TEXTURE_ADDRESS_WRAP; - samplerDesc.AddressW = TEXTURE_ADDRESS_WRAP; - samplerDesc.MaxAnisotropy = 16; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_OBJECTSHADER]); - - samplerDesc.Filter = FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.AddressU = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = TEXTURE_ADDRESS_CLAMP; - samplerDesc.MipLODBias = 0.0f; - samplerDesc.MaxAnisotropy = 0; - samplerDesc.ComparisonFunc = COMPARISON_GREATER_EQUAL; - device->CreateSampler(&samplerDesc, &samplers[SSLOT_CMP_DEPTH]); - - - RasterizerState rs; rs.FillMode = FILL_SOLID; rs.CullMode = CULL_BACK; @@ -2237,9 +2278,9 @@ void SetUpStates() blendStates[BSTYPE_TRANSPARENTSHADOW] = bd; } -void ModifySampler(const SamplerDesc& desc, int slot) +void ModifyObjectSampler(const SamplerDesc& desc) { - device->CreateSampler(&desc, &samplers[slot]); + device->CreateSampler(&desc, &samplers[SSLOT_OBJECTSHADER]); } const std::string& GetShaderPath() @@ -8402,10 +8443,7 @@ void BindCommonResources(CommandList cmd) { SHADERSTAGE stage = (SHADERSTAGE)i; - for (int i = 0; i < SSLOT_COUNT; ++i) - { - device->BindSampler(stage, &samplers[i], i, cmd); - } + device->BindSampler(stage, &samplers[SSLOT_OBJECTSHADER], SSLOT_OBJECTSHADER, cmd); BindConstantBuffers(stage, cmd); } @@ -11477,6 +11515,14 @@ void Postprocess_Lineardepth( ) { device->EventBegin("Postprocess_Lineardepth", cmd); + auto range = wiProfiler::BeginRangeGPU("Linear Depth Pyramid", cmd); + + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&output, IMAGE_LAYOUT_SHADER_RESOURCE, IMAGE_LAYOUT_UNORDERED_ACCESS) + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } const TextureDesc& desc = output.GetDesc(); @@ -11509,13 +11555,17 @@ void Postprocess_Lineardepth( cmd ); - GPUBarrier barriers[] = { - GPUBarrier::Memory(), - }; - device->Barrier(barriers, arraysize(barriers), cmd); + { + GPUBarrier barriers[] = { + GPUBarrier::Memory(), + GPUBarrier::Image(&output, IMAGE_LAYOUT_UNORDERED_ACCESS, IMAGE_LAYOUT_SHADER_RESOURCE) + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } device->UnbindUAVs(0, 6, cmd); + wiProfiler::EndRange(range); device->EventEnd(cmd); } void Postprocess_Sharpen( diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index b421a2dff..9136aeb45 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -37,7 +37,7 @@ namespace wiRenderer const wiGraphics::GPUBuffer* GetConstantBuffer(CBTYPES id); const wiGraphics::Texture* GetTexture(TEXTYPES id); - void ModifySampler(const wiGraphics::SamplerDesc& desc, int slot); + void ModifyObjectSampler(const wiGraphics::SamplerDesc& desc); void Initialize(); diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 513d3a192..a49ddc6de 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wiVersion // minor features, major updates, breaking compatibility changes const int minor = 51; // minor bug fixes, alterations, refactors, updates - const int revision = 49; + const int revision = 50; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);