diff --git a/Editor/Editor.cpp b/Editor/Editor.cpp index 20875bbf9..1ca398dc6 100644 --- a/Editor/Editor.cpp +++ b/Editor/Editor.cpp @@ -1760,7 +1760,6 @@ void EditorComponent::Update(float dt) save_text_message = txt; save_text_alpha = 1.0f; main->config.GetSection("camera").Set("move_speed", cameraWnd.movespeedSlider.GetValue()); - main->config.Commit(); } } else diff --git a/Editor/GraphicsWindow.cpp b/Editor/GraphicsWindow.cpp index a6f27f681..44b3863e1 100644 --- a/Editor/GraphicsWindow.cpp +++ b/Editor/GraphicsWindow.cpp @@ -1571,6 +1571,7 @@ void GraphicsWindow::UpdateSwapChainFormats(wi::graphics::SwapChain* swapChain) break; } swapChain->desc.allow_hdr = editor->main->allow_hdr; + editor->main->config.Set("allow_hdr", editor->main->allow_hdr); bool success = wi::graphics::GetDevice()->CreateSwapChain(&swapChain->desc, nullptr, swapChain); assert(success); diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 913cd4011..cc7163790 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -485,6 +485,10 @@ int main(int argc, char* argv[]) shaders.push_back({ "copyStencilBitPS", wi::graphics::ShaderStage::PS }); shaders.back().permutations.emplace_back().defines = { "MSAA" }; + // permutations for yuv_to_rgbCS: + shaders.push_back({ "yuv_to_rgbCS", wi::graphics::ShaderStage::CS }); + shaders.back().permutations.emplace_back().defines = { "ARRAY" }; + wi::jobsystem::Initialize(); wi::jobsystem::context ctx; diff --git a/WickedEngine/shaders/yuv_to_rgbCS.hlsl b/WickedEngine/shaders/yuv_to_rgbCS.hlsl index fbbd2a526..af4dabb8d 100644 --- a/WickedEngine/shaders/yuv_to_rgbCS.hlsl +++ b/WickedEngine/shaders/yuv_to_rgbCS.hlsl @@ -3,15 +3,24 @@ PUSHCONSTANT(postprocess, PostProcess); +#ifdef ARRAY Texture2DArray input_luminance : register(t0); Texture2DArray input_chrominance : register(t1); +#else +Texture2D input_luminance : register(t0); +Texture2D input_chrominance : register(t1); +#endif // ARRAY RWTexture2D output : register(u0); [numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] void main(uint3 DTid : SV_DispatchThreadID) { +#ifdef ARRAY const float3 uv = float3((DTid.xy + 0.5f) * postprocess.resolution_rcp, 0); +#else + const float2 uv = float2((DTid.xy + 0.5f) * postprocess.resolution_rcp); +#endif // ARRAY float luminance = input_luminance.SampleLevel(sampler_linear_clamp, uv, 0); float2 chrominance = input_chrominance.SampleLevel(sampler_linear_clamp, uv, 0); diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 9a3a97f6d..24e39b2d0 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -390,6 +390,7 @@ namespace wi::enums CSTYPE_VIRTUALTEXTURE_RESIDENCYUPDATE, CSTYPE_WIND, CSTYPE_YUV_TO_RGB, + CSTYPE_YUV_TO_RGB_ARRAY, CSTYPE_WETMAP_UPDATE, CSTYPE_CAUSTICS, CSTYPE_DEPTH_REPROJECT, diff --git a/WickedEngine/wiGraphics.h b/WickedEngine/wiGraphics.h index 9709b6366..09c1a0c1a 100644 --- a/WickedEngine/wiGraphics.h +++ b/WickedEngine/wiGraphics.h @@ -408,10 +408,12 @@ namespace wi::graphics ALIASING = ALIASING_BUFFER | ALIASING_TEXTURE_NON_RT_DS | ALIASING_TEXTURE_RT_DS, // memory allocation will be suitable for all kinds of resources. Requires GraphicsDeviceCapability::ALIASING_GENERIC to be supported TYPED_FORMAT_CASTING = 1 << 11, // enable casting formats between same type and different modifiers: eg. UNORM -> SRGB TYPELESS_FORMAT_CASTING = 1 << 12, // enable casting formats to other formats that have the same bit-width and channel layout: eg. R32_FLOAT -> R32_UINT - VIDEO_DECODE = 1 << 13, // resource is usabe in video decoding operations - NO_DEFAULT_DESCRIPTORS = 1 << 14, // skips creation of default descriptors for resources - TEXTURE_COMPATIBLE_COMPRESSION = 1 << 15, // optimization that can enable sampling from compressed textures - SHARED = 1 << 16, // shared texture + VIDEO_DECODE = 1 << 13, // resource is usabe in video decoding operations (for buffers it is indicating a bitstream buffer, for textures it is a DPB and output texture if DPB_AND_OUTPUT_COINCIDE is supported) + VIDEO_DECODE_OUTPUT_ONLY = 1 << 14, // resource is usabe in video decoding operations but as output only and not as DPB (used for DPB textures when DPB_AND_OUTPUT_COINCIDE is NOT supported) + VIDEO_DECODE_DPB_ONLY = 1 << 15, // resource is usabe in video decoding operations but as strictly DPB only (used for output textures when DPB_AND_OUTPUT_COINCIDE is NOT supported) + NO_DEFAULT_DESCRIPTORS = 1 << 16, // skips creation of default descriptors for resources + TEXTURE_COMPATIBLE_COMPRESSION = 1 << 17, // optimization that can enable sampling from compressed textures (console only) + SHARED = 1 << 18, // shared texture // Compat: SPARSE_TILE_POOL_BUFFER = ALIASING_BUFFER, @@ -478,16 +480,25 @@ namespace wi::graphics // Other: VIDEO_DECODE_SRC = 1 << 15, // video decode operation source (bitstream buffer or DPB texture) - VIDEO_DECODE_DST = 1 << 16, // video decode operation destination DPB texture - SWAPCHAIN = 1 << 17, // resource state of swap chain's back buffer texture when it's not rendering + VIDEO_DECODE_DST = 1 << 16, // video decode operation destination output texture + VIDEO_DECODE_DPB = 1 << 17, // video decode operation destination DPB texture + SWAPCHAIN = 1 << 18, // resource state of swap chain's back buffer texture when it's not rendering }; enum class RenderPassFlags { NONE = 0, - ALLOW_UAV_WRITES = 1 << 0, - SUSPENDING = 1 << 1, - RESUMING = 1 << 2, + ALLOW_UAV_WRITES = 1 << 0, // allows UAV writes to happen within render pass + SUSPENDING = 1 << 1, // suspends the renderpass to be continued in the next submitted command list + RESUMING = 1 << 2, // resumes the renderpass that was suspended in the previously submitted command list + }; + + enum class VideoDecoderSupportFlags + { + NONE = 0, + DPB_AND_OUTPUT_COINCIDE = 1 << 0, // the video decoder supports using the DPB texture as output shader resource. If not supported, then DPB_AND_OUTPUT_DISTINCT must be supported. + DPB_AND_OUTPUT_DISTINCT = 1 << 1, // the video decoder supports outputting to a texture that is not part of the DPB as part of the decode operation. If not supported, then DPB_AND_OUTPUT_COINCIDE must be supported. + DPB_INDIVIDUAL_TEXTURES_SUPPORTED = 1 << 2, // the video decoder supports using a DPB that is not an array texture, so each slot can be an individually allocated texture }; @@ -890,6 +901,7 @@ namespace wi::graphics { VideoDesc desc; constexpr const VideoDesc& GetDesc() const { return desc; } + VideoDecoderSupportFlags support = VideoDecoderSupportFlags::NONE; }; struct VideoDecodeOperation @@ -916,6 +928,7 @@ namespace wi::graphics const int* dpb_poc = nullptr; // for each DPB reference slot, indicate the PictureOrderCount const int* dpb_framenum = nullptr; // for each DPB reference slot, indicate the framenum value const Texture* DPB = nullptr; // DPB texture with arraysize = num_references + 1 + const Texture* output = nullptr; // output of the operation, it should be nullptr if DPB_AND_OUTPUT_COINCIDE is used (because in that case the DPB will be used as output instead of a separate output) }; struct RenderPassImage @@ -2130,6 +2143,10 @@ template<> struct enable_bitmask_operators { static const bool enable = true; }; +template<> +struct enable_bitmask_operators { + static const bool enable = true; +}; namespace std { diff --git a/WickedEngine/wiGraphicsDevice_DX12.cpp b/WickedEngine/wiGraphicsDevice_DX12.cpp index 6cf43e2f9..f88a78fcc 100644 --- a/WickedEngine/wiGraphicsDevice_DX12.cpp +++ b/WickedEngine/wiGraphicsDevice_DX12.cpp @@ -126,6 +126,8 @@ namespace dx12_internal ret |= D3D12_RESOURCE_STATE_VIDEO_DECODE_READ; if (has_flag(value, ResourceState::VIDEO_DECODE_DST)) ret |= D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE; + if (has_flag(value, ResourceState::VIDEO_DECODE_DPB)) + ret |= D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE; if (has_flag(value, ResourceState::SWAPCHAIN)) ret |= D3D12_RESOURCE_STATE_PRESENT; @@ -3534,6 +3536,11 @@ std::mutex queue_locker; resourcedesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; } + if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_DPB_ONLY)) + { + resourcedesc.Flags = D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + switch (texture->desc.type) { case TextureDesc::Type::TEXTURE_1D: @@ -4558,12 +4565,24 @@ std::mutex queue_locker; video_decode_support.BitRate = desc->bit_rate; video_decode_support.FrameRate = { 0, 1 }; dx12_check(video_device->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &video_decode_support, sizeof(video_decode_support))); - bool reference_only = video_decode_support.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED; - assert(!reference_only); // Not supported currently, will need to use resource flags: D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE, and do output decode conversion if (video_decode_support.DecodeTier < D3D12_VIDEO_DECODE_TIER_1) return false; + video_decoder->support = {}; + if (video_decode_support.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) + { + video_decoder->support |= VideoDecoderSupportFlags::DPB_AND_OUTPUT_DISTINCT; + } + else + { + video_decoder->support |= VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE; + } + if (video_decode_support.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) + { + video_decoder->support |= VideoDecoderSupportFlags::DPB_INDIVIDUAL_TEXTURES_SUPPORTED; + } + auto internal_state = std::make_shared(); internal_state->allocationhandler = allocationhandler; video_decoder->internal_state = internal_state; @@ -7440,8 +7459,17 @@ std::mutex queue_locker; D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output = {}; D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input = {}; - output.pOutputTexture2D = dpb_internal->resource.Get(); - output.OutputSubresource = D3D12CalcSubresource(0, op->current_dpb, 0, op->DPB->desc.mip_levels, op->DPB->desc.array_size); + if (op->output == nullptr) + { + output.pOutputTexture2D = dpb_internal->resource.Get(); + output.OutputSubresource = D3D12CalcSubresource(0, op->current_dpb, 0, op->DPB->desc.mip_levels, op->DPB->desc.array_size); + } + else + { + auto output_internal = to_internal(op->output); + output.pOutputTexture2D = output_internal->resource.Get(); + output.OutputSubresource = 0; + } ID3D12Resource* reference_frames[16] = {}; UINT reference_subresources[16] = {}; diff --git a/WickedEngine/wiGraphicsDevice_Vulkan.cpp b/WickedEngine/wiGraphicsDevice_Vulkan.cpp index ee9cd2a08..f543deaae 100644 --- a/WickedEngine/wiGraphicsDevice_Vulkan.cpp +++ b/WickedEngine/wiGraphicsDevice_Vulkan.cpp @@ -354,9 +354,12 @@ namespace vulkan_internal return VK_IMAGE_LAYOUT_GENERAL; case ResourceState::SHADING_RATE_SOURCE: return VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR; - case ResourceState::VIDEO_DECODE_SRC: - case ResourceState::VIDEO_DECODE_DST: + case ResourceState::VIDEO_DECODE_DPB: return VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; + case ResourceState::VIDEO_DECODE_SRC: + return VK_IMAGE_LAYOUT_VIDEO_DECODE_SRC_KHR; + case ResourceState::VIDEO_DECODE_DST: + return VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; case ResourceState::SWAPCHAIN: return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; default: @@ -457,7 +460,8 @@ namespace vulkan_internal flags |= VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT; } if (has_flag(value, ResourceState::VIDEO_DECODE_DST) || - has_flag(value, ResourceState::VIDEO_DECODE_SRC)) + has_flag(value, ResourceState::VIDEO_DECODE_SRC) || + has_flag(value, ResourceState::VIDEO_DECODE_DPB)) { flags |= VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; } @@ -535,6 +539,10 @@ namespace vulkan_internal { flags |= VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR; } + if (has_flag(value, ResourceState::VIDEO_DECODE_DPB)) + { + flags |= VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR; + } return flags; } @@ -4138,12 +4146,21 @@ using namespace vulkan_internal; { imageInfo.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; } + if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE)) { imageInfo.usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; imageInfo.usage |= VK_IMAGE_USAGE_VIDEO_DECODE_SRC_BIT_KHR; imageInfo.usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; } + if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_OUTPUT_ONLY)) + { + imageInfo.usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + } + if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_DPB_ONLY)) + { + imageInfo.usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; // Note: this is not a combination of flags, but complete assignment! + } if (desc->format == Format::NV12 && has_flag(texture->desc.bind_flags, BindFlag::SHADER_RESOURCE)) { @@ -4610,7 +4627,7 @@ using namespace vulkan_internal; } } - if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE)) + if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE) || has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_OUTPUT_ONLY) || has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_DPB_ONLY)) { VkImageViewCreateInfo view_desc = {}; view_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; @@ -4626,7 +4643,18 @@ using namespace vulkan_internal; VkImageViewUsageCreateInfo viewUsageInfo = {}; viewUsageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; - viewUsageInfo.usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_SRC_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_DPB_ONLY)) + { + viewUsageInfo.usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + } + else if (has_flag(texture->desc.misc_flags, ResourceMiscFlag::VIDEO_DECODE_OUTPUT_ONLY)) + { + viewUsageInfo.usage = VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + } + else + { + viewUsageInfo.usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_SRC_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + } view_desc.pNext = &viewUsageInfo; res = vulkan_check(vkCreateImageView(device, &view_desc, nullptr, &internal_state->video_decode_view)); @@ -6413,7 +6441,16 @@ using namespace vulkan_internal; session_parameters_info.pNext = &session_parameters_info_h264; vulkan_check(vkCreateVideoSessionParametersKHR(device, &session_parameters_info, nullptr, &internal_state->session_parameters)); - assert(video_capability_h264.decode_capabilities.flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); // Currently the only method supported + video_decoder->support = {}; + if (video_capability_h264.decode_capabilities.flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) + { + video_decoder->support |= VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE; + } + if (video_capability_h264.decode_capabilities.flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) + { + video_decoder->support |= VideoDecoderSupportFlags::DPB_AND_OUTPUT_DISTINCT; + } + video_decoder->support |= VideoDecoderSupportFlags::DPB_INDIVIDUAL_TEXTURES_SUPPORTED; return true; } @@ -9333,7 +9370,21 @@ using namespace vulkan_internal; decode_info.srcBuffer = stream_internal->resource; decode_info.srcBufferOffset = (VkDeviceSize)op->stream_offset; decode_info.srcBufferRange = (VkDeviceSize)AlignTo(op->stream_size, VIDEO_DECODE_BITSTREAM_ALIGNMENT); - decode_info.dstPictureResource = *reference_slot_infos[op->current_dpb].pPictureResource; + if (op->output == nullptr) + { + decode_info.dstPictureResource = *reference_slot_infos[op->current_dpb].pPictureResource; + } + else + { + auto output_internal = to_internal(op->output); + decode_info.dstPictureResource.sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR; + decode_info.dstPictureResource.codedOffset.x = 0; + decode_info.dstPictureResource.codedOffset.y = 0; + decode_info.dstPictureResource.codedExtent.width = op->DPB->desc.width; + decode_info.dstPictureResource.codedExtent.height = op->DPB->desc.height; + decode_info.dstPictureResource.baseArrayLayer = 0; + decode_info.dstPictureResource.imageViewBinding = output_internal->video_decode_view; + } decode_info.referenceSlotCount = op->dpb_reference_count; decode_info.pReferenceSlots = decode_info.referenceSlotCount == 0 ? nullptr : reference_slots; decode_info.pSetupReferenceSlot = &reference_slot_infos[op->current_dpb]; diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 0f0f567da..bf4feaf86 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -1153,6 +1153,7 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_VIRTUALTEXTURE_RESIDENCYUPDATE], "virtualTextureResidencyUpdateCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_WIND], "windCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_YUV_TO_RGB], "yuv_to_rgbCS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_YUV_TO_RGB_ARRAY], "yuv_to_rgbCS.cso", ShaderModel::SM_6_0, {"ARRAY"}); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_WETMAP_UPDATE], "wetmap_updateCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_CAUSTICS], "causticsCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_DEPTH_REPROJECT], "depth_reprojectCS.cso"); }); @@ -17835,7 +17836,7 @@ void YUV_to_RGB( { device->EventBegin("YUV_to_RGB", cmd); - device->BindComputeShader(&shaders[CSTYPE_YUV_TO_RGB], cmd); + device->BindComputeShader(&shaders[input.desc.array_size > 1 ? CSTYPE_YUV_TO_RGB_ARRAY : CSTYPE_YUV_TO_RGB], cmd); const TextureDesc& input_desc = input.GetDesc(); const TextureDesc& output_desc = output.GetDesc(); diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index b943b322a..1a3823ba0 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 758; + const int revision = 759; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision); diff --git a/WickedEngine/wiVideo.cpp b/WickedEngine/wiVideo.cpp index bd6f913b1..2792fe004 100644 --- a/WickedEngine/wiVideo.cpp +++ b/WickedEngine/wiVideo.cpp @@ -21,6 +21,7 @@ namespace wi::video } bool CreateVideo(const uint8_t* filedata, size_t filesize, Video* video) { + wilog("CreateVideo: Video decoding is still very experimental, use at your own risk!"); bool success = false; const uint8_t* input_buf = filedata; struct INPUT_BUFFER @@ -349,7 +350,7 @@ namespace wi::video GPUBufferDesc bd; bd.size = aligned_size; - bd.usage = Usage::UPLOAD; + bd.usage = Usage::UPLOAD; // DEFAULT doesn't work on Nvidia bd.misc_flags = ResourceMiscFlag::VIDEO_DECODE; success = device->CreateBuffer2(&bd, copy_video_track, &video->data_stream); assert(success); @@ -378,7 +379,7 @@ namespace wi::video GraphicsDevice* device = GetDevice(); if (!device->CheckCapability(GraphicsDeviceCapability::VIDEO_DECODE_H264)) { - wi::helper::messageBox("Video decoding is not supported by your GPU!\nYou can attempt to update graphics driver.\nThere is no CPU decoding implemented yet, video will be disabled!", "Warning!"); + wi::helper::messageBox("The video decoding implementation is not supported by your GPU!\nYou can attempt to update graphics driver.\nThere is no CPU decoding implemented yet, video will be disabled!", "Warning!"); return false; } @@ -401,31 +402,74 @@ namespace wi::video td.height = vd.height; td.format = vd.format; td.array_size = video->num_dpb_slots; - td.bind_flags = BindFlag::SHADER_RESOURCE; - td.misc_flags = ResourceMiscFlag::VIDEO_DECODE; - td.layout = ResourceState::VIDEO_DECODE_DST; + if (has_flag(instance->decoder.support, VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE)) + { + td.bind_flags = BindFlag::SHADER_RESOURCE; + td.misc_flags = ResourceMiscFlag::VIDEO_DECODE; + td.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + } + else + { + td.misc_flags = ResourceMiscFlag::VIDEO_DECODE_DPB_ONLY; + td.layout = ResourceState::VIDEO_DECODE_DPB; + } success = device->CreateTexture(&td, nullptr, &instance->dpb.texture); assert(success); - device->SetName(&instance->dpb.texture, "VideoInstance::DPB"); + device->SetName(&instance->dpb.texture, "VideoInstance::DPB::texture"); - for (uint32_t i = 0; i < td.array_size; ++i) + if (has_flag(instance->decoder.support, VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE)) { - instance->dpb.resource_states[i] = td.layout; + // DPB_AND_OUTPUT_COINCIDE so DPB can be used for output: + instance->dpb.output = {}; + + for (uint32_t i = 0; i < td.array_size; ++i) + { + instance->dpb.resource_states[i] = td.layout; + Format luminance_format = Format::R8_UNORM; + ImageAspect luminance_aspect = ImageAspect::LUMINANCE; + instance->dpb.subresources_luminance[i] = device->CreateSubresource( + &instance->dpb.texture, + SubresourceType::SRV, + i, 1, 0, 1, + &luminance_format, &luminance_aspect + ); + + Format chrominance_format = Format::R8G8_UNORM; + ImageAspect chrominance_aspect = ImageAspect::CHROMINANCE; + instance->dpb.subresources_chrominance[i] = device->CreateSubresource( + &instance->dpb.texture, + SubresourceType::SRV, + i, 1, 0, 1, + &chrominance_format, &chrominance_aspect + ); + } + } + else + { + // DPB_AND_OUTPUT_COINCIDE NOT supported so DPB MUST NOT be used for output: + td.array_size = 1; + td.bind_flags = BindFlag::SHADER_RESOURCE; + td.misc_flags = ResourceMiscFlag::VIDEO_DECODE_OUTPUT_ONLY; + td.layout = ResourceState::SHADER_RESOURCE_COMPUTE; + success = device->CreateTexture(&td, nullptr, &instance->dpb.output); + assert(success); + device->SetName(&instance->dpb.output, "VideoInstance::DPB::output"); + Format luminance_format = Format::R8_UNORM; ImageAspect luminance_aspect = ImageAspect::LUMINANCE; - instance->dpb.subresources_luminance[i] = device->CreateSubresource( - &instance->dpb.texture, + instance->dpb.subresources_luminance[0] = device->CreateSubresource( + &instance->dpb.output, SubresourceType::SRV, - i, 1, 0, 1, + 0, 1, 0, 1, &luminance_format, &luminance_aspect ); Format chrominance_format = Format::R8G8_UNORM; ImageAspect chrominance_aspect = ImageAspect::CHROMINANCE; - instance->dpb.subresources_chrominance[i] = device->CreateSubresource( - &instance->dpb.texture, + instance->dpb.subresources_chrominance[0] = device->CreateSubresource( + &instance->dpb.output, SubresourceType::SRV, - i, 1, 0, 1, + 0, 1, 0, 1, &chrominance_format, &chrominance_aspect ); } @@ -526,23 +570,34 @@ namespace wi::video ImageAspect aspect_luma = ImageAspect::LUMINANCE; ImageAspect aspect_chroma = ImageAspect::CHROMINANCE; - // Ensure that current DPB slot is in DST state: - if (instance->dpb.resource_states[instance->dpb.current_slot] != ResourceState::VIDEO_DECODE_DST) + if (has_flag(instance->decoder.support, VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE)) { - instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::VIDEO_DECODE_DST, 0, instance->dpb.current_slot, &aspect_luma)); - instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::VIDEO_DECODE_DST, 0, instance->dpb.current_slot, &aspect_chroma)); - instance->dpb.resource_states[instance->dpb.current_slot] = ResourceState::VIDEO_DECODE_DST; - } - // Ensure that reference frame DPB slots are in SRC state: - for (size_t i = 0; i < instance->dpb.reference_usage.size(); ++i) - { - uint8_t ref = instance->dpb.reference_usage[i]; - if (instance->dpb.resource_states[ref] != ResourceState::VIDEO_DECODE_SRC) + decode_operation.output = nullptr; + // Ensure that current DPB slot is in DST state: + if (instance->dpb.resource_states[instance->dpb.current_slot] != ResourceState::VIDEO_DECODE_DPB) { - instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[ref], ResourceState::VIDEO_DECODE_SRC, 0, ref, &aspect_luma)); - instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[ref], ResourceState::VIDEO_DECODE_SRC, 0, ref, &aspect_chroma)); - instance->dpb.resource_states[ref] = ResourceState::VIDEO_DECODE_SRC; + instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::VIDEO_DECODE_DPB, 0, instance->dpb.current_slot, &aspect_luma)); + instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::VIDEO_DECODE_DPB, 0, instance->dpb.current_slot, &aspect_chroma)); + instance->dpb.resource_states[instance->dpb.current_slot] = ResourceState::VIDEO_DECODE_DPB; } + // Ensure that reference frame DPB slots are in SRC state: + for (size_t i = 0; i < instance->dpb.reference_usage.size(); ++i) + { + uint8_t ref = instance->dpb.reference_usage[i]; + if (instance->dpb.resource_states[ref] != ResourceState::VIDEO_DECODE_SRC) + { + instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[ref], ResourceState::VIDEO_DECODE_SRC, 0, ref, &aspect_luma)); + instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[ref], ResourceState::VIDEO_DECODE_SRC, 0, ref, &aspect_chroma)); + instance->dpb.resource_states[ref] = ResourceState::VIDEO_DECODE_SRC; + } + } + } + else + { + // if DPB_AND_OUTPUT_COINCIDE is NOT supported, then DPB is kept always in DPB state, and only the output tex is ever a shader resource: + decode_operation.output = &instance->dpb.output; + instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.output, ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::VIDEO_DECODE_DST, -1, -1, &aspect_luma)); + instance->barriers.push_back(GPUBarrier::Image(&instance->dpb.output, ResourceState::SHADER_RESOURCE_COMPUTE, ResourceState::VIDEO_DECODE_DST, -1, -1, &aspect_chroma)); } if (!instance->barriers.empty()) { @@ -552,16 +607,28 @@ namespace wi::video device->VideoDecode(&instance->decoder, &decode_operation, cmd); - // The current DPB slot is transitioned into a shader readable state because it will need to be resolved into RGB on a different GPU queue: - // The video queue must be used to transition from video states - if (instance->dpb.resource_states[instance->dpb.current_slot] != ResourceState::SHADER_RESOURCE_COMPUTE) + if (has_flag(instance->decoder.support, VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE)) { + // The current DPB slot is transitioned into a shader readable state because it will need to be resolved into RGB on a different GPU queue: + // The video queue must be used to transition from video states + if (instance->dpb.resource_states[instance->dpb.current_slot] != ResourceState::SHADER_RESOURCE_COMPUTE) + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::SHADER_RESOURCE_COMPUTE, 0, instance->dpb.current_slot, &aspect_luma), + GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::SHADER_RESOURCE_COMPUTE, 0, instance->dpb.current_slot, &aspect_chroma), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + instance->dpb.resource_states[instance->dpb.current_slot] = ResourceState::SHADER_RESOURCE_COMPUTE; + } + } + else + { + // if DPB_AND_OUTPUT_COINCIDE is NOT supported, then DPB is kept always in DPB state, and only the output tex is ever a shader resource: GPUBarrier barriers[] = { - GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::SHADER_RESOURCE_COMPUTE, 0, instance->dpb.current_slot, &aspect_luma), - GPUBarrier::Image(&instance->dpb.texture, instance->dpb.resource_states[instance->dpb.current_slot], ResourceState::SHADER_RESOURCE_COMPUTE, 0, instance->dpb.current_slot, &aspect_chroma), + GPUBarrier::Image(&instance->dpb.output, ResourceState::VIDEO_DECODE_DST, ResourceState::SHADER_RESOURCE_COMPUTE, -1, -1, &aspect_luma), + GPUBarrier::Image(&instance->dpb.output, ResourceState::VIDEO_DECODE_DST, ResourceState::SHADER_RESOURCE_COMPUTE, -1, -1, &aspect_chroma), }; device->Barrier(barriers, arraysize(barriers), cmd); - instance->dpb.resource_states[instance->dpb.current_slot] = ResourceState::SHADER_RESOURCE_COMPUTE; } // DPB slot management: @@ -638,13 +705,26 @@ namespace wi::video instance->output_textures_free.pop_back(); output.display_order = video->frames_infos[std::max(instance->current_frame - 1, 0)].display_order; - wi::renderer::YUV_to_RGB( - instance->dpb.texture, - instance->dpb.subresources_luminance[instance->dpb.current_slot], - instance->dpb.subresources_chrominance[instance->dpb.current_slot], - output.texture, - cmd - ); + if (has_flag(instance->decoder.support, VideoDecoderSupportFlags::DPB_AND_OUTPUT_COINCIDE)) + { + wi::renderer::YUV_to_RGB( + instance->dpb.texture, + instance->dpb.subresources_luminance[instance->dpb.current_slot], + instance->dpb.subresources_chrominance[instance->dpb.current_slot], + output.texture, + cmd + ); + } + else + { + wi::renderer::YUV_to_RGB( + instance->dpb.output, + instance->dpb.subresources_luminance[0], + instance->dpb.subresources_chrominance[0], + output.texture, + cmd + ); + } if (has_flag(instance->flags, VideoInstance::Flags::Mipmapped)) { diff --git a/WickedEngine/wiVideo.h b/WickedEngine/wiVideo.h index 475caca2b..2a9759101 100644 --- a/WickedEngine/wiVideo.h +++ b/WickedEngine/wiVideo.h @@ -57,6 +57,7 @@ namespace wi::video struct DPB { wi::graphics::Texture texture; + wi::graphics::Texture output; int subresources_luminance[17] = {}; int subresources_chrominance[17] = {}; int poc_status[17] = {};