diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index e3ef26634cd..d61c1362706 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -432,6 +432,11 @@ Comment: meshoptimizer Copyright: 2016-2024, Arseny Kapoulkine License: Expat +Files: thirdparty/metal-cpp/* +Comment: metal-cpp +Copyright: 2024, Apple Inc. +License: Apache-2.0 + Files: thirdparty/mingw-std-threads/* Comment: mingw-std-threads Copyright: 2016, Mega Limited diff --git a/drivers/apple/foundation_helpers.h b/drivers/apple/foundation_helpers.h index db87fba96c6..f07b4d50739 100644 --- a/drivers/apple/foundation_helpers.h +++ b/drivers/apple/foundation_helpers.h @@ -30,7 +30,11 @@ #pragma once +#ifdef __OBJC__ #import +#else +#include +#endif class String; template @@ -38,12 +42,20 @@ class CharStringT; using CharString = CharStringT; +template +class Span; + namespace conv { +#ifdef __OBJC__ /** * Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer. * */ NSString *to_nsstring(const String &p_str); +/** + * Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer. + * */ +NSString *to_nsstring(Span p_str); /** * Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer. * */ @@ -52,5 +64,24 @@ NSString *to_nsstring(const CharString &p_str); * Converts an NSString to a Godot String without allocating intermediate buffers. * */ String to_string(NSString *p_str); +#else +/** + * Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer. + * */ +NS::String *to_nsstring(const String &p_str); +/** + * Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer. + * */ +NS::String *to_nsstring(Span p_str); +/** + * Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer. + * */ +NS::String *to_nsstring(const CharString &p_str); +/** + * Converts an NSString to a Godot String without allocating intermediate buffers. + * */ +String to_string(NS::String *p_str); + +#endif } //namespace conv diff --git a/drivers/apple/foundation_helpers.mm b/drivers/apple/foundation_helpers.mm index 0453011b1d3..df02ea2a1e2 100644 --- a/drivers/apple/foundation_helpers.mm +++ b/drivers/apple/foundation_helpers.mm @@ -31,6 +31,7 @@ #import "foundation_helpers.h" #import "core/string/ustring.h" +#import "core/templates/span.h" #import @@ -42,6 +43,12 @@ NSString *to_nsstring(const String &p_str) { encoding:NSUTF32LittleEndianStringEncoding]; } +NSString *to_nsstring(Span p_str) { + return [[NSString alloc] initWithBytes:(const void *)p_str.ptr() + length:p_str.size() + encoding:NSASCIIStringEncoding]; +} + NSString *to_nsstring(const CharString &p_str) { return [[NSString alloc] initWithBytes:(const void *)p_str.ptr() length:p_str.length() diff --git a/drivers/apple_embedded/SCsub b/drivers/apple_embedded/SCsub index 7cf6599d60d..59ade51e7ac 100644 --- a/drivers/apple_embedded/SCsub +++ b/drivers/apple_embedded/SCsub @@ -27,6 +27,9 @@ setup_swift_builder( vulkan_dir = "#thirdparty/vulkan" env_apple_embedded.Prepend(CPPPATH=[vulkan_dir, vulkan_dir + "/include"]) +# Use bundled metal-cpp headers +env_apple_embedded.Prepend(CPPPATH=["#thirdparty/metal-cpp"]) + # Driver source files env_apple_embedded.add_source_files(env_apple_embedded.drivers_sources, "*.mm") env_apple_embedded.add_source_files(env_apple_embedded.drivers_sources, "*.swift") diff --git a/drivers/apple_embedded/display_server_apple_embedded.mm b/drivers/apple_embedded/display_server_apple_embedded.mm index b41b66e22bb..0cd817541af 100644 --- a/drivers/apple_embedded/display_server_apple_embedded.mm +++ b/drivers/apple_embedded/display_server_apple_embedded.mm @@ -96,7 +96,7 @@ DisplayServerAppleEmbedded::DisplayServerAppleEmbedded(const String &p_rendering if (rendering_driver == "metal") { if (@available(iOS 14.0, *)) { layer = [GDTAppDelegateService.viewController.godotView initializeRenderingForDriver:@"metal"]; - wpd.metal.layer = (CAMetalLayer *)layer; + wpd.metal.layer = (__bridge CA::MetalLayer *)layer; rendering_context = memnew(RenderingContextDriverMetal); } else { OS::get_singleton()->alert("Metal is only supported on iOS 14.0 and later."); diff --git a/drivers/apple_embedded/os_apple_embedded.mm b/drivers/apple_embedded/os_apple_embedded.mm index 53800b6b6d5..fbc610d1594 100644 --- a/drivers/apple_embedded/os_apple_embedded.mm +++ b/drivers/apple_embedded/os_apple_embedded.mm @@ -51,6 +51,7 @@ #import #import #include +#include #if defined(RD_ENABLED) #include "servers/rendering/renderer_rd/renderer_compositor_rd.h" diff --git a/drivers/metal/SCsub b/drivers/metal/SCsub index 1f431b39bbd..8f85363072c 100644 --- a/drivers/metal/SCsub +++ b/drivers/metal/SCsub @@ -3,6 +3,9 @@ from misc.utility.scons_hints import * Import("env") +# If we're using Metal, we need metal-cpp. +env.Prepend(CPPPATH=["#thirdparty/metal-cpp/"]) + env_metal = env.Clone() # Thirdparty source files @@ -23,6 +26,11 @@ thirdparty_sources = [ ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] +# Include metal-cpp +thirdparty_sources += [ + "#thirdparty/metal-cpp/metal_cpp.cpp", +] + env_metal.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include"]) env_metal.Prepend(CPPPATH=[thirdparty_spirv_headers_dir + "include/spirv/unified1"]) @@ -48,7 +56,6 @@ env_metal.Append(CCFLAGS=["-fmodules", "-fcxx-modules"]) driver_obj = [] -env_metal.add_source_files(driver_obj, "*.mm") env_metal.add_source_files(driver_obj, "*.cpp") env.drivers_sources += driver_obj diff --git a/drivers/metal/metal3_objects.cpp b/drivers/metal/metal3_objects.cpp new file mode 100644 index 00000000000..f96ead2caaa --- /dev/null +++ b/drivers/metal/metal3_objects.cpp @@ -0,0 +1,1806 @@ +/**************************************************************************/ +/* metal3_objects.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#include "metal3_objects.h" + +#include "metal_utils.h" +#include "pixel_formats.h" +#include "rendering_device_driver_metal3.h" +#include "rendering_shader_container_metal.h" + +#include + +using namespace MTL3; + +MDCommandBuffer::MDCommandBuffer(MTL::CommandQueue *p_queue, ::RenderingDeviceDriverMetal *p_device_driver) : + _scratch(p_queue->device()), queue(p_queue) { + device_driver = p_device_driver; + type = MDCommandBufferStateType::None; + use_barriers = device_driver->use_barriers; + if (use_barriers) { + // Already validated availability if use_barriers is true. + MTL::Device *device = p_queue->device(); + NS::SharedPtr rs_desc = NS::TransferPtr(MTL::ResidencySetDescriptor::alloc()->init()); + rs_desc->setInitialCapacity(10); + rs_desc->setLabel(MTLSTR("Command Residency Set")); + NS::Error *error = nullptr; + _frame_state.rs = NS::TransferPtr(device->newResidencySet(rs_desc.get(), &error)); + CRASH_COND_MSG(error != nullptr, vformat("Failed to create residency set: %s", String(error->localizedDescription()->utf8String()))); + } +} + +void MDCommandBuffer::begin_label(const char *p_label_name, const Color &p_color) { + NS::SharedPtr s = NS::TransferPtr(NS::String::alloc()->init(p_label_name, NS::UTF8StringEncoding)); + command_buffer()->pushDebugGroup(s.get()); +} + +void MDCommandBuffer::end_label() { + command_buffer()->popDebugGroup(); +} + +void MDCommandBuffer::begin() { + DEV_ASSERT(commandBuffer.get() == nullptr && !state_begin); + state_begin = true; + bzero(pending_after_stages, sizeof(pending_after_stages)); + bzero(pending_before_queue_stages, sizeof(pending_before_queue_stages)); + binding_cache.clear(); + _scratch.reset(); + release_resources(); +} + +MDCommandBuffer::Alloc MDCommandBuffer::allocate_arg_buffer(uint32_t p_size) { + return _scratch.allocate(p_size); +} + +void MDCommandBuffer::end() { + switch (type) { + case MDCommandBufferStateType::None: + return; + case MDCommandBufferStateType::Render: + return render_end_pass(); + case MDCommandBufferStateType::Compute: + return _end_compute_dispatch(); + case MDCommandBufferStateType::Blit: + return _end_blit(); + } +} + +void MDCommandBuffer::commit() { + end(); + if (use_barriers) { + if (_scratch.is_changed()) { + Span bufs = _scratch.get_buffers(); + _frame_state.rs->addAllocations(reinterpret_cast(bufs.ptr()), bufs.size()); + _scratch.clear_changed(); + _frame_state.rs->commit(); + } + } + commandBuffer->commit(); + commandBuffer.reset(); + state_begin = false; +} + +MTL::CommandBuffer *MDCommandBuffer::command_buffer() { + DEV_ASSERT(state_begin); + if (commandBuffer.get() == nullptr) { + commandBuffer = NS::RetainPtr(queue->commandBuffer()); + if (use_barriers) { + commandBuffer->useResidencySet(_frame_state.rs.get()); + } + } + return commandBuffer.get(); +} + +void MDCommandBuffer::_encode_barrier(MTL::CommandEncoder *p_enc) { + DEV_ASSERT(p_enc); + + static const MTL::Stages empty_stages[STAGE_MAX] = { 0, 0, 0 }; + if (memcmp(&pending_before_queue_stages, empty_stages, sizeof(pending_before_queue_stages)) == 0) { + return; + } + + int stage = STAGE_MAX; + // Determine encoder type by checking if it's the current active encoder. + if (render.encoder.get() == p_enc && pending_after_stages[STAGE_RENDER] != 0) { + stage = STAGE_RENDER; + } else if (compute.encoder.get() == p_enc && pending_after_stages[STAGE_COMPUTE] != 0) { + stage = STAGE_COMPUTE; + } else if (blit.encoder.get() == p_enc && pending_after_stages[STAGE_BLIT] != 0) { + stage = STAGE_BLIT; + } + + if (stage == STAGE_MAX) { + return; + } + + p_enc->barrierAfterQueueStages(pending_after_stages[stage], pending_before_queue_stages[stage]); + pending_before_queue_stages[stage] = 0; + pending_after_stages[stage] = 0; +} + +void MDCommandBuffer::pipeline_barrier(BitField p_src_stages, + BitField p_dst_stages, + VectorView p_memory_barriers, + VectorView p_buffer_barriers, + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) { + MTL::Stages after_stages = convert_src_pipeline_stages_to_metal(p_src_stages); + if (after_stages == 0) { + return; + } + + MTL::Stages before_stages = convert_dst_pipeline_stages_to_metal(p_dst_stages); + if (before_stages == 0) { + return; + } + + // Encode intra-encoder memory barrier if an encoder is active for matching stages. + if (render.encoder.get() != nullptr) { + MTL::RenderStages render_after = static_cast(after_stages & (MTL::StageVertex | MTL::StageFragment)); + MTL::RenderStages render_before = static_cast(before_stages & (MTL::StageVertex | MTL::StageFragment)); + if (render_after != 0 && render_before != 0) { + render.encoder->memoryBarrier(MTL::BarrierScopeBuffers | MTL::BarrierScopeTextures, render_after, render_before); + } + } else if (compute.encoder.get() != nullptr) { + if (after_stages & MTL::StageDispatch) { + compute.encoder->memoryBarrier(MTL::BarrierScopeBuffers | MTL::BarrierScopeTextures); + } + } + // Blit encoder has no memory barrier API. + + // Also cache for inter-pass barriers based on DESTINATION stages, + // since barrierAfterQueueStages is called on the encoder that must wait. + if (before_stages & (MTL::StageVertex | MTL::StageFragment)) { + pending_after_stages[STAGE_RENDER] |= after_stages; + pending_before_queue_stages[STAGE_RENDER] |= before_stages; + } + + if (before_stages & MTL::StageDispatch) { + pending_after_stages[STAGE_COMPUTE] |= after_stages; + pending_before_queue_stages[STAGE_COMPUTE] |= before_stages; + } + + if (before_stages & MTL::StageBlit) { + pending_after_stages[STAGE_BLIT] |= after_stages; + pending_before_queue_stages[STAGE_BLIT] |= before_stages; + } +} + +void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { + MDPipeline *p = (MDPipeline *)(p_pipeline.id); + + // End current encoder if it is a compute encoder or blit encoder, + // as they do not have a defined end boundary in the RDD like render. + if (type == MDCommandBufferStateType::Compute) { + _end_compute_dispatch(); + } else if (type == MDCommandBufferStateType::Blit) { + _end_blit(); + } + + if (p->type == MDPipelineType::Render) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + MDRenderPipeline *rp = (MDRenderPipeline *)p; + + if (render.encoder.get() == nullptr) { + // This error would happen if the render pass failed. + ERR_FAIL_NULL_MSG(render.desc.get(), "Render pass descriptor is null."); + + // This condition occurs when there are no attachments when calling render_next_subpass() + // and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag. + render.desc->setDefaultRasterSampleCount(static_cast(rp->sample_count)); + + render.encoder = NS::RetainPtr(command_buffer()->renderCommandEncoder(render.desc.get())); + _encode_barrier(render.encoder.get()); + } + + if (render.pipeline != rp) { + render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_RASTER)); + // Mark all uniforms as dirty, as variants of a shader pipeline may have a different entry point ABI, + // due to setting force_active_argument_buffer_resources = true for spirv_cross::CompilerMSL::Options. + // As a result, uniform sets with the same layout will generate redundant binding warnings when + // capturing a Metal frame in Xcode. + // + // If we don't mark as dirty, then some bindings will generate a validation error. + // binding_cache.clear(); + render.mark_uniforms_dirty(); + + if (render.pipeline != nullptr && render.pipeline->depth_stencil != rp->depth_stencil) { + render.dirty.set_flag(RenderState::DIRTY_DEPTH); + } + if (rp->raster_state.blend.enabled) { + render.dirty.set_flag(RenderState::DIRTY_BLEND); + } + render.pipeline = rp; + } + } else if (p->type == MDPipelineType::Compute) { + DEV_ASSERT(type == MDCommandBufferStateType::None); + type = MDCommandBufferStateType::Compute; + + if (compute.pipeline != p) { + compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE); + binding_cache.clear(); + compute.mark_uniforms_dirty(); + compute.pipeline = (MDComputePipeline *)p; + } + } +} + +void MDCommandBuffer::mark_push_constants_dirty() { + switch (type) { + case MDCommandBufferStateType::Render: + render.dirty.set_flag(RenderState::DirtyFlag::DIRTY_PUSH); + break; + case MDCommandBufferStateType::Compute: + compute.dirty.set_flag(ComputeState::DirtyFlag::DIRTY_PUSH); + break; + default: + break; + } +} + +MTL::BlitCommandEncoder *MDCommandBuffer::_ensure_blit_encoder() { + switch (type) { + case MDCommandBufferStateType::None: + break; + case MDCommandBufferStateType::Render: + render_end_pass(); + break; + case MDCommandBufferStateType::Compute: + _end_compute_dispatch(); + break; + case MDCommandBufferStateType::Blit: + return blit.encoder.get(); + } + + type = MDCommandBufferStateType::Blit; + blit.encoder = NS::RetainPtr(command_buffer()->blitCommandEncoder()); + _encode_barrier(blit.encoder.get()); + + return blit.encoder.get(); +} + +void MDCommandBuffer::resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { + MTL::Texture *src_tex = rid::get(p_src_texture); + MTL::Texture *dst_tex = rid::get(p_dst_texture); + + NS::SharedPtr mtlRPD = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init()); + MTL::RenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD->colorAttachments()->object(0); + mtlColorAttDesc->setLoadAction(MTL::LoadActionLoad); + mtlColorAttDesc->setStoreAction(MTL::StoreActionMultisampleResolve); + + mtlColorAttDesc->setTexture(src_tex); + mtlColorAttDesc->setResolveTexture(dst_tex); + mtlColorAttDesc->setLevel(p_src_mipmap); + mtlColorAttDesc->setSlice(p_src_layer); + mtlColorAttDesc->setResolveLevel(p_dst_mipmap); + mtlColorAttDesc->setResolveSlice(p_dst_layer); + MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(mtlRPD.get()); + enc->setLabel(MTLSTR("Resolve Image")); + enc->endEncoding(); +} + +void MDCommandBuffer::clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) { + MTL::Texture *src_tex = rid::get(p_texture); + + if (src_tex->parentTexture()) { + // Clear via the parent texture rather than the view. + src_tex = src_tex->parentTexture(); + } + + PixelFormats &pf = device_driver->get_pixel_formats(); + + if (pf.isDepthFormat(src_tex->pixelFormat()) || pf.isStencilFormat(src_tex->pixelFormat())) { + ERR_FAIL_MSG("invalid: depth or stencil texture format"); + } + + NS::SharedPtr desc = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init()); + + if (p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { + MTL::RenderPassColorAttachmentDescriptor *caDesc = desc->colorAttachments()->object(0); + caDesc->setTexture(src_tex); + caDesc->setLoadAction(MTL::LoadActionClear); + caDesc->setStoreAction(MTL::StoreActionStore); + caDesc->setClearColor(MTL::ClearColor(p_color.r, p_color.g, p_color.b, p_color.a)); + + // Extract the mipmap levels that are to be updated. + uint32_t mipLvlStart = p_subresources.base_mipmap; + uint32_t mipLvlCnt = p_subresources.mipmap_count; + uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; + + uint32_t levelCount = src_tex->mipmapLevelCount(); + + // Extract the cube or array layers (slices) that are to be updated. + bool is3D = src_tex->textureType() == MTL::TextureType3D; + uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; + uint32_t layerCnt = p_subresources.layer_count; + uint32_t layerEnd = layerStart + layerCnt; + + MetalFeatures const &features = device_driver->get_device_properties().features; + + // Iterate across mipmap levels and layers, and perform and empty render to clear each. + for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { + ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); + + caDesc->setLevel(mipLvl); + + // If a 3D image, we need to get the depth for each level. + if (is3D) { + layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; + layerEnd = layerStart + layerCnt; + } + + if ((features.layeredRendering && src_tex->sampleCount() == 1) || features.multisampleLayeredRendering) { + // We can clear all layers at once. + if (is3D) { + caDesc->setDepthPlane(layerStart); + } else { + caDesc->setSlice(layerStart); + } + desc->setRenderTargetArrayLength(layerCnt); + MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get()); + enc->setLabel(MTLSTR("Clear Image")); + enc->endEncoding(); + } else { + for (uint32_t layer = layerStart; layer < layerEnd; layer++) { + if (is3D) { + caDesc->setDepthPlane(layer); + } else { + caDesc->setSlice(layer); + } + MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get()); + enc->setLabel(MTLSTR("Clear Image")); + enc->endEncoding(); + } + } + } + } +} + +void MDCommandBuffer::clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { + MTL::BlitCommandEncoder *blit_enc = _ensure_blit_encoder(); + const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id; + + blit_enc->fillBuffer(buffer->metal_buffer.get(), NS::Range(p_offset, p_size), 0); +} + +void MDCommandBuffer::clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources) { + MTL::Texture *src_tex = rid::get(p_texture); + + if (src_tex->parentTexture()) { + // Clear via the parent texture rather than the view. + src_tex = src_tex->parentTexture(); + } + + PixelFormats &pf = device_driver->get_pixel_formats(); + + bool is_depth_format = pf.isDepthFormat(src_tex->pixelFormat()); + bool is_stencil_format = pf.isStencilFormat(src_tex->pixelFormat()); + + if (!is_depth_format && !is_stencil_format) { + ERR_FAIL_MSG("invalid: color texture format"); + } + + bool clear_depth = is_depth_format && p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); + bool clear_stencil = is_stencil_format && p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); + + if (clear_depth || clear_stencil) { + NS::SharedPtr desc = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init()); + + MTL::RenderPassDepthAttachmentDescriptor *daDesc = desc->depthAttachment(); + if (clear_depth) { + daDesc->setTexture(src_tex); + daDesc->setLoadAction(MTL::LoadActionClear); + daDesc->setStoreAction(MTL::StoreActionStore); + daDesc->setClearDepth(p_depth); + } + + MTL::RenderPassStencilAttachmentDescriptor *saDesc = desc->stencilAttachment(); + if (clear_stencil) { + saDesc->setTexture(src_tex); + saDesc->setLoadAction(MTL::LoadActionClear); + saDesc->setStoreAction(MTL::StoreActionStore); + saDesc->setClearStencil(p_stencil); + } + + // Extract the mipmap levels that are to be updated. + uint32_t mipLvlStart = p_subresources.base_mipmap; + uint32_t mipLvlCnt = p_subresources.mipmap_count; + uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; + + uint32_t levelCount = src_tex->mipmapLevelCount(); + + // Extract the cube or array layers (slices) that are to be updated. + bool is3D = src_tex->textureType() == MTL::TextureType3D; + uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; + uint32_t layerCnt = p_subresources.layer_count; + uint32_t layerEnd = layerStart + layerCnt; + + MetalFeatures const &features = device_driver->get_device_properties().features; + + // Iterate across mipmap levels and layers, and perform and empty render to clear each. + for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { + ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); + + if (clear_depth) { + daDesc->setLevel(mipLvl); + } + if (clear_stencil) { + saDesc->setLevel(mipLvl); + } + + // If a 3D image, we need to get the depth for each level. + if (is3D) { + layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; + layerEnd = layerStart + layerCnt; + } + + if ((features.layeredRendering && src_tex->sampleCount() == 1) || features.multisampleLayeredRendering) { + // We can clear all layers at once. + if (is3D) { + if (clear_depth) { + daDesc->setDepthPlane(layerStart); + } + if (clear_stencil) { + saDesc->setDepthPlane(layerStart); + } + } else { + if (clear_depth) { + daDesc->setSlice(layerStart); + } + if (clear_stencil) { + saDesc->setSlice(layerStart); + } + } + desc->setRenderTargetArrayLength(layerCnt); + MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get()); + enc->setLabel(MTLSTR("Clear Image")); + enc->endEncoding(); + } else { + for (uint32_t layer = layerStart; layer < layerEnd; layer++) { + if (is3D) { + if (clear_depth) { + daDesc->setDepthPlane(layer); + } + if (clear_stencil) { + saDesc->setDepthPlane(layer); + } + } else { + if (clear_depth) { + daDesc->setSlice(layer); + } + if (clear_stencil) { + saDesc->setSlice(layer); + } + } + MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get()); + enc->setLabel(MTLSTR("Clear Image")); + enc->endEncoding(); + } + } + } + } +} + +void MDCommandBuffer::copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions) { + const RDM::BufferInfo *src = (const RDM::BufferInfo *)p_src_buffer.id; + const RDM::BufferInfo *dst = (const RDM::BufferInfo *)p_dst_buffer.id; + + MTL::BlitCommandEncoder *enc = _ensure_blit_encoder(); + + for (uint32_t i = 0; i < p_regions.size(); i++) { + RDD::BufferCopyRegion region = p_regions[i]; + enc->copyFromBuffer(src->metal_buffer.get(), region.src_offset, + dst->metal_buffer.get(), region.dst_offset, region.size); + } +} + +void MDCommandBuffer::copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions) { + MTL::Texture *src = rid::get(p_src_texture); + MTL::Texture *dst = rid::get(p_dst_texture); + + MTL::BlitCommandEncoder *enc = _ensure_blit_encoder(); + PixelFormats &pf = device_driver->get_pixel_formats(); + + MTL::PixelFormat src_fmt = src->pixelFormat(); + bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed; + MTL::PixelFormat dst_fmt = dst->pixelFormat(); + bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed; + + // Validate copy. + if (src->sampleCount() != dst->sampleCount() || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) { + ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts."); + } + + // If source and destination have different formats and at least one is compressed, a temporary buffer is required. + bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed); + if (need_tmp_buffer) { + ERR_FAIL_MSG("not implemented: copy with intermediate buffer"); + } + + if (src_fmt != dst_fmt) { + // Map the source pixel format to the dst through a texture view on the source texture. + src = src->newTextureView(dst_fmt); + } + + for (uint32_t i = 0; i < p_regions.size(); i++) { + RDD::TextureCopyRegion region = p_regions[i]; + + MTL::Size extent = MTLSizeFromVector3i(region.size); + + // If copies can be performed using direct texture-texture copying, do so. + uint32_t src_level = region.src_subresources.mipmap; + uint32_t src_base_layer = region.src_subresources.base_layer; + MTL::Size src_extent = mipmapLevelSizeFromTexture(src, src_level); + uint32_t dst_level = region.dst_subresources.mipmap; + uint32_t dst_base_layer = region.dst_subresources.base_layer; + MTL::Size dst_extent = mipmapLevelSizeFromTexture(dst, dst_level); + + // All layers may be copied at once, if the extent completely covers both images. + if (src_extent == extent && dst_extent == extent) { + enc->copyFromTexture(src, src_base_layer, src_level, + dst, dst_base_layer, dst_level, + region.src_subresources.layer_count, 1); + } else { + MTL::Origin src_origin = MTLOriginFromVector3i(region.src_offset); + MTL::Size src_size = clampMTLSize(extent, src_origin, src_extent); + uint32_t layer_count = 0; + if ((src->textureType() == MTL::TextureType3D) != (dst->textureType() == MTL::TextureType3D)) { + // In the case, the number of layers to copy is in extent.depth. Use that value, + // then clamp the depth, so we don't try to copy more than Metal will allow. + layer_count = extent.depth; + src_size.depth = 1; + } else { + layer_count = region.src_subresources.layer_count; + } + MTL::Origin dst_origin = MTLOriginFromVector3i(region.dst_offset); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + // We can copy between a 3D and a 2D image easily. Just copy between + // one slice of the 2D image and one plane of the 3D image at a time. + if ((src->textureType() == MTL::TextureType3D) == (dst->textureType() == MTL::TextureType3D)) { + enc->copyFromTexture(src, src_base_layer + layer, src_level, src_origin, src_size, + dst, dst_base_layer + layer, dst_level, dst_origin); + } else if (src->textureType() == MTL::TextureType3D) { + enc->copyFromTexture(src, src_base_layer, src_level, + MTL::Origin(src_origin.x, src_origin.y, src_origin.z + layer), src_size, + dst, dst_base_layer + layer, dst_level, dst_origin); + } else { + DEV_ASSERT(dst->textureType() == MTL::TextureType3D); + enc->copyFromTexture(src, src_base_layer + layer, src_level, src_origin, src_size, + dst, dst_base_layer, dst_level, + MTL::Origin(dst_origin.x, dst_origin.y, dst_origin.z + layer)); + } + } + } + } +} + +void MDCommandBuffer::copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions) { + _copy_texture_buffer(CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions); +} + +void MDCommandBuffer::copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions) { + _copy_texture_buffer(CopySource::Texture, p_src_texture, p_dst_buffer, p_regions); +} + +void MDCommandBuffer::_copy_texture_buffer(CopySource p_source, + RDD::TextureID p_texture, + RDD::BufferID p_buffer, + VectorView p_regions) { + const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id; + MTL::Texture *texture = rid::get(p_texture); + + MTL::BlitCommandEncoder *enc = _ensure_blit_encoder(); + + PixelFormats &pf = device_driver->get_pixel_formats(); + MTL::PixelFormat mtlPixFmt = texture->pixelFormat(); + + MTL::BlitOption options = MTL::BlitOptionNone; + if (pf.isPVRTCFormat(mtlPixFmt)) { + options |= MTL::BlitOptionRowLinearPVRTC; + } + + for (uint32_t i = 0; i < p_regions.size(); i++) { + RDD::BufferTextureCopyRegion region = p_regions[i]; + + uint32_t mip_level = region.texture_subresource.mipmap; + MTL::Origin txt_origin = MTL::Origin(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z); + MTL::Size src_extent = mipmapLevelSizeFromTexture(texture, mip_level); + MTL::Size txt_size = clampMTLSize(MTL::Size(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z), + txt_origin, + src_extent); + + uint32_t buffImgWd = region.texture_region_size.x; + uint32_t buffImgHt = region.texture_region_size.y; + + NS::UInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd); + NS::UInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt); + + MTL::BlitOption blit_options = options; + + if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) { + // Don't reduce depths of 32-bit depth/stencil formats. + if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_DEPTH) { + if (pf.getBytesPerTexel(mtlPixFmt) != 4) { + bytesPerRow -= buffImgWd; + bytesPerImg -= buffImgWd * buffImgHt; + } + blit_options |= MTL::BlitOptionDepthFromDepthStencil; + } else if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_STENCIL) { + // The stencil component is always 1 byte per pixel. + bytesPerRow = buffImgWd; + bytesPerImg = buffImgWd * buffImgHt; + blit_options |= MTL::BlitOptionStencilFromDepthStencil; + } + } + + if (!isArrayTexture(texture->textureType())) { + bytesPerImg = 0; + } + + if (p_source == CopySource::Buffer) { + enc->copyFromBuffer(buffer->metal_buffer.get(), region.buffer_offset, bytesPerRow, bytesPerImg, txt_size, + texture, region.texture_subresource.layer, mip_level, txt_origin, blit_options); + } else { + enc->copyFromTexture(texture, region.texture_subresource.layer, mip_level, txt_origin, txt_size, + buffer->metal_buffer.get(), region.buffer_offset, bytesPerRow, bytesPerImg, blit_options); + } + } +} + +MTL::RenderCommandEncoder *MDCommandBuffer::get_new_render_encoder_with_descriptor(MTL::RenderPassDescriptor *p_desc) { + switch (type) { + case MDCommandBufferStateType::None: + break; + case MDCommandBufferStateType::Render: + render_end_pass(); + break; + case MDCommandBufferStateType::Compute: + _end_compute_dispatch(); + break; + case MDCommandBufferStateType::Blit: + _end_blit(); + break; + } + + MTL::RenderCommandEncoder *enc = command_buffer()->renderCommandEncoder(p_desc); + _encode_barrier(enc); + return enc; +} + +#pragma mark - Render Commands + +void MDCommandBuffer::render_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + if (uint32_t new_size = p_first_set_index + p_set_count; render.uniform_sets.size() < new_size) { + uint32_t s = render.uniform_sets.size(); + render.uniform_sets.resize(new_size); + // Set intermediate values to null. + std::fill(&render.uniform_sets[s], render.uniform_sets.end().operator->(), nullptr); + } + + const MDShader *shader = (const MDShader *)p_shader.id; + DynamicOffsetLayout layout = shader->dynamic_offset_layout; + + // Clear bits for sets being bound, then OR new values. + for (uint32_t i = 0; i < p_set_count && render.dynamic_offsets != 0; i++) { + uint32_t set_index = p_first_set_index + i; + uint32_t count = layout.get_count(set_index); + if (count > 0) { + uint32_t shift = layout.get_offset_index_shift(set_index); + uint32_t mask = ((1u << (count * 4u)) - 1u) << shift; + render.dynamic_offsets &= ~mask; // Clear this set's bits + } + } + render.dynamic_offsets |= p_dynamic_offsets; + + for (size_t i = 0; i < p_set_count; ++i) { + MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id); + + uint32_t index = p_first_set_index + i; + if (render.uniform_sets[index] != set || layout.get_count(index) > 0) { + render.dirty.set_flag(RenderState::DIRTY_UNIFORMS); + render.uniform_set_mask |= 1ULL << index; + render.uniform_sets[index] = set; + } + } +} + +void MDCommandBuffer::render_clear_attachments(VectorView p_attachment_clears, VectorView p_rects) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + const MDSubpass &subpass = render.get_subpass(); + + uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count; + simd::float4 *vertices = ALLOCA_ARRAY(simd::float4, vertex_count); + simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT]; + + Size2i size = render.frameBuffer->size; + Rect2i render_area = render.clip_to_render_area({ { 0, 0 }, size }); + size = Size2i(render_area.position.x + render_area.size.width, render_area.position.y + render_area.size.height); + _populate_vertices(vertices, size, p_rects); + + ClearAttKey key; + key.sample_count = render.pass->get_sample_count(); + if (subpass.view_count > 1) { + key.enable_layered_rendering(); + } + + float depth_value = 0; + uint32_t stencil_value = 0; + + for (uint32_t i = 0; i < p_attachment_clears.size(); i++) { + RDD::AttachmentClear const &attClear = p_attachment_clears[i]; + uint32_t attachment_index; + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { + attachment_index = attClear.color_attachment; + } else { + attachment_index = subpass.depth_stencil_reference.attachment; + } + + MDAttachment const &mda = render.pass->attachments[attachment_index]; + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { + key.set_color_format(attachment_index, mda.format); + clear_colors[attachment_index] = { + attClear.value.color.r, + attClear.value.color.g, + attClear.value.color.b, + attClear.value.color.a + }; + } + + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT)) { + key.set_depth_format(mda.format); + depth_value = attClear.value.depth; + } + + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT)) { + key.set_stencil_format(mda.format); + stencil_value = attClear.value.stencil; + } + } + clear_colors[ClearAttKey::DEPTH_INDEX] = { + depth_value, + depth_value, + depth_value, + depth_value + }; + + MTL::RenderCommandEncoder *enc = render.encoder.get(); + + MDResourceCache &cache = device_driver->get_resource_cache(); + + enc->pushDebugGroup(MTLSTR("ClearAttachments")); + enc->setRenderPipelineState(cache.get_clear_render_pipeline_state(key, nullptr)); + enc->setDepthStencilState(cache.get_depth_stencil_state( + key.is_depth_enabled(), + key.is_stencil_enabled())); + enc->setStencilReferenceValue(stencil_value); + enc->setCullMode(MTL::CullModeNone); + enc->setTriangleFillMode(MTL::TriangleFillModeFill); + enc->setDepthBias(0, 0, 0); + enc->setViewport(MTL::Viewport{ 0, 0, (double)size.width, (double)size.height, 0.0, 1.0 }); + enc->setScissorRect(MTL::ScissorRect{ 0, 0, (NS::UInteger)size.width, (NS::UInteger)size.height }); + + enc->setVertexBytes(clear_colors, sizeof(clear_colors), 0); + enc->setFragmentBytes(clear_colors, sizeof(clear_colors), 0); + enc->setVertexBytes(vertices, vertex_count * sizeof(vertices[0]), device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX)); + + enc->drawPrimitives(MTL::PrimitiveTypeTriangle, (NS::UInteger)0, vertex_count); + enc->popDebugGroup(); + + render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_DEPTH | RenderState::DIRTY_RASTER)); + binding_cache.clear(); + render.mark_uniforms_dirty({ 0 }); // Mark index 0 dirty, if there is already a binding for index 0. + render.mark_viewport_dirty(); + render.mark_scissors_dirty(); + render.mark_vertex_dirty(); + render.mark_blend_dirty(); +} + +void MDCommandBuffer::_render_set_dirty_state() { + _render_bind_uniform_sets(); + + if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) { + if (push_constant_binding != UINT32_MAX) { + render.encoder->setVertexBytes(push_constant_data, push_constant_data_len, push_constant_binding); + render.encoder->setFragmentBytes(push_constant_data, push_constant_data_len, push_constant_binding); + } + } + + MDSubpass const &subpass = render.get_subpass(); + if (subpass.view_count > 1) { + uint32_t view_range[2] = { 0, subpass.view_count }; + render.encoder->setVertexBytes(view_range, sizeof(view_range), VIEW_MASK_BUFFER_INDEX); + render.encoder->setFragmentBytes(view_range, sizeof(view_range), VIEW_MASK_BUFFER_INDEX); + } + + if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) { + render.encoder->setRenderPipelineState(render.pipeline->state.get()); + } + + if (render.dirty.has_flag(RenderState::DIRTY_VIEWPORT)) { + render.encoder->setViewports(reinterpret_cast(render.viewports.ptr()), render.viewports.size()); + } + + if (render.dirty.has_flag(RenderState::DIRTY_DEPTH)) { + render.encoder->setDepthStencilState(render.pipeline->depth_stencil.get()); + } + + if (render.dirty.has_flag(RenderState::DIRTY_RASTER)) { + render.pipeline->raster_state.apply(render.encoder.get()); + } + + if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) { + size_t len = render.scissors.size(); + MTL::ScissorRect *rects = ALLOCA_ARRAY(MTL::ScissorRect, len); + for (size_t i = 0; i < len; i++) { + rects[i] = render.clip_to_render_area(render.scissors[i]); + } + render.encoder->setScissorRects(rects, len); + } + + if (render.dirty.has_flag(RenderState::DIRTY_BLEND) && render.blend_constants.has_value()) { + render.encoder->setBlendColor(render.blend_constants->r, render.blend_constants->g, render.blend_constants->b, render.blend_constants->a); + } + + if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) { + uint32_t p_binding_count = render.vertex_buffers.size(); + if (p_binding_count > 0) { + uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); + render.encoder->setVertexBuffers(render.vertex_buffers.ptr(), render.vertex_offsets.ptr(), NS::Range(first, p_binding_count)); + } + } + + if (!use_barriers) { + render.resource_tracker.encode(render.encoder.get()); + } + + render.dirty.clear(); +} + +void ResourceTracker::merge_from(const ::ResourceUsageMap &p_from) { + for (KeyValue const &keyval : p_from) { + ResourceVector *resources = _current.getptr(keyval.key); + if (resources == nullptr) { + resources = &_current.insert(keyval.key, ResourceVector())->value; + } + resources->reserve(resources->size() + keyval.value.size()); + + MTL::Resource *const *keyval_ptr = (MTL::Resource *const *)(void *)keyval.value.ptr(); + + // Helper to check if a resource needs to be added based on previous usage. + auto should_add_resource = [this, usage = keyval.key](MTL::Resource *res) -> bool { + ResourceUsageEntry *existing = _previous.getptr(res); + if (existing == nullptr) { + _previous.insert(res, usage); + return true; + } + if (existing->usage != usage) { + existing->usage |= usage; + return true; + } + return false; + }; + + // 2-way merge of sorted resource lists. + uint32_t i = 0, j = 0; + while (i < resources->size() && j < keyval.value.size()) { + MTL::Resource *current_res = resources->ptr()[i]; + MTL::Resource *new_res = keyval_ptr[j]; + + if (current_res < new_res) { + i++; + } else if (current_res > new_res) { + if (should_add_resource(new_res)) { + resources->insert(i, new_res); + } + i++; + j++; + } else { + i++; + j++; + } + } + + // Append any remaining resources from the input. + for (; j < keyval.value.size(); j++) { + if (should_add_resource(keyval_ptr[j])) { + resources->push_back(keyval_ptr[j]); + } + } + } +} + +void ResourceTracker::encode(MTL::RenderCommandEncoder *p_enc) { + for (KeyValue const &keyval : _current) { + if (keyval.value.is_empty()) { + continue; + } + + MTL::ResourceUsage vert_usage = (MTL::ResourceUsage)resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_VERTEX); + MTL::ResourceUsage frag_usage = (MTL::ResourceUsage)resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); + const MTL::Resource **resources = (const MTL::Resource **)(void *)keyval.value.ptr(); + NS::UInteger count = keyval.value.size(); + if (vert_usage == frag_usage) { + p_enc->useResources(resources, count, vert_usage, MTL::RenderStageVertex | MTL::RenderStageFragment); + } else { + if (vert_usage != 0) { + p_enc->useResources(resources, count, vert_usage, MTL::RenderStageVertex); + } + if (frag_usage != 0) { + p_enc->useResources(resources, count, frag_usage, MTL::RenderStageFragment); + } + } + } + + // Keep the keys for now and clear the vectors to reduce churn. + for (KeyValue &v : _current) { + v.value.clear(); + } +} + +void ResourceTracker::encode(MTL::ComputeCommandEncoder *p_enc) { + for (KeyValue const &keyval : _current) { + if (keyval.value.is_empty()) { + continue; + } + MTL::ResourceUsage usage = (MTL::ResourceUsage)resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_COMPUTE); + if (usage != 0) { + const MTL::Resource **resources = (const MTL::Resource **)(void *)keyval.value.ptr(); + p_enc->useResources(resources, keyval.value.size(), usage); + } + } + + // Keep the keys for now and clear the vectors to reduce churn. + for (KeyValue &v : _current) { + v.value.clear(); + } +} + +void ResourceTracker::reset() { + // Keep the keys for now, as they are likely to be used repeatedly. + for (KeyValue &v : _previous) { + if (v.value.usage == ResourceUnused) { + v.value.unused++; + if (v.value.unused >= RESOURCE_UNUSED_CLEANUP_COUNT) { + _scratch.push_back(v.key); + } + } else { + v.value = ResourceUnused; + v.value.unused = 0; + } + } + + // Clear up resources that weren't used for the last pass. + for (MTL::Resource *res : _scratch) { + _previous.erase(res); + } + _scratch.clear(); +} + +void MDCommandBuffer::_render_bind_uniform_sets() { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + if (!render.dirty.has_flag(RenderState::DIRTY_UNIFORMS)) { + return; + } + + render.dirty.clear_flag(RenderState::DIRTY_UNIFORMS); + uint64_t set_uniforms = render.uniform_set_mask; + render.uniform_set_mask = 0; + + MDRenderShader *shader = render.pipeline->shader; + const uint32_t dynamic_offsets = render.dynamic_offsets; + + while (set_uniforms != 0) { + // Find the index of the next set bit. + uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms); + // Clear the set bit. + set_uniforms &= (set_uniforms - 1); + MDUniformSet *set = render.uniform_sets[index]; + if (set == nullptr || index >= (uint32_t)shader->sets.size()) { + continue; + } + if (shader->uses_argument_buffers) { + _bind_uniforms_argument_buffers(set, shader, index, dynamic_offsets); + } else { + DirectEncoder de(render.encoder.get(), binding_cache, DirectEncoder::RENDER); + _bind_uniforms_direct(set, shader, de, index, dynamic_offsets); + } + } +} + +void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) { + DEV_ASSERT(command_buffer() != nullptr); + end(); + + MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); + MDFrameBuffer *fb = (MDFrameBuffer *)(p_frameBuffer.id); + + type = MDCommandBufferStateType::Render; + render.pass = pass; + render.current_subpass = UINT32_MAX; + render.render_area = p_rect; + render.clear_values.resize(p_clear_values.size()); + for (uint32_t i = 0; i < p_clear_values.size(); i++) { + render.clear_values[i] = p_clear_values[i]; + } + render.is_rendering_entire_area = (p_rect.position == Point2i(0, 0)) && p_rect.size == fb->size; + render.frameBuffer = fb; + render_next_subpass(); +} + +void MDCommandBuffer::render_next_subpass() { + DEV_ASSERT(command_buffer() != nullptr); + + if (render.current_subpass == UINT32_MAX) { + render.current_subpass = 0; + } else { + _end_render_pass(); + render.current_subpass++; + } + + MDFrameBuffer const &fb = *render.frameBuffer; + MDRenderPass const &pass = *render.pass; + MDSubpass const &subpass = render.get_subpass(); + + NS::SharedPtr desc = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init()); + + if (subpass.view_count > 1) { + desc->setRenderTargetArrayLength(subpass.view_count); + } + + PixelFormats &pf = device_driver->get_pixel_formats(); + + uint32_t attachmentCount = 0; + for (uint32_t i = 0; i < subpass.color_references.size(); i++) { + uint32_t idx = subpass.color_references[i].attachment; + if (idx == RDD::AttachmentReference::UNUSED) { + continue; + } + + attachmentCount += 1; + MTL::RenderPassColorAttachmentDescriptor *ca = desc->colorAttachments()->object(i); + + uint32_t resolveIdx = subpass.resolve_references.is_empty() ? RDD::AttachmentReference::UNUSED : subpass.resolve_references[i].attachment; + bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED; + bool can_resolve = true; + if (resolveIdx != RDD::AttachmentReference::UNUSED) { + MTL::Texture *resolve_tex = fb.get_texture(resolveIdx); + can_resolve = flags::all(pf.getCapabilities(resolve_tex->pixelFormat()), kMTLFmtCapsResolve); + if (can_resolve) { + ca->setResolveTexture(resolve_tex); + } else { + CRASH_NOW_MSG("unimplemented: using a texture format that is not supported for resolve"); + } + } + + MDAttachment const &attachment = pass.attachments[idx]; + + MTL::Texture *tex = fb.get_texture(idx); + ERR_FAIL_NULL_MSG(tex, "Frame buffer color texture is null."); + + if ((attachment.type & MDAttachmentType::Color)) { + if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) { + Color clearColor = render.clear_values[idx].color; + ca->setClearColor(MTL::ClearColor(clearColor.r, clearColor.g, clearColor.b, clearColor.a)); + } + } + } + + if (subpass.depth_stencil_reference.attachment != RDD::AttachmentReference::UNUSED) { + attachmentCount += 1; + uint32_t idx = subpass.depth_stencil_reference.attachment; + MDAttachment const &attachment = pass.attachments[idx]; + MTL::Texture *tex = fb.get_texture(idx); + ERR_FAIL_NULL_MSG(tex, "Frame buffer depth / stencil texture is null."); + if (attachment.type & MDAttachmentType::Depth) { + MTL::RenderPassDepthAttachmentDescriptor *da = desc->depthAttachment(); + if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) { + da->setClearDepth(render.clear_values[idx].depth); + } + } + + if (attachment.type & MDAttachmentType::Stencil) { + MTL::RenderPassStencilAttachmentDescriptor *sa = desc->stencilAttachment(); + if (attachment.configureDescriptor(sa, pf, subpass, tex, render.is_rendering_entire_area, false, false, true)) { + sa->setClearStencil(render.clear_values[idx].stencil); + } + } + } + + desc->setRenderTargetWidth(MAX((NS::UInteger)MIN(render.render_area.position.x + render.render_area.size.width, fb.size.width), 1u)); + desc->setRenderTargetHeight(MAX((NS::UInteger)MIN(render.render_area.position.y + render.render_area.size.height, fb.size.height), 1u)); + + if (attachmentCount == 0) { + // If there are no attachments, delay the creation of the encoder, + // so we can use a matching sample count for the pipeline, by setting + // the defaultRasterSampleCount from the pipeline's sample count. + render.desc = desc; + } else { + render.encoder = NS::RetainPtr(command_buffer()->renderCommandEncoder(desc.get())); + _encode_barrier(render.encoder.get()); + + if (!render.is_rendering_entire_area) { + _render_clear_render_area(); + } + // With a new encoder, all state is dirty. + render.dirty.set_flag(RenderState::DIRTY_ALL); + } +} + +void MDCommandBuffer::render_draw(uint32_t p_vertex_count, + uint32_t p_instance_count, + uint32_t p_base_vertex, + uint32_t p_first_instance) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + + _render_set_dirty_state(); + + MDSubpass const &subpass = render.get_subpass(); + if (subpass.view_count > 1) { + p_instance_count *= subpass.view_count; + } + + DEV_ASSERT(render.dirty == 0); + + MTL::RenderCommandEncoder *enc = render.encoder.get(); + enc->drawPrimitives(render.pipeline->raster_state.render_primitive, p_base_vertex, p_vertex_count, p_instance_count, p_first_instance); +} + +void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + render.vertex_buffers.resize(p_binding_count); + render.vertex_offsets.resize(p_binding_count); + + // Are the existing buffer bindings the same? + bool same = true; + + // Reverse the buffers, as their bindings are assigned in descending order. + for (uint32_t i = 0; i < p_binding_count; i += 1) { + const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id; + + NS::UInteger dynamic_offset = 0; + if (buf_info->is_dynamic()) { + const MetalBufferDynamicInfo *dyn_buf = (const MetalBufferDynamicInfo *)buf_info; + uint64_t frame_idx = p_dynamic_offsets & 0x3; + p_dynamic_offsets >>= 2; + dynamic_offset = frame_idx * dyn_buf->size_bytes; + } + if (render.vertex_buffers[i] != buf_info->metal_buffer.get()) { + render.vertex_buffers[i] = buf_info->metal_buffer.get(); + same = false; + } + + render.vertex_offsets[i] = dynamic_offset + p_offsets[p_binding_count - i - 1]; + } + + if (render.encoder.get() != nullptr) { + uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); + if (same) { + NS::UInteger *offset_ptr = render.vertex_offsets.ptr(); + for (uint32_t i = first; i < first + p_binding_count; i++) { + render.encoder->setVertexBufferOffset(*offset_ptr, i); + offset_ptr++; + } + } else { + render.encoder->setVertexBuffers(render.vertex_buffers.ptr(), render.vertex_offsets.ptr(), NS::Range(first, p_binding_count)); + } + render.dirty.clear_flag(RenderState::DIRTY_VERTEX); + } else { + render.dirty.set_flag(RenderState::DIRTY_VERTEX); + } +} + +void MDCommandBuffer::render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + const RenderingDeviceDriverMetal::BufferInfo *buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffer.id; + + render.index_buffer = buffer->metal_buffer.get(); + render.index_type = p_format == RDD::IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT16 ? MTL::IndexTypeUInt16 : MTL::IndexTypeUInt32; + render.index_offset = p_offset; +} + +void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count, + uint32_t p_instance_count, + uint32_t p_first_index, + int32_t p_vertex_offset, + uint32_t p_first_instance) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + + _render_set_dirty_state(); + + MDSubpass const &subpass = render.get_subpass(); + if (subpass.view_count > 1) { + p_instance_count *= subpass.view_count; + } + + MTL::RenderCommandEncoder *enc = render.encoder.get(); + + uint32_t index_offset = render.index_offset; + index_offset += p_first_index * (render.index_type == MTL::IndexTypeUInt16 ? sizeof(uint16_t) : sizeof(uint32_t)); + + enc->drawIndexedPrimitives(render.pipeline->raster_state.render_primitive, p_index_count, render.index_type, render.index_buffer, index_offset, p_instance_count, p_vertex_offset, p_first_instance); +} + +void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + + _render_set_dirty_state(); + + MTL::RenderCommandEncoder *enc = render.encoder.get(); + + const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id; + NS::UInteger indirect_offset = p_offset; + + for (uint32_t i = 0; i < p_draw_count; i++) { + enc->drawIndexedPrimitives(render.pipeline->raster_state.render_primitive, render.index_type, render.index_buffer, 0, indirect_buffer->metal_buffer.get(), indirect_offset); + indirect_offset += p_stride; + } +} + +void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + ERR_FAIL_MSG("not implemented"); +} + +void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + + _render_set_dirty_state(); + + MTL::RenderCommandEncoder *enc = render.encoder.get(); + + const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id; + NS::UInteger indirect_offset = p_offset; + + for (uint32_t i = 0; i < p_draw_count; i++) { + enc->drawPrimitives(render.pipeline->raster_state.render_primitive, indirect_buffer->metal_buffer.get(), indirect_offset); + indirect_offset += p_stride; + } +} + +void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + ERR_FAIL_MSG("not implemented"); +} + +void MDCommandBuffer::render_end_pass() { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + render.end_encoding(); + render.reset(); + reset(); +} + +#pragma mark - RenderState + +void MDCommandBuffer::RenderState::reset() { + pass = nullptr; + frameBuffer = nullptr; + pipeline = nullptr; + current_subpass = UINT32_MAX; + render_area = {}; + is_rendering_entire_area = false; + desc.reset(); + encoder.reset(); + index_buffer = nullptr; + index_type = MTL::IndexTypeUInt16; + dirty = DIRTY_NONE; + uniform_sets.clear(); + dynamic_offsets = 0; + uniform_set_mask = 0; + clear_values.clear(); + viewports.clear(); + scissors.clear(); + blend_constants.reset(); + bzero(vertex_buffers.ptr(), sizeof(MTL::Buffer *) * vertex_buffers.size()); + vertex_buffers.clear(); + bzero(vertex_offsets.ptr(), sizeof(NS::UInteger) * vertex_offsets.size()); + vertex_offsets.clear(); + resource_tracker.reset(); +} + +void MDCommandBuffer::RenderState::end_encoding() { + if (encoder.get() == nullptr) { + return; + } + + encoder->endEncoding(); + encoder.reset(); +} + +#pragma mark - ComputeState + +void MDCommandBuffer::ComputeState::end_encoding() { + if (encoder.get() == nullptr) { + return; + } + + encoder->endEncoding(); + encoder.reset(); +} + +#pragma mark - Compute + +void MDCommandBuffer::_compute_set_dirty_state() { + if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) { + compute.encoder = NS::RetainPtr(command_buffer()->computeCommandEncoder(MTL::DispatchTypeConcurrent)); + _encode_barrier(compute.encoder.get()); + compute.encoder->setComputePipelineState(compute.pipeline->state.get()); + } + + _compute_bind_uniform_sets(); + + if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) { + if (push_constant_binding != UINT32_MAX) { + compute.encoder->setBytes(push_constant_data, push_constant_data_len, push_constant_binding); + } + } + + if (!use_barriers) { + compute.resource_tracker.encode(compute.encoder.get()); + } + + compute.dirty.clear(); +} + +void MDCommandBuffer::_compute_bind_uniform_sets() { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) { + return; + } + + compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS); + uint64_t set_uniforms = compute.uniform_set_mask; + compute.uniform_set_mask = 0; + + MDComputeShader *shader = compute.pipeline->shader; + const uint32_t dynamic_offsets = compute.dynamic_offsets; + + while (set_uniforms != 0) { + // Find the index of the next set bit. + uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms); + // Clear the set bit. + set_uniforms &= (set_uniforms - 1); + MDUniformSet *set = compute.uniform_sets[index]; + if (set == nullptr || index >= (uint32_t)shader->sets.size()) { + continue; + } + if (shader->uses_argument_buffers) { + _bind_uniforms_argument_buffers_compute(set, shader, index, dynamic_offsets); + } else { + DirectEncoder de(compute.encoder.get(), binding_cache, DirectEncoder::COMPUTE); + _bind_uniforms_direct(set, shader, de, index, dynamic_offsets); + } + } +} + +void MDCommandBuffer::ComputeState::reset() { + pipeline = nullptr; + encoder.reset(); + dirty = DIRTY_NONE; + uniform_sets.clear(); + dynamic_offsets = 0; + uniform_set_mask = 0; + resource_tracker.reset(); +} + +void MDCommandBuffer::compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + if (uint32_t new_size = p_first_set_index + p_set_count; compute.uniform_sets.size() < new_size) { + uint32_t s = compute.uniform_sets.size(); + compute.uniform_sets.resize(new_size); + // Set intermediate values to null. + std::fill(&compute.uniform_sets[s], compute.uniform_sets.end().operator->(), nullptr); + } + + const MDShader *shader = (const MDShader *)p_shader.id; + DynamicOffsetLayout layout = shader->dynamic_offset_layout; + + // Clear bits for sets being bound, then OR new values. + for (uint32_t i = 0; i < p_set_count && compute.dynamic_offsets != 0; i++) { + uint32_t set_index = p_first_set_index + i; + uint32_t count = layout.get_count(set_index); + if (count > 0) { + uint32_t shift = layout.get_offset_index_shift(set_index); + uint32_t mask = ((1u << (count * 4u)) - 1u) << shift; + compute.dynamic_offsets &= ~mask; // Clear this set's bits + } + } + compute.dynamic_offsets |= p_dynamic_offsets; + + for (size_t i = 0; i < p_set_count; ++i) { + MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id); + + uint32_t index = p_first_set_index + i; + if (compute.uniform_sets[index] != set || layout.get_count(index) > 0) { + compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS); + compute.uniform_set_mask |= 1ULL << index; + compute.uniform_sets[index] = set; + } + } +} + +void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + _compute_set_dirty_state(); + + MTL::Size size = MTL::Size(p_x_groups, p_y_groups, p_z_groups); + + MTL::ComputeCommandEncoder *enc = compute.encoder.get(); + enc->dispatchThreadgroups(size, compute.pipeline->compute_state.local); +} + +void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + _compute_set_dirty_state(); + + const RenderingDeviceDriverMetal::BufferInfo *indirectBuffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id; + + MTL::ComputeCommandEncoder *enc = compute.encoder.get(); + enc->dispatchThreadgroups(indirectBuffer->metal_buffer.get(), p_offset, compute.pipeline->compute_state.local); +} + +void MDCommandBuffer::reset() { + push_constant_binding = UINT32_MAX; + push_constant_data_len = 0; + type = MDCommandBufferStateType::None; + binding_cache.clear(); +} + +void MDCommandBuffer::_end_compute_dispatch() { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + compute.end_encoding(); + compute.reset(); + reset(); +} + +void MDCommandBuffer::_end_blit() { + DEV_ASSERT(type == MDCommandBufferStateType::Blit); + + blit.encoder->endEncoding(); + blit.reset(); + reset(); +} + +MDComputeShader::MDComputeShader(CharString p_name, + Vector p_sets, + bool p_uses_argument_buffers, + std::shared_ptr p_kernel) : + MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(std::move(p_kernel)) { +} + +MDRenderShader::MDRenderShader(CharString p_name, + Vector p_sets, + bool p_needs_view_mask_buffer, + bool p_uses_argument_buffers, + std::shared_ptr p_vert, std::shared_ptr p_frag) : + MDShader(p_name, p_sets, p_uses_argument_buffers), + needs_view_mask_buffer(p_needs_view_mask_buffer), + vert(std::move(p_vert)), + frag(std::move(p_frag)) { +} + +void DirectEncoder::set(MTL::Texture **p_textures, NS::Range p_range) { + if (cache.update(p_range, p_textures)) { + switch (mode) { + case RENDER: { + MTL::RenderCommandEncoder *enc = static_cast(encoder); + enc->setVertexTextures(p_textures, p_range); + enc->setFragmentTextures(p_textures, p_range); + } break; + case COMPUTE: { + MTL::ComputeCommandEncoder *enc = static_cast(encoder); + enc->setTextures(p_textures, p_range); + } break; + } + } +} + +void DirectEncoder::set(MTL::Buffer **p_buffers, const NS::UInteger *p_offsets, NS::Range p_range) { + if (cache.update(p_range, p_buffers, p_offsets)) { + switch (mode) { + case RENDER: { + MTL::RenderCommandEncoder *enc = static_cast(encoder); + enc->setVertexBuffers(p_buffers, p_offsets, p_range); + enc->setFragmentBuffers(p_buffers, p_offsets, p_range); + } break; + case COMPUTE: { + MTL::ComputeCommandEncoder *enc = static_cast(encoder); + enc->setBuffers(p_buffers, p_offsets, p_range); + } break; + } + } +} + +void DirectEncoder::set(MTL::Buffer *p_buffer, NS::UInteger p_offset, uint32_t p_index) { + if (cache.update(p_buffer, p_offset, p_index)) { + switch (mode) { + case RENDER: { + MTL::RenderCommandEncoder *enc = static_cast(encoder); + enc->setVertexBuffer(p_buffer, p_offset, p_index); + enc->setFragmentBuffer(p_buffer, p_offset, p_index); + } break; + case COMPUTE: { + MTL::ComputeCommandEncoder *enc = static_cast(encoder); + enc->setBuffer(p_buffer, p_offset, p_index); + } break; + } + } +} + +void DirectEncoder::set(MTL::SamplerState **p_samplers, NS::Range p_range) { + if (cache.update(p_range, p_samplers)) { + switch (mode) { + case RENDER: { + MTL::RenderCommandEncoder *enc = static_cast(encoder); + enc->setVertexSamplerStates(p_samplers, p_range); + enc->setFragmentSamplerStates(p_samplers, p_range); + } break; + case COMPUTE: { + MTL::ComputeCommandEncoder *enc = static_cast(encoder); + enc->setSamplerStates(p_samplers, p_range); + } break; + } + } +} + +GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new") + +void MDCommandBuffer::_bind_uniforms_argument_buffers(MDUniformSet *p_set, MDShader *p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets) { + DEV_ASSERT(p_shader->uses_argument_buffers); + DEV_ASSERT(render.encoder.get() != nullptr); + + MTL::RenderCommandEncoder *enc = render.encoder.get(); + render.resource_tracker.merge_from(p_set->usage_to_resources); + + const UniformSet &shader_set = p_shader->sets[p_set_index]; + + // Check if this set has dynamic uniforms. + if (!shader_set.dynamic_uniforms.is_empty()) { + // Allocate from the ring buffer. + uint32_t buffer_size = p_set->arg_buffer_data.size(); + MDRingBuffer::Allocation alloc = allocate_arg_buffer(buffer_size); + + // Copy the base argument buffer data. + memcpy(alloc.ptr, p_set->arg_buffer_data.ptr(), buffer_size); + + // Update dynamic buffer GPU addresses. + uint64_t *ptr = (uint64_t *)alloc.ptr; + DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; + uint32_t dynamic_index = 0; + + for (uint32_t i : shader_set.dynamic_uniforms) { + RDD::BoundUniform const &uniform = p_set->uniforms[i]; + const UniformInfo &ui = shader_set.uniforms[i]; + const UniformInfo::Indexes &idx = ui.arg_buffer; + + uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index); + dynamic_index++; + uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xf; + + const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; + uint64_t gpu_address = buf_info->metal_buffer.get()->gpuAddress() + frame_idx * buf_info->size_bytes; + *(uint64_t *)(ptr + idx.buffer) = gpu_address; + } + + enc->setVertexBuffer(alloc.buffer, alloc.offset, p_set_index); + enc->setFragmentBuffer(alloc.buffer, alloc.offset, p_set_index); + } else { + enc->setVertexBuffer(p_set->arg_buffer.get(), 0, p_set_index); + enc->setFragmentBuffer(p_set->arg_buffer.get(), 0, p_set_index); + } +} + +void MDCommandBuffer::_bind_uniforms_direct(MDUniformSet *p_set, MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets) { + DEV_ASSERT(!p_shader->uses_argument_buffers); + + UniformSet const &set = p_shader->sets[p_set_index]; + DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; + uint32_t dynamic_index = 0; + + for (uint32_t i = 0; i < MIN(p_set->uniforms.size(), set.uniforms.size()); i++) { + RDD::BoundUniform const &uniform = p_set->uniforms[i]; + const UniformInfo &ui = set.uniforms[i]; + const UniformInfo::Indexes &indexes = ui.slot; + + uint32_t frame_idx; + if (uniform.is_dynamic()) { + uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index); + dynamic_index++; + frame_idx = (p_dynamic_offsets >> shift) & 0xf; + } else { + frame_idx = 0; + } + + switch (uniform.type) { + case RDD::UNIFORM_TYPE_SAMPLER: { + size_t count = uniform.ids.size(); + MTL::SamplerState **objects = ALLOCA_ARRAY(MTL::SamplerState *, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j]); + } + NS::Range sampler_range = { indexes.sampler, count }; + p_enc.set(objects, sampler_range); + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { + size_t count = uniform.ids.size() / 2; + MTL::Texture **textures = ALLOCA_ARRAY(MTL::Texture *, count); + MTL::SamplerState **samplers = ALLOCA_ARRAY(MTL::SamplerState *, count); + for (uint32_t j = 0; j < count; j += 1) { + samplers[j] = rid::get(uniform.ids[j * 2 + 0]); + textures[j] = rid::get(uniform.ids[j * 2 + 1]); + } + NS::Range sampler_range = { indexes.sampler, count }; + NS::Range texture_range = { indexes.texture, count }; + p_enc.set(samplers, sampler_range); + p_enc.set(textures, texture_range); + } break; + case RDD::UNIFORM_TYPE_TEXTURE: { + size_t count = uniform.ids.size(); + MTL::Texture **objects = ALLOCA_ARRAY(MTL::Texture *, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j]); + } + NS::Range texture_range = { indexes.texture, count }; + p_enc.set(objects, texture_range); + } break; + case RDD::UNIFORM_TYPE_IMAGE: { + size_t count = uniform.ids.size(); + MTL::Texture **objects = ALLOCA_ARRAY(MTL::Texture *, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j]); + } + NS::Range texture_range = { indexes.texture, count }; + p_enc.set(objects, texture_range); + + if (indexes.buffer != UINT32_MAX) { + // Emulated atomic image access. + MTL::Buffer **bufs = ALLOCA_ARRAY(MTL::Buffer *, count); + for (size_t j = 0; j < count; j += 1) { + MTL::Texture *obj = objects[j]; + MTL::Texture *tex = obj->parentTexture() ? obj->parentTexture() : obj; + bufs[j] = tex->buffer(); + } + NS::UInteger *offs = ALLOCA_ARRAY(NS::UInteger, count); + bzero(offs, sizeof(NS::UInteger) * count); + NS::Range buffer_range = { indexes.buffer, count }; + p_enc.set(bufs, offs, buffer_range); + } + } break; + case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { + CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: + case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { + const RDM::BufferInfo *buf_info = (const RDM::BufferInfo *)uniform.ids[0].id; + p_enc.set(buf_info->metal_buffer.get(), 0, indexes.buffer); + } break; + case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: + case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { + const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; + p_enc.set(buf_info->metal_buffer.get(), frame_idx * buf_info->size_bytes, indexes.buffer); + } break; + case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { + size_t count = uniform.ids.size(); + MTL::Texture **objects = ALLOCA_ARRAY(MTL::Texture *, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j]); + } + NS::Range texture_range = { indexes.texture, count }; + p_enc.set(objects, texture_range); + } break; + default: { + DEV_ASSERT(false); + } + } + } +} + +void MDCommandBuffer::_bind_uniforms_argument_buffers_compute(MDUniformSet *p_set, MDShader *p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets) { + DEV_ASSERT(p_shader->uses_argument_buffers); + DEV_ASSERT(compute.encoder.get() != nullptr); + + MTL::ComputeCommandEncoder *enc = compute.encoder.get(); + compute.resource_tracker.merge_from(p_set->usage_to_resources); + + const UniformSet &shader_set = p_shader->sets[p_set_index]; + + // Check if this set has dynamic uniforms. + if (!shader_set.dynamic_uniforms.is_empty()) { + // Allocate from the ring buffer. + uint32_t buffer_size = p_set->arg_buffer_data.size(); + MDRingBuffer::Allocation alloc = allocate_arg_buffer(buffer_size); + + // Copy the base argument buffer data. + memcpy(alloc.ptr, p_set->arg_buffer_data.ptr(), buffer_size); + + // Update dynamic buffer GPU addresses. + uint64_t *ptr = (uint64_t *)alloc.ptr; + DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; + uint32_t dynamic_index = 0; + + for (uint32_t i : shader_set.dynamic_uniforms) { + RDD::BoundUniform const &uniform = p_set->uniforms[i]; + const UniformInfo &ui = shader_set.uniforms[i]; + const UniformInfo::Indexes &idx = ui.arg_buffer; + + uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index); + dynamic_index++; + uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xf; + + const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; + uint64_t gpu_address = buf_info->metal_buffer.get()->gpuAddress() + frame_idx * buf_info->size_bytes; + *(uint64_t *)(ptr + idx.buffer) = gpu_address; + } + + enc->setBuffer(alloc.buffer, alloc.offset, p_set_index); + } else { + enc->setBuffer(p_set->arg_buffer.get(), 0, p_set_index); + } +} + +GODOT_CLANG_WARNING_POP diff --git a/drivers/metal/metal3_objects.h b/drivers/metal/metal3_objects.h new file mode 100644 index 00000000000..cde410dc5eb --- /dev/null +++ b/drivers/metal/metal3_objects.h @@ -0,0 +1,590 @@ +/**************************************************************************/ +/* metal3_objects.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#include "metal_objects_shared.h" + +#include "servers/rendering/rendering_device_driver.h" + +#include + +#include +#include + +namespace MTL3 { + +// These types are defined in the global namespace (metal_objects_shared.h / rendering_device_driver_metal.h) +using ::MDAttachment; +using ::MDAttachmentType; +using ::MDCommandBufferBase; +using ::MDCommandBufferStateType; +using ::MDFrameBuffer; +using ::MDRenderPass; +using ::MDRingBuffer; +using ::MDSubpass; +using ::RenderStateBase; + +using ::DynamicOffsetLayout; +using ::MDComputePipeline; +using ::MDComputeShader; +using ::MDLibrary; +using ::MDPipeline; +using ::MDPipelineType; +using ::MDRenderPipeline; +using ::MDRenderShader; +using ::MDShader; +using ::MDUniformSet; +using ::ShaderCacheEntry; +using ::ShaderLoadStrategy; +using ::UniformInfo; +using ::UniformSet; + +using RDM = ::RenderingDeviceDriverMetal; + +struct ResourceUsageEntry { + StageResourceUsage usage = ResourceUnused; + uint32_t unused = 0; + + ResourceUsageEntry() {} + ResourceUsageEntry(StageResourceUsage p_usage) : + usage(p_usage) {} +}; + +} // namespace MTL3 + +template <> +struct is_zero_constructible : std::true_type {}; + +namespace MTL3 { + +/*! Track the cumulative usage for a resource during a render or compute pass */ +typedef HashMap ResourceToStageUsage; + +/*! Track resource and ensure they are resident prior to dispatch or draw commands. + * + * The primary purpose of this data structure is to track all the resources that must be made resident prior + * to issuing the next dispatch or draw command. It aggregates all resources used from argument buffers. + * + * As an optimization, this data structure also tracks previous usage for resources, so that + * it may avoid binding them again in later commands if the resource is already resident and its usage flagged. + */ +struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) ResourceTracker { + // A constant specifying how many iterations a resource can remain in + // the _previous HashSet before it will be removed permanently. + // + // Keeping them in the _previous HashMap reduces churn if resources are regularly + // bound. 256 is arbitrary, but if an object remains unused for 256 encoders, + // it will be released. + static constexpr uint32_t RESOURCE_UNUSED_CLEANUP_COUNT = 256; + + // Used as a scratch buffer to periodically clean up resources from _previous. + ResourceVector _scratch; + // Tracks all resources and their prior usage for the duration of the encoder. + ResourceToStageUsage _previous; + // Tracks resources for the current command that must be made resident + ResourceUsageMap _current; + + void merge_from(const ::ResourceUsageMap &p_from); + void encode(MTL::RenderCommandEncoder *p_enc); + void encode(MTL::ComputeCommandEncoder *p_enc); + void reset(); +}; + +struct BindingCache { + struct BufferBinding { + MTL::Buffer *buffer = nullptr; + NS::UInteger offset = 0; + + bool operator!=(const BufferBinding &p_other) const { + return buffer != p_other.buffer || offset != p_other.offset; + } + }; + + LocalVector textures; + LocalVector samplers; + LocalVector buffers; + + _FORCE_INLINE_ void clear() { + textures.clear(); + samplers.clear(); + buffers.clear(); + } + +private: + template + _FORCE_INLINE_ void ensure_size(LocalVector &p_vec, uint32_t p_required) { + if (p_vec.size() < p_required) { + p_vec.resize_initialized(p_required); + } + } + +public: + _FORCE_INLINE_ bool update(NS::Range p_range, MTL::Texture *const *p_values) { + if (p_range.length == 0) { + return false; + } + uint32_t required = (uint32_t)(p_range.location + p_range.length); + ensure_size(textures, required); + bool changed = false; + for (NS::UInteger i = 0; i < p_range.length; ++i) { + uint32_t slot = (uint32_t)(p_range.location + i); + MTL::Texture *value = p_values[i]; + if (textures[slot] != value) { + textures[slot] = value; + changed = true; + } + } + return changed; + } + + _FORCE_INLINE_ bool update(NS::Range p_range, MTL::SamplerState *const *p_values) { + if (p_range.length == 0) { + return false; + } + uint32_t required = (uint32_t)(p_range.location + p_range.length); + ensure_size(samplers, required); + bool changed = false; + for (NS::UInteger i = 0; i < p_range.length; ++i) { + uint32_t slot = (uint32_t)(p_range.location + i); + MTL::SamplerState *value = p_values[i]; + if (samplers[slot] != value) { + samplers[slot] = value; + changed = true; + } + } + return changed; + } + + _FORCE_INLINE_ bool update(NS::Range p_range, MTL::Buffer *const *p_values, const NS::UInteger *p_offsets) { + if (p_range.length == 0) { + return false; + } + uint32_t required = (uint32_t)(p_range.location + p_range.length); + ensure_size(buffers, required); + BufferBinding *buffers_ptr = buffers.ptr() + p_range.location; + bool changed = false; + for (NS::UInteger i = 0; i < p_range.length; ++i) { + BufferBinding &binding = *buffers_ptr; + BufferBinding new_binding = { + .buffer = p_values[i], + .offset = p_offsets[i], + }; + if (binding != new_binding) { + binding = new_binding; + changed = true; + } + ++buffers_ptr; + } + return changed; + } + + _FORCE_INLINE_ bool update(MTL::Buffer *p_buffer, NS::UInteger p_offset, uint32_t p_index) { + uint32_t required = p_index + 1; + ensure_size(buffers, required); + BufferBinding &binding = buffers.ptr()[p_index]; + BufferBinding new_binding = { + .buffer = p_buffer, + .offset = p_offset, + }; + if (binding != new_binding) { + binding = new_binding; + return true; + } + return false; + } +}; + +// A type used to encode resources directly to a MTLCommandEncoder +struct DirectEncoder { + MTL::CommandEncoder *encoder; + BindingCache &cache; + enum Mode { + RENDER, + COMPUTE + }; + Mode mode; + + void set(MTL::Buffer **p_buffers, const NS::UInteger *p_offsets, NS::Range p_range); + void set(MTL::Buffer *p_buffer, NS::UInteger p_offset, uint32_t p_index); + void set(MTL::Texture **p_textures, NS::Range p_range); + void set(MTL::SamplerState **p_samplers, NS::Range p_range); + + DirectEncoder(MTL::CommandEncoder *p_encoder, BindingCache &p_cache, Mode p_mode) : + encoder(p_encoder), cache(p_cache), mode(p_mode) {} +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer : public MDCommandBufferBase { + friend class MDUniformSet; + +private: +#pragma mark - Common State + + BindingCache binding_cache; + +#pragma mark - Argument Buffer Ring Allocator + + using Alloc = MDRingBuffer::Allocation; + + // Used for argument buffers that contain dynamic uniforms. + MDRingBuffer _scratch; + + /// Allocates from the ring buffer for dynamic argument buffers. + Alloc allocate_arg_buffer(uint32_t p_size); + + struct { + NS::SharedPtr rs; + } _frame_state; + +#pragma mark - Synchronization + + enum { + STAGE_RENDER, + STAGE_COMPUTE, + STAGE_BLIT, + STAGE_MAX, + }; + bool use_barriers = false; + MTL::Stages pending_after_stages[STAGE_MAX] = { 0, 0, 0 }; + MTL::Stages pending_before_queue_stages[STAGE_MAX] = { 0, 0, 0 }; + void _encode_barrier(MTL::CommandEncoder *p_enc); + + void reset(); + + MTL::CommandQueue *queue = nullptr; + NS::SharedPtr commandBuffer; + bool state_begin = false; + + MTL::CommandBuffer *command_buffer(); + + void _end_compute_dispatch(); + void _end_blit(); + MTL::BlitCommandEncoder *_ensure_blit_encoder(); + + enum class CopySource { + Buffer, + Texture, + }; + void _copy_texture_buffer(CopySource p_source, + RDD::TextureID p_texture, + RDD::BufferID p_buffer, + VectorView p_regions); + +#pragma mark - Render + + void _render_set_dirty_state(); + void _render_bind_uniform_sets(); + void _bind_uniforms_argument_buffers(MDUniformSet *p_set, MDShader *p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets); + void _bind_uniforms_direct(MDUniformSet *p_set, MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets); + +#pragma mark - Compute + + void _compute_set_dirty_state(); + void _compute_bind_uniform_sets(); + void _bind_uniforms_argument_buffers_compute(MDUniformSet *p_set, MDShader *p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets); + +protected: + void mark_push_constants_dirty() override; + RenderStateBase &get_render_state_base() override { return render; } + uint32_t get_current_view_count() const override { return render.get_subpass().view_count; } + MDRenderPass *get_render_pass() const override { return render.pass; } + MDFrameBuffer *get_frame_buffer() const override { return render.frameBuffer; } + const MDSubpass &get_current_subpass() const override { return render.get_subpass(); } + LocalVector &get_clear_values() override { return render.clear_values; } + const Rect2i &get_render_area() const override { return render.render_area; } + void end_render_encoding() override { render.end_encoding(); } + +public: + struct RenderState : public RenderStateBase { + MDRenderPass *pass = nullptr; + MDFrameBuffer *frameBuffer = nullptr; + MDRenderPipeline *pipeline = nullptr; + LocalVector clear_values; + uint32_t current_subpass = UINT32_MAX; + Rect2i render_area = {}; + bool is_rendering_entire_area = false; + NS::SharedPtr desc; + NS::SharedPtr encoder; + MTL::Buffer *index_buffer = nullptr; // Buffer is owned by RDD. + MTL::IndexType index_type = MTL::IndexTypeUInt16; + uint32_t index_offset = 0; + LocalVector vertex_buffers; + LocalVector vertex_offsets; + ResourceTracker resource_tracker; + + LocalVector uniform_sets; + uint32_t dynamic_offsets = 0; + // Bit mask of the uniform sets that are dirty, to prevent redundant binding. + uint64_t uniform_set_mask = 0; + + _FORCE_INLINE_ void reset(); + void end_encoding(); + + _ALWAYS_INLINE_ const MDSubpass &get_subpass() const { + DEV_ASSERT(pass != nullptr); + return pass->subpasses[current_subpass]; + } + + _FORCE_INLINE_ void mark_viewport_dirty() { + if (viewports.is_empty()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_VIEWPORT); + } + + _FORCE_INLINE_ void mark_scissors_dirty() { + if (scissors.is_empty()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_SCISSOR); + } + + _FORCE_INLINE_ void mark_vertex_dirty() { + if (vertex_buffers.is_empty()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_VERTEX); + } + + _FORCE_INLINE_ void mark_uniforms_dirty(std::initializer_list l) { + if (uniform_sets.is_empty()) { + return; + } + for (uint32_t i : l) { + if (i < uniform_sets.size() && uniform_sets[i] != nullptr) { + uniform_set_mask |= 1 << i; + } + } + dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); + } + + _FORCE_INLINE_ void mark_uniforms_dirty(void) { + if (uniform_sets.is_empty()) { + return; + } + for (uint32_t i = 0; i < uniform_sets.size(); i++) { + if (uniform_sets[i] != nullptr) { + uniform_set_mask |= 1 << i; + } + } + dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); + } + + _FORCE_INLINE_ void mark_blend_dirty() { + if (!blend_constants.has_value()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_BLEND); + } + + MTL::ScissorRect clip_to_render_area(MTL::ScissorRect p_rect) const { + uint32_t raLeft = render_area.position.x; + uint32_t raRight = raLeft + render_area.size.width; + uint32_t raBottom = render_area.position.y; + uint32_t raTop = raBottom + render_area.size.height; + + p_rect.x = CLAMP(p_rect.x, raLeft, MAX(raRight - 1, raLeft)); + p_rect.y = CLAMP(p_rect.y, raBottom, MAX(raTop - 1, raBottom)); + p_rect.width = MIN(p_rect.width, raRight - p_rect.x); + p_rect.height = MIN(p_rect.height, raTop - p_rect.y); + + return p_rect; + } + + Rect2i clip_to_render_area(Rect2i p_rect) const { + int32_t raLeft = render_area.position.x; + int32_t raRight = raLeft + render_area.size.width; + int32_t raBottom = render_area.position.y; + int32_t raTop = raBottom + render_area.size.height; + + p_rect.position.x = CLAMP(p_rect.position.x, raLeft, MAX(raRight - 1, raLeft)); + p_rect.position.y = CLAMP(p_rect.position.y, raBottom, MAX(raTop - 1, raBottom)); + p_rect.size.width = MIN(p_rect.size.width, raRight - p_rect.position.x); + p_rect.size.height = MIN(p_rect.size.height, raTop - p_rect.position.y); + + return p_rect; + } + + } render; + + // State specific for a compute pass. + struct ComputeState { + MDComputePipeline *pipeline = nullptr; + NS::SharedPtr encoder; + ResourceTracker resource_tracker; + // clang-format off + enum DirtyFlag: uint16_t { + DIRTY_NONE = 0, + DIRTY_PIPELINE = 1 << 0, //! pipeline state + DIRTY_UNIFORMS = 1 << 1, //! uniform sets + DIRTY_PUSH = 1 << 2, //! push constants + DIRTY_ALL = (1 << 3) - 1, + }; + // clang-format on + BitField dirty = DIRTY_NONE; + + LocalVector uniform_sets; + uint32_t dynamic_offsets = 0; + // Bit mask of the uniform sets that are dirty, to prevent redundant binding. + uint64_t uniform_set_mask = 0; + + _FORCE_INLINE_ void reset(); + void end_encoding(); + + _FORCE_INLINE_ void mark_uniforms_dirty(void) { + if (uniform_sets.is_empty()) { + return; + } + for (uint32_t i = 0; i < uniform_sets.size(); i++) { + if (uniform_sets[i] != nullptr) { + uniform_set_mask |= 1 << i; + } + } + dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); + } + } compute; + + // State specific to a blit pass. + struct { + NS::SharedPtr encoder; + _FORCE_INLINE_ void reset() { + encoder.reset(); + } + } blit; + + _FORCE_INLINE_ MTL::CommandBuffer *get_command_buffer() const { + return commandBuffer.get(); + } + + void begin() override; + void commit() override; + void end() override; + + void bind_pipeline(RDD::PipelineID p_pipeline) override; + +#pragma mark - Render Commands + + void render_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override; + void render_clear_attachments(VectorView p_attachment_clears, VectorView p_rects) override; + void render_begin_pass(RDD::RenderPassID p_render_pass, + RDD::FramebufferID p_frameBuffer, + RDD::CommandBufferType p_cmd_buffer_type, + const Rect2i &p_rect, + VectorView p_clear_values) override; + void render_next_subpass() override; + void render_draw(uint32_t p_vertex_count, + uint32_t p_instance_count, + uint32_t p_base_vertex, + uint32_t p_first_instance) override; + void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override; + void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) override; + + void render_draw_indexed(uint32_t p_index_count, + uint32_t p_instance_count, + uint32_t p_first_index, + int32_t p_vertex_offset, + uint32_t p_first_instance) override; + + void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override; + void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override; + void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override; + void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override; + + void render_end_pass() override; + +#pragma mark - Compute Commands + + void compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) override; + void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override; + void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) override; + +#pragma mark - Transfer + +private: + MTL::RenderCommandEncoder *get_new_render_encoder_with_descriptor(MTL::RenderPassDescriptor *p_desc); + +public: + void resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) override; + void clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) override; + void clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources) override; + void clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) override; + void copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions) override; + void copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions) override; + void copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions) override; + void copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions) override; + +#pragma mark - Synchronization + + void pipeline_barrier(BitField p_src_stages, + BitField p_dst_stages, + VectorView p_memory_barriers, + VectorView p_buffer_barriers, + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) override; + +#pragma mark - Debugging + + void begin_label(const char *p_label_name, const Color &p_color) override; + void end_label() override; + + MDCommandBuffer(MTL::CommandQueue *p_queue, ::RenderingDeviceDriverMetal *p_device_driver); + MDCommandBuffer() = default; +}; + +} // namespace MTL3 + +// C++ helper to get mipmap level size from texture +_FORCE_INLINE_ static MTL::Size mipmapLevelSizeFromTexture(MTL::Texture *p_tex, NS::UInteger p_level) { + MTL::Size lvlSize; + lvlSize.width = MAX(p_tex->width() >> p_level, 1UL); + lvlSize.height = MAX(p_tex->height() >> p_level, 1UL); + lvlSize.depth = MAX(p_tex->depth() >> p_level, 1UL); + return lvlSize; +} diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.cpp similarity index 73% rename from drivers/metal/metal_device_properties.mm rename to drivers/metal/metal_device_properties.cpp index 2abec4b4013..9806c1c84d8 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* metal_device_properties.mm */ +/* metal_device_properties.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -48,41 +48,46 @@ /* permissions and limitations under the License. */ /**************************************************************************/ -#import "metal_device_properties.h" +#include "metal_device_properties.h" -#import "metal_utils.h" +#include "metal_utils.h" -#import "servers/rendering/renderer_rd/effects/metal_fx.h" +#include "servers/rendering/renderer_rd/effects/metal_fx.h" -#import -#import -#import -#import +#include +#include +#include + +#include // Common scaling multipliers. #define KIBI (1024) #define MEBI (KIBI * KIBI) #if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000) -#define MTLGPUFamilyApple9 (MTLGPUFamily)1009 +constexpr MTL::GPUFamily GPUFamilyApple9 = static_cast(1009); +#else +constexpr MTL::GPUFamily GPUFamilyApple9 = MTL::GPUFamilyApple9; #endif API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(1.0)) -MTLGPUFamily &operator--(MTLGPUFamily &p_family) { - p_family = static_cast(static_cast(p_family) - 1); - if (p_family < MTLGPUFamilyApple1) { - p_family = MTLGPUFamilyApple9; +MTL::GPUFamily &operator--(MTL::GPUFamily &p_family) { + p_family = static_cast(static_cast(p_family) - 1); + if (p_family < MTL::GPUFamilyApple1) { + p_family = GPUFamilyApple9; } return p_family; } -void MetalDeviceProperties::init_features(id p_device) { +void MetalDeviceProperties::init_features(MTL::Device *p_device) { features = {}; - MTLCompileOptions *opts = [MTLCompileOptions new]; - features.msl_max_version = make_msl_version((opts.languageVersion >> 0x10) & 0xff, (opts.languageVersion >> 0x00) & 0xff); + MTL::CompileOptions *opts = MTL::CompileOptions::alloc()->init(); + MTL::LanguageVersion lang_version = opts->languageVersion(); + features.msl_max_version = make_msl_version((static_cast(lang_version) >> 0x10) & 0xff, (static_cast(lang_version) >> 0x00) & 0xff); features.msl_target_version = features.msl_max_version; + opts->release(); if (String version = OS::get_singleton()->get_environment("GODOT_MTL_TARGET_VERSION"); !version.is_empty()) { if (version != "max") { Vector parts = version.split(".", true, 2); @@ -102,55 +107,55 @@ void MetalDeviceProperties::init_features(id p_device) { } } - features.highestFamily = MTLGPUFamilyApple1; - for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) { - if ([p_device supportsFamily:family]) { + features.highestFamily = MTL::GPUFamilyApple1; + for (MTL::GPUFamily family = GPUFamilyApple9; family >= MTL::GPUFamilyApple1; --family) { + if (p_device->supportsFamily(family)) { features.highestFamily = family; break; } } - if (@available(macOS 11, iOS 16.4, tvOS 16.4, *)) { - features.supportsBCTextureCompression = p_device.supportsBCTextureCompression; + if (__builtin_available(macOS 11, iOS 16.4, tvOS 16.4, *)) { + features.supportsBCTextureCompression = p_device->supportsBCTextureCompression(); } else { features.supportsBCTextureCompression = false; } #if TARGET_OS_OSX - features.supportsDepth24Stencil8 = p_device.isDepth24Stencil8PixelFormatSupported; + features.supportsDepth24Stencil8 = p_device->isDepth24Stencil8PixelFormatSupported(); #endif - if (@available(macOS 11.0, iOS 14.0, tvOS 14.0, *)) { - features.supports32BitFloatFiltering = p_device.supports32BitFloatFiltering; - features.supports32BitMSAA = p_device.supports32BitMSAA; + if (__builtin_available(macOS 11.0, iOS 14.0, tvOS 14.0, *)) { + features.supports32BitFloatFiltering = p_device->supports32BitFloatFiltering(); + features.supports32BitMSAA = p_device->supports32BitMSAA(); } - if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { + if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { features.supports_gpu_address = true; } features.hostMemoryPageSize = sysconf(_SC_PAGESIZE); for (SampleCount sc = SampleCount1; sc <= SampleCount64; sc <<= 1) { - if ([p_device supportsTextureSampleCount:sc]) { + if (p_device->supportsTextureSampleCount(sc)) { features.supportedSampleCounts |= sc; } } - features.layeredRendering = [p_device supportsFamily:MTLGPUFamilyApple5]; - features.multisampleLayeredRendering = [p_device supportsFamily:MTLGPUFamilyApple7]; - features.tessellationShader = [p_device supportsFamily:MTLGPUFamilyApple3]; - features.imageCubeArray = [p_device supportsFamily:MTLGPUFamilyApple3]; - features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4]; - features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6]; - features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7]; - features.argument_buffers_tier = p_device.argumentBuffersSupport; - features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6]; - features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple9] || ([p_device supportsFamily:MTLGPUFamilyApple8] && [p_device supportsFamily:MTLGPUFamilyMac2]); + features.layeredRendering = p_device->supportsFamily(MTL::GPUFamilyApple5); + features.multisampleLayeredRendering = p_device->supportsFamily(MTL::GPUFamilyApple7); + features.tessellationShader = p_device->supportsFamily(MTL::GPUFamilyApple3); + features.imageCubeArray = p_device->supportsFamily(MTL::GPUFamilyApple3); + features.quadPermute = p_device->supportsFamily(MTL::GPUFamilyApple4); + features.simdPermute = p_device->supportsFamily(MTL::GPUFamilyApple6); + features.simdReduction = p_device->supportsFamily(MTL::GPUFamilyApple7); + features.argument_buffers_tier = p_device->argumentBuffersSupport(); + features.supports_image_atomic_32_bit = p_device->supportsFamily(MTL::GPUFamilyApple6); + features.supports_image_atomic_64_bit = p_device->supportsFamily(GPUFamilyApple9) || (p_device->supportsFamily(MTL::GPUFamilyApple8) && p_device->supportsFamily(MTL::GPUFamilyMac2)); if (features.msl_target_version >= MSL_VERSION_31) { // Native atomics are only supported on 3.1 and above. - if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) { + if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) { features.supports_native_image_atomics = true; } } @@ -159,31 +164,31 @@ void MetalDeviceProperties::init_features(id p_device) { features.supports_native_image_atomics = false; } - if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { + if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { features.supports_residency_sets = true; } else { features.supports_residency_sets = false; } - if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { - features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2); + if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { + features.needs_arg_encoders = !(p_device->supportsFamily(MTL::GPUFamilyMetal3) && features.argument_buffers_tier == MTL::ArgumentBuffersTier2); } if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") { features.use_argument_buffers = false; } - if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { - features.metal_fx_spatial = [MTLFXSpatialScalerDescriptor supportsDevice:p_device]; + if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { + features.metal_fx_spatial = MTLFX::SpatialScalerDescriptor::supportsDevice(p_device); #ifdef METAL_MFXTEMPORAL_ENABLED - features.metal_fx_temporal = [MTLFXTemporalScalerDescriptor supportsDevice:p_device]; + features.metal_fx_temporal = MTLFX::TemporalScalerDescriptor::supportsDevice(p_device); #else features.metal_fx_temporal = false; #endif } } -void MetalDeviceProperties::init_limits(id p_device) { +void MetalDeviceProperties::init_limits(MTL::Device *p_device) { using std::max; using std::min; @@ -191,7 +196,7 @@ void MetalDeviceProperties::init_limits(id p_device) { // FST: Maximum number of layers per 1D texture array, 2D texture array, or 3D texture. limits.maxImageArrayLayers = 2048; - if ([p_device supportsFamily:MTLGPUFamilyApple3]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple3)) { // FST: Maximum 2D texture width and height. limits.maxFramebufferWidth = 16384; limits.maxFramebufferHeight = 16384; @@ -219,7 +224,7 @@ void MetalDeviceProperties::init_limits(id p_device) { // FST: Maximum 3D texture width, height, and depth. limits.maxImageDimension3D = 2048; - limits.maxThreadsPerThreadGroup = p_device.maxThreadsPerThreadgroup; + limits.maxThreadsPerThreadGroup = p_device->maxThreadsPerThreadgroup(); // No effective limits. limits.maxComputeWorkGroupCount = { std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max() }; // https://github.com/KhronosGroup/MoltenVK/blob/568cc3acc0e2299931fdaecaaa1fc3ec5b4af281/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h#L85 @@ -228,25 +233,25 @@ void MetalDeviceProperties::init_limits(id p_device) { limits.maxColorAttachments = 8; // Maximum number of textures the device can access, per stage, from an argument buffer. - if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple6)) { limits.maxTexturesPerArgumentBuffer = 1'000'000; - } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + } else if (p_device->supportsFamily(MTL::GPUFamilyApple4)) { limits.maxTexturesPerArgumentBuffer = 96; } else { limits.maxTexturesPerArgumentBuffer = 31; } // Maximum number of samplers the device can access, per stage, from an argument buffer. - if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple6)) { limits.maxSamplersPerArgumentBuffer = 1024; } else { limits.maxSamplersPerArgumentBuffer = 16; } // Maximum number of buffers the device can access, per stage, from an argument buffer. - if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple6)) { limits.maxBuffersPerArgumentBuffer = std::numeric_limits::max(); - } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + } else if (p_device->supportsFamily(MTL::GPUFamilyApple4)) { limits.maxBuffersPerArgumentBuffer = 96; } else { limits.maxBuffersPerArgumentBuffer = 31; @@ -283,13 +288,13 @@ void MetalDeviceProperties::init_limits(id p_device) { limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_QUAD_BIT); } - limits.maxBufferLength = p_device.maxBufferLength; + limits.maxBufferLength = p_device->maxBufferLength(); // FST: Maximum size of vertex descriptor layout stride. limits.maxVertexDescriptorLayoutStride = std::numeric_limits::max(); // Maximum number of viewports. - if ([p_device supportsFamily:MTLGPUFamilyApple5]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple5)) { limits.maxViewports = 16; } else { limits.maxViewports = 1; @@ -297,9 +302,9 @@ void MetalDeviceProperties::init_limits(id p_device) { limits.maxPerStageBufferCount = 31; limits.maxPerStageSamplerCount = 16; - if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple6)) { limits.maxPerStageTextureCount = 128; - } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + } else if (p_device->supportsFamily(MTL::GPUFamilyApple4)) { limits.maxPerStageTextureCount = 96; } else { limits.maxPerStageTextureCount = 31; @@ -310,9 +315,9 @@ void MetalDeviceProperties::init_limits(id p_device) { limits.maxVertexInputBindingStride = (2 * KIBI); limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf - if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + if (p_device->supportsFamily(MTL::GPUFamilyApple4)) { limits.maxThreadGroupMemoryAllocation = 32768; - } else if ([p_device supportsFamily:MTLGPUFamilyApple3]) { + } else if (p_device->supportsFamily(MTL::GPUFamilyApple3)) { limits.maxThreadGroupMemoryAllocation = 16384; } else { limits.maxThreadGroupMemoryAllocation = 16352; @@ -330,9 +335,9 @@ void MetalDeviceProperties::init_limits(id p_device) { limits.maxDrawIndexedIndexValue = std::numeric_limits::max() - 1; #ifdef METAL_MFXTEMPORAL_ENABLED - if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) { - limits.temporalScalerInputContentMinScale = (double)[MTLFXTemporalScalerDescriptor supportedInputContentMinScaleForDevice:p_device]; - limits.temporalScalerInputContentMaxScale = (double)[MTLFXTemporalScalerDescriptor supportedInputContentMaxScaleForDevice:p_device]; + if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) { + limits.temporalScalerInputContentMinScale = MTLFX::TemporalScalerDescriptor::supportedInputContentMinScale(p_device); + limits.temporalScalerInputContentMaxScale = MTLFX::TemporalScalerDescriptor::supportedInputContentMaxScale(p_device); } else { // Defaults taken from macOS 14+ limits.temporalScalerInputContentMinScale = 1.0; @@ -346,11 +351,11 @@ void MetalDeviceProperties::init_limits(id p_device) { } void MetalDeviceProperties::init_os_props() { - NSOperatingSystemVersion ver = NSProcessInfo.processInfo.operatingSystemVersion; + NS::OperatingSystemVersion ver = NS::ProcessInfo::processInfo()->operatingSystemVersion(); os_version = (uint32_t)ver.majorVersion * 10000 + (uint32_t)ver.minorVersion * 100 + (uint32_t)ver.patchVersion; } -MetalDeviceProperties::MetalDeviceProperties(id p_device) { +MetalDeviceProperties::MetalDeviceProperties(MTL::Device *p_device) { init_features(p_device); init_limits(p_device); init_os_props(); diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index 447e485de6b..e4acc473468 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -50,16 +50,16 @@ /* permissions and limitations under the License. */ /**************************************************************************/ -#import "servers/rendering/rendering_device.h" +#include "servers/rendering/rendering_device.h" -#import -#import +#include +#include /** The buffer index to use for vertex content. */ const static uint32_t VERT_CONTENT_BUFFER_INDEX = 0; const static uint32_t MAX_COLOR_ATTACHMENT_COUNT = 8; -typedef NS_OPTIONS(NSUInteger, SampleCount) { +enum SampleCount : NS::UInteger { SampleCount1 = (1UL << 0), SampleCount2 = (1UL << 1), SampleCount4 = (1UL << 2), @@ -69,6 +69,22 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) { SampleCount64 = (1UL << 6), }; +_FORCE_INLINE_ SampleCount operator|(SampleCount a, SampleCount b) { + return static_cast(static_cast(a) | static_cast(b)); +} + +_FORCE_INLINE_ SampleCount &operator|=(SampleCount &a, SampleCount b) { + return a = a | b; +} + +_FORCE_INLINE_ SampleCount operator<<(SampleCount a, int shift) { + return static_cast(static_cast(a) << shift); +} + +_FORCE_INLINE_ SampleCount &operator<<=(SampleCount &a, int shift) { + return a = a << shift; +} + struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { /// Maximum version of the Metal Shading Language version available. uint32_t msl_max_version = 0; @@ -78,7 +94,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { * for engine developers for testing. */ uint32_t msl_target_version = 0; - MTLGPUFamily highestFamily = MTLGPUFamilyApple4; + MTL::GPUFamily highestFamily = MTL::GPUFamilyApple4; bool supportsBCTextureCompression = false; bool supportsDepth24Stencil8 = false; bool supports32BitFloatFiltering = false; @@ -93,7 +109,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { bool simdReduction = false; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */ bool tessellationShader = false; /**< If true, tessellation shaders are supported. */ bool imageCubeArray = false; /**< If true, image cube arrays are supported. */ - MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1; + MTL::ArgumentBuffersTier argument_buffers_tier = MTL::ArgumentBuffersTier1; bool needs_arg_encoders = true; /**< If true, argument encoders are required to encode arguments into an argument buffer. */ bool use_argument_buffers = true; /**< If true, argument buffers are can be used instead of slot binding, if available. */ bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */ @@ -108,7 +124,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { * Check if argument buffers are fully supported, which requires tier 2 support and no need for argument encoders. */ _FORCE_INLINE_ bool argument_buffers_supported() const { - return argument_buffers_tier == MTLArgumentBuffersTier2 && needs_arg_encoders == false; + return argument_buffers_tier == MTL::ArgumentBuffersTier2 && needs_arg_encoders == false; } /*! @@ -129,8 +145,8 @@ struct MetalLimits { uint64_t maxImageDimensionCube; uint64_t maxViewportDimensionX; uint64_t maxViewportDimensionY; - MTLSize maxThreadsPerThreadGroup; - MTLSize maxComputeWorkGroupCount; + MTL::Size maxThreadsPerThreadGroup; + MTL::Size maxComputeWorkGroupCount; uint64_t maxBoundDescriptorSets; uint64_t maxColorAttachments; uint64_t maxTexturesPerArgumentBuffer; @@ -161,8 +177,8 @@ struct MetalLimits { class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalDeviceProperties { private: - void init_features(id p_device); - void init_limits(id p_device); + void init_features(MTL::Device *p_device); + void init_limits(MTL::Device *p_device); void init_os_props(); public: @@ -174,7 +190,7 @@ public: SampleCount find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const; - MetalDeviceProperties(id p_device); + MetalDeviceProperties(MTL::Device *p_device); ~MetalDeviceProperties(); private: diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h deleted file mode 100644 index 1d3d512f557..00000000000 --- a/drivers/metal/metal_objects.h +++ /dev/null @@ -1,1086 +0,0 @@ -/**************************************************************************/ -/* metal_objects.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#pragma once - -/**************************************************************************/ -/* */ -/* Portions of this code were derived from MoltenVK. */ -/* */ -/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ -/* (http://www.brenwill.com) */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/**************************************************************************/ - -#import "metal_device_properties.h" -#import "metal_objects_shared.h" -#import "metal_utils.h" -#import "pixel_formats.h" -#import "sha256_digest.h" - -#include "servers/rendering/rendering_device_driver.h" - -#import -#import -#import -#import -#import -#import -#import -#import -#import - -enum StageResourceUsage : uint32_t { - ResourceUnused = 0, - VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2), - VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2), - FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2), - FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2), - TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), - TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), - TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), - TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), - ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2), - ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2), -}; - -typedef id __unsafe_unretained MTLResourceUnsafe; - -template <> -struct HashMapHasherDefaultImpl { - static _FORCE_INLINE_ uint32_t hash(const MTLResourceUnsafe p_pointer) { return hash_one_uint64((uint64_t)p_pointer); } -}; - -typedef LocalVector ResourceVector; -typedef HashMap ResourceUsageMap; - -struct ResourceUsageEntry { - StageResourceUsage usage = ResourceUnused; - uint32_t unused = 0; - - ResourceUsageEntry() {} - ResourceUsageEntry(StageResourceUsage p_usage) : - usage(p_usage) {} -}; - -template <> -struct is_zero_constructible : std::true_type {}; - -/*! Track the cumulative usage for a resource during a render or compute pass */ -typedef HashMap ResourceToStageUsage; - -/*! Track resource and ensure they are resident prior to dispatch or draw commands. - * - * The primary purpose of this data structure is to track all the resources that must be made resident prior - * to issuing the next dispatch or draw command. It aggregates all resources used from argument buffers. - * - * As an optimization, this data structure also tracks previous usage for resources, so that - * it may avoid binding them again in later commands if the resource is already resident and its usage flagged. - */ -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) ResourceTracker { - // A constant specifying how many iterations a resource can remain in - // the _previous HashSet before it will be removed permanently. - // - // Keeping them in the _previous HashMap reduces churn if resources are regularly - // bound. 256 is arbitrary, but if an object remains unused for 256 encoders, - // it will be released. - static constexpr uint32_t RESOURCE_UNUSED_CLEANUP_COUNT = 256; - - // Used as a scratch buffer to periodically clean up resources from _previous. - ResourceVector _scratch; - // Tracks all resources and their prior usage for the duration of the encoder. - ResourceToStageUsage _previous; - // Tracks resources for the current command that must be made resident - ResourceUsageMap _current; - - void merge_from(const ResourceUsageMap &p_from); - void encode(id __unsafe_unretained p_enc); - void encode(id __unsafe_unretained p_enc); - void reset(); -}; - -enum class MDCommandBufferStateType { - None, - Render, - Compute, - Blit, -}; - -enum class MDPipelineType { - None, - Render, - Compute, -}; - -class MDRenderPass; -class MDPipeline; -class MDRenderPipeline; -class MDComputePipeline; -class RenderingDeviceDriverMetal; -class MDUniformSet; -class MDShader; - -struct MetalBufferDynamicInfo; - -using RDM = RenderingDeviceDriverMetal; - -#pragma mark - Resource Factory - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceFactory { -private: - RenderingDeviceDriverMetal *device_driver; - - id new_func(NSString *p_source, NSString *p_name, NSError **p_error); - id new_clear_vert_func(ClearAttKey &p_key); - id new_clear_frag_func(ClearAttKey &p_key); - NSString *get_format_type_string(MTLPixelFormat p_fmt); - -public: - id new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error); - id new_depth_stencil_state(bool p_use_depth, bool p_use_stencil); - - MDResourceFactory(RenderingDeviceDriverMetal *p_device_driver) : - device_driver(p_device_driver) {} - ~MDResourceFactory() = default; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceCache { -private: - typedef HashMap> HashMap; - std::unique_ptr resource_factory; - HashMap clear_states; - - struct { - id all; - id depth_only; - id stencil_only; - id none; - } clear_depth_stencil_state; - -public: - id get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error); - id get_depth_stencil_state(bool p_use_depth, bool p_use_stencil); - - explicit MDResourceCache(RenderingDeviceDriverMetal *p_device_driver) : - resource_factory(new MDResourceFactory(p_device_driver)) {} - ~MDResourceCache() = default; -}; - -enum class MDAttachmentType : uint8_t { - None = 0, - Color = 1 << 0, - Depth = 1 << 1, - Stencil = 1 << 2, -}; - -_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) { - flags::set(p_a, p_b); - return p_a; -} - -_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) { - return uint8_t(p_a) & uint8_t(p_b); -} - -struct MDSubpass { - uint32_t subpass_index = 0; - uint32_t view_count = 0; - LocalVector input_references; - LocalVector color_references; - RDD::AttachmentReference depth_stencil_reference; - LocalVector resolve_references; - - MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const; -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDAttachment { -private: - uint32_t index = 0; - uint32_t firstUseSubpassIndex = 0; - uint32_t lastUseSubpassIndex = 0; - -public: - MTLPixelFormat format = MTLPixelFormatInvalid; - MDAttachmentType type = MDAttachmentType::None; - MTLLoadAction loadAction = MTLLoadActionDontCare; - MTLStoreAction storeAction = MTLStoreActionDontCare; - MTLLoadAction stencilLoadAction = MTLLoadActionDontCare; - MTLStoreAction stencilStoreAction = MTLStoreActionDontCare; - uint32_t samples = 1; - - /*! - * @brief Returns true if this attachment is first used in the given subpass. - * @param p_subpass - * @return - */ - _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const { - return p_subpass.subpass_index == firstUseSubpassIndex; - } - - /*! - * @brief Returns true if this attachment is last used in the given subpass. - * @param p_subpass - * @return - */ - _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const { - return p_subpass.subpass_index == lastUseSubpassIndex; - } - - void linkToSubpass(MDRenderPass const &p_pass); - - MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass, - bool p_is_rendering_entire_area, - bool p_has_resolve, - bool p_can_resolve, - bool p_is_stencil) const; - bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc, - PixelFormats &p_pf, - MDSubpass const &p_subpass, - id p_attachment, - bool p_is_rendering_entire_area, - bool p_has_resolve, - bool p_can_resolve, - bool p_is_stencil) const; - /** Returns whether this attachment should be cleared in the subpass. */ - bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPass { -public: - Vector attachments; - Vector subpasses; - - uint32_t get_sample_count() const { - return attachments.is_empty() ? 1 : attachments[0].samples; - } - - MDRenderPass(Vector &p_attachments, Vector &p_subpasses); -}; - -struct BindingCache { - struct BufferBinding { - id __unsafe_unretained buffer = nil; - NSUInteger offset = 0; - - bool operator!=(const BufferBinding &p_other) const { - return buffer != p_other.buffer || offset != p_other.offset; - } - }; - - LocalVector __unsafe_unretained> textures; - LocalVector __unsafe_unretained> samplers; - LocalVector buffers; - - _FORCE_INLINE_ void clear() { - textures.clear(); - samplers.clear(); - buffers.clear(); - } - -private: - template - _FORCE_INLINE_ void ensure_size(LocalVector &p_vec, uint32_t p_required) { - if (p_vec.size() < p_required) { - p_vec.resize_initialized(p_required); - } - } - -public: - _FORCE_INLINE_ bool update(NSRange p_range, id __unsafe_unretained const *p_values) { - if (p_range.length == 0) { - return false; - } - uint32_t required = (uint32_t)(p_range.location + p_range.length); - ensure_size(textures, required); - bool changed = false; - for (NSUInteger i = 0; i < p_range.length; ++i) { - uint32_t slot = (uint32_t)(p_range.location + i); - id value = p_values[i]; - if (textures[slot] != value) { - textures[slot] = value; - changed = true; - } - } - return changed; - } - - _FORCE_INLINE_ bool update(NSRange p_range, id __unsafe_unretained const *p_values) { - if (p_range.length == 0) { - return false; - } - uint32_t required = (uint32_t)(p_range.location + p_range.length); - ensure_size(samplers, required); - bool changed = false; - for (NSUInteger i = 0; i < p_range.length; ++i) { - uint32_t slot = (uint32_t)(p_range.location + i); - id __unsafe_unretained value = p_values[i]; - if (samplers[slot] != value) { - samplers[slot] = value; - changed = true; - } - } - return changed; - } - - _FORCE_INLINE_ bool update(NSRange p_range, id __unsafe_unretained const *p_values, const NSUInteger *p_offsets) { - if (p_range.length == 0) { - return false; - } - uint32_t required = (uint32_t)(p_range.location + p_range.length); - ensure_size(buffers, required); - BufferBinding *buffers_ptr = buffers.ptr() + p_range.location; - bool changed = false; - for (NSUInteger i = 0; i < p_range.length; ++i) { - BufferBinding &binding = *buffers_ptr; - BufferBinding new_binding = { - .buffer = p_values[i], - .offset = p_offsets[i], - }; - if (binding != new_binding) { - binding = new_binding; - changed = true; - } - ++buffers_ptr; - } - return changed; - } - - _FORCE_INLINE_ bool update(id __unsafe_unretained p_buffer, NSUInteger p_offset, uint32_t p_index) { - uint32_t required = p_index + 1; - ensure_size(buffers, required); - BufferBinding &binding = buffers.ptr()[p_index]; - BufferBinding new_binding = { - .buffer = p_buffer, - .offset = p_offset, - }; - if (binding != new_binding) { - binding = new_binding; - return true; - } - return false; - } -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { - friend class MDUniformSet; - -private: -#pragma mark - Common State - - // From RenderingDevice - static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128; - - uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; - uint32_t push_constant_data_len = 0; - uint32_t push_constant_binding = UINT32_MAX; - - BindingCache binding_cache; - - void reset(); - - RenderingDeviceDriverMetal *device_driver = nullptr; - id queue = nil; - id commandBuffer = nil; - bool state_begin = false; - - _FORCE_INLINE_ id command_buffer() { - DEV_ASSERT(state_begin); - if (commandBuffer == nil) { - commandBuffer = queue.commandBuffer; - } - return commandBuffer; - } - - void _end_compute_dispatch(); - void _end_blit(); - id _ensure_blit_encoder(); - - enum class CopySource { - Buffer, - Texture, - }; - void _copy_texture_buffer(CopySource p_source, - RDD::TextureID p_texture, - RDD::BufferID p_buffer, - VectorView p_regions); - -#pragma mark - Render - - void _render_set_dirty_state(); - void _render_bind_uniform_sets(); - - void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView p_rects); - uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size); - void _end_render_pass(); - void _render_clear_render_area(); - -#pragma mark - Compute - - void _compute_set_dirty_state(); - void _compute_bind_uniform_sets(); - -public: - MDCommandBufferStateType type = MDCommandBufferStateType::None; - - struct RenderState { - MDRenderPass *pass = nullptr; - MDFrameBuffer *frameBuffer = nullptr; - MDRenderPipeline *pipeline = nullptr; - LocalVector clear_values; - LocalVector viewports; - LocalVector scissors; - std::optional blend_constants; - uint32_t current_subpass = UINT32_MAX; - Rect2i render_area = {}; - bool is_rendering_entire_area = false; - MTLRenderPassDescriptor *desc = nil; - id encoder = nil; - id __unsafe_unretained index_buffer = nil; // Buffer is owned by RDD. - MTLIndexType index_type = MTLIndexTypeUInt16; - uint32_t index_offset = 0; - LocalVector __unsafe_unretained> vertex_buffers; - LocalVector vertex_offsets; - ResourceTracker resource_tracker; - // clang-format off - enum DirtyFlag: uint16_t { - DIRTY_NONE = 0, - DIRTY_PIPELINE = 1 << 0, //! pipeline state - DIRTY_UNIFORMS = 1 << 1, //! uniform sets - DIRTY_PUSH = 1 << 2, //! push constants - DIRTY_DEPTH = 1 << 3, //! depth / stencil state - DIRTY_VERTEX = 1 << 4, //! vertex buffers - DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles - DIRTY_SCISSOR = 1 << 6, //! scissor rectangles - DIRTY_BLEND = 1 << 7, //! blend state - DIRTY_RASTER = 1 << 8, //! encoder state like cull mode - DIRTY_ALL = (1 << 9) - 1, - }; - // clang-format on - BitField dirty = DIRTY_NONE; - - LocalVector uniform_sets; - uint32_t dynamic_offsets = 0; - // Bit mask of the uniform sets that are dirty, to prevent redundant binding. - uint64_t uniform_set_mask = 0; - - _FORCE_INLINE_ void reset(); - void end_encoding(); - - _ALWAYS_INLINE_ const MDSubpass &get_subpass() const { - DEV_ASSERT(pass != nullptr); - return pass->subpasses[current_subpass]; - } - - _FORCE_INLINE_ void mark_viewport_dirty() { - if (viewports.is_empty()) { - return; - } - dirty.set_flag(DirtyFlag::DIRTY_VIEWPORT); - } - - _FORCE_INLINE_ void mark_scissors_dirty() { - if (scissors.is_empty()) { - return; - } - dirty.set_flag(DirtyFlag::DIRTY_SCISSOR); - } - - _FORCE_INLINE_ void mark_vertex_dirty() { - if (vertex_buffers.is_empty()) { - return; - } - dirty.set_flag(DirtyFlag::DIRTY_VERTEX); - } - - _FORCE_INLINE_ void mark_uniforms_dirty(std::initializer_list l) { - if (uniform_sets.is_empty()) { - return; - } - for (uint32_t i : l) { - if (i < uniform_sets.size() && uniform_sets[i] != nullptr) { - uniform_set_mask |= 1 << i; - } - } - dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); - } - - _FORCE_INLINE_ void mark_uniforms_dirty(void) { - if (uniform_sets.is_empty()) { - return; - } - for (uint32_t i = 0; i < uniform_sets.size(); i++) { - if (uniform_sets[i] != nullptr) { - uniform_set_mask |= 1 << i; - } - } - dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); - } - - _FORCE_INLINE_ void mark_blend_dirty() { - if (!blend_constants.has_value()) { - return; - } - dirty.set_flag(DirtyFlag::DIRTY_BLEND); - } - - MTLScissorRect clip_to_render_area(MTLScissorRect p_rect) const { - uint32_t raLeft = render_area.position.x; - uint32_t raRight = raLeft + render_area.size.width; - uint32_t raBottom = render_area.position.y; - uint32_t raTop = raBottom + render_area.size.height; - - p_rect.x = CLAMP(p_rect.x, raLeft, MAX(raRight - 1, raLeft)); - p_rect.y = CLAMP(p_rect.y, raBottom, MAX(raTop - 1, raBottom)); - p_rect.width = MIN(p_rect.width, raRight - p_rect.x); - p_rect.height = MIN(p_rect.height, raTop - p_rect.y); - - return p_rect; - } - - Rect2i clip_to_render_area(Rect2i p_rect) const { - int32_t raLeft = render_area.position.x; - int32_t raRight = raLeft + render_area.size.width; - int32_t raBottom = render_area.position.y; - int32_t raTop = raBottom + render_area.size.height; - - p_rect.position.x = CLAMP(p_rect.position.x, raLeft, MAX(raRight - 1, raLeft)); - p_rect.position.y = CLAMP(p_rect.position.y, raBottom, MAX(raTop - 1, raBottom)); - p_rect.size.width = MIN(p_rect.size.width, raRight - p_rect.position.x); - p_rect.size.height = MIN(p_rect.size.height, raTop - p_rect.position.y); - - return p_rect; - } - - } render; - - // State specific for a compute pass. - struct ComputeState { - MDComputePipeline *pipeline = nullptr; - id encoder = nil; - ResourceTracker resource_tracker; - // clang-format off - enum DirtyFlag: uint16_t { - DIRTY_NONE = 0, - DIRTY_PIPELINE = 1 << 0, //! pipeline state - DIRTY_UNIFORMS = 1 << 1, //! uniform sets - DIRTY_PUSH = 1 << 2, //! push constants - DIRTY_ALL = (1 << 3) - 1, - }; - // clang-format on - BitField dirty = DIRTY_NONE; - - LocalVector uniform_sets; - uint32_t dynamic_offsets = 0; - // Bit mask of the uniform sets that are dirty, to prevent redundant binding. - uint64_t uniform_set_mask = 0; - - _FORCE_INLINE_ void reset(); - void end_encoding(); - - _FORCE_INLINE_ void mark_uniforms_dirty(void) { - if (uniform_sets.is_empty()) { - return; - } - for (uint32_t i = 0; i < uniform_sets.size(); i++) { - if (uniform_sets[i] != nullptr) { - uniform_set_mask |= 1 << i; - } - } - dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); - } - } compute; - - // State specific to a blit pass. - struct { - id encoder = nil; - _FORCE_INLINE_ void reset() { - encoder = nil; - } - } blit; - - _FORCE_INLINE_ id get_command_buffer() const { - return commandBuffer; - } - - void begin(); - void commit(); - void end(); - - void bind_pipeline(RDD::PipelineID p_pipeline); - void encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data); - -#pragma mark - Render Commands - - void render_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets); - void render_clear_attachments(VectorView p_attachment_clears, VectorView p_rects); - void render_set_viewport(VectorView p_viewports); - void render_set_scissor(VectorView p_scissors); - void render_set_blend_constants(const Color &p_constants); - void render_begin_pass(RDD::RenderPassID p_render_pass, - RDD::FramebufferID p_frameBuffer, - RDD::CommandBufferType p_cmd_buffer_type, - const Rect2i &p_rect, - VectorView p_clear_values); - void render_next_subpass(); - void render_draw(uint32_t p_vertex_count, - uint32_t p_instance_count, - uint32_t p_base_vertex, - uint32_t p_first_instance); - void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets); - void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset); - - void render_draw_indexed(uint32_t p_index_count, - uint32_t p_instance_count, - uint32_t p_first_index, - int32_t p_vertex_offset, - uint32_t p_first_instance); - - void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride); - void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride); - void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride); - void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride); - - void render_end_pass(); - -#pragma mark - Compute Commands - - void compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets); - void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); - void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset); - -#pragma mark - Transfer - -private: - void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label); - -public: - void resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap); - void clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources); - void clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources); - void clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size); - void copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions); - void copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions); - void copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions); - void copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions); - -#pragma mark - Debugging - - void begin_label(const char *p_label_name, const Color &p_color); - void end_label(); - - MDCommandBuffer(id p_queue, RenderingDeviceDriverMetal *p_device_driver) : - device_driver(p_device_driver), queue(p_queue) { - type = MDCommandBufferStateType::None; - } - - MDCommandBuffer() = default; -}; - -#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000) -#define MTLBindingAccess MTLArgumentAccess -#define MTLBindingAccessReadOnly MTLArgumentAccessReadOnly -#define MTLBindingAccessReadWrite MTLArgumentAccessReadWrite -#define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly -#endif - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo { - uint32_t binding; - BitField active_stages; - MTLDataType dataType = MTLDataTypeNone; - MTLBindingAccess access = MTLBindingAccessReadOnly; - MTLResourceUsage usage = 0; - MTLTextureType textureType = MTLTextureType2D; - uint32_t imageFormat = 0; - uint32_t arrayLength = 0; - bool isMultisampled = 0; - - struct Indexes { - uint32_t buffer = UINT32_MAX; - uint32_t texture = UINT32_MAX; - uint32_t sampler = UINT32_MAX; - }; - Indexes slot; - Indexes arg_buffer; - - enum class IndexType { - SLOT, - ARG, - }; - - _FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) { - switch (p_type) { - case IndexType::SLOT: - return slot; - case IndexType::ARG: - return arg_buffer; - } - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet { - LocalVector uniforms; - LocalVector dynamic_uniforms; - uint32_t buffer_size = 0; -}; - -struct ShaderCacheEntry; - -enum class ShaderLoadStrategy { - IMMEDIATE, - LAZY, - - /// The default strategy is to load the shader immediately. - DEFAULT = IMMEDIATE, -}; - -/// A Metal shader library. -@interface MDLibrary : NSObject { - ShaderCacheEntry *_entry; - NSString *_original_source; -}; -- (id)library; -- (NSError *)error; -- (void)setLabel:(NSString *)label; -#ifdef DEV_ENABLED -- (NSString *)originalSource; -#endif - -+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device - source:(NSString *)source - options:(MTLCompileOptions *)options - strategy:(ShaderLoadStrategy)strategy; - -+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device -#ifdef DEV_ENABLED - source:(NSString *)source -#endif - data:(dispatch_data_t)data; -@end - -/// A cache entry for a Metal shader library. -struct ShaderCacheEntry { - RenderingDeviceDriverMetal &owner; - /// A hash of the Metal shader source code. - SHA256Digest key; - CharString name; - RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX; - /// This reference must be weak, to ensure that when the last strong reference to the library - /// is released, the cache entry is freed. - MDLibrary *__weak library = nil; - - /// Notify the cache that this entry is no longer needed. - void notify_free() const; - - ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) : - owner(p_owner), key(p_key) { - } - ~ShaderCacheEntry() = default; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsetLayout { - struct Data { - uint8_t offset : 4; - uint8_t count : 4; - }; - - union { - Data data[MAX_DYNAMIC_BUFFERS]; - uint64_t _val = 0; - }; - -public: - _FORCE_INLINE_ bool is_empty() const { return _val == 0; } - - _FORCE_INLINE_ uint32_t get_count(uint32_t p_set_index) const { - return data[p_set_index].count; - } - - _FORCE_INLINE_ uint32_t get_offset(uint32_t p_set_index) const { - return data[p_set_index].offset; - } - - _FORCE_INLINE_ void set_offset_count(uint32_t p_set_index, uint8_t p_offset, uint8_t p_count) { - data[p_set_index].offset = p_offset; - data[p_set_index].count = p_count; - } - - _FORCE_INLINE_ uint32_t get_offset_index_shift(uint32_t p_set_index, uint32_t p_dynamic_index = 0) const { - return (data[p_set_index].offset + p_dynamic_index) * 4u; - } -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader { -public: - CharString name; - Vector sets; - struct { - BitField stages = {}; - uint32_t binding = UINT32_MAX; - uint32_t size = 0; - } push_constants; - DynamicOffsetLayout dynamic_offset_layout; - bool uses_argument_buffers = true; - - MDShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers) : - name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {} - virtual ~MDShader() = default; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader { -public: - MTLSize local = {}; - - MDLibrary *kernel; - - MDComputeShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel); -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderShader final : public MDShader { -public: - bool needs_view_mask_buffer = false; - - MDLibrary *vert; - MDLibrary *frag; - - MDRenderShader(CharString p_name, - Vector p_sets, - bool p_needs_view_mask_buffer, - bool p_uses_argument_buffers, - MDLibrary *p_vert, MDLibrary *p_frag); -}; - -_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) { - p_a = StageResourceUsage(uint32_t(p_a) | p_b); - return p_a; -} - -_FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTLResourceUsage p_usage) { - return StageResourceUsage(p_usage << (p_stage * 2)); -} - -_FORCE_INLINE_ MTLResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) { - return MTLResourceUsage((p_usage >> (p_stage * 2)) & 0b11); -} - -template <> -struct HashMapComparatorDefault { - static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) { - return p_lhs.id == p_rhs.id; - } -}; - -template <> -struct HashMapComparatorDefault { - static bool compare(const RDD::BufferID &p_lhs, const RDD::BufferID &p_rhs) { - return p_lhs.id == p_rhs.id; - } -}; - -template <> -struct HashMapComparatorDefault { - static bool compare(const RDD::TextureID &p_lhs, const RDD::TextureID &p_rhs) { - return p_lhs.id == p_rhs.id; - } -}; - -template <> -struct HashMapHasherDefaultImpl { - static _FORCE_INLINE_ uint32_t hash(const RDD::BufferID &p_value) { - return HashMapHasherDefaultImpl::hash(p_value.id); - } -}; - -template <> -struct HashMapHasherDefaultImpl { - static _FORCE_INLINE_ uint32_t hash(const RDD::TextureID &p_value) { - return HashMapHasherDefaultImpl::hash(p_value.id); - } -}; - -// A type used to encode resources directly to a MTLCommandEncoder -struct DirectEncoder { - id __unsafe_unretained encoder; - BindingCache &cache; - enum Mode { - RENDER, - COMPUTE - }; - Mode mode; - - void set(id __unsafe_unretained *p_buffers, const NSUInteger *p_offsets, NSRange p_range); - void set(id __unsafe_unretained p_buffer, const NSUInteger p_offset, uint32_t p_index); - void set(id __unsafe_unretained *p_textures, NSRange p_range); - void set(id __unsafe_unretained *p_samplers, NSRange p_range); - - DirectEncoder(id __unsafe_unretained p_encoder, BindingCache &p_cache) : - encoder(p_encoder), cache(p_cache) { - if ([p_encoder conformsToProtocol:@protocol(MTLRenderCommandEncoder)]) { - mode = RENDER; - } else { - mode = COMPUTE; - } - } -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet { -public: - uint32_t index = 0; - id arg_buffer = nil; - ResourceUsageMap usage_to_resources; - LocalVector uniforms; - - void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); - void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); - void bind_uniforms_direct(MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets); -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline { -public: - MDPipelineType type; - - explicit MDPipeline(MDPipelineType p_type) : - type(p_type) {} - virtual ~MDPipeline() = default; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPipeline final : public MDPipeline { -public: - id state = nil; - id depth_stencil = nil; - uint32_t push_constant_size = 0; - uint32_t push_constant_stages_mask = 0; - SampleCount sample_count = SampleCount1; - - struct { - MTLCullMode cull_mode = MTLCullModeNone; - MTLTriangleFillMode fill_mode = MTLTriangleFillModeFill; - MTLDepthClipMode clip_mode = MTLDepthClipModeClip; - MTLWinding winding = MTLWindingClockwise; - MTLPrimitiveType render_primitive = MTLPrimitiveTypePoint; - - struct { - bool enabled = false; - } depth_test; - - struct { - bool enabled = false; - float depth_bias = 0.0; - float slope_scale = 0.0; - float clamp = 0.0; - _FORCE_INLINE_ void apply(id __unsafe_unretained p_enc) const { - if (!enabled) { - return; - } - [p_enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp]; - } - } depth_bias; - - struct { - bool enabled = false; - uint32_t front_reference = 0; - uint32_t back_reference = 0; - _FORCE_INLINE_ void apply(id __unsafe_unretained p_enc) const { - if (!enabled) { - return; - } - [p_enc setStencilFrontReferenceValue:front_reference backReferenceValue:back_reference]; - } - } stencil; - - struct { - bool enabled = false; - float r = 0.0; - float g = 0.0; - float b = 0.0; - float a = 0.0; - - _FORCE_INLINE_ void apply(id __unsafe_unretained p_enc) const { - //if (!enabled) - // return; - [p_enc setBlendColorRed:r green:g blue:b alpha:a]; - } - } blend; - - _FORCE_INLINE_ void apply(id __unsafe_unretained p_enc) const { - [p_enc setCullMode:cull_mode]; - [p_enc setTriangleFillMode:fill_mode]; - [p_enc setDepthClipMode:clip_mode]; - [p_enc setFrontFacingWinding:winding]; - depth_bias.apply(p_enc); - stencil.apply(p_enc); - blend.apply(p_enc); - } - - } raster_state; - - MDRenderShader *shader = nil; - - MDRenderPipeline() : - MDPipeline(MDPipelineType::Render) {} - ~MDRenderPipeline() final = default; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputePipeline final : public MDPipeline { -public: - id state = nil; - struct { - MTLSize local = {}; - } compute_state; - - MDComputeShader *shader = nil; - - explicit MDComputePipeline(id p_state) : - MDPipeline(MDPipelineType::Compute), state(p_state) {} - ~MDComputePipeline() final = default; -}; - -namespace rid { -#define MAKE_ID(FROM, TO) \ - _FORCE_INLINE_ TO make(FROM p_obj) { \ - return TO(owned(p_obj)); \ - } - -MAKE_ID(id, RDD::CommandPoolID) - -#undef MAKE_ID -} //namespace rid diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm deleted file mode 100644 index 76a3f67d0de..00000000000 --- a/drivers/metal/metal_objects.mm +++ /dev/null @@ -1,2548 +0,0 @@ -/**************************************************************************/ -/* metal_objects.mm */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -/**************************************************************************/ -/* */ -/* Portions of this code were derived from MoltenVK. */ -/* */ -/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ -/* (http://www.brenwill.com) */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/**************************************************************************/ - -#import "metal_objects.h" - -#import "metal_utils.h" -#import "pixel_formats.h" -#import "rendering_device_driver_metal.h" -#import "rendering_shader_container_metal.h" - -#import -#import - -// We have to undefine these macros because they are defined in NSObjCRuntime.h. -#undef MIN -#undef MAX - -void MDCommandBuffer::begin_label(const char *p_label_name, const Color &p_color) { - NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)p_label_name length:strlen(p_label_name) encoding:NSUTF8StringEncoding freeWhenDone:NO]; - [commandBuffer pushDebugGroup:s]; -} - -void MDCommandBuffer::end_label() { - [commandBuffer popDebugGroup]; -} - -void MDCommandBuffer::begin() { - DEV_ASSERT(commandBuffer == nil && !state_begin); - state_begin = true; - binding_cache.clear(); -} - -void MDCommandBuffer::end() { - switch (type) { - case MDCommandBufferStateType::None: - return; - case MDCommandBufferStateType::Render: - return render_end_pass(); - case MDCommandBufferStateType::Compute: - return _end_compute_dispatch(); - case MDCommandBufferStateType::Blit: - return _end_blit(); - } -} - -void MDCommandBuffer::commit() { - end(); - [commandBuffer commit]; - commandBuffer = nil; - state_begin = false; -} - -void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { - MDPipeline *p = (MDPipeline *)(p_pipeline.id); - - // End current encoder if it is a compute encoder or blit encoder, - // as they do not have a defined end boundary in the RDD like render. - if (type == MDCommandBufferStateType::Compute) { - _end_compute_dispatch(); - } else if (type == MDCommandBufferStateType::Blit) { - _end_blit(); - } - - if (p->type == MDPipelineType::Render) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - MDRenderPipeline *rp = (MDRenderPipeline *)p; - - if (render.encoder == nil) { - // This error would happen if the render pass failed. - ERR_FAIL_NULL_MSG(render.desc, "Render pass descriptor is null."); - - // This condition occurs when there are no attachments when calling render_next_subpass() - // and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag. - render.desc.defaultRasterSampleCount = static_cast(rp->sample_count); - -// NOTE(sgc): This is to test rdar://FB13605547 and will be deleted once fix is confirmed. -#if 0 - if (render.pipeline->sample_count == 4) { - static id tex = nil; - static id res_tex = nil; - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - Size2i sz = render.frameBuffer->size; - MTLTextureDescriptor *td = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm width:sz.width height:sz.height mipmapped:NO]; - td.textureType = MTLTextureType2DMultisample; - td.storageMode = MTLStorageModeMemoryless; - td.usage = MTLTextureUsageRenderTarget; - td.sampleCount = render.pipeline->sample_count; - tex = [device_driver->get_device() newTextureWithDescriptor:td]; - - td.textureType = MTLTextureType2D; - td.storageMode = MTLStorageModePrivate; - td.usage = MTLTextureUsageShaderWrite; - td.sampleCount = 1; - res_tex = [device_driver->get_device() newTextureWithDescriptor:td]; - }); - render.desc.colorAttachments[0].texture = tex; - render.desc.colorAttachments[0].loadAction = MTLLoadActionClear; - render.desc.colorAttachments[0].storeAction = MTLStoreActionMultisampleResolve; - - render.desc.colorAttachments[0].resolveTexture = res_tex; - } -#endif - render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc]; - } - - if (render.pipeline != rp) { - render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_RASTER)); - // Mark all uniforms as dirty, as variants of a shader pipeline may have a different entry point ABI, - // due to setting force_active_argument_buffer_resources = true for spirv_cross::CompilerMSL::Options. - // As a result, uniform sets with the same layout will generate redundant binding warnings when - // capturing a Metal frame in Xcode. - // - // If we don't mark as dirty, then some bindings will generate a validation error. - binding_cache.clear(); - render.mark_uniforms_dirty(); - if (render.pipeline != nullptr && render.pipeline->depth_stencil != rp->depth_stencil) { - render.dirty.set_flag(RenderState::DIRTY_DEPTH); - } - if (rp->raster_state.blend.enabled) { - render.dirty.set_flag(RenderState::DIRTY_BLEND); - } - render.pipeline = rp; - } - } else if (p->type == MDPipelineType::Compute) { - DEV_ASSERT(type == MDCommandBufferStateType::None); - type = MDCommandBufferStateType::Compute; - - if (compute.pipeline != p) { - compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE); - binding_cache.clear(); - compute.mark_uniforms_dirty(); - compute.pipeline = (MDComputePipeline *)p; - } - } -} - -void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data) { - switch (type) { - case MDCommandBufferStateType::Render: - case MDCommandBufferStateType::Compute: { - MDShader *shader = (MDShader *)(p_shader.id); - if (shader->push_constants.binding == UINT32_MAX) { - return; - } - push_constant_binding = shader->push_constants.binding; - void const *ptr = p_data.ptr(); - push_constant_data_len = p_data.size() * sizeof(uint32_t); - DEV_ASSERT(push_constant_data_len <= sizeof(push_constant_data)); - memcpy(push_constant_data, ptr, push_constant_data_len); - if (push_constant_data_len > 0) { - switch (type) { - case MDCommandBufferStateType::Render: - render.dirty.set_flag(RenderState::DirtyFlag::DIRTY_PUSH); - break; - case MDCommandBufferStateType::Compute: - compute.dirty.set_flag(ComputeState::DirtyFlag::DIRTY_PUSH); - break; - default: - break; - } - } - } break; - case MDCommandBufferStateType::Blit: - case MDCommandBufferStateType::None: - return; - } -} - -id MDCommandBuffer::_ensure_blit_encoder() { - switch (type) { - case MDCommandBufferStateType::None: - break; - case MDCommandBufferStateType::Render: - render_end_pass(); - break; - case MDCommandBufferStateType::Compute: - _end_compute_dispatch(); - break; - case MDCommandBufferStateType::Blit: - return blit.encoder; - } - - type = MDCommandBufferStateType::Blit; - blit.encoder = command_buffer().blitCommandEncoder; - return blit.encoder; -} - -_FORCE_INLINE_ static MTLSize mipmapLevelSizeFromTexture(id p_tex, NSUInteger p_level) { - MTLSize lvlSize; - lvlSize.width = MAX(p_tex.width >> p_level, 1UL); - lvlSize.height = MAX(p_tex.height >> p_level, 1UL); - lvlSize.depth = MAX(p_tex.depth >> p_level, 1UL); - return lvlSize; -} - -void MDCommandBuffer::resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { - id src_tex = rid::get(p_src_texture); - id dst_tex = rid::get(p_dst_texture); - - MTLRenderPassDescriptor *mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor]; - MTLRenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD.colorAttachments[0]; - mtlColorAttDesc.loadAction = MTLLoadActionLoad; - mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve; - - mtlColorAttDesc.texture = src_tex; - mtlColorAttDesc.resolveTexture = dst_tex; - mtlColorAttDesc.level = p_src_mipmap; - mtlColorAttDesc.slice = p_src_layer; - mtlColorAttDesc.resolveLevel = p_dst_mipmap; - mtlColorAttDesc.resolveSlice = p_dst_layer; - encodeRenderCommandEncoderWithDescriptor(mtlRPD, @"Resolve Image"); -} - -void MDCommandBuffer::clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) { - id src_tex = rid::get(p_texture); - - if (src_tex.parentTexture) { - // Clear via the parent texture rather than the view. - src_tex = src_tex.parentTexture; - } - - PixelFormats &pf = device_driver->get_pixel_formats(); - - if (pf.isDepthFormat(src_tex.pixelFormat) || pf.isStencilFormat(src_tex.pixelFormat)) { - ERR_FAIL_MSG("invalid: depth or stencil texture format"); - } - - MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; - - if (p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { - MTLRenderPassColorAttachmentDescriptor *caDesc = desc.colorAttachments[0]; - caDesc.texture = src_tex; - caDesc.loadAction = MTLLoadActionClear; - caDesc.storeAction = MTLStoreActionStore; - caDesc.clearColor = MTLClearColorMake(p_color.r, p_color.g, p_color.b, p_color.a); - - // Extract the mipmap levels that are to be updated. - uint32_t mipLvlStart = p_subresources.base_mipmap; - uint32_t mipLvlCnt = p_subresources.mipmap_count; - uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; - - uint32_t levelCount = src_tex.mipmapLevelCount; - - // Extract the cube or array layers (slices) that are to be updated. - bool is3D = src_tex.textureType == MTLTextureType3D; - uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; - uint32_t layerCnt = p_subresources.layer_count; - uint32_t layerEnd = layerStart + layerCnt; - - MetalFeatures const &features = device_driver->get_device_properties().features; - - // Iterate across mipmap levels and layers, and perform and empty render to clear each. - for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { - ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); - - caDesc.level = mipLvl; - - // If a 3D image, we need to get the depth for each level. - if (is3D) { - layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; - layerEnd = layerStart + layerCnt; - } - - if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) { - // We can clear all layers at once. - if (is3D) { - caDesc.depthPlane = layerStart; - } else { - caDesc.slice = layerStart; - } - desc.renderTargetArrayLength = layerCnt; - encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); - } else { - for (uint32_t layer = layerStart; layer < layerEnd; layer++) { - if (is3D) { - caDesc.depthPlane = layer; - } else { - caDesc.slice = layer; - } - encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); - } - } - } - } -} - -void MDCommandBuffer::clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources) { - id src_tex = rid::get(p_texture); - - if (src_tex.parentTexture) { - // Clear via the parent texture rather than the view. - src_tex = src_tex.parentTexture; - } - - PixelFormats &pf = device_driver->get_pixel_formats(); - - bool is_depth_format = pf.isDepthFormat(src_tex.pixelFormat); - bool is_stencil_format = pf.isStencilFormat(src_tex.pixelFormat); - - if (!is_depth_format && !is_stencil_format) { - ERR_FAIL_MSG("invalid: color texture format"); - } - - bool clear_depth = is_depth_format && p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); - bool clear_stencil = is_stencil_format && p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); - - if (clear_depth || clear_stencil) { - MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; - - MTLRenderPassDepthAttachmentDescriptor *daDesc = desc.depthAttachment; - if (clear_depth) { - daDesc.texture = src_tex; - daDesc.loadAction = MTLLoadActionClear; - daDesc.storeAction = MTLStoreActionStore; - daDesc.clearDepth = p_depth; - } - - MTLRenderPassStencilAttachmentDescriptor *saDesc = desc.stencilAttachment; - if (clear_stencil) { - saDesc.texture = src_tex; - saDesc.loadAction = MTLLoadActionClear; - saDesc.storeAction = MTLStoreActionStore; - saDesc.clearStencil = p_stencil; - } - - // Extract the mipmap levels that are to be updated. - uint32_t mipLvlStart = p_subresources.base_mipmap; - uint32_t mipLvlCnt = p_subresources.mipmap_count; - uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; - - uint32_t levelCount = src_tex.mipmapLevelCount; - - // Extract the cube or array layers (slices) that are to be updated. - bool is3D = src_tex.textureType == MTLTextureType3D; - uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; - uint32_t layerCnt = p_subresources.layer_count; - uint32_t layerEnd = layerStart + layerCnt; - - MetalFeatures const &features = device_driver->get_device_properties().features; - - // Iterate across mipmap levels and layers, and perform and empty render to clear each. - for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { - ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); - - if (clear_depth) { - daDesc.level = mipLvl; - } - if (clear_stencil) { - saDesc.level = mipLvl; - } - - // If a 3D image, we need to get the depth for each level. - if (is3D) { - layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; - layerEnd = layerStart + layerCnt; - } - - if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) { - // We can clear all layers at once. - if (is3D) { - if (clear_depth) { - daDesc.depthPlane = layerStart; - } - if (clear_stencil) { - saDesc.depthPlane = layerStart; - } - } else { - if (clear_depth) { - daDesc.slice = layerStart; - } - if (clear_stencil) { - saDesc.slice = layerStart; - } - } - desc.renderTargetArrayLength = layerCnt; - encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); - } else { - for (uint32_t layer = layerStart; layer < layerEnd; layer++) { - if (is3D) { - if (clear_depth) { - daDesc.depthPlane = layer; - } - if (clear_stencil) { - saDesc.depthPlane = layer; - } - } else { - if (clear_depth) { - daDesc.slice = layer; - } - if (clear_stencil) { - saDesc.slice = layer; - } - } - encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); - } - } - } - } -} - -void MDCommandBuffer::clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { - id blit_enc = _ensure_blit_encoder(); - const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id; - - [blit_enc fillBuffer:buffer->metal_buffer - range:NSMakeRange(p_offset, p_size) - value:0]; -} - -void MDCommandBuffer::copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions) { - const RDM::BufferInfo *src = (const RDM::BufferInfo *)p_src_buffer.id; - const RDM::BufferInfo *dst = (const RDM::BufferInfo *)p_dst_buffer.id; - - id enc = _ensure_blit_encoder(); - - for (uint32_t i = 0; i < p_regions.size(); i++) { - RDD::BufferCopyRegion region = p_regions[i]; - [enc copyFromBuffer:src->metal_buffer - sourceOffset:region.src_offset - toBuffer:dst->metal_buffer - destinationOffset:region.dst_offset - size:region.size]; - } -} - -static MTLSize MTLSizeFromVector3i(Vector3i p_size) { - return MTLSizeMake(p_size.x, p_size.y, p_size.z); -} - -static MTLOrigin MTLOriginFromVector3i(Vector3i p_origin) { - return MTLOriginMake(p_origin.x, p_origin.y, p_origin.z); -} - -// Clamps the size so that the sum of the origin and size do not exceed the maximum size. -static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p_max_size) { - MTLSize clamped; - clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x); - clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y); - clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z); - return clamped; -} - -API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) -static bool isArrayTexture(MTLTextureType p_type) { - return (p_type == MTLTextureType3D || - p_type == MTLTextureType2DArray || - p_type == MTLTextureType2DMultisampleArray || - p_type == MTLTextureType1DArray); -} - -_FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) { - return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth; -} - -void MDCommandBuffer::copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions) { - id src = rid::get(p_src_texture); - id dst = rid::get(p_dst_texture); - - id enc = _ensure_blit_encoder(); - PixelFormats &pf = device_driver->get_pixel_formats(); - - MTLPixelFormat src_fmt = src.pixelFormat; - bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed; - MTLPixelFormat dst_fmt = dst.pixelFormat; - bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed; - - // Validate copy. - if (src.sampleCount != dst.sampleCount || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) { - ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts."); - } - - // If source and destination have different formats and at least one is compressed, a temporary buffer is required. - bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed); - if (need_tmp_buffer) { - ERR_FAIL_MSG("not implemented: copy with intermediate buffer"); - } - - if (src_fmt != dst_fmt) { - // Map the source pixel format to the dst through a texture view on the source texture. - src = [src newTextureViewWithPixelFormat:dst_fmt]; - } - - for (uint32_t i = 0; i < p_regions.size(); i++) { - RDD::TextureCopyRegion region = p_regions[i]; - - MTLSize extent = MTLSizeFromVector3i(region.size); - - // If copies can be performed using direct texture-texture copying, do so. - uint32_t src_level = region.src_subresources.mipmap; - uint32_t src_base_layer = region.src_subresources.base_layer; - MTLSize src_extent = mipmapLevelSizeFromTexture(src, src_level); - uint32_t dst_level = region.dst_subresources.mipmap; - uint32_t dst_base_layer = region.dst_subresources.base_layer; - MTLSize dst_extent = mipmapLevelSizeFromTexture(dst, dst_level); - - // All layers may be copied at once, if the extent completely covers both images. - if (src_extent == extent && dst_extent == extent) { - [enc copyFromTexture:src - sourceSlice:src_base_layer - sourceLevel:src_level - toTexture:dst - destinationSlice:dst_base_layer - destinationLevel:dst_level - sliceCount:region.src_subresources.layer_count - levelCount:1]; - } else { - MTLOrigin src_origin = MTLOriginFromVector3i(region.src_offset); - MTLSize src_size = clampMTLSize(extent, src_origin, src_extent); - uint32_t layer_count = 0; - if ((src.textureType == MTLTextureType3D) != (dst.textureType == MTLTextureType3D)) { - // In the case, the number of layers to copy is in extent.depth. Use that value, - // then clamp the depth, so we don't try to copy more than Metal will allow. - layer_count = extent.depth; - src_size.depth = 1; - } else { - layer_count = region.src_subresources.layer_count; - } - MTLOrigin dst_origin = MTLOriginFromVector3i(region.dst_offset); - - for (uint32_t layer = 0; layer < layer_count; layer++) { - // We can copy between a 3D and a 2D image easily. Just copy between - // one slice of the 2D image and one plane of the 3D image at a time. - if ((src.textureType == MTLTextureType3D) == (dst.textureType == MTLTextureType3D)) { - [enc copyFromTexture:src - sourceSlice:src_base_layer + layer - sourceLevel:src_level - sourceOrigin:src_origin - sourceSize:src_size - toTexture:dst - destinationSlice:dst_base_layer + layer - destinationLevel:dst_level - destinationOrigin:dst_origin]; - } else if (src.textureType == MTLTextureType3D) { - [enc copyFromTexture:src - sourceSlice:src_base_layer - sourceLevel:src_level - sourceOrigin:MTLOriginMake(src_origin.x, src_origin.y, src_origin.z + layer) - sourceSize:src_size - toTexture:dst - destinationSlice:dst_base_layer + layer - destinationLevel:dst_level - destinationOrigin:dst_origin]; - } else { - DEV_ASSERT(dst.textureType == MTLTextureType3D); - [enc copyFromTexture:src - sourceSlice:src_base_layer + layer - sourceLevel:src_level - sourceOrigin:src_origin - sourceSize:src_size - toTexture:dst - destinationSlice:dst_base_layer - destinationLevel:dst_level - destinationOrigin:MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z + layer)]; - } - } - } - } -} - -void MDCommandBuffer::copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions) { - _copy_texture_buffer(CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions); -} - -void MDCommandBuffer::copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions) { - _copy_texture_buffer(CopySource::Texture, p_src_texture, p_dst_buffer, p_regions); -} - -void MDCommandBuffer::_copy_texture_buffer(CopySource p_source, - RDD::TextureID p_texture, - RDD::BufferID p_buffer, - VectorView p_regions) { - const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id; - id texture = rid::get(p_texture); - - id enc = _ensure_blit_encoder(); - - PixelFormats &pf = device_driver->get_pixel_formats(); - MTLPixelFormat mtlPixFmt = texture.pixelFormat; - - MTLBlitOption options = MTLBlitOptionNone; - if (pf.isPVRTCFormat(mtlPixFmt)) { - options |= MTLBlitOptionRowLinearPVRTC; - } - - for (uint32_t i = 0; i < p_regions.size(); i++) { - RDD::BufferTextureCopyRegion region = p_regions[i]; - - uint32_t mip_level = region.texture_subresource.mipmap; - MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z); - MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level); - MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z), - txt_origin, - src_extent); - - uint32_t buffImgWd = region.texture_region_size.x; - uint32_t buffImgHt = region.texture_region_size.y; - - NSUInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd); - NSUInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt); - - MTLBlitOption blit_options = options; - - if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) { - // Don't reduce depths of 32-bit depth/stencil formats. - if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_DEPTH) { - if (pf.getBytesPerTexel(mtlPixFmt) != 4) { - bytesPerRow -= buffImgWd; - bytesPerImg -= buffImgWd * buffImgHt; - } - blit_options |= MTLBlitOptionDepthFromDepthStencil; - } else if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_STENCIL) { - // The stencil component is always 1 byte per pixel. - bytesPerRow = buffImgWd; - bytesPerImg = buffImgWd * buffImgHt; - blit_options |= MTLBlitOptionStencilFromDepthStencil; - } - } - - if (!isArrayTexture(texture.textureType)) { - bytesPerImg = 0; - } - - if (p_source == CopySource::Buffer) { - [enc copyFromBuffer:buffer->metal_buffer - sourceOffset:region.buffer_offset - sourceBytesPerRow:bytesPerRow - sourceBytesPerImage:bytesPerImg - sourceSize:txt_size - toTexture:texture - destinationSlice:region.texture_subresource.layer - destinationLevel:mip_level - destinationOrigin:txt_origin - options:blit_options]; - } else { - [enc copyFromTexture:texture - sourceSlice:region.texture_subresource.layer - sourceLevel:mip_level - sourceOrigin:txt_origin - sourceSize:txt_size - toBuffer:buffer->metal_buffer - destinationOffset:region.buffer_offset - destinationBytesPerRow:bytesPerRow - destinationBytesPerImage:bytesPerImg - options:blit_options]; - } - } -} - -void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label) { - switch (type) { - case MDCommandBufferStateType::None: - break; - case MDCommandBufferStateType::Render: - render_end_pass(); - break; - case MDCommandBufferStateType::Compute: - _end_compute_dispatch(); - break; - case MDCommandBufferStateType::Blit: - _end_blit(); - break; - } - - id enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc]; - if (p_label != nil) { - [enc pushDebugGroup:p_label]; - [enc popDebugGroup]; - } - [enc endEncoding]; -} - -#pragma mark - Render Commands - -void MDCommandBuffer::render_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - - if (uint32_t new_size = p_first_set_index + p_set_count; render.uniform_sets.size() < new_size) { - uint32_t s = render.uniform_sets.size(); - render.uniform_sets.resize(new_size); - // Set intermediate values to null. - std::fill(&render.uniform_sets[s], render.uniform_sets.end().operator->(), nullptr); - } - - const MDShader *shader = (const MDShader *)p_shader.id; - DynamicOffsetLayout layout = shader->dynamic_offset_layout; - - // Clear bits for sets being rebound before OR'ing new values. - // This prevents corruption when the same set is bound multiple times - // with different frame indices (e.g., OPAQUE pass then ALPHA pass). - for (uint32_t i = 0; i < p_set_count && render.dynamic_offsets != 0; i++) { - uint32_t set_index = p_first_set_index + i; - uint32_t count = layout.get_count(set_index); - if (count > 0) { - uint32_t shift = layout.get_offset_index_shift(set_index); - uint32_t mask = ((1u << (count * 4u)) - 1u) << shift; - render.dynamic_offsets &= ~mask; - } - } - render.dynamic_offsets |= p_dynamic_offsets; - - for (size_t i = 0; i < p_set_count; ++i) { - MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id); - - uint32_t index = p_first_set_index + i; - if (render.uniform_sets[index] != set || layout.get_count(index) > 0) { - render.dirty.set_flag(RenderState::DIRTY_UNIFORMS); - render.uniform_set_mask |= 1ULL << index; - render.uniform_sets[index] = set; - } - } -} - -void MDCommandBuffer::render_clear_attachments(VectorView p_attachment_clears, VectorView p_rects) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - - const MDSubpass &subpass = render.get_subpass(); - - uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count; - simd::float4 *vertices = ALLOCA_ARRAY(simd::float4, vertex_count); - simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT]; - - Size2i size = render.frameBuffer->size; - Rect2i render_area = render.clip_to_render_area({ { 0, 0 }, size }); - size = Size2i(render_area.position.x + render_area.size.width, render_area.position.y + render_area.size.height); - _populate_vertices(vertices, size, p_rects); - - ClearAttKey key; - key.sample_count = render.pass->get_sample_count(); - if (subpass.view_count > 1) { - key.enable_layered_rendering(); - } - - float depth_value = 0; - uint32_t stencil_value = 0; - - for (uint32_t i = 0; i < p_attachment_clears.size(); i++) { - RDD::AttachmentClear const &attClear = p_attachment_clears[i]; - uint32_t attachment_index; - if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { - attachment_index = attClear.color_attachment; - } else { - attachment_index = subpass.depth_stencil_reference.attachment; - } - - MDAttachment const &mda = render.pass->attachments[attachment_index]; - if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { - key.set_color_format(attachment_index, mda.format); - clear_colors[attachment_index] = { - attClear.value.color.r, - attClear.value.color.g, - attClear.value.color.b, - attClear.value.color.a - }; - } - - if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT)) { - key.set_depth_format(mda.format); - depth_value = attClear.value.depth; - } - - if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT)) { - key.set_stencil_format(mda.format); - stencil_value = attClear.value.stencil; - } - } - clear_colors[ClearAttKey::DEPTH_INDEX] = { - depth_value, - depth_value, - depth_value, - depth_value - }; - - id enc = render.encoder; - - MDResourceCache &cache = device_driver->get_resource_cache(); - - [enc pushDebugGroup:@"ClearAttachments"]; - [enc setRenderPipelineState:cache.get_clear_render_pipeline_state(key, nil)]; - [enc setDepthStencilState:cache.get_depth_stencil_state( - key.is_depth_enabled(), - key.is_stencil_enabled())]; - [enc setStencilReferenceValue:stencil_value]; - [enc setCullMode:MTLCullModeNone]; - [enc setTriangleFillMode:MTLTriangleFillModeFill]; - [enc setDepthBias:0 slopeScale:0 clamp:0]; - [enc setViewport:{ 0, 0, (double)size.width, (double)size.height, 0.0, 1.0 }]; - [enc setScissorRect:{ 0, 0, (NSUInteger)size.width, (NSUInteger)size.height }]; - - [enc setVertexBytes:clear_colors length:sizeof(clear_colors) atIndex:0]; - [enc setFragmentBytes:clear_colors length:sizeof(clear_colors) atIndex:0]; - [enc setVertexBytes:vertices length:vertex_count * sizeof(vertices[0]) atIndex:device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX)]; - - [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:vertex_count]; - [enc popDebugGroup]; - - render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_DEPTH | RenderState::DIRTY_RASTER)); - binding_cache.clear(); - render.mark_uniforms_dirty({ 0 }); // Mark index 0 dirty, if there is already a binding for index 0. - render.mark_viewport_dirty(); - render.mark_scissors_dirty(); - render.mark_vertex_dirty(); - render.mark_blend_dirty(); -} - -void MDCommandBuffer::_render_set_dirty_state() { - _render_bind_uniform_sets(); - - if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) { - if (push_constant_binding != UINT32_MAX) { - [render.encoder setVertexBytes:push_constant_data - length:push_constant_data_len - atIndex:push_constant_binding]; - [render.encoder setFragmentBytes:push_constant_data - length:push_constant_data_len - atIndex:push_constant_binding]; - } - } - - MDSubpass const &subpass = render.get_subpass(); - if (subpass.view_count > 1) { - uint32_t view_range[2] = { 0, subpass.view_count }; - [render.encoder setVertexBytes:view_range length:sizeof(view_range) atIndex:VIEW_MASK_BUFFER_INDEX]; - [render.encoder setFragmentBytes:view_range length:sizeof(view_range) atIndex:VIEW_MASK_BUFFER_INDEX]; - } - - if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) { - [render.encoder setRenderPipelineState:render.pipeline->state]; - } - - if (render.dirty.has_flag(RenderState::DIRTY_VIEWPORT)) { - [render.encoder setViewports:render.viewports.ptr() count:render.viewports.size()]; - } - - if (render.dirty.has_flag(RenderState::DIRTY_DEPTH)) { - [render.encoder setDepthStencilState:render.pipeline->depth_stencil]; - } - - if (render.dirty.has_flag(RenderState::DIRTY_RASTER)) { - render.pipeline->raster_state.apply(render.encoder); - } - - if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) { - size_t len = render.scissors.size(); - MTLScissorRect *rects = ALLOCA_ARRAY(MTLScissorRect, len); - for (size_t i = 0; i < len; i++) { - rects[i] = render.clip_to_render_area(render.scissors[i]); - } - [render.encoder setScissorRects:rects count:len]; - } - - if (render.dirty.has_flag(RenderState::DIRTY_BLEND) && render.blend_constants.has_value()) { - [render.encoder setBlendColorRed:render.blend_constants->r green:render.blend_constants->g blue:render.blend_constants->b alpha:render.blend_constants->a]; - } - - if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) { - uint32_t p_binding_count = render.vertex_buffers.size(); - if (p_binding_count > 0) { - uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); - [render.encoder setVertexBuffers:render.vertex_buffers.ptr() - offsets:render.vertex_offsets.ptr() - withRange:NSMakeRange(first, p_binding_count)]; - } - } - - render.resource_tracker.encode(render.encoder); - - render.dirty.clear(); -} - -void MDCommandBuffer::render_set_viewport(VectorView p_viewports) { - render.viewports.resize(p_viewports.size()); - for (uint32_t i = 0; i < p_viewports.size(); i += 1) { - Rect2i const &vp = p_viewports[i]; - render.viewports[i] = { - .originX = static_cast(vp.position.x), - .originY = static_cast(vp.position.y), - .width = static_cast(vp.size.width), - .height = static_cast(vp.size.height), - .znear = 0.0, - .zfar = 1.0, - }; - } - - render.dirty.set_flag(RenderState::DIRTY_VIEWPORT); -} - -void MDCommandBuffer::render_set_scissor(VectorView p_scissors) { - render.scissors.resize(p_scissors.size()); - for (uint32_t i = 0; i < p_scissors.size(); i += 1) { - Rect2i const &vp = p_scissors[i]; - render.scissors[i] = { - .x = static_cast(vp.position.x), - .y = static_cast(vp.position.y), - .width = static_cast(vp.size.width), - .height = static_cast(vp.size.height), - }; - } - - render.dirty.set_flag(RenderState::DIRTY_SCISSOR); -} - -void MDCommandBuffer::render_set_blend_constants(const Color &p_constants) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - if (render.blend_constants != p_constants) { - render.blend_constants = p_constants; - render.dirty.set_flag(RenderState::DIRTY_BLEND); - } -} - -void ResourceTracker::merge_from(const ResourceUsageMap &p_from) { - for (KeyValue const &keyval : p_from) { - ResourceVector *resources = _current.getptr(keyval.key); - if (resources == nullptr) { - resources = &_current.insert(keyval.key, ResourceVector())->value; - } - // Reserve space for the new resources, assuming they are all added. - resources->reserve(resources->size() + keyval.value.size()); - - uint32_t i = 0, j = 0; - MTLResourceUnsafe *resources_ptr = resources->ptr(); - const MTLResourceUnsafe *keyval_ptr = keyval.value.ptr(); - // 2-way merge. - while (i < resources->size() && j < keyval.value.size()) { - if (resources_ptr[i] < keyval_ptr[j]) { - i++; - } else if (resources_ptr[i] > keyval_ptr[j]) { - ResourceUsageEntry *existing = nullptr; - if ((existing = _previous.getptr(keyval_ptr[j])) == nullptr) { - existing = &_previous.insert(keyval_ptr[j], keyval.key)->value; - resources->insert(i, keyval_ptr[j]); - } else { - if (existing->usage != keyval.key) { - existing->usage |= keyval.key; - resources->insert(i, keyval_ptr[j]); - } - } - i++; - j++; - } else { - i++; - j++; - } - } - // Append the remaining resources. - for (; j < keyval.value.size(); j++) { - ResourceUsageEntry *existing = nullptr; - if ((existing = _previous.getptr(keyval_ptr[j])) == nullptr) { - existing = &_previous.insert(keyval_ptr[j], keyval.key)->value; - resources->push_back(keyval_ptr[j]); - } else { - if (existing->usage != keyval.key) { - existing->usage |= keyval.key; - resources->push_back(keyval_ptr[j]); - } - } - } - } -} - -void ResourceTracker::encode(id __unsafe_unretained p_enc) { - for (KeyValue const &keyval : _current) { - if (keyval.value.is_empty()) { - continue; - } - - MTLResourceUsage vert_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_VERTEX); - MTLResourceUsage frag_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); - if (vert_usage == frag_usage) { - [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex | MTLRenderStageFragment]; - } else { - if (vert_usage != 0) { - [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex]; - } - if (frag_usage != 0) { - [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:frag_usage stages:MTLRenderStageFragment]; - } - } - } - - // Keep the keys for now and clear the vectors to reduce churn. - for (KeyValue &v : _current) { - v.value.clear(); - } -} - -void ResourceTracker::encode(id __unsafe_unretained p_enc) { - for (KeyValue const &keyval : _current) { - if (keyval.value.is_empty()) { - continue; - } - MTLResourceUsage usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_COMPUTE); - if (usage != 0) { - [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:usage]; - } - } - - // Keep the keys for now and clear the vectors to reduce churn. - for (KeyValue &v : _current) { - v.value.clear(); - } -} - -void ResourceTracker::reset() { - // Keep the keys for now, as they are likely to be used repeatedly. - for (KeyValue &v : _previous) { - if (v.value.usage == ResourceUnused) { - v.value.unused++; - if (v.value.unused >= RESOURCE_UNUSED_CLEANUP_COUNT) { - _scratch.push_back(v.key); - } - } else { - v.value = ResourceUnused; - v.value.unused = 0; - } - } - - // Clear up resources that weren't used for the last pass. - for (const MTLResourceUnsafe &res : _scratch) { - _previous.erase(res); - } - _scratch.clear(); -} - -void MDCommandBuffer::_render_bind_uniform_sets() { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - if (!render.dirty.has_flag(RenderState::DIRTY_UNIFORMS)) { - return; - } - - render.dirty.clear_flag(RenderState::DIRTY_UNIFORMS); - uint64_t set_uniforms = render.uniform_set_mask; - render.uniform_set_mask = 0; - - MDRenderShader *shader = render.pipeline->shader; - const uint32_t dynamic_offsets = render.dynamic_offsets; - - while (set_uniforms != 0) { - // Find the index of the next set bit. - uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms); - // Clear the set bit. - set_uniforms &= (set_uniforms - 1); - MDUniformSet *set = render.uniform_sets[index]; - if (set == nullptr || index >= (uint32_t)shader->sets.size()) { - continue; - } - if (shader->uses_argument_buffers) { - set->bind_uniforms_argument_buffers(shader, render, index, dynamic_offsets, device_driver->frame_index(), device_driver->frame_count()); - } else { - DirectEncoder de(render.encoder, binding_cache); - set->bind_uniforms_direct(shader, de, index, dynamic_offsets); - } - } -} - -void MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView p_rects) { - uint32_t idx = 0; - for (uint32_t i = 0; i < p_rects.size(); i++) { - Rect2i const &rect = p_rects[i]; - idx = _populate_vertices(p_vertices, idx, rect, p_fb_size); - } -} - -uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size) { - // Determine the positions of the four edges of the - // clear rectangle as a fraction of the attachment size. - float leftPos = (float)(p_rect.position.x) / (float)p_fb_size.width; - float rightPos = (float)(p_rect.size.width) / (float)p_fb_size.width + leftPos; - float bottomPos = (float)(p_rect.position.y) / (float)p_fb_size.height; - float topPos = (float)(p_rect.size.height) / (float)p_fb_size.height + bottomPos; - - // Transform to clip-space coordinates, which are bounded by (-1.0 < p < 1.0) in clip-space. - leftPos = (leftPos * 2.0f) - 1.0f; - rightPos = (rightPos * 2.0f) - 1.0f; - bottomPos = (bottomPos * 2.0f) - 1.0f; - topPos = (topPos * 2.0f) - 1.0f; - - simd::float4 vtx; - - uint32_t idx = p_index; - uint32_t endLayer = render.get_subpass().view_count; - - for (uint32_t layer = 0; layer < endLayer; layer++) { - vtx.z = 0.0; - vtx.w = (float)layer; - - // Top left vertex - First triangle. - vtx.y = topPos; - vtx.x = leftPos; - p_vertices[idx++] = vtx; - - // Bottom left vertex. - vtx.y = bottomPos; - vtx.x = leftPos; - p_vertices[idx++] = vtx; - - // Bottom right vertex. - vtx.y = bottomPos; - vtx.x = rightPos; - p_vertices[idx++] = vtx; - - // Bottom right vertex - Second triangle. - p_vertices[idx++] = vtx; - - // Top right vertex. - vtx.y = topPos; - vtx.x = rightPos; - p_vertices[idx++] = vtx; - - // Top left vertex. - vtx.y = topPos; - vtx.x = leftPos; - p_vertices[idx++] = vtx; - } - - return idx; -} - -void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) { - DEV_ASSERT(command_buffer() != nil); - end(); - - MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); - MDFrameBuffer *fb = (MDFrameBuffer *)(p_frameBuffer.id); - - type = MDCommandBufferStateType::Render; - render.pass = pass; - render.current_subpass = UINT32_MAX; - render.render_area = p_rect; - render.clear_values.resize(p_clear_values.size()); - for (uint32_t i = 0; i < p_clear_values.size(); i++) { - render.clear_values[i] = p_clear_values[i]; - } - render.is_rendering_entire_area = (p_rect.position == Point2i(0, 0)) && p_rect.size == fb->size; - render.frameBuffer = fb; - render_next_subpass(); -} - -void MDCommandBuffer::_end_render_pass() { - MDFrameBuffer const &fb_info = *render.frameBuffer; - MDSubpass const &subpass = render.get_subpass(); - - PixelFormats &pf = device_driver->get_pixel_formats(); - - for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) { - uint32_t color_index = subpass.color_references[i].attachment; - uint32_t resolve_index = subpass.resolve_references[i].attachment; - DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED)); - if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.has_texture(color_index)) { - continue; - } - - id resolve_tex = fb_info.get_texture(resolve_index); - - CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve), "not implemented: unresolvable texture types"); - // see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407 - } - - render.end_encoding(); -} - -void MDCommandBuffer::_render_clear_render_area() { - MDRenderPass const &pass = *render.pass; - MDSubpass const &subpass = render.get_subpass(); - - uint32_t ds_index = subpass.depth_stencil_reference.attachment; - bool clear_depth = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, false)); - bool clear_stencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true)); - - uint32_t color_count = subpass.color_references.size(); - uint32_t clears_size = color_count + (clear_depth || clear_stencil ? 1 : 0); - if (clears_size == 0) { - return; - } - - RDD::AttachmentClear *clears = ALLOCA_ARRAY(RDD::AttachmentClear, clears_size); - uint32_t clears_count = 0; - - for (uint32_t i = 0; i < color_count; i++) { - uint32_t idx = subpass.color_references[i].attachment; - if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) { - clears[clears_count++] = { .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = render.clear_values[idx] }; - } - } - - if (clear_depth || clear_stencil) { - MDAttachment const &attachment = pass.attachments[ds_index]; - BitField bits = {}; - if (clear_depth && attachment.type & MDAttachmentType::Depth) { - bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); - } - if (clear_stencil && attachment.type & MDAttachmentType::Stencil) { - bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); - } - - clears[clears_count++] = { .aspect = bits, .color_attachment = ds_index, .value = render.clear_values[ds_index] }; - } - - if (clears_count == 0) { - return; - } - - render_clear_attachments(VectorView(clears, clears_count), { render.render_area }); -} - -void MDCommandBuffer::render_next_subpass() { - DEV_ASSERT(command_buffer() != nil); - - if (render.current_subpass == UINT32_MAX) { - render.current_subpass = 0; - } else { - _end_render_pass(); - render.current_subpass++; - } - - MDFrameBuffer const &fb = *render.frameBuffer; - MDRenderPass const &pass = *render.pass; - MDSubpass const &subpass = render.get_subpass(); - - MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; - - if (subpass.view_count > 1) { - desc.renderTargetArrayLength = subpass.view_count; - } - - PixelFormats &pf = device_driver->get_pixel_formats(); - - uint32_t attachmentCount = 0; - for (uint32_t i = 0; i < subpass.color_references.size(); i++) { - uint32_t idx = subpass.color_references[i].attachment; - if (idx == RDD::AttachmentReference::UNUSED) { - continue; - } - - attachmentCount += 1; - MTLRenderPassColorAttachmentDescriptor *ca = desc.colorAttachments[i]; - - uint32_t resolveIdx = subpass.resolve_references.is_empty() ? RDD::AttachmentReference::UNUSED : subpass.resolve_references[i].attachment; - bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED; - bool can_resolve = true; - if (resolveIdx != RDD::AttachmentReference::UNUSED) { - id resolve_tex = fb.get_texture(resolveIdx); - can_resolve = flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve); - if (can_resolve) { - ca.resolveTexture = resolve_tex; - } else { - CRASH_NOW_MSG("unimplemented: using a texture format that is not supported for resolve"); - } - } - - MDAttachment const &attachment = pass.attachments[idx]; - - id tex = fb.get_texture(idx); - ERR_FAIL_NULL_MSG(tex, "Frame buffer color texture is null."); - - if ((attachment.type & MDAttachmentType::Color)) { - if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) { - Color clearColor = render.clear_values[idx].color; - ca.clearColor = MTLClearColorMake(clearColor.r, clearColor.g, clearColor.b, clearColor.a); - } - } - } - - if (subpass.depth_stencil_reference.attachment != RDD::AttachmentReference::UNUSED) { - attachmentCount += 1; - uint32_t idx = subpass.depth_stencil_reference.attachment; - MDAttachment const &attachment = pass.attachments[idx]; - id tex = fb.get_texture(idx); - ERR_FAIL_NULL_MSG(tex, "Frame buffer depth / stencil texture is null."); - if (attachment.type & MDAttachmentType::Depth) { - MTLRenderPassDepthAttachmentDescriptor *da = desc.depthAttachment; - if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) { - da.clearDepth = render.clear_values[idx].depth; - } - } - - if (attachment.type & MDAttachmentType::Stencil) { - MTLRenderPassStencilAttachmentDescriptor *sa = desc.stencilAttachment; - if (attachment.configureDescriptor(sa, pf, subpass, tex, render.is_rendering_entire_area, false, false, true)) { - sa.clearStencil = render.clear_values[idx].stencil; - } - } - } - - desc.renderTargetWidth = MAX((NSUInteger)MIN(render.render_area.position.x + render.render_area.size.width, fb.size.width), 1u); - desc.renderTargetHeight = MAX((NSUInteger)MIN(render.render_area.position.y + render.render_area.size.height, fb.size.height), 1u); - - if (attachmentCount == 0) { - // If there are no attachments, delay the creation of the encoder, - // so we can use a matching sample count for the pipeline, by setting - // the defaultRasterSampleCount from the pipeline's sample count. - render.desc = desc; - } else { - render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc]; - - if (!render.is_rendering_entire_area) { - _render_clear_render_area(); - } - // With a new encoder, all state is dirty. - render.dirty.set_flag(RenderState::DIRTY_ALL); - } -} - -void MDCommandBuffer::render_draw(uint32_t p_vertex_count, - uint32_t p_instance_count, - uint32_t p_base_vertex, - uint32_t p_first_instance) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); - - _render_set_dirty_state(); - - MDSubpass const &subpass = render.get_subpass(); - if (subpass.view_count > 1) { - p_instance_count *= subpass.view_count; - } - - DEV_ASSERT(render.dirty == 0); - - id enc = render.encoder; - - [enc drawPrimitives:render.pipeline->raster_state.render_primitive - vertexStart:p_base_vertex - vertexCount:p_vertex_count - instanceCount:p_instance_count - baseInstance:p_first_instance]; -} - -void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - - render.vertex_buffers.resize(p_binding_count); - render.vertex_offsets.resize(p_binding_count); - - // Are the existing buffer bindings the same? - bool same = true; - - // Reverse the buffers, as their bindings are assigned in descending order. - for (uint32_t i = 0; i < p_binding_count; i += 1) { - const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id; - - NSUInteger dynamic_offset = 0; - if (buf_info->is_dynamic()) { - const MetalBufferDynamicInfo *dyn_buf = (const MetalBufferDynamicInfo *)buf_info; - uint64_t frame_idx = p_dynamic_offsets & 0x3; - p_dynamic_offsets >>= 2; - dynamic_offset = frame_idx * dyn_buf->size_bytes; - } - if (render.vertex_buffers[i] != buf_info->metal_buffer) { - render.vertex_buffers[i] = buf_info->metal_buffer; - same = false; - } - - render.vertex_offsets[i] = dynamic_offset + p_offsets[p_binding_count - i - 1]; - } - - if (render.encoder) { - uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); - if (same) { - NSUInteger *offset_ptr = render.vertex_offsets.ptr(); - for (uint32_t i = first; i < first + p_binding_count; i++) { - [render.encoder setVertexBufferOffset:*offset_ptr atIndex:i]; - offset_ptr++; - } - } else { - [render.encoder setVertexBuffers:render.vertex_buffers.ptr() - offsets:render.vertex_offsets.ptr() - withRange:NSMakeRange(first, p_binding_count)]; - } - render.dirty.clear_flag(RenderState::DIRTY_VERTEX); - } else { - render.dirty.set_flag(RenderState::DIRTY_VERTEX); - } -} - -void MDCommandBuffer::render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - - const RenderingDeviceDriverMetal::BufferInfo *buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffer.id; - - render.index_buffer = buffer->metal_buffer; - render.index_type = p_format == RDD::IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32; - render.index_offset = p_offset; -} - -void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count, - uint32_t p_instance_count, - uint32_t p_first_index, - int32_t p_vertex_offset, - uint32_t p_first_instance) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); - - _render_set_dirty_state(); - - MDSubpass const &subpass = render.get_subpass(); - if (subpass.view_count > 1) { - p_instance_count *= subpass.view_count; - } - - id enc = render.encoder; - - uint32_t index_offset = render.index_offset; - index_offset += p_first_index * (render.index_type == MTLIndexTypeUInt16 ? sizeof(uint16_t) : sizeof(uint32_t)); - - [enc drawIndexedPrimitives:render.pipeline->raster_state.render_primitive - indexCount:p_index_count - indexType:render.index_type - indexBuffer:render.index_buffer - indexBufferOffset:index_offset - instanceCount:p_instance_count - baseVertex:p_vertex_offset - baseInstance:p_first_instance]; -} - -void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); - - _render_set_dirty_state(); - - id enc = render.encoder; - - const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id; - NSUInteger indirect_offset = p_offset; - - for (uint32_t i = 0; i < p_draw_count; i++) { - [enc drawIndexedPrimitives:render.pipeline->raster_state.render_primitive - indexType:render.index_type - indexBuffer:render.index_buffer - indexBufferOffset:0 - indirectBuffer:indirect_buffer->metal_buffer - indirectBufferOffset:indirect_offset]; - indirect_offset += p_stride; - } -} - -void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { - ERR_FAIL_MSG("not implemented"); -} - -void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); - - _render_set_dirty_state(); - - id enc = render.encoder; - - const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id; - NSUInteger indirect_offset = p_offset; - - for (uint32_t i = 0; i < p_draw_count; i++) { - [enc drawPrimitives:render.pipeline->raster_state.render_primitive - indirectBuffer:indirect_buffer->metal_buffer - indirectBufferOffset:indirect_offset]; - indirect_offset += p_stride; - } -} - -void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { - ERR_FAIL_MSG("not implemented"); -} - -void MDCommandBuffer::render_end_pass() { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - - render.end_encoding(); - render.reset(); - reset(); -} - -#pragma mark - RenderState - -void MDCommandBuffer::RenderState::reset() { - pass = nil; - frameBuffer = nil; - pipeline = nil; - current_subpass = UINT32_MAX; - render_area = {}; - is_rendering_entire_area = false; - desc = nil; - encoder = nil; - index_buffer = nil; - index_type = MTLIndexTypeUInt16; - dirty = DIRTY_NONE; - uniform_sets.clear(); - dynamic_offsets = 0; - uniform_set_mask = 0; - clear_values.clear(); - viewports.clear(); - scissors.clear(); - blend_constants.reset(); - bzero(vertex_buffers.ptr(), sizeof(id __unsafe_unretained) * vertex_buffers.size()); - vertex_buffers.clear(); - bzero(vertex_offsets.ptr(), sizeof(NSUInteger) * vertex_offsets.size()); - vertex_offsets.clear(); - resource_tracker.reset(); -} - -void MDCommandBuffer::RenderState::end_encoding() { - if (encoder == nil) { - return; - } - - [encoder endEncoding]; - encoder = nil; -} - -#pragma mark - ComputeState - -void MDCommandBuffer::ComputeState::end_encoding() { - if (encoder == nil) { - return; - } - - [encoder endEncoding]; - encoder = nil; -} - -#pragma mark - Compute - -void MDCommandBuffer::_compute_set_dirty_state() { - if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) { - compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent]; - [compute.encoder setComputePipelineState:compute.pipeline->state]; - } - - _compute_bind_uniform_sets(); - - if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) { - if (push_constant_binding != UINT32_MAX) { - [compute.encoder setBytes:push_constant_data - length:push_constant_data_len - atIndex:push_constant_binding]; - } - } - - compute.resource_tracker.encode(compute.encoder); - - compute.dirty.clear(); -} - -void MDCommandBuffer::_compute_bind_uniform_sets() { - DEV_ASSERT(type == MDCommandBufferStateType::Compute); - if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) { - return; - } - - compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS); - uint64_t set_uniforms = compute.uniform_set_mask; - compute.uniform_set_mask = 0; - - MDComputeShader *shader = compute.pipeline->shader; - const uint32_t dynamic_offsets = compute.dynamic_offsets; - - while (set_uniforms != 0) { - // Find the index of the next set bit. - uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms); - // Clear the set bit. - set_uniforms &= (set_uniforms - 1); - MDUniformSet *set = compute.uniform_sets[index]; - if (set == nullptr || index >= (uint32_t)shader->sets.size()) { - continue; - } - if (shader->uses_argument_buffers) { - set->bind_uniforms_argument_buffers(shader, compute, index, dynamic_offsets, device_driver->frame_index(), device_driver->frame_count()); - } else { - DirectEncoder de(compute.encoder, binding_cache); - set->bind_uniforms_direct(shader, de, index, dynamic_offsets); - } - } -} - -void MDCommandBuffer::ComputeState::reset() { - pipeline = nil; - encoder = nil; - dirty = DIRTY_NONE; - uniform_sets.clear(); - dynamic_offsets = 0; - uniform_set_mask = 0; - resource_tracker.reset(); -} - -void MDCommandBuffer::compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { - DEV_ASSERT(type == MDCommandBufferStateType::Compute); - - if (uint32_t new_size = p_first_set_index + p_set_count; compute.uniform_sets.size() < new_size) { - uint32_t s = compute.uniform_sets.size(); - compute.uniform_sets.resize(new_size); - // Set intermediate values to null. - std::fill(&compute.uniform_sets[s], compute.uniform_sets.end().operator->(), nullptr); - } - - const MDShader *shader = (const MDShader *)p_shader.id; - DynamicOffsetLayout layout = shader->dynamic_offset_layout; - - // Clear bits for sets being rebound before OR'ing new values. - // This prevents corruption when the same set is bound multiple times - // with different frame indices. - for (uint32_t i = 0; i < p_set_count && compute.dynamic_offsets != 0; i++) { - uint32_t set_index = p_first_set_index + i; - uint32_t count = layout.get_count(set_index); - if (count > 0) { - uint32_t shift = layout.get_offset_index_shift(set_index); - uint32_t mask = ((1u << (count * 4u)) - 1u) << shift; - compute.dynamic_offsets &= ~mask; - } - } - compute.dynamic_offsets |= p_dynamic_offsets; - - for (size_t i = 0; i < p_set_count; ++i) { - MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id); - - uint32_t index = p_first_set_index + i; - if (compute.uniform_sets[index] != set || layout.get_count(index) > 0) { - compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS); - compute.uniform_set_mask |= 1ULL << index; - compute.uniform_sets[index] = set; - } - } -} - -void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { - DEV_ASSERT(type == MDCommandBufferStateType::Compute); - - _compute_set_dirty_state(); - - MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups); - - id enc = compute.encoder; - [enc dispatchThreadgroups:region.size threadsPerThreadgroup:compute.pipeline->compute_state.local]; -} - -void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) { - DEV_ASSERT(type == MDCommandBufferStateType::Compute); - - _compute_set_dirty_state(); - - const RenderingDeviceDriverMetal::BufferInfo *indirectBuffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id; - - id enc = compute.encoder; - [enc dispatchThreadgroupsWithIndirectBuffer:indirectBuffer->metal_buffer indirectBufferOffset:p_offset threadsPerThreadgroup:compute.pipeline->compute_state.local]; -} - -void MDCommandBuffer::reset() { - push_constant_data_len = 0; - type = MDCommandBufferStateType::None; -} - -void MDCommandBuffer::_end_compute_dispatch() { - DEV_ASSERT(type == MDCommandBufferStateType::Compute); - - compute.end_encoding(); - compute.reset(); - reset(); -} - -void MDCommandBuffer::_end_blit() { - DEV_ASSERT(type == MDCommandBufferStateType::Blit); - - [blit.encoder endEncoding]; - blit.reset(); - reset(); -} - -MDComputeShader::MDComputeShader(CharString p_name, - Vector p_sets, - bool p_uses_argument_buffers, - MDLibrary *p_kernel) : - MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) { -} - -MDRenderShader::MDRenderShader(CharString p_name, - Vector p_sets, - bool p_needs_view_mask_buffer, - bool p_uses_argument_buffers, - MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) : - MDShader(p_name, p_sets, p_uses_argument_buffers), - needs_view_mask_buffer(p_needs_view_mask_buffer), - vert(p_vert), - frag(p_frag) { -} - -void DirectEncoder::set(__unsafe_unretained id *p_textures, NSRange p_range) { - if (cache.update(p_range, p_textures)) { - switch (mode) { - case RENDER: { - id __unsafe_unretained enc = (id)encoder; - [enc setVertexTextures:p_textures withRange:p_range]; - [enc setFragmentTextures:p_textures withRange:p_range]; - } break; - case COMPUTE: { - id __unsafe_unretained enc = (id)encoder; - [enc setTextures:p_textures withRange:p_range]; - } break; - } - } -} - -void DirectEncoder::set(__unsafe_unretained id *p_buffers, const NSUInteger *p_offsets, NSRange p_range) { - if (cache.update(p_range, p_buffers, p_offsets)) { - switch (mode) { - case RENDER: { - id __unsafe_unretained enc = (id)encoder; - [enc setVertexBuffers:p_buffers offsets:p_offsets withRange:p_range]; - [enc setFragmentBuffers:p_buffers offsets:p_offsets withRange:p_range]; - } break; - case COMPUTE: { - id __unsafe_unretained enc = (id)encoder; - [enc setBuffers:p_buffers offsets:p_offsets withRange:p_range]; - } break; - } - } -} - -void DirectEncoder::set(id __unsafe_unretained p_buffer, const NSUInteger p_offset, uint32_t p_index) { - if (cache.update(p_buffer, p_offset, p_index)) { - switch (mode) { - case RENDER: { - id __unsafe_unretained enc = (id)encoder; - [enc setVertexBuffer:p_buffer offset:p_offset atIndex:p_index]; - [enc setFragmentBuffer:p_buffer offset:p_offset atIndex:p_index]; - } break; - case COMPUTE: { - id __unsafe_unretained enc = (id)encoder; - [enc setBuffer:p_buffer offset:p_offset atIndex:p_index]; - } break; - } - } -} - -void DirectEncoder::set(__unsafe_unretained id *p_samplers, NSRange p_range) { - if (cache.update(p_range, p_samplers)) { - switch (mode) { - case RENDER: { - id __unsafe_unretained enc = (id)encoder; - [enc setVertexSamplerStates:p_samplers withRange:p_range]; - [enc setFragmentSamplerStates:p_samplers withRange:p_range]; - } break; - case COMPUTE: { - id __unsafe_unretained enc = (id)encoder; - [enc setSamplerStates:p_samplers withRange:p_range]; - } break; - } - } -} - -void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { - DEV_ASSERT(p_shader->uses_argument_buffers); - DEV_ASSERT(p_state.encoder != nil); - DEV_ASSERT(p_shader->dynamic_offset_layout.is_empty()); // Argument buffers do not support dynamic offsets. - - id __unsafe_unretained enc = p_state.encoder; - - p_state.resource_tracker.merge_from(usage_to_resources); - - [enc setVertexBuffer:arg_buffer - offset:0 - atIndex:p_set_index]; - [enc setFragmentBuffer:arg_buffer offset:0 atIndex:p_set_index]; -} - -void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets) { - DEV_ASSERT(!p_shader->uses_argument_buffers); - - UniformSet const &set = p_shader->sets[p_set_index]; - DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; - uint32_t dynamic_index = 0; - - for (uint32_t i = 0; i < MIN(uniforms.size(), set.uniforms.size()); i++) { - RDD::BoundUniform const &uniform = uniforms[i]; - const UniformInfo &ui = set.uniforms[i]; - const UniformInfo::Indexes &indexes = ui.slot; - - uint32_t frame_idx; - if (uniform.is_dynamic()) { - uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index); - dynamic_index++; - frame_idx = (p_dynamic_offsets >> shift) & 0xf; - } else { - frame_idx = 0; - } - - switch (uniform.type) { - case RDD::UNIFORM_TYPE_SAMPLER: { - size_t count = uniform.ids.size(); - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - objects[j] = rid::get(uniform.ids[j].id); - } - NSRange sampler_range = NSMakeRange(indexes.sampler, count); - p_enc.set(objects, sampler_range); - } break; - case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { - size_t count = uniform.ids.size() / 2; - id __unsafe_unretained *textures = ALLOCA_ARRAY(id __unsafe_unretained, count); - id __unsafe_unretained *samplers = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (uint32_t j = 0; j < count; j += 1) { - id sampler = rid::get(uniform.ids[j * 2 + 0]); - id texture = rid::get(uniform.ids[j * 2 + 1]); - samplers[j] = sampler; - textures[j] = texture; - } - NSRange sampler_range = NSMakeRange(indexes.sampler, count); - NSRange texture_range = NSMakeRange(indexes.texture, count); - p_enc.set(samplers, sampler_range); - p_enc.set(textures, texture_range); - } break; - case RDD::UNIFORM_TYPE_TEXTURE: { - size_t count = uniform.ids.size(); - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - NSRange texture_range = NSMakeRange(indexes.texture, count); - p_enc.set(objects, texture_range); - } break; - case RDD::UNIFORM_TYPE_IMAGE: { - size_t count = uniform.ids.size(); - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - NSRange texture_range = NSMakeRange(indexes.texture, count); - p_enc.set(objects, texture_range); - - if (indexes.buffer != UINT32_MAX) { - // Emulated atomic image access. - id __unsafe_unretained *bufs = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - id tex = obj.parentTexture ? obj.parentTexture : obj; - id buf = tex.buffer; - bufs[j] = buf; - } - NSUInteger *offs = ALLOCA_ARRAY(NSUInteger, count); - bzero(offs, sizeof(NSUInteger) * count); - NSRange buffer_range = NSMakeRange(indexes.buffer, count); - p_enc.set(bufs, offs, buffer_range); - } - } break; - case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { - ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { - ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { - CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: - case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { - const RDM::BufferInfo *buf_info = (const RDM::BufferInfo *)uniform.ids[0].id; - p_enc.set(buf_info->metal_buffer, 0, indexes.buffer); - } break; - case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: - case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { - const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; - p_enc.set(buf_info->metal_buffer, frame_idx * buf_info->size_bytes, indexes.buffer); - } break; - case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { - size_t count = uniform.ids.size(); - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - NSRange texture_range = NSMakeRange(indexes.texture, count); - p_enc.set(objects, texture_range); - } break; - default: { - DEV_ASSERT(false); - } - } - } -} - -void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { - DEV_ASSERT(p_shader->uses_argument_buffers); - DEV_ASSERT(p_state.encoder != nil); - - id enc = p_state.encoder; - - p_state.resource_tracker.merge_from(usage_to_resources); - [enc setBuffer:arg_buffer - offset:0 - atIndex:p_set_index]; -} - -MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const { - MTLFmtCaps caps = kMTLFmtCapsNone; - - for (RDD::AttachmentReference const &ar : input_references) { - if (ar.attachment == p_index) { - flags::set(caps, kMTLFmtCapsRead); - break; - } - } - - for (RDD::AttachmentReference const &ar : color_references) { - if (ar.attachment == p_index) { - flags::set(caps, kMTLFmtCapsColorAtt); - break; - } - } - - for (RDD::AttachmentReference const &ar : resolve_references) { - if (ar.attachment == p_index) { - flags::set(caps, kMTLFmtCapsResolve); - break; - } - } - - if (depth_stencil_reference.attachment == p_index) { - flags::set(caps, kMTLFmtCapsDSAtt); - } - - return caps; -} - -void MDAttachment::linkToSubpass(const MDRenderPass &p_pass) { - firstUseSubpassIndex = UINT32_MAX; - lastUseSubpassIndex = 0; - - for (MDSubpass const &subpass : p_pass.subpasses) { - MTLFmtCaps reqCaps = subpass.getRequiredFmtCapsForAttachmentAt(index); - if (reqCaps) { - firstUseSubpassIndex = MIN(subpass.subpass_index, firstUseSubpassIndex); - lastUseSubpassIndex = MAX(subpass.subpass_index, lastUseSubpassIndex); - } - } -} - -MTLStoreAction MDAttachment::getMTLStoreAction(MDSubpass const &p_subpass, - bool p_is_rendering_entire_area, - bool p_has_resolve, - bool p_can_resolve, - bool p_is_stencil) const { - if (!p_is_rendering_entire_area || !isLastUseOf(p_subpass)) { - return p_has_resolve && p_can_resolve ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore; - } - - switch (p_is_stencil ? stencilStoreAction : storeAction) { - case MTLStoreActionStore: - return p_has_resolve && p_can_resolve ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore; - case MTLStoreActionDontCare: - return p_has_resolve ? (p_can_resolve ? MTLStoreActionMultisampleResolve : MTLStoreActionStore) : MTLStoreActionDontCare; - - default: - return MTLStoreActionStore; - } -} - -bool MDAttachment::configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc, - PixelFormats &p_pf, - MDSubpass const &p_subpass, - id p_attachment, - bool p_is_rendering_entire_area, - bool p_has_resolve, - bool p_can_resolve, - bool p_is_stencil) const { - p_desc.texture = p_attachment; - - MTLLoadAction load; - if (!p_is_rendering_entire_area || !isFirstUseOf(p_subpass)) { - load = MTLLoadActionLoad; - } else { - load = p_is_stencil ? stencilLoadAction : loadAction; - } - - p_desc.loadAction = load; - - MTLPixelFormat mtlFmt = p_attachment.pixelFormat; - bool isDepthFormat = p_pf.isDepthFormat(mtlFmt); - bool isStencilFormat = p_pf.isStencilFormat(mtlFmt); - if (isStencilFormat && !p_is_stencil && !isDepthFormat) { - p_desc.storeAction = MTLStoreActionDontCare; - } else { - p_desc.storeAction = getMTLStoreAction(p_subpass, p_is_rendering_entire_area, p_has_resolve, p_can_resolve, p_is_stencil); - } - - return load == MTLLoadActionClear; -} - -bool MDAttachment::shouldClear(const MDSubpass &p_subpass, bool p_is_stencil) const { - // If the subpass is not the first subpass to use this attachment, don't clear this attachment. - if (p_subpass.subpass_index != firstUseSubpassIndex) { - return false; - } - return (p_is_stencil ? stencilLoadAction : loadAction) == MTLLoadActionClear; -} - -MDRenderPass::MDRenderPass(Vector &p_attachments, Vector &p_subpasses) : - attachments(p_attachments), subpasses(p_subpasses) { - for (MDAttachment &att : attachments) { - att.linkToSubpass(*this); - } -} - -#pragma mark - Resource Factory - -id MDResourceFactory::new_func(NSString *p_source, NSString *p_name, NSError **p_error) { - @autoreleasepool { - NSError *err = nil; - MTLCompileOptions *options = [MTLCompileOptions new]; - id device = device_driver->get_device(); - id mtlLib = [device newLibraryWithSource:p_source - options:options - error:&err]; - if (err) { - if (p_error != nil) { - *p_error = err; - } - } - return [mtlLib newFunctionWithName:p_name]; - } -} - -id MDResourceFactory::new_clear_vert_func(ClearAttKey &p_key) { - @autoreleasepool { - NSString *msl = [NSString stringWithFormat:@R"( -#include -using namespace metal; - -typedef struct { - float4 a_position [[attribute(0)]]; -} AttributesPos; - -typedef struct { - float4 colors[9]; -} ClearColorsIn; - -typedef struct { - float4 v_position [[position]]; - uint layer%s; -} VaryingsPos; - -vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { - VaryingsPos varyings; - varyings.v_position = float4(attributes.a_position.x, -attributes.a_position.y, ccIn.colors[%d].r, 1.0); - varyings.layer = uint(attributes.a_position.w); - return varyings; -} -)", p_key.is_layered_rendering_enabled() ? " [[render_target_array_index]]" : "", ClearAttKey::DEPTH_INDEX]; - - return new_func(msl, @"vertClear", nil); - } -} - -id MDResourceFactory::new_clear_frag_func(ClearAttKey &p_key) { - @autoreleasepool { - NSMutableString *msl = [NSMutableString stringWithCapacity:2048]; - - [msl appendFormat:@R"( -#include -using namespace metal; - -typedef struct { - float4 v_position [[position]]; -} VaryingsPos; - -typedef struct { - float4 colors[9]; -} ClearColorsIn; - -typedef struct { -)"]; - - for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { - if (p_key.is_enabled(caIdx)) { - NSString *typeStr = get_format_type_string((MTLPixelFormat)p_key.pixel_formats[caIdx]); - [msl appendFormat:@" %@4 color%u [[color(%u)]];\n", typeStr, caIdx, caIdx]; - } - } - [msl appendFormat:@R"(} ClearColorsOut; - -fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { - - ClearColorsOut ccOut; -)"]; - for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { - if (p_key.is_enabled(caIdx)) { - NSString *typeStr = get_format_type_string((MTLPixelFormat)p_key.pixel_formats[caIdx]); - [msl appendFormat:@" ccOut.color%u = %@4(ccIn.colors[%u]);\n", caIdx, typeStr, caIdx]; - } - } - [msl appendString:@R"( return ccOut; -})"]; - - return new_func(msl, @"fragClear", nil); - } -} - -NSString *MDResourceFactory::get_format_type_string(MTLPixelFormat p_fmt) { - switch (device_driver->get_pixel_formats().getFormatType(p_fmt)) { - case MTLFormatType::ColorInt8: - case MTLFormatType::ColorInt16: - return @"short"; - case MTLFormatType::ColorUInt8: - case MTLFormatType::ColorUInt16: - return @"ushort"; - case MTLFormatType::ColorInt32: - return @"int"; - case MTLFormatType::ColorUInt32: - return @"uint"; - case MTLFormatType::ColorHalf: - return @"half"; - case MTLFormatType::ColorFloat: - case MTLFormatType::DepthStencil: - case MTLFormatType::Compressed: - return @"float"; - case MTLFormatType::None: - return @"unexpected_MTLPixelFormatInvalid"; - } -} - -id MDResourceFactory::new_depth_stencil_state(bool p_use_depth, bool p_use_stencil) { - MTLDepthStencilDescriptor *dsDesc = [MTLDepthStencilDescriptor new]; - dsDesc.depthCompareFunction = MTLCompareFunctionAlways; - dsDesc.depthWriteEnabled = p_use_depth; - - if (p_use_stencil) { - MTLStencilDescriptor *sDesc = [MTLStencilDescriptor new]; - sDesc.stencilCompareFunction = MTLCompareFunctionAlways; - sDesc.stencilFailureOperation = MTLStencilOperationReplace; - sDesc.depthFailureOperation = MTLStencilOperationReplace; - sDesc.depthStencilPassOperation = MTLStencilOperationReplace; - - dsDesc.frontFaceStencil = sDesc; - dsDesc.backFaceStencil = sDesc; - } else { - dsDesc.frontFaceStencil = nil; - dsDesc.backFaceStencil = nil; - } - - return [device_driver->get_device() newDepthStencilStateWithDescriptor:dsDesc]; -} - -id MDResourceFactory::new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error) { - PixelFormats &pixFmts = device_driver->get_pixel_formats(); - - id vtxFunc = new_clear_vert_func(p_key); - id fragFunc = new_clear_frag_func(p_key); - MTLRenderPipelineDescriptor *plDesc = [MTLRenderPipelineDescriptor new]; - plDesc.label = @"ClearRenderAttachments"; - plDesc.vertexFunction = vtxFunc; - plDesc.fragmentFunction = fragFunc; - plDesc.rasterSampleCount = p_key.sample_count; - plDesc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; - - for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { - MTLRenderPipelineColorAttachmentDescriptor *colorDesc = plDesc.colorAttachments[caIdx]; - colorDesc.pixelFormat = (MTLPixelFormat)p_key.pixel_formats[caIdx]; - colorDesc.writeMask = p_key.is_enabled(caIdx) ? MTLColorWriteMaskAll : MTLColorWriteMaskNone; - } - - MTLPixelFormat mtlDepthFormat = p_key.depth_format(); - if (pixFmts.isDepthFormat(mtlDepthFormat)) { - plDesc.depthAttachmentPixelFormat = mtlDepthFormat; - } - - MTLPixelFormat mtlStencilFormat = p_key.stencil_format(); - if (pixFmts.isStencilFormat(mtlStencilFormat)) { - plDesc.stencilAttachmentPixelFormat = mtlStencilFormat; - } - - MTLVertexDescriptor *vtxDesc = plDesc.vertexDescriptor; - - // Vertex attribute descriptors. - MTLVertexAttributeDescriptorArray *vaDescArray = vtxDesc.attributes; - MTLVertexAttributeDescriptor *vaDesc; - NSUInteger vtxBuffIdx = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX); - NSUInteger vtxStride = 0; - - // Vertex location. - vaDesc = vaDescArray[0]; - vaDesc.format = MTLVertexFormatFloat4; - vaDesc.bufferIndex = vtxBuffIdx; - vaDesc.offset = vtxStride; - vtxStride += sizeof(simd::float4); - - // Vertex attribute buffer. - MTLVertexBufferLayoutDescriptorArray *vbDescArray = vtxDesc.layouts; - MTLVertexBufferLayoutDescriptor *vbDesc = vbDescArray[vtxBuffIdx]; - vbDesc.stepFunction = MTLVertexStepFunctionPerVertex; - vbDesc.stepRate = 1; - vbDesc.stride = vtxStride; - - return [device_driver->get_device() newRenderPipelineStateWithDescriptor:plDesc error:p_error]; -} - -id MDResourceCache::get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error) { - HashMap::ConstIterator it = clear_states.find(p_key); - if (it != clear_states.end()) { - return it->value; - } - - id state = resource_factory->new_clear_pipeline_state(p_key, p_error); - clear_states[p_key] = state; - return state; -} - -id MDResourceCache::get_depth_stencil_state(bool p_use_depth, bool p_use_stencil) { - id __strong *val; - if (p_use_depth && p_use_stencil) { - val = &clear_depth_stencil_state.all; - } else if (p_use_depth) { - val = &clear_depth_stencil_state.depth_only; - } else if (p_use_stencil) { - val = &clear_depth_stencil_state.stencil_only; - } else { - val = &clear_depth_stencil_state.none; - } - DEV_ASSERT(val != nullptr); - - if (*val == nil) { - *val = resource_factory->new_depth_stencil_state(p_use_depth, p_use_stencil); - } - return *val; -} - -static const char *SHADER_STAGE_NAMES[] = { - [RD::SHADER_STAGE_VERTEX] = "vert", - [RD::SHADER_STAGE_FRAGMENT] = "frag", - [RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl", - [RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval", - [RD::SHADER_STAGE_COMPUTE] = "comp", -}; - -void ShaderCacheEntry::notify_free() const { - owner.shader_cache_free_entry(key); -} - -@interface MDLibrary () -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry -#ifdef DEV_ENABLED - source:(NSString *)source; -#endif -; -@end - -/// Loads the MTLLibrary when the library is first accessed. -@interface MDLazyLibrary : MDLibrary { - id _library; - NSError *_error; - std::shared_mutex _mu; - bool _loaded; - id _device; - NSString *_source; - MTLCompileOptions *_options; -} -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device - source:(NSString *)source - options:(MTLCompileOptions *)options; -@end - -/// Loads the MTLLibrary immediately on initialization, using an asynchronous API. -@interface MDImmediateLibrary : MDLibrary { - id _library; - NSError *_error; - std::mutex _cv_mutex; - std::condition_variable _cv; - std::atomic _complete; - bool _ready; -} -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device - source:(NSString *)source - options:(MTLCompileOptions *)options; -@end - -@interface MDBinaryLibrary : MDLibrary { - id _library; - NSError *_error; -} -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device -#ifdef DEV_ENABLED - source:(NSString *)source -#endif - data:(dispatch_data_t)data; -@end - -@implementation MDLibrary - -+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device - source:(NSString *)source - options:(MTLCompileOptions *)options - strategy:(ShaderLoadStrategy)strategy { - switch (strategy) { - case ShaderLoadStrategy::IMMEDIATE: - [[fallthrough]]; - default: - return [[MDImmediateLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; - case ShaderLoadStrategy::LAZY: - return [[MDLazyLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; - } -} - -+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device -#ifdef DEV_ENABLED - source:(NSString *)source -#endif - data:(dispatch_data_t)data { - return [[MDBinaryLibrary alloc] initWithCacheEntry:entry - device:device -#ifdef DEV_ENABLED - source:source -#endif - data:data]; -} - -#ifdef DEV_ENABLED -- (NSString *)originalSource { - return _original_source; -} -#endif - -- (id)library { - CRASH_NOW_MSG("Not implemented"); - return nil; -} - -- (NSError *)error { - CRASH_NOW_MSG("Not implemented"); - return nil; -} - -- (void)setLabel:(NSString *)label { -} - -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry -#ifdef DEV_ENABLED - source:(NSString *)source -#endif -{ - self = [super init]; - _entry = entry; - _entry->library = self; -#ifdef DEV_ENABLED - _original_source = source; -#endif - return self; -} - -- (void)dealloc { - _entry->notify_free(); -} - -@end - -@implementation MDImmediateLibrary - -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device - source:(NSString *)source - options:(MTLCompileOptions *)options { - self = [super initWithCacheEntry:entry -#ifdef DEV_ENABLED - source:source -#endif - ]; - _complete = false; - _ready = false; - - __block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self; - os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", - "shader_name=%{public}s stage=%{public}s hash=%X", - entry->name.get_data(), SHADER_STAGE_NAMES[entry->stage], entry->key.short_sha()); - - [device newLibraryWithSource:source - options:options - completionHandler:^(id library, NSError *error) { - os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); - self->_library = library; - self->_error = error; - if (error) { - ERR_PRINT(vformat(U"Error compiling shader %s: %s", entry->name.get_data(), error.localizedDescription.UTF8String)); - } - - { - std::lock_guard lock(self->_cv_mutex); - _ready = true; - } - _cv.notify_all(); - _complete = true; - }]; - return self; -} - -- (id)library { - if (!_complete) { - std::unique_lock lock(_cv_mutex); - _cv.wait(lock, [&] { return _ready; }); - } - return _library; -} - -- (NSError *)error { - if (!_complete) { - std::unique_lock lock(_cv_mutex); - _cv.wait(lock, [&] { return _ready; }); - } - return _error; -} - -@end - -@implementation MDLazyLibrary -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device - source:(NSString *)source - options:(MTLCompileOptions *)options { - self = [super initWithCacheEntry:entry -#ifdef DEV_ENABLED - source:source -#endif - ]; - _device = device; - _source = source; - _options = options; - - return self; -} - -- (void)load { - { - std::shared_lock lock(_mu); - if (_loaded) { - return; - } - } - - std::unique_lock lock(_mu); - if (_loaded) { - return; - } - - __block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self; - os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", - "shader_name=%{public}s stage=%{public}s hash=%X", - _entry->name.get_data(), SHADER_STAGE_NAMES[_entry->stage], _entry->key.short_sha()); - NSError *error; - _library = [_device newLibraryWithSource:_source options:_options error:&error]; - os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); - _device = nil; - _source = nil; - _options = nil; - _loaded = true; -} - -- (id)library { - [self load]; - return _library; -} - -- (NSError *)error { - [self load]; - return _error; -} - -@end - -@implementation MDBinaryLibrary - -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry - device:(id)device -#ifdef DEV_ENABLED - source:(NSString *)source -#endif - data:(dispatch_data_t)data { - self = [super initWithCacheEntry:entry -#ifdef DEV_ENABLED - source:source -#endif - ]; - NSError *error = nil; - _library = [device newLibraryWithData:data error:&error]; - if (error != nil) { - _error = error; - NSString *desc = [error description]; - ERR_PRINT(vformat("Unable to load shader library: %s", desc.UTF8String)); - } - return self; -} - -- (id)library { - return _library; -} - -- (NSError *)error { - return _error; -} - -@end diff --git a/drivers/metal/metal_objects_shared.cpp b/drivers/metal/metal_objects_shared.cpp new file mode 100644 index 00000000000..5cafa067fca --- /dev/null +++ b/drivers/metal/metal_objects_shared.cpp @@ -0,0 +1,897 @@ +/**************************************************************************/ +/* metal_objects_shared.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "metal_objects_shared.h" + +#include "rendering_device_driver_metal.h" + +#include +#include +#include + +#pragma mark - Resource Factory + +NS::SharedPtr MDResourceFactory::new_func(NS::String *p_source, NS::String *p_name, NS::Error **p_error) { + NS::SharedPtr pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init()); + NS::SharedPtr options = NS::TransferPtr(MTL::CompileOptions::alloc()->init()); + NS::Error *err = nullptr; + NS::SharedPtr mtlLib = NS::TransferPtr(device->newLibrary(p_source, options.get(), &err)); + if (err) { + if (p_error != nullptr) { + *p_error = err; + } + } + return NS::TransferPtr(mtlLib->newFunction(p_name)); +} + +NS::SharedPtr MDResourceFactory::new_clear_vert_func(ClearAttKey &p_key) { + NS::SharedPtr pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init()); + char msl[1024]; + snprintf(msl, sizeof(msl), R"( +#include +using namespace metal; + +typedef struct { + float4 a_position [[attribute(0)]]; +} AttributesPos; + +typedef struct { + float4 colors[9]; +} ClearColorsIn; + +typedef struct { + float4 v_position [[position]]; + uint layer%s; +} VaryingsPos; + +vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { + VaryingsPos varyings; + varyings.v_position = float4(attributes.a_position.x, -attributes.a_position.y, ccIn.colors[%d].r, 1.0); + varyings.layer = uint(attributes.a_position.w); + return varyings; +} +)", + p_key.is_layered_rendering_enabled() ? " [[render_target_array_index]]" : "", ClearAttKey::DEPTH_INDEX); + + return new_func(NS::String::string(msl, NS::UTF8StringEncoding), MTLSTR("vertClear"), nullptr); +} + +NS::SharedPtr MDResourceFactory::new_clear_frag_func(ClearAttKey &p_key) { + NS::SharedPtr pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init()); + std::string msl; + msl.reserve(2048); + + msl += R"( +#include +using namespace metal; + +typedef struct { + float4 v_position [[position]]; +} VaryingsPos; + +typedef struct { + float4 colors[9]; +} ClearColorsIn; + +typedef struct { +)"; + + char line[128]; + for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { + if (p_key.is_enabled(caIdx)) { + const char *typeStr = get_format_type_string((MTL::PixelFormat)p_key.pixel_formats[caIdx]); + snprintf(line, sizeof(line), " %s4 color%u [[color(%u)]];\n", typeStr, caIdx, caIdx); + msl += line; + } + } + msl += R"(} ClearColorsOut; + +fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { + + ClearColorsOut ccOut; +)"; + for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { + if (p_key.is_enabled(caIdx)) { + const char *typeStr = get_format_type_string((MTL::PixelFormat)p_key.pixel_formats[caIdx]); + snprintf(line, sizeof(line), " ccOut.color%u = %s4(ccIn.colors[%u]);\n", caIdx, typeStr, caIdx); + msl += line; + } + } + msl += R"( return ccOut; +})"; + + return new_func(NS::String::string(msl.c_str(), NS::UTF8StringEncoding), MTLSTR("fragClear"), nullptr); +} + +const char *MDResourceFactory::get_format_type_string(MTL::PixelFormat p_fmt) const { + switch (pixel_formats.getFormatType(p_fmt)) { + case MTLFormatType::ColorInt8: + case MTLFormatType::ColorInt16: + return "short"; + case MTLFormatType::ColorUInt8: + case MTLFormatType::ColorUInt16: + return "ushort"; + case MTLFormatType::ColorInt32: + return "int"; + case MTLFormatType::ColorUInt32: + return "uint"; + case MTLFormatType::ColorHalf: + return "half"; + case MTLFormatType::ColorFloat: + case MTLFormatType::DepthStencil: + case MTLFormatType::Compressed: + return "float"; + case MTLFormatType::None: + default: + return "unexpected_MTLPixelFormatInvalid"; + } +} + +NS::SharedPtr MDResourceFactory::new_depth_stencil_state(bool p_use_depth, bool p_use_stencil) { + NS::SharedPtr dsDesc = NS::TransferPtr(MTL::DepthStencilDescriptor::alloc()->init()); + dsDesc->setDepthCompareFunction(MTL::CompareFunctionAlways); + dsDesc->setDepthWriteEnabled(p_use_depth); + + if (p_use_stencil) { + NS::SharedPtr sDesc = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init()); + sDesc->setStencilCompareFunction(MTL::CompareFunctionAlways); + sDesc->setStencilFailureOperation(MTL::StencilOperationReplace); + sDesc->setDepthFailureOperation(MTL::StencilOperationReplace); + sDesc->setDepthStencilPassOperation(MTL::StencilOperationReplace); + + dsDesc->setFrontFaceStencil(sDesc.get()); + dsDesc->setBackFaceStencil(sDesc.get()); + } else { + dsDesc->setFrontFaceStencil(nullptr); + dsDesc->setBackFaceStencil(nullptr); + } + + return NS::TransferPtr(device->newDepthStencilState(dsDesc.get())); +} + +NS::SharedPtr MDResourceFactory::new_clear_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) { + NS::SharedPtr vtxFunc = new_clear_vert_func(p_key); + NS::SharedPtr fragFunc = new_clear_frag_func(p_key); + NS::SharedPtr plDesc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init()); + plDesc->setLabel(MTLSTR("ClearRenderAttachments")); + plDesc->setVertexFunction(vtxFunc.get()); + plDesc->setFragmentFunction(fragFunc.get()); + plDesc->setRasterSampleCount(p_key.sample_count); + plDesc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle); + + for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { + MTL::RenderPipelineColorAttachmentDescriptor *colorDesc = plDesc->colorAttachments()->object(caIdx); + colorDesc->setPixelFormat((MTL::PixelFormat)p_key.pixel_formats[caIdx]); + colorDesc->setWriteMask(p_key.is_enabled(caIdx) ? MTL::ColorWriteMaskAll : MTL::ColorWriteMaskNone); + } + + MTL::PixelFormat mtlDepthFormat = (MTL::PixelFormat)p_key.depth_format(); + if (pixel_formats.isDepthFormat(mtlDepthFormat)) { + plDesc->setDepthAttachmentPixelFormat(mtlDepthFormat); + } + + MTL::PixelFormat mtlStencilFormat = (MTL::PixelFormat)p_key.stencil_format(); + if (pixel_formats.isStencilFormat(mtlStencilFormat)) { + plDesc->setStencilAttachmentPixelFormat(mtlStencilFormat); + } + + MTL::VertexDescriptor *vtxDesc = plDesc->vertexDescriptor(); + + // Vertex attribute descriptors. + NS::UInteger vtxBuffIdx = get_vertex_buffer_index(VERT_CONTENT_BUFFER_INDEX); + NS::UInteger vtxStride = 0; + + // Vertex location. + MTL::VertexAttributeDescriptor *vaDesc = vtxDesc->attributes()->object(0); + vaDesc->setFormat(MTL::VertexFormatFloat4); + vaDesc->setBufferIndex(vtxBuffIdx); + vaDesc->setOffset(vtxStride); + vtxStride += sizeof(simd::float4); + + // Vertex attribute buffer. + MTL::VertexBufferLayoutDescriptor *vbDesc = vtxDesc->layouts()->object(vtxBuffIdx); + vbDesc->setStepFunction(MTL::VertexStepFunctionPerVertex); + vbDesc->setStepRate(1); + vbDesc->setStride(vtxStride); + + NS::Error *err = nullptr; + NS::SharedPtr state = NS::TransferPtr(device->newRenderPipelineState(plDesc.get(), &err)); + if (p_error != nullptr) { + *p_error = err; + } + return state; +} + +NS::SharedPtr MDResourceFactory::new_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) { + DEV_ASSERT(!p_key.is_layered_rendering_enabled()); + DEV_ASSERT(p_key.is_enabled(0)); + DEV_ASSERT(!p_key.is_depth_enabled()); + DEV_ASSERT(!p_key.is_stencil_enabled()); + + NS::SharedPtr pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init()); + static const char *msl = R"(#include +using namespace metal; + +struct FullscreenNoopOut { + float4 position [[position]]; +}; + +vertex FullscreenNoopOut fullscreenNoopVert(uint vid [[vertex_id]]) { + float2 positions[3] = { float2(-1.0, -1.0), float2(3.0, -1.0), float2(-1.0, 3.0) }; + float2 pos = positions[vid]; + + FullscreenNoopOut out; + out.position = float4(pos, 0.0, 1.0); + return out; +} + +fragment void fullscreenNoopFrag(float4 gl_FragCoord [[position]]) { +} +)"; + + NS::Error *err = nullptr; + NS::SharedPtr options = NS::TransferPtr(MTL::CompileOptions::alloc()->init()); + NS::SharedPtr mtlLib = NS::TransferPtr(device->newLibrary(NS::String::string(msl, NS::UTF8StringEncoding), options.get(), &err)); + if (err && p_error != nullptr) { + *p_error = err; + } + + if (mtlLib.get() == nullptr) { + return {}; + } + + NS::SharedPtr vtxFunc = NS::TransferPtr(mtlLib->newFunction(MTLSTR("fullscreenNoopVert"))); + NS::SharedPtr fragFunc = NS::TransferPtr(mtlLib->newFunction(MTLSTR("fullscreenNoopFrag"))); + + NS::SharedPtr plDesc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init()); + plDesc->setLabel(MTLSTR("EmptyDrawFullscreenTriangle")); + plDesc->setVertexFunction(vtxFunc.get()); + plDesc->setFragmentFunction(fragFunc.get()); + plDesc->setRasterSampleCount(p_key.sample_count ? p_key.sample_count : 1); + plDesc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle); + + MTL::RenderPipelineColorAttachmentDescriptor *colorDesc = plDesc->colorAttachments()->object(0); + colorDesc->setPixelFormat((MTL::PixelFormat)p_key.pixel_formats[0]); + colorDesc->setWriteMask(MTL::ColorWriteMaskNone); + + err = nullptr; + NS::SharedPtr state = NS::TransferPtr(device->newRenderPipelineState(plDesc.get(), &err)); + if (p_error != nullptr && err != nullptr) { + *p_error = err; + } + return state; +} + +#pragma mark - Resource Cache + +MTL::RenderPipelineState *MDResourceCache::get_clear_render_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) { + HashMap::ConstIterator it = clear_states.find(p_key); + if (it != clear_states.end()) { + return it->value.get(); + } + + NS::SharedPtr state = resource_factory->new_clear_pipeline_state(p_key, p_error); + MTL::RenderPipelineState *result = state.get(); + clear_states[p_key] = std::move(state); + return result; +} + +MTL::RenderPipelineState *MDResourceCache::get_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) { + HashMap::ConstIterator it = empty_draw_states.find(p_key); + if (it != empty_draw_states.end()) { + return it->value.get(); + } + + NS::SharedPtr state = resource_factory->new_empty_draw_pipeline_state(p_key, p_error); + MTL::RenderPipelineState *result = state.get(); + empty_draw_states[p_key] = std::move(state); + return result; +} + +MTL::DepthStencilState *MDResourceCache::get_depth_stencil_state(bool p_use_depth, bool p_use_stencil) { + if (p_use_depth && p_use_stencil) { + if (!clear_depth_stencil_state.all) { + clear_depth_stencil_state.all = resource_factory->new_depth_stencil_state(true, true); + } + return clear_depth_stencil_state.all.get(); + } else if (p_use_depth) { + if (!clear_depth_stencil_state.depth_only) { + clear_depth_stencil_state.depth_only = resource_factory->new_depth_stencil_state(true, false); + } + return clear_depth_stencil_state.depth_only.get(); + } else if (p_use_stencil) { + if (!clear_depth_stencil_state.stencil_only) { + clear_depth_stencil_state.stencil_only = resource_factory->new_depth_stencil_state(false, true); + } + return clear_depth_stencil_state.stencil_only.get(); + } else { + if (!clear_depth_stencil_state.none) { + clear_depth_stencil_state.none = resource_factory->new_depth_stencil_state(false, false); + } + return clear_depth_stencil_state.none.get(); + } +} + +#pragma mark - Render Pass Types + +MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const { + MTLFmtCaps caps = kMTLFmtCapsNone; + + for (RDD::AttachmentReference const &ar : input_references) { + if (ar.attachment == p_index) { + flags::set(caps, kMTLFmtCapsRead); + break; + } + } + + for (RDD::AttachmentReference const &ar : color_references) { + if (ar.attachment == p_index) { + flags::set(caps, kMTLFmtCapsColorAtt); + break; + } + } + + for (RDD::AttachmentReference const &ar : resolve_references) { + if (ar.attachment == p_index) { + flags::set(caps, kMTLFmtCapsResolve); + break; + } + } + + if (depth_stencil_reference.attachment == p_index) { + flags::set(caps, kMTLFmtCapsDSAtt); + } + + return caps; +} + +void MDAttachment::linkToSubpass(const MDRenderPass &p_pass) { + firstUseSubpassIndex = UINT32_MAX; + lastUseSubpassIndex = 0; + + for (MDSubpass const &subpass : p_pass.subpasses) { + MTLFmtCaps reqCaps = subpass.getRequiredFmtCapsForAttachmentAt(index); + if (reqCaps) { + firstUseSubpassIndex = MIN(subpass.subpass_index, firstUseSubpassIndex); + lastUseSubpassIndex = MAX(subpass.subpass_index, lastUseSubpassIndex); + } + } +} + +MTL::StoreAction MDAttachment::getMTLStoreAction(MDSubpass const &p_subpass, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const { + if (!p_is_rendering_entire_area || !isLastUseOf(p_subpass)) { + return p_has_resolve && p_can_resolve ? MTL::StoreActionStoreAndMultisampleResolve : MTL::StoreActionStore; + } + + switch (p_is_stencil ? stencilStoreAction : storeAction) { + case MTL::StoreActionStore: + return p_has_resolve && p_can_resolve ? MTL::StoreActionStoreAndMultisampleResolve : MTL::StoreActionStore; + case MTL::StoreActionDontCare: + return p_has_resolve ? (p_can_resolve ? MTL::StoreActionMultisampleResolve : MTL::StoreActionStore) : MTL::StoreActionDontCare; + + default: + return MTL::StoreActionStore; + } +} + +bool MDAttachment::shouldClear(const MDSubpass &p_subpass, bool p_is_stencil) const { + // If the subpass is not the first subpass to use this attachment, don't clear this attachment. + if (p_subpass.subpass_index != firstUseSubpassIndex) { + return false; + } + return (p_is_stencil ? stencilLoadAction : loadAction) == MTL::LoadActionClear; +} + +MDRenderPass::MDRenderPass(Vector &p_attachments, Vector &p_subpasses) : + attachments(p_attachments), subpasses(p_subpasses) { + for (MDAttachment &att : attachments) { + att.linkToSubpass(*this); + } +} + +#pragma mark - Command Buffer Base + +void MDCommandBufferBase::retain_resource(CFTypeRef p_resource) { + CFRetain(p_resource); + _retained_resources.push_back(p_resource); +} + +void MDCommandBufferBase::release_resources() { + for (CFTypeRef r : _retained_resources) { + CFRelease(r); + } + _retained_resources.clear(); +} + +void MDCommandBufferBase::render_set_viewport(VectorView p_viewports) { + RenderStateBase &state = get_render_state_base(); + state.viewports.resize(p_viewports.size()); + for (uint32_t i = 0; i < p_viewports.size(); i += 1) { + Rect2i const &vp = p_viewports[i]; + state.viewports[i] = { + .originX = static_cast(vp.position.x), + .originY = static_cast(vp.position.y), + .width = static_cast(vp.size.width), + .height = static_cast(vp.size.height), + .znear = 0.0, + .zfar = 1.0, + }; + } + state.dirty.set_flag(RenderStateBase::DIRTY_VIEWPORT); +} + +void MDCommandBufferBase::render_set_scissor(VectorView p_scissors) { + RenderStateBase &state = get_render_state_base(); + state.scissors.resize(p_scissors.size()); + for (uint32_t i = 0; i < p_scissors.size(); i += 1) { + Rect2i const &vp = p_scissors[i]; + state.scissors[i] = { + .x = static_cast(vp.position.x), + .y = static_cast(vp.position.y), + .width = static_cast(vp.size.width), + .height = static_cast(vp.size.height), + }; + } + state.dirty.set_flag(RenderStateBase::DIRTY_SCISSOR); +} + +void MDCommandBufferBase::render_set_blend_constants(const Color &p_constants) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + RenderStateBase &state = get_render_state_base(); + if (state.blend_constants != p_constants) { + state.blend_constants = p_constants; + state.dirty.set_flag(RenderStateBase::DIRTY_BLEND); + } +} + +void MDCommandBufferBase::_populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView p_rects) { + uint32_t idx = 0; + for (uint32_t i = 0; i < p_rects.size(); i++) { + Rect2i const &rect = p_rects[i]; + idx = _populate_vertices(p_vertices, idx, rect, p_fb_size); + } +} + +uint32_t MDCommandBufferBase::_populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size) { + // Determine the positions of the four edges of the + // clear rectangle as a fraction of the attachment size. + float leftPos = (float)(p_rect.position.x) / (float)p_fb_size.width; + float rightPos = (float)(p_rect.size.width) / (float)p_fb_size.width + leftPos; + float bottomPos = (float)(p_rect.position.y) / (float)p_fb_size.height; + float topPos = (float)(p_rect.size.height) / (float)p_fb_size.height + bottomPos; + + // Transform to clip-space coordinates, which are bounded by (-1.0 < p < 1.0) in clip-space. + leftPos = (leftPos * 2.0f) - 1.0f; + rightPos = (rightPos * 2.0f) - 1.0f; + bottomPos = (bottomPos * 2.0f) - 1.0f; + topPos = (topPos * 2.0f) - 1.0f; + + simd::float4 vtx; + + uint32_t idx = p_index; + uint32_t endLayer = get_current_view_count(); + + for (uint32_t layer = 0; layer < endLayer; layer++) { + vtx.z = 0.0; + vtx.w = (float)layer; + + // Top left vertex - First triangle. + vtx.y = topPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + + // Bottom left vertex. + vtx.y = bottomPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + + // Bottom right vertex. + vtx.y = bottomPos; + vtx.x = rightPos; + p_vertices[idx++] = vtx; + + // Bottom right vertex - Second triangle. + p_vertices[idx++] = vtx; + + // Top right vertex. + vtx.y = topPos; + vtx.x = rightPos; + p_vertices[idx++] = vtx; + + // Top left vertex. + vtx.y = topPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + } + + return idx; +} + +void MDCommandBufferBase::_end_render_pass() { + MDFrameBuffer const &fb_info = *get_frame_buffer(); + MDSubpass const &subpass = get_current_subpass(); + + PixelFormats &pf = device_driver->get_pixel_formats(); + + for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) { + uint32_t color_index = subpass.color_references[i].attachment; + uint32_t resolve_index = subpass.resolve_references[i].attachment; + DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED)); + if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.has_texture(color_index)) { + continue; + } + + MTL::Texture *resolve_tex = fb_info.get_texture(resolve_index); + + CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex->pixelFormat()), kMTLFmtCapsResolve), "not implemented: unresolvable texture types"); + // see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407 + } + + end_render_encoding(); +} + +void MDCommandBufferBase::_render_clear_render_area() { + MDRenderPass const &pass = *get_render_pass(); + MDSubpass const &subpass = get_current_subpass(); + LocalVector &clear_values = get_clear_values(); + + uint32_t ds_index = subpass.depth_stencil_reference.attachment; + bool clear_depth = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, false)); + bool clear_stencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true)); + + uint32_t color_count = subpass.color_references.size(); + uint32_t clears_size = color_count + (clear_depth || clear_stencil ? 1 : 0); + if (clears_size == 0) { + return; + } + + RDD::AttachmentClear *clears = ALLOCA_ARRAY(RDD::AttachmentClear, clears_size); + uint32_t clears_count = 0; + + for (uint32_t i = 0; i < color_count; i++) { + uint32_t idx = subpass.color_references[i].attachment; + if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) { + clears[clears_count++] = { .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = clear_values[idx] }; + } + } + + if (clear_depth || clear_stencil) { + MDAttachment const &attachment = pass.attachments[ds_index]; + BitField bits = {}; + if (clear_depth && attachment.type & MDAttachmentType::Depth) { + bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); + } + if (clear_stencil && attachment.type & MDAttachmentType::Stencil) { + bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); + } + + clears[clears_count++] = { .aspect = bits, .color_attachment = ds_index, .value = clear_values[ds_index] }; + } + + if (clears_count == 0) { + return; + } + + render_clear_attachments(VectorView(clears, clears_count), { get_render_area() }); +} + +void MDCommandBufferBase::encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data) { + switch (type) { + case MDCommandBufferStateType::Render: + case MDCommandBufferStateType::Compute: { + MDShader *shader = (MDShader *)(p_shader.id); + if (shader->push_constants.binding == UINT32_MAX) { + return; + } + push_constant_binding = shader->push_constants.binding; + void const *ptr = p_data.ptr(); + push_constant_data_len = p_data.size() * sizeof(uint32_t); + DEV_ASSERT(push_constant_data_len <= sizeof(push_constant_data)); + memcpy(push_constant_data, ptr, push_constant_data_len); + if (push_constant_data_len > 0) { + mark_push_constants_dirty(); + } + } break; + case MDCommandBufferStateType::Blit: + case MDCommandBufferStateType::None: + return; + } +} + +#pragma mark - Metal Library + +static const char *SHADER_STAGE_NAMES[] = { + [RD::SHADER_STAGE_VERTEX] = "vert", + [RD::SHADER_STAGE_FRAGMENT] = "frag", + [RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl", + [RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval", + [RD::SHADER_STAGE_COMPUTE] = "comp", +}; + +void ShaderCacheEntry::notify_free() const { + owner.shader_cache_free_entry(key); +} + +#pragma mark - MDLibrary + +MDLibrary::MDLibrary(ShaderCacheEntry *p_entry +#ifdef DEV_ENABLED + , + NS::String *p_source +#endif + ) : + _entry(p_entry) { +#ifdef DEV_ENABLED + _original_source = NS::RetainPtr(p_source); +#endif +} + +MDLibrary::~MDLibrary() { + _entry->notify_free(); +} + +void MDLibrary::set_label(NS::String *p_label) { +} + +#pragma mark - MDLazyLibrary + +/// Loads the MTLLibrary when the library is first accessed. +class MDLazyLibrary final : public MDLibrary { + NS::SharedPtr _library; + NS::Error *_error = nullptr; + std::shared_mutex _mu; + bool _loaded = false; + MTL::Device *_device = nullptr; + NS::SharedPtr _source; + NS::SharedPtr _options; + + void _load(); + +public: + MDLazyLibrary(ShaderCacheEntry *p_entry, + MTL::Device *p_device, + NS::String *p_source, + MTL::CompileOptions *p_options); + + MTL::Library *get_library() override; + NS::Error *get_error() override; +}; + +MDLazyLibrary::MDLazyLibrary(ShaderCacheEntry *p_entry, + MTL::Device *p_device, + NS::String *p_source, + MTL::CompileOptions *p_options) : + MDLibrary(p_entry +#ifdef DEV_ENABLED + , + p_source +#endif + ), + _device(p_device), + _source(NS::RetainPtr(p_source)), + _options(NS::RetainPtr(p_options)) { +} + +void MDLazyLibrary::_load() { + { + std::shared_lock lock(_mu); + if (_loaded) { + return; + } + } + + std::unique_lock lock(_mu); + if (_loaded) { + return; + } + + os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)this; + os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", + "shader_name=%{public}s stage=%{public}s hash=%X", + _entry->name.get_data(), SHADER_STAGE_NAMES[_entry->stage], _entry->key.short_sha()); + NS::Error *error = nullptr; + _library = NS::TransferPtr(_device->newLibrary(_source.get(), _options.get(), &error)); + os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); + _error = error; + _device = nullptr; + _source.reset(); + _options.reset(); + _loaded = true; +} + +MTL::Library *MDLazyLibrary::get_library() { + _load(); + return _library.get(); +} + +NS::Error *MDLazyLibrary::get_error() { + _load(); + return _error; +} + +#pragma mark - MDImmediateLibrary + +/// Loads the MTLLibrary immediately on initialization, using Metal's async compilation API. +class MDImmediateLibrary final : public MDLibrary { + NS::SharedPtr _library; + NS::Error *_error = nullptr; + std::mutex _cv_mutex; + std::condition_variable _cv; + std::atomic _complete{ false }; + bool _ready = false; + +public: + MDImmediateLibrary(ShaderCacheEntry *p_entry, + MTL::Device *p_device, + NS::String *p_source, + MTL::CompileOptions *p_options); + + MTL::Library *get_library() override; + NS::Error *get_error() override; +}; + +MDImmediateLibrary::MDImmediateLibrary(ShaderCacheEntry *p_entry, + MTL::Device *p_device, + NS::String *p_source, + MTL::CompileOptions *p_options) : + MDLibrary(p_entry +#ifdef DEV_ENABLED + , + p_source +#endif + ) { + os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)this; + os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", + "shader_name=%{public}s stage=%{public}s hash=%X", + p_entry->name.get_data(), SHADER_STAGE_NAMES[p_entry->stage], p_entry->key.short_sha()); + + // Use Metal's async compilation API with std::function callback. + p_device->newLibrary(p_source, p_options, [this, compile_id, p_entry](MTL::Library *library, NS::Error *error) { + os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); + if (library) { + _library = NS::RetainPtr(library); + } + _error = error; + if (error) { + ERR_PRINT(vformat(U"Error compiling shader %s: %s", p_entry->name.get_data(), error->localizedDescription()->utf8String())); + } + + { + std::lock_guard lock(_cv_mutex); + _ready = true; + } + _cv.notify_all(); + _complete = true; + }); +} + +MTL::Library *MDImmediateLibrary::get_library() { + if (!_complete) { + std::unique_lock lock(_cv_mutex); + _cv.wait(lock, [this] { return _ready; }); + } + return _library.get(); +} + +NS::Error *MDImmediateLibrary::get_error() { + if (!_complete) { + std::unique_lock lock(_cv_mutex); + _cv.wait(lock, [this] { return _ready; }); + } + return _error; +} + +#pragma mark - MDBinaryLibrary + +/// Loads the MTLLibrary from pre-compiled binary data. +class MDBinaryLibrary final : public MDLibrary { + NS::SharedPtr _library; + NS::Error *_error = nullptr; + +public: + MDBinaryLibrary(ShaderCacheEntry *p_entry, + MTL::Device *p_device, +#ifdef DEV_ENABLED + NS::String *p_source, +#endif + dispatch_data_t p_data); + + MTL::Library *get_library() override; + NS::Error *get_error() override; +}; + +MDBinaryLibrary::MDBinaryLibrary(ShaderCacheEntry *p_entry, + MTL::Device *p_device, +#ifdef DEV_ENABLED + NS::String *p_source, +#endif + dispatch_data_t p_data) : + MDLibrary(p_entry +#ifdef DEV_ENABLED + , + p_source +#endif + ) { + NS::Error *error = nullptr; + _library = NS::TransferPtr(p_device->newLibrary(p_data, &error)); + if (error != nullptr) { + _error = error; + ERR_PRINT(vformat("Unable to load shader library: %s", error->localizedDescription()->utf8String())); + } +} + +MTL::Library *MDBinaryLibrary::get_library() { + return _library.get(); +} + +NS::Error *MDBinaryLibrary::get_error() { + return _error; +} + +#pragma mark - MDLibrary Factory Methods + +std::shared_ptr MDLibrary::create(ShaderCacheEntry *p_entry, + MTL::Device *p_device, + NS::String *p_source, + MTL::CompileOptions *p_options, + ShaderLoadStrategy p_strategy) { + std::shared_ptr lib; + switch (p_strategy) { + case ShaderLoadStrategy::IMMEDIATE: + [[fallthrough]]; + default: + lib = std::make_shared(p_entry, p_device, p_source, p_options); + break; + case ShaderLoadStrategy::LAZY: + lib = std::make_shared(p_entry, p_device, p_source, p_options); + break; + } + p_entry->library = lib; + return lib; +} + +std::shared_ptr MDLibrary::create(ShaderCacheEntry *p_entry, + MTL::Device *p_device, +#ifdef DEV_ENABLED + NS::String *p_source, +#endif + dispatch_data_t p_data) { + std::shared_ptr lib = std::make_shared(p_entry, p_device, +#ifdef DEV_ENABLED + p_source, +#endif + p_data); + p_entry->library = lib; + return lib; +} diff --git a/drivers/metal/metal_objects_shared.h b/drivers/metal/metal_objects_shared.h index ce7993a6b8d..62f3159b874 100644 --- a/drivers/metal/metal_objects_shared.h +++ b/drivers/metal/metal_objects_shared.h @@ -30,28 +30,19 @@ #pragma once -#import "metal_device_properties.h" -#import "metal_utils.h" +#include "metal_device_properties.h" +#include "metal_utils.h" +#include "pixel_formats.h" +#include "sha256_digest.h" + +#include +#include +#include + +class RenderingDeviceDriverMetal; using RDC = RenderingDeviceCommons; -// These types can be used in Vector and other containers that use -// pointer operations not supported by ARC. -namespace MTL { -#define MTL_CLASS(name) \ - class name { \ - public: \ - name(id obj = nil) : m_obj(obj) {} \ - operator id() const { \ - return m_obj; \ - } \ - id m_obj; \ - }; - -MTL_CLASS(Texture) - -} //namespace MTL - enum ShaderStageUsage : uint32_t { None = 0, Vertex = RDD::SHADER_STAGE_VERTEX_BIT, @@ -81,11 +72,11 @@ struct ClearAttKey { uint16_t sample_count = 0; uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 }; - _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; } - _FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; } - _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; } - _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; } - _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; } + _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTL::PixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; } + _FORCE_INLINE_ void set_depth_format(MTL::PixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; } + _FORCE_INLINE_ void set_stencil_format(MTL::PixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; } + _FORCE_INLINE_ MTL::PixelFormat depth_format() const { return (MTL::PixelFormat)pixel_formats[DEPTH_INDEX]; } + _FORCE_INLINE_ MTL::PixelFormat stencil_format() const { return (MTL::PixelFormat)pixel_formats[STENCIL_INDEX]; } _FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); } _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; } @@ -105,6 +96,175 @@ struct ClearAttKey { } }; +#pragma mark - Ring Buffer + +/// A ring buffer backed by MTLBuffer instances for transient GPU allocations. +/// Allocations are 16-byte aligned with a minimum size of 16 bytes. +/// When the current buffer is exhausted, a new buffer is allocated. +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRingBuffer { +public: + static constexpr uint32_t DEFAULT_BUFFER_SIZE = 512 * 1024; + static constexpr uint32_t MIN_BLOCK_SIZE = 16; + static constexpr uint32_t ALIGNMENT = 16; + + struct Allocation { + void *ptr = nullptr; + MTL::Buffer *buffer = nullptr; + uint64_t gpu_address = 0; + uint32_t offset = 0; + + _FORCE_INLINE_ bool is_valid() const { return ptr != nullptr; } + }; + +private: + MTL::Device *device = nullptr; + LocalVector buffers; + LocalVector heads; + uint32_t current_segment = 0; + uint32_t buffer_size = DEFAULT_BUFFER_SIZE; + bool changed = false; + + _FORCE_INLINE_ uint32_t alloc_segment() { + MTL::Buffer *buffer = device->newBuffer(buffer_size, MTL::ResourceStorageModeShared | MTL::ResourceHazardTrackingModeUntracked); + buffers.push_back(buffer); + heads.push_back(0); + changed = true; + + return buffers.size() - 1; + } + +public: + MDRingBuffer() = default; + + MDRingBuffer(MTL::Device *p_device, uint32_t p_buffer_size = DEFAULT_BUFFER_SIZE) : + device(p_device), buffer_size(p_buffer_size) {} + + ~MDRingBuffer() { + for (MTL::Buffer *buffer : buffers) { + buffer->release(); + } + } + + /// Allocates a block of memory from the ring buffer. + /// Returns an Allocation with the pointer, buffer, and offset. + _FORCE_INLINE_ Allocation allocate(uint32_t p_size) { + p_size = MAX(p_size, MIN_BLOCK_SIZE); + p_size = (p_size + ALIGNMENT - 1) & ~(ALIGNMENT - 1); + + if (buffers.is_empty()) { + alloc_segment(); + } + + uint32_t aligned_head = (heads[current_segment] + ALIGNMENT - 1) & ~(ALIGNMENT - 1); + + if (aligned_head + p_size > buffer_size) { + // Current segment exhausted, try to find one with space or allocate new. + bool found = false; + for (uint32_t i = 0; i < buffers.size(); i++) { + uint32_t ah = (heads[i] + ALIGNMENT - 1) & ~(ALIGNMENT - 1); + if (ah + p_size <= buffer_size) { + current_segment = i; + aligned_head = ah; + found = true; + break; + } + } + + if (!found) { + current_segment = alloc_segment(); + aligned_head = 0; + } + } + + MTL::Buffer *buffer = buffers[current_segment]; + Allocation alloc; + alloc.buffer = buffer; + alloc.offset = aligned_head; + alloc.ptr = static_cast(buffer->contents()) + aligned_head; + if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { + alloc.gpu_address = buffer->gpuAddress() + aligned_head; + } + heads[current_segment] = aligned_head + p_size; + + return alloc; + } + + /// Resets all segments for reuse. Call at frame boundaries when GPU work is complete. + _FORCE_INLINE_ void reset() { + for (uint32_t &head : heads) { + head = 0; + } + current_segment = 0; + } + + /// Returns true if buffers were added or removed since last clear_changed(). + _FORCE_INLINE_ bool is_changed() const { return changed; } + + /// Clears the changed flag. + _FORCE_INLINE_ void clear_changed() { changed = false; } + + /// Returns a Span of all backing buffers. + _FORCE_INLINE_ Span get_buffers() const { + return Span(buffers.ptr(), buffers.size()); + } + + /// Returns the number of buffer segments currently allocated. + _FORCE_INLINE_ uint32_t get_segment_count() const { + return buffers.size(); + } +}; + +#pragma mark - Resource Factory + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDResourceFactory { +private: + MTL::Device *device; + PixelFormats &pixel_formats; + uint32_t max_buffer_count; + + NS::SharedPtr new_func(NS::String *p_source, NS::String *p_name, NS::Error **p_error); + NS::SharedPtr new_clear_vert_func(ClearAttKey &p_key); + NS::SharedPtr new_clear_frag_func(ClearAttKey &p_key); + const char *get_format_type_string(MTL::PixelFormat p_fmt) const; + + _FORCE_INLINE_ uint32_t get_vertex_buffer_index(uint32_t p_binding) { + return (max_buffer_count - 1) - p_binding; + } + +public: + NS::SharedPtr new_clear_pipeline_state(ClearAttKey &p_key, NS::Error **p_error); + NS::SharedPtr new_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error); + NS::SharedPtr new_depth_stencil_state(bool p_use_depth, bool p_use_stencil); + + MDResourceFactory(MTL::Device *p_device, PixelFormats &p_pixel_formats, uint32_t p_max_buffer_count) : + device(p_device), pixel_formats(p_pixel_formats), max_buffer_count(p_max_buffer_count) {} + ~MDResourceFactory() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDResourceCache { +private: + typedef HashMap> HashMap; + std::unique_ptr resource_factory; + HashMap clear_states; + HashMap empty_draw_states; + + struct { + NS::SharedPtr all; + NS::SharedPtr depth_only; + NS::SharedPtr stencil_only; + NS::SharedPtr none; + } clear_depth_stencil_state; + +public: + MTL::RenderPipelineState *get_clear_render_pipeline_state(ClearAttKey &p_key, NS::Error **p_error); + MTL::RenderPipelineState *get_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error); + MTL::DepthStencilState *get_depth_stencil_state(bool p_use_depth, bool p_use_stencil); + + explicit MDResourceCache(MTL::Device *p_device, PixelFormats &p_pixel_formats, uint32_t p_max_buffer_count) : + resource_factory(new MDResourceFactory(p_device, p_pixel_formats, p_max_buffer_count)) {} + ~MDResourceCache() = default; +}; + /** * Returns an index that can be used to map a shader stage to an index in a fixed-size array that is used for * a single pipeline type. @@ -122,33 +282,33 @@ _FORCE_INLINE_ static uint32_t to_index(RDD::ShaderStage p_s) { } } -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer { - Vector textures; +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDFrameBuffer { + Vector textures; public: Size2i size; - MDFrameBuffer(Vector p_textures, Size2i p_size) : + MDFrameBuffer(Vector p_textures, Size2i p_size) : textures(p_textures), size(p_size) {} MDFrameBuffer() {} /// Returns the texture at the given index. - _ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const { + _ALWAYS_INLINE_ MTL::Texture *get_texture(uint32_t p_idx) const { return textures[p_idx]; } /// Returns true if the texture at the given index is not nil. _ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const { - return textures[p_idx] != nil; + return textures[p_idx] != nullptr; } /// Set the texture at the given index. - _ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) { + _ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture *p_texture) { textures.write[p_idx] = p_texture; } /// Unset or nil the texture at the given index. _ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) { - textures.write[p_idx] = nil; + textures.write[p_idx] = nullptr; } /// Resizes buffers to the specified size. @@ -159,38 +319,771 @@ public: virtual ~MDFrameBuffer() = default; }; -// These functions are used to convert between Objective-C objects and -// the RIDs used by Godot, respecting automatic reference counting. +template <> +struct HashMapComparatorDefault { + static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) { + return p_lhs.id == p_rhs.id; + } +}; + +template <> +struct HashMapComparatorDefault { + static bool compare(const RDD::BufferID &p_lhs, const RDD::BufferID &p_rhs) { + return p_lhs.id == p_rhs.id; + } +}; + +template <> +struct HashMapComparatorDefault { + static bool compare(const RDD::TextureID &p_lhs, const RDD::TextureID &p_rhs) { + return p_lhs.id == p_rhs.id; + } +}; + +template <> +struct HashMapHasherDefaultImpl { + static _FORCE_INLINE_ uint32_t hash(const RDD::BufferID &p_value) { + return HashMapHasherDefaultImpl::hash(p_value.id); + } +}; + +template <> +struct HashMapHasherDefaultImpl { + static _FORCE_INLINE_ uint32_t hash(const RDD::TextureID &p_value) { + return HashMapHasherDefaultImpl::hash(p_value.id); + } +}; + namespace rid { -// Converts an Objective-C object to a pointer, and incrementing the -// reference count. -_FORCE_INLINE_ void *owned(id p_id) { - return (__bridge_retained void *)p_id; +template +_FORCE_INLINE_ T *get(RDD::ID p_id) { + return reinterpret_cast(p_id.id); } -#define MAKE_ID(FROM, TO) \ - _FORCE_INLINE_ TO make(FROM p_obj) { \ - return TO(owned(p_obj)); \ - } - -// These are shared for Metal and Metal 4 drivers - -MAKE_ID(id, RDD::TextureID) -MAKE_ID(id, RDD::BufferID) -MAKE_ID(id, RDD::SamplerID) -MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID) - -#undef MAKE_ID - -// Converts a pointer to an Objective-C object without changing the reference count. -_FORCE_INLINE_ auto get(RDD::ID p_id) { - return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil; -} - -// Converts a pointer to an Objective-C object, and decrements the reference count. -_FORCE_INLINE_ auto release(RDD::ID p_id) { - return (__bridge_transfer ::id)(void *)p_id.id; +template +_FORCE_INLINE_ T *get(uint64_t p_id) { + return reinterpret_cast(p_id); } } // namespace rid + +#pragma mark - Render Pass Types + +class MDRenderPass; + +enum class MDAttachmentType : uint8_t { + None = 0, + Color = 1 << 0, + Depth = 1 << 1, + Stencil = 1 << 2, +}; + +_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) { + flags::set(p_a, p_b); + return p_a; +} + +_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) { + return uint8_t(p_a) & uint8_t(p_b); +} + +struct MDSubpass { + uint32_t subpass_index = 0; + uint32_t view_count = 0; + LocalVector input_references; + LocalVector color_references; + RDD::AttachmentReference depth_stencil_reference; + LocalVector resolve_references; + + MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const; +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDAttachment { +private: + uint32_t index = 0; + uint32_t firstUseSubpassIndex = 0; + uint32_t lastUseSubpassIndex = 0; + +public: + MTL::PixelFormat format = MTL::PixelFormatInvalid; + MDAttachmentType type = MDAttachmentType::None; + MTL::LoadAction loadAction = MTL::LoadActionDontCare; + MTL::StoreAction storeAction = MTL::StoreActionDontCare; + MTL::LoadAction stencilLoadAction = MTL::LoadActionDontCare; + MTL::StoreAction stencilStoreAction = MTL::StoreActionDontCare; + uint32_t samples = 1; + + /*! + * @brief Returns true if this attachment is first used in the given subpass. + * @param p_subpass + * @return + */ + _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const { + return p_subpass.subpass_index == firstUseSubpassIndex; + } + + /*! + * @brief Returns true if this attachment is last used in the given subpass. + * @param p_subpass + * @return + */ + _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const { + return p_subpass.subpass_index == lastUseSubpassIndex; + } + + void linkToSubpass(MDRenderPass const &p_pass); + + MTL::StoreAction getMTLStoreAction(MDSubpass const &p_subpass, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const; + bool configureDescriptor(MTL::RenderPassAttachmentDescriptor *p_desc, + PixelFormats &p_pf, + MDSubpass const &p_subpass, + MTL::Texture *p_attachment, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const { + p_desc->setTexture(p_attachment); + + MTL::LoadAction load; + if (!p_is_rendering_entire_area || !isFirstUseOf(p_subpass)) { + load = MTL::LoadActionLoad; + } else { + load = p_is_stencil ? (MTL::LoadAction)stencilLoadAction : (MTL::LoadAction)loadAction; + } + + p_desc->setLoadAction(load); + + MTL::PixelFormat mtlFmt = p_attachment->pixelFormat(); + bool isDepthFormat = p_pf.isDepthFormat(mtlFmt); + bool isStencilFormat = p_pf.isStencilFormat(mtlFmt); + if (isStencilFormat && !p_is_stencil && !isDepthFormat) { + p_desc->setStoreAction(MTL::StoreActionDontCare); + } else { + p_desc->setStoreAction(getMTLStoreAction(p_subpass, p_is_rendering_entire_area, p_has_resolve, p_can_resolve, p_is_stencil)); + } + + return load == MTL::LoadActionClear; + } + + /** Returns whether this attachment should be cleared in the subpass. */ + bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRenderPass { +public: + Vector attachments; + Vector subpasses; + + uint32_t get_sample_count() const { + return attachments.is_empty() ? 1 : attachments[0].samples; + } + + MDRenderPass(Vector &p_attachments, Vector &p_subpasses); +}; + +#pragma mark - Command Buffer Helpers + +_FORCE_INLINE_ static MTL::Size MTLSizeFromVector3i(Vector3i p_size) { + return MTL::Size{ (NS::UInteger)p_size.x, (NS::UInteger)p_size.y, (NS::UInteger)p_size.z }; +} + +_FORCE_INLINE_ static MTL::Origin MTLOriginFromVector3i(Vector3i p_origin) { + return MTL::Origin{ (NS::UInteger)p_origin.x, (NS::UInteger)p_origin.y, (NS::UInteger)p_origin.z }; +} + +// Clamps the size so that the sum of the origin and size do not exceed the maximum size. +_FORCE_INLINE_ static MTL::Size clampMTLSize(MTL::Size p_size, MTL::Origin p_origin, MTL::Size p_max_size) { + MTL::Size clamped; + clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x); + clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y); + clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z); + return clamped; +} + +API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) +_FORCE_INLINE_ static bool isArrayTexture(MTL::TextureType p_type) { + return (p_type == MTL::TextureType3D || + p_type == MTL::TextureType2DArray || + p_type == MTL::TextureType2DMultisampleArray || + p_type == MTL::TextureType1DArray); +} + +_FORCE_INLINE_ static bool operator==(MTL::Size p_a, MTL::Size p_b) { + return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth; +} + +#pragma mark - Pipeline Stage Conversion + +GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") + +_FORCE_INLINE_ static MTL::Stages convert_src_pipeline_stages_to_metal(BitField p_stages) { + p_stages.clear_flag(RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + // BOTTOM_OF_PIPE or ALL_COMMANDS means "all prior work must complete". + if (p_stages & (RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) { + return MTL::StageAll; + } + + MTL::Stages mtlStages = 0; + + // Vertex stage mappings. + if (p_stages & (RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT | RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT | RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { + mtlStages |= MTL::StageVertex; + } + + // Fragment stage mappings. + // Includes resolve and clear_storage, which on Metal use the render pipeline. + if (p_stages & (RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT | RDD::PIPELINE_STAGE_RESOLVE_BIT | RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) { + mtlStages |= MTL::StageFragment; + } + + // Compute stage. + if (p_stages & RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT) { + mtlStages |= MTL::StageDispatch; + } + + // Blit stage (transfer operations). + if (p_stages & RDD::PIPELINE_STAGE_COPY_BIT) { + mtlStages |= MTL::StageBlit; + } + + // ALL_GRAPHICS_BIT special case. + if (p_stages & RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT) { + mtlStages |= (MTL::StageVertex | MTL::StageFragment); + } + + return mtlStages; +} + +_FORCE_INLINE_ static MTL::Stages convert_dst_pipeline_stages_to_metal(BitField p_stages) { + p_stages.clear_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + + // TOP_OF_PIPE or ALL_COMMANDS means "wait before any work starts". + if (p_stages & (RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT | RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + return MTL::StageAll; + } + + MTL::Stages mtlStages = 0; + + // Vertex stage mappings. + if (p_stages & (RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT | RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT | RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { + mtlStages |= MTL::StageVertex; + } + + // Fragment stage mappings. + // Includes resolve and clear_storage, which on Metal use the render pipeline. + if (p_stages & (RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT | RDD::PIPELINE_STAGE_RESOLVE_BIT | RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) { + mtlStages |= MTL::StageFragment; + } + + // Compute stage. + if (p_stages & RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT) { + mtlStages |= MTL::StageDispatch; + } + + // Blit stage (transfer operations). + if (p_stages & RDD::PIPELINE_STAGE_COPY_BIT) { + mtlStages |= MTL::StageBlit; + } + + // ALL_GRAPHICS_BIT special case. + if (p_stages & RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT) { + mtlStages |= (MTL::StageVertex | MTL::StageFragment); + } + + return mtlStages; +} + +GODOT_CLANG_WARNING_POP + +#pragma mark - Command Buffer Base + +enum class MDCommandBufferStateType { + None, + Render, + Compute, + Blit, // Only used by Metal 3 +}; + +/// Base struct for render state shared between MTL3 and MTL4 implementations. +struct RenderStateBase { + LocalVector viewports; + LocalVector scissors; + std::optional blend_constants; + + // clang-format off + enum DirtyFlag : uint16_t { + DIRTY_NONE = 0, + DIRTY_PIPELINE = 1 << 0, + DIRTY_UNIFORMS = 1 << 1, + DIRTY_PUSH = 1 << 2, + DIRTY_DEPTH = 1 << 3, + DIRTY_VERTEX = 1 << 4, + DIRTY_VIEWPORT = 1 << 5, + DIRTY_SCISSOR = 1 << 6, + DIRTY_BLEND = 1 << 7, + DIRTY_RASTER = 1 << 8, + DIRTY_ALL = (1 << 9) - 1, + }; + // clang-format on + BitField dirty = DIRTY_NONE; +}; + +/// Abstract base class for Metal command buffers, shared between MTL3 and MTL4 implementations. +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDCommandBufferBase { + LocalVector _retained_resources; + +protected: + // From RenderingDevice + static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128; + + MDCommandBufferStateType type = MDCommandBufferStateType::None; + + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; + uint32_t push_constant_data_len = 0; + uint32_t push_constant_binding = UINT32_MAX; + + ::RenderingDeviceDriverMetal *device_driver = nullptr; + + void release_resources(); + + /// Called when push constants are modified to mark the appropriate dirty flags. + virtual void mark_push_constants_dirty() = 0; + + /// Returns a reference to the render state base for viewport/scissor/blend operations. + virtual RenderStateBase &get_render_state_base() = 0; + + /// Returns the view count for the current subpass. + virtual uint32_t get_current_view_count() const = 0; + + /// Accessors for render pass state. + virtual MDRenderPass *get_render_pass() const = 0; + virtual MDFrameBuffer *get_frame_buffer() const = 0; + virtual const MDSubpass &get_current_subpass() const = 0; + virtual LocalVector &get_clear_values() = 0; + virtual const Rect2i &get_render_area() const = 0; + virtual void end_render_encoding() = 0; + + void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView p_rects); + uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size); + void _end_render_pass(); + void _render_clear_render_area(); + +public: + virtual ~MDCommandBufferBase() { release_resources(); } + + virtual void begin() = 0; + virtual void commit() = 0; + virtual void end() = 0; + + virtual void bind_pipeline(RDD::PipelineID p_pipeline) = 0; + void encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data); + + void retain_resource(CFTypeRef p_resource); + +#pragma mark - Render Commands + + virtual void render_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) = 0; + virtual void render_clear_attachments(VectorView p_attachment_clears, VectorView p_rects) = 0; + void render_set_viewport(VectorView p_viewports); + void render_set_scissor(VectorView p_scissors); + void render_set_blend_constants(const Color &p_constants); + virtual void render_begin_pass(RDD::RenderPassID p_render_pass, + RDD::FramebufferID p_frameBuffer, + RDD::CommandBufferType p_cmd_buffer_type, + const Rect2i &p_rect, + VectorView p_clear_values) = 0; + virtual void render_next_subpass() = 0; + virtual void render_draw(uint32_t p_vertex_count, + uint32_t p_instance_count, + uint32_t p_base_vertex, + uint32_t p_first_instance) = 0; + virtual void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) = 0; + virtual void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) = 0; + + virtual void render_draw_indexed(uint32_t p_index_count, + uint32_t p_instance_count, + uint32_t p_first_index, + int32_t p_vertex_offset, + uint32_t p_first_instance) = 0; + + virtual void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0; + virtual void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; + virtual void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0; + virtual void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; + + virtual void render_end_pass() = 0; + +#pragma mark - Compute Commands + + virtual void compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) = 0; + virtual void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; + virtual void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) = 0; + +#pragma mark - Transfer + + virtual void resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) = 0; + virtual void clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) = 0; + virtual void clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources) = 0; + virtual void clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) = 0; + virtual void copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions) = 0; + virtual void copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions) = 0; + virtual void copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions) = 0; + virtual void copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions) = 0; + +#pragma mark - Synchronization + + virtual void pipeline_barrier(BitField p_src_stages, + BitField p_dst_stages, + VectorView p_memory_barriers, + VectorView p_buffer_barriers, + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) = 0; + +#pragma mark - Debugging + + virtual void begin_label(const char *p_label_name, const Color &p_color) = 0; + virtual void end_label() = 0; +}; + +#pragma mark - Uniform Types + +struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) UniformInfo { + uint32_t binding; + BitField active_stages; + MTL::DataType dataType = MTL::DataTypeNone; + MTL::BindingAccess access = MTL::BindingAccessReadOnly; + MTL::ResourceUsage usage = 0; + MTL::TextureType textureType = MTL::TextureType2D; + uint32_t imageFormat = 0; + uint32_t arrayLength = 0; + bool isMultisampled = 0; + + struct Indexes { + uint32_t buffer = UINT32_MAX; + uint32_t texture = UINT32_MAX; + uint32_t sampler = UINT32_MAX; + }; + Indexes slot; + Indexes arg_buffer; + + enum class IndexType { + SLOT, + ARG, + }; + + _FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) { + switch (p_type) { + case IndexType::SLOT: + return slot; + case IndexType::ARG: + return arg_buffer; + } + } +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) UniformSet { + LocalVector uniforms; + LocalVector dynamic_uniforms; + uint32_t buffer_size = 0; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) DynamicOffsetLayout { + struct Data { + uint8_t offset : 4; + uint8_t count : 4; + }; + + union { + Data data[MAX_DYNAMIC_BUFFERS]; + uint64_t _val = 0; + }; + +public: + _FORCE_INLINE_ bool is_empty() const { return _val == 0; } + + _FORCE_INLINE_ uint32_t get_count(uint32_t p_set_index) const { + return data[p_set_index].count; + } + + _FORCE_INLINE_ uint32_t get_offset(uint32_t p_set_index) const { + return data[p_set_index].offset; + } + + _FORCE_INLINE_ void set_offset_count(uint32_t p_set_index, uint8_t p_offset, uint8_t p_count) { + data[p_set_index].offset = p_offset; + data[p_set_index].count = p_count; + } + + _FORCE_INLINE_ uint32_t get_offset_index_shift(uint32_t p_set_index, uint32_t p_dynamic_index = 0) const { + return (data[p_set_index].offset + p_dynamic_index) * 4u; + } +}; + +#pragma mark - Shader Types + +class MDLibrary; // Forward declaration for C++ code +struct ShaderCacheEntry; // Forward declaration for C++ code + +enum class ShaderLoadStrategy { + IMMEDIATE, + LAZY, + + /// The default strategy is to load the shader immediately. + DEFAULT = IMMEDIATE, +}; + +/// A Metal shader library. +class MDLibrary : public std::enable_shared_from_this { +protected: + ShaderCacheEntry *_entry = nullptr; +#ifdef DEV_ENABLED + NS::SharedPtr _original_source = nullptr; +#endif + + MDLibrary(ShaderCacheEntry *p_entry +#ifdef DEV_ENABLED + , + NS::String *p_source +#endif + ); + +public: + virtual ~MDLibrary(); + + virtual MTL::Library *get_library() = 0; + virtual NS::Error *get_error() = 0; + virtual void set_label(NS::String *p_label); +#ifdef DEV_ENABLED + NS::String *get_original_source() const { return _original_source.get(); } +#endif + + static std::shared_ptr create(ShaderCacheEntry *p_entry, + MTL::Device *p_device, + NS::String *p_source, + MTL::CompileOptions *p_options, + ShaderLoadStrategy p_strategy); + + static std::shared_ptr create(ShaderCacheEntry *p_entry, + MTL::Device *p_device, +#ifdef DEV_ENABLED + NS::String *p_source, +#endif + dispatch_data_t p_data); +}; + +/// A cache entry for a Metal shader library. +struct ShaderCacheEntry { + RenderingDeviceDriverMetal &owner; + /// A hash of the Metal shader source code. + SHA256Digest key; + CharString name; + RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX; + /// Weak reference to the library; allows cache lookup without preventing cleanup. + std::weak_ptr library; + + /// Notify the cache that this entry is no longer needed. + void notify_free() const; + + ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) : + owner(p_owner), key(p_key) { + } + ~ShaderCacheEntry() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDShader { +public: + CharString name; + Vector sets; + struct { + BitField stages = {}; + uint32_t binding = UINT32_MAX; + uint32_t size = 0; + } push_constants; + DynamicOffsetLayout dynamic_offset_layout; + bool uses_argument_buffers = true; + + MDShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers) : + name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {} + virtual ~MDShader() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDComputeShader final : public MDShader { +public: + MTL::Size local = {}; + + std::shared_ptr kernel; + + MDComputeShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers, std::shared_ptr p_kernel); +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRenderShader final : public MDShader { +public: + bool needs_view_mask_buffer = false; + + std::shared_ptr vert; + std::shared_ptr frag; + + MDRenderShader(CharString p_name, + Vector p_sets, + bool p_needs_view_mask_buffer, + bool p_uses_argument_buffers, + std::shared_ptr p_vert, std::shared_ptr p_frag); +}; + +#pragma mark - Uniform Set + +enum StageResourceUsage : uint32_t { + ResourceUnused = 0, + VertexRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2), + VertexWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2), + FragmentRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2), + FragmentWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2), + TesselationControlRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), + TesselationControlWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), + TesselationEvaluationRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), + TesselationEvaluationWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), + ComputeRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2), + ComputeWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2), +}; + +typedef LocalVector ResourceVector; +typedef HashMap ResourceUsageMap; + +_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) { + p_a = StageResourceUsage(uint32_t(p_a) | p_b); + return p_a; +} + +_FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTL::ResourceUsage p_usage) { + return StageResourceUsage(p_usage << (p_stage * 2)); +} + +_FORCE_INLINE_ MTL::ResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) { + return MTL::ResourceUsage((p_usage >> (p_stage * 2)) & 0b11); +} + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDUniformSet { +public: + NS::SharedPtr arg_buffer; + Vector arg_buffer_data; // Stored for dynamic uniform sets. + ResourceUsageMap usage_to_resources; // Used by Metal 3 for resource tracking. + Vector uniforms; +}; + +#pragma mark - Pipeline Types + +enum class MDPipelineType { + None, + Render, + Compute, +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDPipeline { +public: + MDPipelineType type; + + explicit MDPipeline(MDPipelineType p_type) : + type(p_type) {} + virtual ~MDPipeline() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRenderPipeline final : public MDPipeline { +public: + NS::SharedPtr state; + NS::SharedPtr depth_stencil; + uint32_t push_constant_size = 0; + uint32_t push_constant_stages_mask = 0; + SampleCount sample_count = SampleCount1; + + struct { + MTL::CullMode cull_mode = MTL::CullModeNone; + MTL::TriangleFillMode fill_mode = MTL::TriangleFillModeFill; + MTL::DepthClipMode clip_mode = MTL::DepthClipModeClip; + MTL::Winding winding = MTL::WindingClockwise; + MTL::PrimitiveType render_primitive = MTL::PrimitiveTypePoint; + + struct { + bool enabled = false; + } depth_test; + + struct { + bool enabled = false; + float depth_bias = 0.0; + float slope_scale = 0.0; + float clamp = 0.0; + + template + _FORCE_INLINE_ void apply(T *p_enc) const { + if (!enabled) { + return; + } + p_enc->setDepthBias(depth_bias, slope_scale, clamp); + } + } depth_bias; + + struct { + bool enabled = false; + uint32_t front_reference = 0; + uint32_t back_reference = 0; + + template + _FORCE_INLINE_ void apply(T *p_enc) const { + if (!enabled) { + return; + } + p_enc->setStencilReferenceValues(front_reference, back_reference); + } + } stencil; + + struct { + bool enabled = false; + float r = 0.0; + float g = 0.0; + float b = 0.0; + float a = 0.0; + + template + _FORCE_INLINE_ void apply(T *p_enc) const { + p_enc->setBlendColor(r, g, b, a); + } + } blend; + + template + _FORCE_INLINE_ void apply(T *p_enc) const { + p_enc->setCullMode(cull_mode); + p_enc->setTriangleFillMode(fill_mode); + p_enc->setDepthClipMode(clip_mode); + p_enc->setFrontFacingWinding(winding); + depth_bias.apply(p_enc); + stencil.apply(p_enc); + blend.apply(p_enc); + } + + } raster_state; + + MDRenderShader *shader = nullptr; + + MDRenderPipeline() : + MDPipeline(MDPipelineType::Render) {} + ~MDRenderPipeline() final = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDComputePipeline final : public MDPipeline { +public: + NS::SharedPtr state; + struct { + MTL::Size local = {}; + } compute_state; + + MDComputeShader *shader = nullptr; + + explicit MDComputePipeline(NS::SharedPtr p_state) : + MDPipeline(MDPipelineType::Compute), state(std::move(p_state)) {} + ~MDComputePipeline() final = default; +}; diff --git a/drivers/metal/metal_utils.h b/drivers/metal/metal_utils.h index 54c3509984d..33a31aec4e5 100644 --- a/drivers/metal/metal_utils.h +++ b/drivers/metal/metal_utils.h @@ -30,9 +30,9 @@ #pragma once -#import +#include -#import +#include /// Godot limits the number of dynamic buffers to 8. /// @@ -93,19 +93,32 @@ static constexpr uint64_t round_up_to_alignment(uint64_t p_value, uint64_t p_ali return aligned_value; } +template class Defer { public: - Defer(std::function func) : - func_(func) {} + explicit Defer(F &&f) : + func_(std::forward(f)) {} ~Defer() { func_(); } + // Non-copyable (correct RAII semantics) + Defer(const Defer &) = delete; + Defer &operator=(const Defer &) = delete; + + // Movable + Defer(Defer &&) = default; + Defer &operator=(Defer &&) = default; + private: - std::function func_; + F func_; }; +// C++17 class template argument deduction. +template +Defer(F &&) -> Defer>; + #define CONCAT_INTERNAL(x, y) x##y #define CONCAT(x, y) CONCAT_INTERNAL(x, y) -#define DEFER const Defer &CONCAT(defer__, __LINE__) = Defer +#define DEFER const auto &CONCAT(defer__, __LINE__) = Defer extern os_log_t LOG_DRIVER; // Used for dynamic tracing. diff --git a/drivers/metal/pixel_formats.mm b/drivers/metal/pixel_formats.cpp similarity index 89% rename from drivers/metal/pixel_formats.mm rename to drivers/metal/pixel_formats.cpp index 386f1090408..ae1f0cf97c7 100644 --- a/drivers/metal/pixel_formats.mm +++ b/drivers/metal/pixel_formats.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* pixel_formats.mm */ +/* pixel_formats.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -48,53 +48,53 @@ /* permissions and limitations under the License. */ /**************************************************************************/ -#import "pixel_formats.h" +#include "pixel_formats.h" -#import "metal_utils.h" +#include "metal_utils.h" #if TARGET_OS_IPHONE || TARGET_OS_TV #if !(__IPHONE_OS_VERSION_MAX_ALLOWED >= 160400) // iOS/tvOS 16.4 -#define MTLPixelFormatBC1_RGBA MTLPixelFormatInvalid -#define MTLPixelFormatBC1_RGBA_sRGB MTLPixelFormatInvalid -#define MTLPixelFormatBC2_RGBA MTLPixelFormatInvalid -#define MTLPixelFormatBC2_RGBA_sRGB MTLPixelFormatInvalid -#define MTLPixelFormatBC3_RGBA MTLPixelFormatInvalid -#define MTLPixelFormatBC3_RGBA_sRGB MTLPixelFormatInvalid -#define MTLPixelFormatBC4_RUnorm MTLPixelFormatInvalid -#define MTLPixelFormatBC4_RSnorm MTLPixelFormatInvalid -#define MTLPixelFormatBC5_RGUnorm MTLPixelFormatInvalid -#define MTLPixelFormatBC5_RGSnorm MTLPixelFormatInvalid -#define MTLPixelFormatBC6H_RGBUfloat MTLPixelFormatInvalid -#define MTLPixelFormatBC6H_RGBFloat MTLPixelFormatInvalid -#define MTLPixelFormatBC7_RGBAUnorm MTLPixelFormatInvalid -#define MTLPixelFormatBC7_RGBAUnorm_sRGB MTLPixelFormatInvalid +#define PixelFormatBC1_RGBA PixelFormatInvalid +#define PixelFormatBC1_RGBA_sRGB PixelFormatInvalid +#define PixelFormatBC2_RGBA PixelFormatInvalid +#define PixelFormatBC2_RGBA_sRGB PixelFormatInvalid +#define PixelFormatBC3_RGBA PixelFormatInvalid +#define PixelFormatBC3_RGBA_sRGB PixelFormatInvalid +#define PixelFormatBC4_RUnorm PixelFormatInvalid +#define PixelFormatBC4_RSnorm PixelFormatInvalid +#define PixelFormatBC5_RGUnorm PixelFormatInvalid +#define PixelFormatBC5_RGSnorm PixelFormatInvalid +#define PixelFormatBC6H_RGBUfloat PixelFormatInvalid +#define PixelFormatBC6H_RGBFloat PixelFormatInvalid +#define PixelFormatBC7_RGBAUnorm PixelFormatInvalid +#define PixelFormatBC7_RGBAUnorm_sRGB PixelFormatInvalid #endif -#define MTLPixelFormatDepth16Unorm_Stencil8 MTLPixelFormatDepth32Float_Stencil8 -#define MTLPixelFormatDepth24Unorm_Stencil8 MTLPixelFormatInvalid -#define MTLPixelFormatX24_Stencil8 MTLPixelFormatInvalid +#define PixelFormatDepth16Unorm_Stencil8 PixelFormatDepth32Float_Stencil8 +#define PixelFormatDepth24Unorm_Stencil8 PixelFormatInvalid +#define PixelFormatX24_Stencil8 PixelFormatInvalid #endif #if TARGET_OS_TV -#define MTLPixelFormatASTC_4x4_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_5x4_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_5x5_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_6x5_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_6x6_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_8x5_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_8x6_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_8x8_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_10x5_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_10x6_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_10x8_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_10x10_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_12x10_HDR MTLPixelFormatInvalid -#define MTLPixelFormatASTC_12x12_HDR MTLPixelFormatInvalid +#define PixelFormatASTC_4x4_HDR PixelFormatInvalid +#define PixelFormatASTC_5x4_HDR PixelFormatInvalid +#define PixelFormatASTC_5x5_HDR PixelFormatInvalid +#define PixelFormatASTC_6x5_HDR PixelFormatInvalid +#define PixelFormatASTC_6x6_HDR PixelFormatInvalid +#define PixelFormatASTC_8x5_HDR PixelFormatInvalid +#define PixelFormatASTC_8x6_HDR PixelFormatInvalid +#define PixelFormatASTC_8x8_HDR PixelFormatInvalid +#define PixelFormatASTC_10x5_HDR PixelFormatInvalid +#define PixelFormatASTC_10x6_HDR PixelFormatInvalid +#define PixelFormatASTC_10x8_HDR PixelFormatInvalid +#define PixelFormatASTC_10x10_HDR PixelFormatInvalid +#define PixelFormatASTC_12x10_HDR PixelFormatInvalid +#define PixelFormatASTC_12x12_HDR PixelFormatInvalid #endif #if !((__MAC_OS_X_VERSION_MAX_ALLOWED >= 140000) || (__IPHONE_OS_VERSION_MAX_ALLOWED >= 170000)) // Xcode 15 -#define MTLVertexFormatFloatRG11B10 MTLVertexFormatInvalid -#define MTLVertexFormatFloatRGB9E5 MTLVertexFormatInvalid +#define VertexFormatFloatRG11B10 VertexFormatInvalid +#define VertexFormatFloatRGB9E5 VertexFormatInvalid #endif template @@ -113,21 +113,21 @@ bool PixelFormats::isSupportedOrSubstitutable(DataFormat p_format) { return getDataFormatDesc(p_format).isSupportedOrSubstitutable(); } -bool PixelFormats::isPVRTCFormat(MTLPixelFormat p_format) { +bool PixelFormats::isPVRTCFormat(MTL::PixelFormat p_format) { #if defined(VISIONOS_ENABLED) return false; #else // Deprecated in SDK 26.0 GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations") switch (p_format) { - case MTLPixelFormatPVRTC_RGBA_2BPP: - case MTLPixelFormatPVRTC_RGBA_2BPP_sRGB: - case MTLPixelFormatPVRTC_RGBA_4BPP: - case MTLPixelFormatPVRTC_RGBA_4BPP_sRGB: - case MTLPixelFormatPVRTC_RGB_2BPP: - case MTLPixelFormatPVRTC_RGB_2BPP_sRGB: - case MTLPixelFormatPVRTC_RGB_4BPP: - case MTLPixelFormatPVRTC_RGB_4BPP_sRGB: + case MTL::PixelFormatPVRTC_RGBA_2BPP: + case MTL::PixelFormatPVRTC_RGBA_2BPP_sRGB: + case MTL::PixelFormatPVRTC_RGBA_4BPP: + case MTL::PixelFormatPVRTC_RGBA_4BPP_sRGB: + case MTL::PixelFormatPVRTC_RGB_2BPP: + case MTL::PixelFormatPVRTC_RGB_2BPP_sRGB: + case MTL::PixelFormatPVRTC_RGB_4BPP: + case MTL::PixelFormatPVRTC_RGB_4BPP_sRGB: return true; default: return false; @@ -140,24 +140,24 @@ MTLFormatType PixelFormats::getFormatType(DataFormat p_format) { return getDataFormatDesc(p_format).formatType; } -MTLFormatType PixelFormats::getFormatType(MTLPixelFormat p_format) { +MTLFormatType PixelFormats::getFormatType(MTL::PixelFormat p_format) { return getDataFormatDesc(p_format).formatType; } -MTLPixelFormat PixelFormats::getMTLPixelFormat(DataFormat p_format) { +MTL::PixelFormat PixelFormats::getMTLPixelFormat(DataFormat p_format) { DataFormatDesc &dfDesc = getDataFormatDesc(p_format); - MTLPixelFormat mtlPixFmt = dfDesc.mtlPixelFormat; + MTL::PixelFormat mtlPixFmt = dfDesc.mtlPixelFormat; - // If the MTLPixelFormat is not supported but DataFormat is valid, + // If the MTL::PixelFormat is not supported but DataFormat is valid, // attempt to substitute a different format. - if (mtlPixFmt == MTLPixelFormatInvalid && p_format != RD::DATA_FORMAT_MAX && dfDesc.chromaSubsamplingPlaneCount <= 1) { + if (mtlPixFmt == MTL::PixelFormatInvalid && p_format != RD::DATA_FORMAT_MAX && dfDesc.chromaSubsamplingPlaneCount <= 1) { mtlPixFmt = dfDesc.mtlPixelFormatSubstitute; } return mtlPixFmt; } -RD::DataFormat PixelFormats::getDataFormat(MTLPixelFormat p_format) { +RD::DataFormat PixelFormats::getDataFormat(MTL::PixelFormat p_format) { return getMTLPixelFormatDesc(p_format).dataFormat; } @@ -165,7 +165,7 @@ uint32_t PixelFormats::getBytesPerBlock(DataFormat p_format) { return getDataFormatDesc(p_format).bytesPerBlock; } -uint32_t PixelFormats::getBytesPerBlock(MTLPixelFormat p_format) { +uint32_t PixelFormats::getBytesPerBlock(MTL::PixelFormat p_format) { return getDataFormatDesc(p_format).bytesPerBlock; } @@ -181,7 +181,7 @@ float PixelFormats::getBytesPerTexel(DataFormat p_format) { return getDataFormatDesc(p_format).bytesPerTexel(); } -float PixelFormats::getBytesPerTexel(MTLPixelFormat p_format) { +float PixelFormats::getBytesPerTexel(MTL::PixelFormat p_format) { return getDataFormatDesc(p_format).bytesPerTexel(); } @@ -190,7 +190,7 @@ size_t PixelFormats::getBytesPerRow(DataFormat p_format, uint32_t p_texels_per_r return Math::division_round_up(p_texels_per_row, dfDesc.blockTexelSize.width) * dfDesc.bytesPerBlock; } -size_t PixelFormats::getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row) { +size_t PixelFormats::getBytesPerRow(MTL::PixelFormat p_format, uint32_t p_texels_per_row) { DataFormatDesc &dfDesc = getDataFormatDesc(p_format); return Math::division_round_up(p_texels_per_row, dfDesc.blockTexelSize.width) * dfDesc.bytesPerBlock; } @@ -199,7 +199,7 @@ size_t PixelFormats::getBytesPerLayer(DataFormat p_format, size_t p_bytes_per_ro return Math::division_round_up(p_texel_rows_per_layer, getDataFormatDesc(p_format).blockTexelSize.height) * p_bytes_per_row; } -size_t PixelFormats::getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer) { +size_t PixelFormats::getBytesPerLayer(MTL::PixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer) { return Math::division_round_up(p_texel_rows_per_layer, getDataFormatDesc(p_format).blockTexelSize.height) * p_bytes_per_row; } @@ -211,7 +211,7 @@ MTLFmtCaps PixelFormats::getCapabilities(DataFormat p_format, bool p_extended) { return getCapabilities(getDataFormatDesc(p_format).mtlPixelFormat, p_extended); } -MTLFmtCaps PixelFormats::getCapabilities(MTLPixelFormat p_format, bool p_extended) { +MTLFmtCaps PixelFormats::getCapabilities(MTL::PixelFormat p_format, bool p_extended) { MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(p_format); MTLFmtCaps caps = mtlDesc.mtlFmtCaps; if (!p_extended || mtlDesc.mtlViewClass == MTLViewClass::None) { @@ -226,11 +226,11 @@ MTLFmtCaps PixelFormats::getCapabilities(MTLPixelFormat p_format, bool p_extende return caps; } -MTLVertexFormat PixelFormats::getMTLVertexFormat(DataFormat p_format) { +MTL::VertexFormat PixelFormats::getMTLVertexFormat(DataFormat p_format) { DataFormatDesc &dfDesc = getDataFormatDesc(p_format); - MTLVertexFormat format = dfDesc.mtlVertexFormat; + MTL::VertexFormat format = dfDesc.mtlVertexFormat; - if (format == MTLVertexFormatInvalid) { + if (format == MTL::VertexFormatInvalid) { String errMsg; errMsg += "DataFormat "; errMsg += dfDesc.name; @@ -254,22 +254,22 @@ DataFormatDesc &PixelFormats::getDataFormatDesc(DataFormat p_format) { return _data_format_descs[p_format]; } -DataFormatDesc &PixelFormats::getDataFormatDesc(MTLPixelFormat p_format) { +DataFormatDesc &PixelFormats::getDataFormatDesc(MTL::PixelFormat p_format) { return getDataFormatDesc(getMTLPixelFormatDesc(p_format).dataFormat); } -// Return a reference to the Metal format descriptor corresponding to the MTLPixelFormat. -MTLFormatDesc &PixelFormats::getMTLPixelFormatDesc(MTLPixelFormat p_format) { +// Return a reference to the Metal format descriptor corresponding to the MTL::PixelFormat. +MTLFormatDesc &PixelFormats::getMTLPixelFormatDesc(MTL::PixelFormat p_format) { return _mtl_pixel_format_descs[p_format]; } -// Return a reference to the Metal format descriptor corresponding to the MTLVertexFormat. -MTLFormatDesc &PixelFormats::getMTLVertexFormatDesc(MTLVertexFormat p_format) { +// Return a reference to the Metal format descriptor corresponding to the MTL::VertexFormat. +MTLFormatDesc &PixelFormats::getMTLVertexFormatDesc(MTL::VertexFormat p_format) { return _mtl_vertex_format_descs[p_format]; } -PixelFormats::PixelFormats(id p_device, const MetalFeatures &p_feat) : - device(p_device) { +PixelFormats::PixelFormats(MTL::Device *p_device, const MetalFeatures &p_feat) : + device(p_device->retain()) { initMTLPixelFormatCapabilities(); initMTLVertexFormatCapabilities(p_feat); modifyMTLFormatCapabilities(p_feat); @@ -278,9 +278,13 @@ PixelFormats::PixelFormats(id p_device, const MetalFeatures &p_feat) buildDFFormatMaps(); } +PixelFormats::~PixelFormats() { + device->release(); +} + #define addDataFormatDescFull(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE, SWIZ_R, SWIZ_G, SWIZ_B, SWIZ_A) \ dfFmt = RD::DATA_FORMAT_##DATA_FMT; \ - _data_format_descs[dfFmt] = { dfFmt, MTLPixelFormat##MTL_FMT, MTLPixelFormat##MTL_FMT_ALT, MTLVertexFormat##MTL_VTX_FMT, MTLVertexFormat##MTL_VTX_FMT_ALT, \ + _data_format_descs[dfFmt] = { dfFmt, MTL::PixelFormat##MTL_FMT, MTL::PixelFormat##MTL_FMT_ALT, MTL::VertexFormat##MTL_VTX_FMT, MTL::VertexFormat##MTL_VTX_FMT_ALT, \ CSPC, CSCB, { BLK_W, BLK_H }, BLK_BYTE_CNT, MTLFormatType::MVK_FMT_TYPE, \ { RD::TEXTURE_SWIZZLE_##SWIZ_R, RD::TEXTURE_SWIZZLE_##SWIZ_G, RD::TEXTURE_SWIZZLE_##SWIZ_B, RD::TEXTURE_SWIZZLE_##SWIZ_A }, \ "DATA_FORMAT_" #DATA_FMT, false } @@ -577,14 +581,14 @@ void PixelFormats::initDataFormatCapabilities() { addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_444_UNORM, Invalid, 3, 16, 1, 1, 6); } -void PixelFormats::addMTLPixelFormatDescImpl(MTLPixelFormat p_pix_fmt, MTLPixelFormat p_pix_fmt_linear, +void PixelFormats::addMTLPixelFormatDescImpl(MTL::PixelFormat p_pix_fmt, MTL::PixelFormat p_pix_fmt_linear, MTLViewClass p_view_class, MTLFmtCaps p_fmt_caps, const char *p_name) { _mtl_pixel_format_descs[p_pix_fmt] = { .mtlPixelFormat = p_pix_fmt, DataFormat::DATA_FORMAT_MAX, p_fmt_caps, p_view_class, p_pix_fmt_linear, p_name }; } #define addMTLPixelFormatDescFull(mtlFmt, mtlFmtLinear, viewClass, appleGPUCaps) \ - addMTLPixelFormatDescImpl(MTLPixelFormat##mtlFmt, MTLPixelFormat##mtlFmtLinear, MTLViewClass::viewClass, \ - appleGPUCaps, "MTLPixelFormat" #mtlFmt) + addMTLPixelFormatDescImpl(MTL::PixelFormat##mtlFmt, MTL::PixelFormat##mtlFmtLinear, MTLViewClass::viewClass, \ + appleGPUCaps, "MTL::PixelFormat" #mtlFmt) #define addMTLPixelFormatDesc(mtlFmt, viewClass, appleGPUCaps) \ addMTLPixelFormatDescFull(mtlFmt, mtlFmt, viewClass, kMTLFmtCaps##appleGPUCaps) @@ -602,8 +606,8 @@ void PixelFormats::addMTLPixelFormatDescImpl(MTLPixelFormat p_pix_fmt, MTLPixelF void PixelFormats::initMTLPixelFormatCapabilities() { _mtl_pixel_format_descs.reserve(1024); - // MTLPixelFormatInvalid must come first. Use addMTLPixelFormatDescImpl to avoid guard code. - addMTLPixelFormatDescImpl(MTLPixelFormatInvalid, MTLPixelFormatInvalid, MTLViewClass::None, kMTLFmtCapsNone, "MTLPixelFormatInvalid"); + // MTL::PixelFormatInvalid must come first. Use addMTLPixelFormatDescImpl to avoid guard code. + addMTLPixelFormatDescImpl(MTL::PixelFormatInvalid, MTL::PixelFormatInvalid, MTLViewClass::None, kMTLFmtCapsNone, "MTL::PixelFormatInvalid"); // Ordinary 8-bit pixel formats. addMTLPixelFormatDesc(A8Unorm, Color8, All); @@ -779,23 +783,23 @@ void PixelFormats::initMTLPixelFormatCapabilities() { } // If necessary, resize vector with empty elements. -void PixelFormats::addMTLVertexFormatDescImpl(MTLVertexFormat mtlVtxFmt, MTLFmtCaps vtxCap, const char *name) { +void PixelFormats::addMTLVertexFormatDescImpl(MTL::VertexFormat mtlVtxFmt, MTLFmtCaps vtxCap, const char *name) { if (mtlVtxFmt >= _mtl_vertex_format_descs.size()) { _mtl_vertex_format_descs.resize(mtlVtxFmt + 1); } - _mtl_vertex_format_descs[mtlVtxFmt] = { .mtlVertexFormat = mtlVtxFmt, RD::DATA_FORMAT_MAX, vtxCap, MTLViewClass::None, MTLPixelFormatInvalid, name }; + _mtl_vertex_format_descs[mtlVtxFmt] = { .mtlVertexFormat = mtlVtxFmt, RD::DATA_FORMAT_MAX, vtxCap, MTLViewClass::None, MTL::PixelFormatInvalid, name }; } -// Check mtlVtx exists on platform, to avoid overwriting the MTLVertexFormatInvalid entry. +// Check mtlVtx exists on platform, to avoid overwriting the MTL::VertexFormatInvalid entry. #define addMTLVertexFormatDesc(mtlVtx) \ - if (MTLVertexFormat##mtlVtx) { \ - addMTLVertexFormatDescImpl(MTLVertexFormat##mtlVtx, kMTLFmtCapsVertex, "MTLVertexFormat" #mtlVtx); \ + if (MTL::VertexFormat##mtlVtx) { \ + addMTLVertexFormatDescImpl(MTL::VertexFormat##mtlVtx, kMTLFmtCapsVertex, "MTL::VertexFormat" #mtlVtx); \ } void PixelFormats::initMTLVertexFormatCapabilities(const MetalFeatures &p_feat) { - _mtl_vertex_format_descs.resize(MTLVertexFormatHalf + 3); - // MTLVertexFormatInvalid must come first. Use addMTLVertexFormatDescImpl to avoid guard code. - addMTLVertexFormatDescImpl(MTLVertexFormatInvalid, kMTLFmtCapsNone, "MTLVertexFormatInvalid"); + _mtl_vertex_format_descs.resize(MTL::VertexFormatHalf + 3); + // MTL::VertexFormatInvalid must come first. Use addMTLVertexFormatDescImpl to avoid guard code. + addMTLVertexFormatDescImpl(MTL::VertexFormatInvalid, kMTLFmtCapsNone, "MTL::VertexFormatInvalid"); addMTLVertexFormatDesc(UChar2Normalized); addMTLVertexFormatDesc(Char2Normalized); @@ -862,8 +866,8 @@ void PixelFormats::initMTLVertexFormatCapabilities(const MetalFeatures &p_feat) addMTLVertexFormatDesc(UChar4Normalized_BGRA); - if (@available(macos 14.0, ios 17.0, tvos 17.0, *)) { - if (p_feat.highestFamily >= MTLGPUFamilyApple5) { + if (__builtin_available(macos 14.0, ios 17.0, tvos 17.0, *)) { + if (p_feat.highestFamily >= MTL::GPUFamilyApple5) { addMTLVertexFormatDesc(FloatRG11B10); addMTLVertexFormatDesc(FloatRGB9E5); } @@ -871,9 +875,9 @@ void PixelFormats::initMTLVertexFormatCapabilities(const MetalFeatures &p_feat) } // Return a reference to the format capabilities, so the caller can manipulate them. -// Check mtlPixFmt exists on platform, to avoid overwriting the MTLPixelFormatInvalid entry. +// Check mtlPixFmt exists on platform, to avoid overwriting the MTL::PixelFormatInvalid entry. // When returning the dummy, reset it on each access because it can be written to by caller. -MTLFmtCaps &PixelFormats::getMTLPixelFormatCapsIf(MTLPixelFormat mtlPixFmt, bool cond) { +MTLFmtCaps &PixelFormats::getMTLPixelFormatCapsIf(MTL::PixelFormat mtlPixFmt, bool cond) { static MTLFmtCaps dummyFmtCaps; if (mtlPixFmt && cond) { return getMTLPixelFormatDesc(mtlPixFmt).mtlFmtCaps; @@ -883,22 +887,22 @@ MTLFmtCaps &PixelFormats::getMTLPixelFormatCapsIf(MTLPixelFormat mtlPixFmt, bool } } -#define setMTLPixFmtCapsIf(cond, mtlFmt, caps) getMTLPixelFormatCapsIf(MTLPixelFormat##mtlFmt, cond) = kMTLFmtCaps##caps; +#define setMTLPixFmtCapsIf(cond, mtlFmt, caps) getMTLPixelFormatCapsIf(MTL::PixelFormat##mtlFmt, cond) = kMTLFmtCaps##caps; #define setMTLPixFmtCapsIfGPU(gpuFam, mtlFmt, caps) setMTLPixFmtCapsIf(gpuCaps.supports##gpuFam, mtlFmt, caps) -#define enableMTLPixFmtCapsIf(cond, mtlFmt, caps) flags::set(getMTLPixelFormatCapsIf(MTLPixelFormat##mtlFmt, cond), kMTLFmtCaps##caps); -#define enableMTLPixFmtCapsIfGPU(gpuFam, mtlFmt, caps) enableMTLPixFmtCapsIf(p_feat.highestFamily >= MTLGPUFamily##gpuFam, mtlFmt, caps) +#define enableMTLPixFmtCapsIf(cond, mtlFmt, caps) flags::set(getMTLPixelFormatCapsIf(MTL::PixelFormat##mtlFmt, cond), kMTLFmtCaps##caps); +#define enableMTLPixFmtCapsIfGPU(gpuFam, mtlFmt, caps) enableMTLPixFmtCapsIf(p_feat.highestFamily >= MTL::GPUFamily##gpuFam, mtlFmt, caps) -#define disableMTLPixFmtCapsIf(cond, mtlFmt, caps) flags::clear(getMTLPixelFormatCapsIf(MTLPixelFormat##mtlFmt, cond), kMTLFmtCaps##caps); +#define disableMTLPixFmtCapsIf(cond, mtlFmt, caps) flags::clear(getMTLPixelFormatCapsIf(MTL::PixelFormat##mtlFmt, cond), kMTLFmtCaps##caps); // Modifies the format capability tables based on the capabilities of the specific MTLDevice. void PixelFormats::modifyMTLFormatCapabilities(const MetalFeatures &p_feat) { bool noVulkanSupport = false; // Indicated supported in Metal but not Vulkan or SPIR-V. bool notMac = !p_feat.supportsMac; - bool iosOnly1 = notMac && p_feat.highestFamily < MTLGPUFamilyApple2; - bool iosOnly2 = notMac && p_feat.highestFamily < MTLGPUFamilyApple3; - bool iosOnly6 = notMac && p_feat.highestFamily < MTLGPUFamilyApple7; - bool iosOnly8 = notMac && p_feat.highestFamily < MTLGPUFamilyApple9; + bool iosOnly1 = notMac && p_feat.highestFamily < MTL::GPUFamilyApple2; + bool iosOnly2 = notMac && p_feat.highestFamily < MTL::GPUFamilyApple3; + bool iosOnly6 = notMac && p_feat.highestFamily < MTL::GPUFamilyApple7; + bool iosOnly8 = notMac && p_feat.highestFamily < MTL::GPUFamilyApple9; setMTLPixFmtCapsIf(iosOnly2, A8Unorm, RF); setMTLPixFmtCapsIf(iosOnly1, R8Unorm_sRGB, RFCMRB); @@ -934,7 +938,7 @@ void PixelFormats::modifyMTLFormatCapabilities(const MetalFeatures &p_feat) { // Metal supports reading both R&G into as one 64-bit atomic operation, but Vulkan and SPIR-V do not. // Including this here so we remember to update this if support is added to Vulkan in the future. - bool atomic64 = noVulkanSupport && (p_feat.highestFamily >= MTLGPUFamilyApple9 || (p_feat.highestFamily >= MTLGPUFamilyApple8 && p_feat.supportsMac)); + bool atomic64 = noVulkanSupport && (p_feat.highestFamily >= MTL::GPUFamilyApple9 || (p_feat.highestFamily >= MTL::GPUFamilyApple8 && p_feat.supportsMac)); enableMTLPixFmtCapsIf(atomic64, RG32Uint, Atomic); enableMTLPixFmtCapsIf(atomic64, RG32Sint, Atomic); @@ -961,7 +965,7 @@ void PixelFormats::modifyMTLFormatCapabilities(const MetalFeatures &p_feat) { enableMTLPixFmtCapsIf(floatFB, RGBA32Float, Filter); enableMTLPixFmtCapsIf(floatFB, RGBA32Float, Blend); // Undocumented by confirmed through testing. - bool noHDR_ASTC = p_feat.highestFamily < MTLGPUFamilyApple6; + bool noHDR_ASTC = p_feat.highestFamily < MTL::GPUFamilyApple6; setMTLPixFmtCapsIf(noHDR_ASTC, ASTC_4x4_HDR, None); setMTLPixFmtCapsIf(noHDR_ASTC, ASTC_5x4_HDR, None); setMTLPixFmtCapsIf(noHDR_ASTC, ASTC_5x5_HDR, None); @@ -1021,13 +1025,13 @@ void PixelFormats::buildDFFormatMaps() { mtlDesc.dataFormat = dfDesc.dataFormat; } if (!mtlDesc.isSupported()) { - dfDesc.mtlPixelFormat = MTLPixelFormatInvalid; + dfDesc.mtlPixelFormat = MTL::PixelFormatInvalid; } } if (dfDesc.mtlPixelFormatSubstitute) { MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(dfDesc.mtlPixelFormatSubstitute); if (!mtlDesc.isSupported()) { - dfDesc.mtlPixelFormatSubstitute = MTLPixelFormatInvalid; + dfDesc.mtlPixelFormatSubstitute = MTL::PixelFormatInvalid; } } if (dfDesc.mtlVertexFormat) { @@ -1036,13 +1040,13 @@ void PixelFormats::buildDFFormatMaps() { mtlDesc.dataFormat = dfDesc.dataFormat; } if (!mtlDesc.isSupported()) { - dfDesc.mtlVertexFormat = MTLVertexFormatInvalid; + dfDesc.mtlVertexFormat = MTL::VertexFormatInvalid; } } if (dfDesc.mtlVertexFormatSubstitute) { MTLFormatDesc &mtlDesc = getMTLVertexFormatDesc(dfDesc.mtlVertexFormatSubstitute); if (!mtlDesc.isSupported()) { - dfDesc.mtlVertexFormatSubstitute = MTLVertexFormatInvalid; + dfDesc.mtlVertexFormatSubstitute = MTL::VertexFormatInvalid; } } } diff --git a/drivers/metal/pixel_formats.h b/drivers/metal/pixel_formats.h index 04f1d9350db..0f1fb19d1d3 100644 --- a/drivers/metal/pixel_formats.h +++ b/drivers/metal/pixel_formats.h @@ -54,12 +54,15 @@ GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations") -#import "inflection_map.h" -#import "metal_device_properties.h" +#include "inflection_map.h" +#include "metal_device_properties.h" #include "servers/rendering/rendering_device.h" -#import +#ifdef __OBJC__ +#include +#endif +#include #include #pragma mark - @@ -197,10 +200,10 @@ struct ComponentMapping { /** Describes the properties of a DataFormat, including the corresponding Metal pixel and vertex format. */ struct DataFormatDesc { RD::DataFormat dataFormat; - MTLPixelFormat mtlPixelFormat; - MTLPixelFormat mtlPixelFormatSubstitute; - MTLVertexFormat mtlVertexFormat; - MTLVertexFormat mtlVertexFormatSubstitute; + MTL::PixelFormat mtlPixelFormat; + MTL::PixelFormat mtlPixelFormatSubstitute; + MTL::VertexFormat mtlVertexFormat; + MTL::VertexFormat mtlVertexFormatSubstitute; uint8_t chromaSubsamplingPlaneCount; uint8_t chromaSubsamplingComponentBits; Extent2D blockTexelSize; @@ -212,11 +215,11 @@ struct DataFormatDesc { inline double bytesPerTexel() const { return (double)bytesPerBlock / (double)(blockTexelSize.width * blockTexelSize.height); } - inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid || chromaSubsamplingPlaneCount > 1); } - inline bool isSupportedOrSubstitutable() const { return isSupported() || (mtlPixelFormatSubstitute != MTLPixelFormatInvalid); } + inline bool isSupported() const { return (mtlPixelFormat != MTL::PixelFormatInvalid || chromaSubsamplingPlaneCount > 1); } + inline bool isSupportedOrSubstitutable() const { return isSupported() || (mtlPixelFormatSubstitute != MTL::PixelFormatInvalid); } - inline bool vertexIsSupported() const { return (mtlVertexFormat != MTLVertexFormatInvalid); } - inline bool vertexIsSupportedOrSubstitutable() const { return vertexIsSupported() || (mtlVertexFormatSubstitute != MTLVertexFormatInvalid); } + inline bool vertexIsSupported() const { return (mtlVertexFormat != MTL::VertexFormatInvalid); } + inline bool vertexIsSupportedOrSubstitutable() const { return vertexIsSupported() || (mtlVertexFormatSubstitute != MTL::VertexFormatInvalid); } bool needsSwizzle() const { return (componentMapping.r != RD::TEXTURE_SWIZZLE_IDENTITY || @@ -226,19 +229,19 @@ struct DataFormatDesc { } }; -/** Describes the properties of a MTLPixelFormat or MTLVertexFormat. */ +/** Describes the properties of a MTL::PixelFormat or MTL::VertexFormat. */ struct MTLFormatDesc { union { - MTLPixelFormat mtlPixelFormat; - MTLVertexFormat mtlVertexFormat; + MTL::PixelFormat mtlPixelFormat; + MTL::VertexFormat mtlVertexFormat; }; RD::DataFormat dataFormat = RD::DATA_FORMAT_MAX; MTLFmtCaps mtlFmtCaps; MTLViewClass mtlViewClass; - MTLPixelFormat mtlPixelFormatLinear; + MTL::PixelFormat mtlPixelFormatLinear; const char *name = nullptr; - inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid) && (mtlFmtCaps != kMTLFmtCapsNone); } + inline bool isSupported() const { return (mtlPixelFormat != MTL::PixelFormatInvalid) && (mtlFmtCaps != kMTLFmtCapsNone); } }; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) PixelFormats { @@ -251,14 +254,14 @@ public: /** Returns whether the DataFormat is supported by this implementation, or can be substituted by one that is. */ bool isSupportedOrSubstitutable(DataFormat p_format); - /** Returns whether the specified Metal MTLPixelFormat can be used as a depth format. */ - _FORCE_INLINE_ bool isDepthFormat(MTLPixelFormat p_format) { + /** Returns whether the specified Metal MTL::PixelFormat can be used as a depth format. */ + _FORCE_INLINE_ bool isDepthFormat(MTL::PixelFormat p_format) { switch (p_format) { - case MTLPixelFormatDepth32Float: - case MTLPixelFormatDepth16Unorm: - case MTLPixelFormatDepth32Float_Stencil8: + case MTL::PixelFormatDepth32Float: + case MTL::PixelFormatDepth16Unorm: + case MTL::PixelFormatDepth32Float_Stencil8: #if TARGET_OS_OSX - case MTLPixelFormatDepth24Unorm_Stencil8: + case MTL::PixelFormatDepth24Unorm_Stencil8: #endif return true; default: @@ -266,42 +269,42 @@ public: } } - /** Returns whether the specified Metal MTLPixelFormat can be used as a stencil format. */ - _FORCE_INLINE_ bool isStencilFormat(MTLPixelFormat p_format) { + /** Returns whether the specified Metal MTL::PixelFormat can be used as a stencil format. */ + _FORCE_INLINE_ bool isStencilFormat(MTL::PixelFormat p_format) { switch (p_format) { - case MTLPixelFormatStencil8: + case MTL::PixelFormatStencil8: #if TARGET_OS_OSX - case MTLPixelFormatDepth24Unorm_Stencil8: - case MTLPixelFormatX24_Stencil8: + case MTL::PixelFormatDepth24Unorm_Stencil8: + case MTL::PixelFormatX24_Stencil8: #endif - case MTLPixelFormatDepth32Float_Stencil8: - case MTLPixelFormatX32_Stencil8: + case MTL::PixelFormatDepth32Float_Stencil8: + case MTL::PixelFormatX32_Stencil8: return true; default: return false; } } - /** Returns whether the specified Metal MTLPixelFormat is a PVRTC format. */ - bool isPVRTCFormat(MTLPixelFormat p_format); + /** Returns whether the specified Metal MTL::PixelFormat is a PVRTC format. */ + bool isPVRTCFormat(MTL::PixelFormat p_format); /** Returns the format type corresponding to the specified Godot pixel format, */ MTLFormatType getFormatType(DataFormat p_format); - /** Returns the format type corresponding to the specified Metal MTLPixelFormat, */ - MTLFormatType getFormatType(MTLPixelFormat p_format); + /** Returns the format type corresponding to the specified Metal MTL::PixelFormat, */ + MTLFormatType getFormatType(MTL::PixelFormat p_format); /** - * Returns the Metal MTLPixelFormat corresponding to the specified Godot pixel - * or returns MTLPixelFormatInvalid if no corresponding MTLPixelFormat exists. + * Returns the Metal MTL::PixelFormat corresponding to the specified Godot pixel + * or returns MTL::PixelFormatInvalid if no corresponding MTL::PixelFormat exists. */ - MTLPixelFormat getMTLPixelFormat(DataFormat p_format); + MTL::PixelFormat getMTLPixelFormat(DataFormat p_format); /** - * Returns the DataFormat corresponding to the specified Metal MTLPixelFormat, + * Returns the DataFormat corresponding to the specified Metal MTL::PixelFormat, * or returns DATA_FORMAT_MAX if no corresponding DataFormat exists. */ - DataFormat getDataFormat(MTLPixelFormat p_format); + DataFormat getDataFormat(MTL::PixelFormat p_format); /** * Returns the size, in bytes, of a texel block of the specified Godot pixel. @@ -313,7 +316,7 @@ public: * Returns the size, in bytes, of a texel block of the specified Metal format. * For uncompressed formats, the returned value corresponds to the size in bytes of a single texel. */ - uint32_t getBytesPerBlock(MTLPixelFormat p_format); + uint32_t getBytesPerBlock(MTL::PixelFormat p_format); /** Returns the number of planes of the specified chroma-subsampling (YCbCr) DataFormat */ uint8_t getChromaSubsamplingPlaneCount(DataFormat p_format); @@ -331,7 +334,7 @@ public: * Returns the size, in bytes, of a texel of the specified Metal format. * The returned value may be fractional for certain compressed formats. */ - float getBytesPerTexel(MTLPixelFormat p_format); + float getBytesPerTexel(MTL::PixelFormat p_format); /** * Returns the size, in bytes, of a row of texels of the specified Godot pixel format. @@ -349,7 +352,7 @@ public: * and texelsPerRow should specify the width in texels, not blocks. The result is rounded * up if texelsPerRow is not an integer multiple of the compression block width. */ - size_t getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row); + size_t getBytesPerRow(MTL::PixelFormat p_format, uint32_t p_texels_per_row); /** * Returns the size, in bytes, of a texture layer of the specified Godot pixel format. @@ -366,7 +369,7 @@ public: * and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is * rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height. */ - size_t getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer); + size_t getBytesPerLayer(MTL::PixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer); /** Returns whether or not the specified Godot format requires swizzling to use with Metal. */ bool needsSwizzle(DataFormat p_format); @@ -375,37 +378,38 @@ public: MTLFmtCaps getCapabilities(DataFormat p_format, bool p_extended = false); /** Returns the Metal format capabilities supported by the specified Metal format. */ - MTLFmtCaps getCapabilities(MTLPixelFormat p_format, bool p_extended = false); + MTLFmtCaps getCapabilities(MTL::PixelFormat p_format, bool p_extended = false); /** - * Returns the Metal MTLVertexFormat corresponding to the specified + * Returns the Metal MTL::VertexFormat corresponding to the specified * DataFormat as used as a vertex attribute format. */ - MTLVertexFormat getMTLVertexFormat(DataFormat p_format); + MTL::VertexFormat getMTLVertexFormat(DataFormat p_format); #pragma mark Construction - explicit PixelFormats(id p_device, const MetalFeatures &p_feat); + explicit PixelFormats(MTL::Device *p_device, const MetalFeatures &p_feat); + ~PixelFormats(); protected: DataFormatDesc &getDataFormatDesc(DataFormat p_format); - DataFormatDesc &getDataFormatDesc(MTLPixelFormat p_format); - MTLFormatDesc &getMTLPixelFormatDesc(MTLPixelFormat p_format); - MTLFmtCaps &getMTLPixelFormatCapsIf(MTLPixelFormat mtlPixFmt, bool cond); - MTLFormatDesc &getMTLVertexFormatDesc(MTLVertexFormat p_format); + DataFormatDesc &getDataFormatDesc(MTL::PixelFormat p_format); + MTLFormatDesc &getMTLPixelFormatDesc(MTL::PixelFormat p_format); + MTLFmtCaps &getMTLPixelFormatCapsIf(MTL::PixelFormat mtlPixFmt, bool cond); + MTLFormatDesc &getMTLVertexFormatDesc(MTL::VertexFormat p_format); void initDataFormatCapabilities(); void initMTLPixelFormatCapabilities(); void initMTLVertexFormatCapabilities(const MetalFeatures &p_feat); void modifyMTLFormatCapabilities(const MetalFeatures &p_feat); void buildDFFormatMaps(); - void addMTLPixelFormatDescImpl(MTLPixelFormat p_pix_fmt, MTLPixelFormat p_pix_fmt_linear, + void addMTLPixelFormatDescImpl(MTL::PixelFormat p_pix_fmt, MTL::PixelFormat p_pix_fmt_linear, MTLViewClass p_view_class, MTLFmtCaps p_fmt_caps, const char *p_name); - void addMTLVertexFormatDescImpl(MTLVertexFormat p_vert_fmt, MTLFmtCaps p_vert_caps, const char *name); + void addMTLVertexFormatDescImpl(MTL::VertexFormat p_vert_fmt, MTLFmtCaps p_vert_caps, const char *name); - id device; + MTL::Device *device; InflectionMap _data_format_descs; - InflectionMap _mtl_pixel_format_descs; // The actual last enum value is not available on iOS. + InflectionMap _mtl_pixel_format_descs; // The actual last enum value is not available on iOS. TightLocalVector _mtl_vertex_format_descs; }; diff --git a/drivers/metal/rendering_context_driver_metal.mm b/drivers/metal/rendering_context_driver_metal.cpp similarity index 65% rename from drivers/metal/rendering_context_driver_metal.mm rename to drivers/metal/rendering_context_driver_metal.cpp index 51b752f2a04..2f2769fe318 100644 --- a/drivers/metal/rendering_context_driver_metal.mm +++ b/drivers/metal/rendering_context_driver_metal.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* rendering_context_driver_metal.mm */ +/* rendering_context_driver_metal.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -28,14 +28,35 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /**************************************************************************/ -#import "rendering_context_driver_metal.h" +#include "rendering_context_driver_metal.h" -#import "rendering_device_driver_metal.h" +#include "metal3_objects.h" +#include "metal_objects_shared.h" +#include "rendering_device_driver_metal3.h" #include "core/templates/sort_array.h" -#import -#import +#include +#include + +#include + +// Selector helper for calling ObjC methods from C++ +#define _APPLE_PRIVATE_DEF_SEL(accessor, symbol) static SEL s_k##accessor = sel_registerName(symbol) +#define _APPLE_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor) + +namespace Private::Selector { + +_APPLE_PRIVATE_DEF_SEL(setOpaque_, "setOpaque:"); + +template +_NS_INLINE _Ret sendMessage(const void *pObj, SEL selector, _Args... args) { + using SendMessageProc = _Ret (*)(const void *, SEL, _Args...); + const SendMessageProc pProc = reinterpret_cast(&objc_msgSend); + return (*pProc)(pObj, selector, args...); +} + +} // namespace Private::Selector #pragma mark - Logging @@ -48,12 +69,6 @@ __attribute__((constructor)) static void InitializeLogging(void) { LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events"); } -@protocol MTLDeviceEx -#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 130300 -- (void)setShouldMaximizeConcurrentCompilation:(BOOL)v; -#endif -@end - RenderingContextDriverMetal::RenderingContextDriverMetal() { } @@ -61,14 +76,14 @@ RenderingContextDriverMetal::~RenderingContextDriverMetal() { } Error RenderingContextDriverMetal::initialize() { - if (OS::get_singleton()->get_environment("MTL_CAPTURE_ENABLED") == "1") { + if (OS::get_singleton()->get_environment("MTL_CAPTURE_ENABLED") == "1" || OS::get_singleton()->get_environment("MTLCAPTURE_DESTINATION_DEVELOPER_TOOLS_ENABLE") == "1") { capture_available = true; } - metal_device = MTLCreateSystemDefaultDevice(); + metal_device = MTL::CreateSystemDefaultDevice(); #if TARGET_OS_OSX - if (@available(macOS 13.3, *)) { - [id(metal_device) setShouldMaximizeConcurrentCompilation:YES]; + if (__builtin_available(macOS 13.3, *)) { + metal_device->setShouldMaximizeConcurrentCompilation(true); } #endif device.type = DEVICE_TYPE_INTEGRATED_GPU; @@ -76,8 +91,8 @@ Error RenderingContextDriverMetal::initialize() { device.workarounds = Workarounds(); MetalDeviceProperties props(metal_device); - int version = (int)props.features.highestFamily - (int)MTLGPUFamilyApple1 + 1; - device.name = vformat("%s (Apple%d)", metal_device.name.UTF8String, version); + int version = (int)props.features.highestFamily - (int)MTL::GPUFamilyApple1 + 1; + device.name = vformat("%s (Apple%d)", metal_device->name()->utf8String(), version); return OK; } @@ -92,7 +107,7 @@ uint32_t RenderingContextDriverMetal::device_get_count() const { } RenderingDeviceDriver *RenderingContextDriverMetal::driver_create() { - return memnew(RenderingDeviceDriverMetal(this)); + return memnew(MTL3::RenderingDeviceDriverMetal(this)); } void RenderingContextDriverMetal::driver_free(RenderingDeviceDriver *p_driver) { @@ -100,25 +115,25 @@ void RenderingContextDriverMetal::driver_free(RenderingDeviceDriver *p_driver) { } class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) SurfaceLayer : public RenderingContextDriverMetal::Surface { - CAMetalLayer *__unsafe_unretained layer = nil; + CA::MetalLayer *layer = nullptr; LocalVector frame_buffers; - LocalVector> drawables; + LocalVector drawables; uint32_t rear = -1; uint32_t front = 0; uint32_t count = 0; public: - SurfaceLayer(CAMetalLayer *p_layer, id p_device) : + SurfaceLayer(CA::MetalLayer *p_layer, MTL::Device *p_device) : Surface(p_device), layer(p_layer) { - layer.allowsNextDrawableTimeout = YES; - layer.framebufferOnly = YES; - layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES; - layer.pixelFormat = get_pixel_format(); - layer.device = p_device; + layer->setAllowsNextDrawableTimeout(true); + layer->setFramebufferOnly(true); + Private::Selector::sendMessage(layer, _APPLE_PRIVATE_SEL(setOpaque_), !OS::get_singleton()->is_layered_allowed()); + layer->setPixelFormat(get_pixel_format()); + layer->setDevice(p_device); } ~SurfaceLayer() override { - layer = nil; + layer = nullptr; } Error resize(uint32_t p_desired_framebuffer_count) override final { @@ -128,14 +143,14 @@ public: } CGSize drawableSize = CGSizeMake(width, height); - CGSize current = layer.drawableSize; + CGSize current = layer->drawableSize(); if (!CGSizeEqualToSize(current, drawableSize)) { - layer.drawableSize = drawableSize; + layer->setDrawableSize(drawableSize); } // Metal supports a maximum of 3 drawables. p_desired_framebuffer_count = MIN(3U, p_desired_framebuffer_count); - layer.maximumDrawableCount = p_desired_framebuffer_count; + layer->setMaximumDrawableCount(p_desired_framebuffer_count); #if TARGET_OS_OSX // Display sync is only supported on macOS. @@ -143,10 +158,10 @@ public: case DisplayServer::VSYNC_MAILBOX: case DisplayServer::VSYNC_ADAPTIVE: case DisplayServer::VSYNC_ENABLED: - layer.displaySyncEnabled = YES; + layer->setDisplaySyncEnabled(true); break; case DisplayServer::VSYNC_DISABLED: - layer.displaySyncEnabled = NO; + layer->setDisplaySyncEnabled(false); break; } #endif @@ -171,56 +186,77 @@ public: MDFrameBuffer &frame_buffer = frame_buffers[rear]; frame_buffer.size = Size2i(width, height); - id drawable = layer.nextDrawable; + CA::MetalDrawable *drawable = layer->nextDrawable(); ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available"); drawables[rear] = drawable; - frame_buffer.set_texture(0, drawable.texture); + frame_buffer.set_texture(0, drawable->texture()); return RDD::FramebufferID(&frame_buffer); } - void present(MDCommandBuffer *p_cmd_buffer) override final { + void present(MTL3::MDCommandBuffer *p_cmd_buffer) override final { if (count == 0) { return; } // Release texture and drawable. frame_buffers[front].unset_texture(0); - id drawable = drawables[front]; - drawables[front] = nil; + MTL::Drawable *drawable = drawables[front]; + drawables[front] = nullptr; count--; front = (front + 1) % frame_buffers.size(); if (vsync_mode != DisplayServer::VSYNC_DISABLED) { - [p_cmd_buffer->get_command_buffer() presentDrawable:drawable afterMinimumDuration:present_minimum_duration]; + p_cmd_buffer->get_command_buffer()->presentDrawableAfterMinimumDuration(drawable, present_minimum_duration); } else { - [p_cmd_buffer->get_command_buffer() presentDrawable:drawable]; + p_cmd_buffer->get_command_buffer()->presentDrawable(drawable); } } + + MTL::Drawable *next_drawable() override final { + if (count == 0) { + return nullptr; + } + + // Release texture and drawable. + frame_buffers[front].unset_texture(0); + MTL::Drawable *drawable = drawables[front]; + drawables[front] = nullptr; + + count--; + front = (front + 1) % frame_buffers.size(); + + return drawable; + } + + API_AVAILABLE(macos(26.0), ios(26.0)) + MTL::ResidencySet *get_residency_set() const override final { + return layer->residencySet(); + } }; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) SurfaceOffscreen : public RenderingContextDriverMetal::Surface { int frame_buffer_size = 3; MDFrameBuffer *frame_buffers; - LocalVector> textures; - LocalVector> drawables; + LocalVector textures; + LocalVector drawables; int32_t rear = -1; std::atomic_int count; uint64_t target_time = 0; - CAMetalLayer *layer; + CA::MetalLayer *layer; public: - SurfaceOffscreen(CAMetalLayer *p_layer, id p_device) : + SurfaceOffscreen(CA::MetalLayer *p_layer, MTL::Device *p_device) : Surface(p_device), layer(p_layer) { - layer.allowsNextDrawableTimeout = YES; - layer.framebufferOnly = YES; - layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES; - layer.pixelFormat = get_pixel_format(); - layer.device = p_device; + layer->setAllowsNextDrawableTimeout(true); + layer->setFramebufferOnly(true); + Private::Selector::sendMessage(layer, _APPLE_PRIVATE_SEL(setOpaque_), !OS::get_singleton()->is_layered_allowed()); + layer->setPixelFormat(get_pixel_format()); + layer->setDevice(p_device); #if TARGET_OS_OSX - layer.displaySyncEnabled = NO; + layer->setDisplaySyncEnabled(false); #endif target_time = OS::get_singleton()->get_ticks_usec(); @@ -244,9 +280,9 @@ public: } CGSize drawableSize = CGSizeMake(width, height); - CGSize current = layer.drawableSize; + CGSize current = layer->drawableSize(); if (!CGSizeEqualToSize(current, drawableSize)) { - layer.drawableSize = drawableSize; + layer->setDrawableSize(drawableSize); } return OK; @@ -263,22 +299,22 @@ public: MDFrameBuffer &frame_buffer = frame_buffers[rear]; - if (textures[rear] == nil || textures[rear].width != width || textures[rear].height != height) { - MTLTextureDescriptor *texture_descriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:get_pixel_format() width:width height:height mipmapped:NO]; - texture_descriptor.usage = MTLTextureUsageRenderTarget; - texture_descriptor.hazardTrackingMode = MTLHazardTrackingModeUntracked; - texture_descriptor.storageMode = MTLStorageModePrivate; - textures[rear] = [device newTextureWithDescriptor:texture_descriptor]; + if (textures[rear] == nullptr || textures[rear]->width() != width || textures[rear]->height() != height) { + MTL::TextureDescriptor *texture_descriptor = MTL::TextureDescriptor::texture2DDescriptor(get_pixel_format(), width, height, false); + texture_descriptor->setUsage(MTL::TextureUsageRenderTarget); + texture_descriptor->setHazardTrackingMode(MTL::HazardTrackingModeUntracked); + texture_descriptor->setStorageMode(MTL::StorageModePrivate); + textures[rear] = device->newTexture(texture_descriptor); } frame_buffer.size = Size2i(width, height); uint64_t now = OS::get_singleton()->get_ticks_usec(); if (now >= target_time) { target_time = now + 1'000'000; // 1 second into the future. - id drawable = layer.nextDrawable; + CA::MetalDrawable *drawable = layer->nextDrawable(); ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available"); drawables[rear] = drawable; - frame_buffer.set_texture(0, drawable.texture); + frame_buffer.set_texture(0, drawable->texture()); } else { frame_buffer.set_texture(0, textures[rear]); } @@ -286,18 +322,39 @@ public: return RDD::FramebufferID(&frame_buffers[rear]); } - void present(MDCommandBuffer *p_cmd_buffer) override final { + void present(MTL3::MDCommandBuffer *p_cmd_buffer) override final { MDFrameBuffer *frame_buffer = &frame_buffers[rear]; - if (drawables[rear] != nil) { - [p_cmd_buffer->get_command_buffer() presentDrawable:drawables[rear]]; - drawables[rear] = nil; + if (drawables[rear] != nullptr) { + p_cmd_buffer->get_command_buffer()->presentDrawable(drawables[rear]); + drawables[rear] = nullptr; } - [p_cmd_buffer->get_command_buffer() addScheduledHandler:^(id p_command_buffer) { + p_cmd_buffer->get_command_buffer()->addScheduledHandler([frame_buffer, this](MTL::CommandBuffer *) { frame_buffer->unset_texture(0); count.fetch_add(-1, std::memory_order_relaxed); - }]; + }); + } + + MTL::Drawable *next_drawable() override final { + if (count == 0) { + return nullptr; + } + + MDFrameBuffer *frame_buffer = &frame_buffers[rear]; + + MTL::Drawable *next = drawables[rear]; + drawables[rear] = nullptr; + + frame_buffer->unset_texture(0); + count--; + + return next; + } + + API_AVAILABLE(macos(26.0), ios(26.0)) + MTL::ResidencySet *get_residency_set() const override final { + return layer->residencySet(); } }; diff --git a/drivers/metal/rendering_context_driver_metal.h b/drivers/metal/rendering_context_driver_metal.h index 8e313d81f18..513b8d18f0d 100644 --- a/drivers/metal/rendering_context_driver_metal.h +++ b/drivers/metal/rendering_context_driver_metal.h @@ -35,42 +35,18 @@ #include "servers/rendering/rendering_context_driver.h" #include "servers/rendering/rendering_device_driver.h" -#import +#include +#include -#ifdef __OBJC__ -#import "metal_objects.h" - -#import -#import - -@class CAMetalLayer; -@protocol CAMetalDrawable; -#else -typedef enum MTLPixelFormat { - MTLPixelFormatBGRA8Unorm = 80, -} MTLPixelFormat; +namespace MTL3 { class MDCommandBuffer; -#endif - -class PixelFormats; - -#ifdef __OBJC__ -#define METAL_DEVICE id -#define METAL_DRAWABLE id -#define METAL_LAYER CAMetalLayer *__unsafe_unretained -#define METAL_RESIDENCY_SET id -#else -#define METAL_DEVICE void * -#define METAL_DRAWABLE void * -#define METAL_LAYER void * -#define METAL_RESIDENCY_SET void * -#endif +} class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMetal : public RenderingContextDriver { bool capture_available = false; protected: - METAL_DEVICE metal_device = nullptr; + MTL::Device *metal_device = nullptr; Device device; // There is only one device on Apple Silicon. public: @@ -95,12 +71,12 @@ public: // Platform-specific data for the Windows embedded in this driver. struct WindowPlatformData { - METAL_LAYER layer; + CA::MetalLayer *layer; }; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) Surface { protected: - METAL_DEVICE device; + MTL::Device *device; public: uint32_t width = 0; @@ -109,18 +85,21 @@ public: bool needs_resize = false; double present_minimum_duration = 0.0; - Surface(METAL_DEVICE p_device) : + Surface(MTL::Device *p_device) : device(p_device) {} virtual ~Surface() = default; - MTLPixelFormat get_pixel_format() const { return MTLPixelFormatBGRA8Unorm; } + MTL::PixelFormat get_pixel_format() const { return MTL::PixelFormatBGRA8Unorm; } virtual Error resize(uint32_t p_desired_framebuffer_count) = 0; virtual RDD::FramebufferID acquire_next_frame_buffer() = 0; - virtual void present(MDCommandBuffer *p_cmd_buffer) = 0; + virtual void present(MTL3::MDCommandBuffer *p_cmd_buffer) = 0; + virtual MTL::Drawable *next_drawable() = 0; + API_AVAILABLE(macos(26.0), ios(26.0)) + virtual MTL::ResidencySet *get_residency_set() const = 0; void set_max_fps(int p_max_fps) { present_minimum_duration = p_max_fps ? 1.0 / p_max_fps : 0.0; } }; - METAL_DEVICE get_metal_device() const { + MTL::Device *get_metal_device() const { return metal_device; } diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.cpp similarity index 65% rename from drivers/metal/rendering_device_driver_metal.mm rename to drivers/metal/rendering_device_driver_metal.cpp index 654196d943c..0dc7915d486 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* rendering_device_driver_metal.mm */ +/* rendering_device_driver_metal.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -48,21 +48,21 @@ /* permissions and limitations under the License. */ /**************************************************************************/ -#import "rendering_device_driver_metal.h" +#include "rendering_device_driver_metal.h" -#import "pixel_formats.h" -#import "rendering_context_driver_metal.h" -#import "rendering_shader_container_metal.h" +#include "pixel_formats.h" +#include "rendering_context_driver_metal.h" +#include "rendering_shader_container_metal.h" +#include "core/config/project_settings.h" #include "core/io/marshalls.h" #include "core/string/ustring.h" #include "core/templates/hash_map.h" #include "drivers/apple/foundation_helpers.h" -#import -#import -#import -#import +#include +#include +#include #include #ifndef MTLGPUAddress @@ -80,26 +80,14 @@ extern os_log_t LOG_INTERVALS; /*****************/ // RDD::CompareOperator == VkCompareOp. -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, MTLCompareFunctionNever)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, MTLCompareFunctionLess)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, MTLCompareFunctionEqual)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, MTLCompareFunctionLessEqual)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, MTLCompareFunctionGreater)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTLCompareFunctionNotEqual)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTLCompareFunctionGreaterEqual)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTLCompareFunctionAlways)); - -_FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_level) { - if (p_level == 0) { - return p_size; - } - - MTLSize lvlSize; - lvlSize.width = MAX(p_size.width >> p_level, 1UL); - lvlSize.height = MAX(p_size.height >> p_level, 1UL); - lvlSize.depth = MAX(p_size.depth >> p_level, 1UL); - return lvlSize; -} +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, MTL::CompareFunctionNever)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, MTL::CompareFunctionLess)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, MTL::CompareFunctionEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, MTL::CompareFunctionLessEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, MTL::CompareFunctionGreater)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTL::CompareFunctionNotEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTL::CompareFunctionGreaterEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTL::CompareFunctionAlways)); /*****************/ /**** BUFFERS ****/ @@ -111,21 +99,21 @@ RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitFiel p_size = round_up_to_alignment(p_size, 16u) * _frame_count; } - MTLResourceOptions options = 0; + MTL::ResourceOptions options = 0; switch (p_allocation_type) { case MEMORY_ALLOCATION_TYPE_CPU: - options = MTLResourceHazardTrackingModeTracked | MTLResourceStorageModeShared; + options = base_hazard_tracking | MTL::ResourceStorageModeShared; break; case MEMORY_ALLOCATION_TYPE_GPU: if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) { - options = MTLResourceHazardTrackingModeUntracked | MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + options = MTL::ResourceHazardTrackingModeUntracked | MTL::ResourceStorageModeShared | MTL::ResourceCPUCacheModeWriteCombined; } else { - options = MTLResourceHazardTrackingModeTracked | MTLResourceStorageModePrivate; + options = base_hazard_tracking | MTL::ResourceStorageModePrivate; } break; } - id obj = [device newBufferWithLength:p_size options:options]; + MTL::Buffer *obj = device->newBuffer(p_size, options); ERR_FAIL_NULL_V_MSG(obj, BufferID(), "Can't create buffer of size: " + itos(p_size)); BufferInfo *buf_info; @@ -140,7 +128,9 @@ RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitFiel } else { buf_info = memnew(BufferInfo); } - buf_info->metal_buffer = obj; + buf_info->metal_buffer = NS::TransferPtr(obj); + + _track_resource(buf_info->metal_buffer.get()); return BufferID(buf_info); } @@ -152,7 +142,8 @@ bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, Data void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) { BufferInfo *buf_info = (BufferInfo *)p_buffer.id; - buf_info->metal_buffer = nil; // Tell ARC to release. + + _untrack_resource(buf_info->metal_buffer.get()); if (buf_info->is_dynamic()) { memdelete((MetalBufferDynamicInfo *)buf_info); @@ -163,13 +154,13 @@ void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) { uint64_t RenderingDeviceDriverMetal::buffer_get_allocation_size(BufferID p_buffer) { const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - return buf_info->metal_buffer.allocatedSize; + return buf_info->metal_buffer.get()->allocatedSize(); } uint8_t *RenderingDeviceDriverMetal::buffer_map(BufferID p_buffer) { const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - ERR_FAIL_COND_V_MSG(buf_info->metal_buffer.storageMode != MTLStorageModeShared, nullptr, "Unable to map private buffers"); - return (uint8_t *)buf_info->metal_buffer.contents; + ERR_FAIL_COND_V_MSG(buf_info->metal_buffer.get()->storageMode() != MTL::StorageModeShared, nullptr, "Unable to map private buffers"); + return (uint8_t *)buf_info->metal_buffer.get()->contents(); } void RenderingDeviceDriverMetal::buffer_unmap(BufferID p_buffer) { @@ -183,7 +174,7 @@ uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_bu ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit."); buf_info->last_frame_mapped = p_frames_drawn; #endif - return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes; + return (uint8_t *)buf_info->metal_buffer.get()->contents() + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes; } uint64_t RenderingDeviceDriverMetal::buffer_get_dynamic_offsets(Span p_buffers) { @@ -203,14 +194,10 @@ uint64_t RenderingDeviceDriverMetal::buffer_get_dynamic_offsets(Span p return mask; } -void RenderingDeviceDriverMetal::buffer_flush(BufferID p_buffer) { - // Nothing to do. -} - uint64_t RenderingDeviceDriverMetal::buffer_get_device_address(BufferID p_buffer) { - if (@available(iOS 16.0, macOS 13.0, *)) { + if (__builtin_available(iOS 16.0, macOS 13.0, *)) { const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - return buf_info->metal_buffer.gpuAddress; + return buf_info->metal_buffer.get()->gpuAddress(); } else { #if DEV_ENABLED WARN_PRINT_ONCE("buffer_get_device_address is not supported on this OS version."); @@ -223,14 +210,14 @@ uint64_t RenderingDeviceDriverMetal::buffer_get_device_address(BufferID p_buffer #pragma mark - Format Conversions -static const MTLTextureType TEXTURE_TYPE[RD::TEXTURE_TYPE_MAX] = { - MTLTextureType1D, - MTLTextureType2D, - MTLTextureType3D, - MTLTextureTypeCube, - MTLTextureType1DArray, - MTLTextureType2DArray, - MTLTextureTypeCubeArray, +static const MTL::TextureType TEXTURE_TYPE[RD::TEXTURE_TYPE_MAX] = { + MTL::TextureType1D, + MTL::TextureType2D, + MTL::TextureType3D, + MTL::TextureTypeCube, + MTL::TextureType1DArray, + MTL::TextureType2DArray, + MTL::TextureTypeCubeArray, }; bool RenderingDeviceDriverMetal::is_valid_linear(TextureFormat const &p_format) const { @@ -244,28 +231,28 @@ bool RenderingDeviceDriverMetal::is_valid_linear(TextureFormat const &p_format) } RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p_format, const TextureView &p_view) { - MTLTextureDescriptor *desc = [MTLTextureDescriptor new]; - desc.textureType = TEXTURE_TYPE[p_format.texture_type]; + NS::SharedPtr desc = NS::TransferPtr(MTL::TextureDescriptor::alloc()->init()); + desc->setTextureType(TEXTURE_TYPE[p_format.texture_type]); PixelFormats &formats = *pixel_formats; - desc.pixelFormat = formats.getMTLPixelFormat(p_format.format); - MTLFmtCaps format_caps = formats.getCapabilities(desc.pixelFormat); + desc->setPixelFormat((MTL::PixelFormat)formats.getMTLPixelFormat(p_format.format)); + MTLFmtCaps format_caps = formats.getCapabilities(desc->pixelFormat()); - desc.width = p_format.width; - desc.height = p_format.height; - desc.depth = p_format.depth; - desc.mipmapLevelCount = p_format.mipmaps; + desc->setWidth(p_format.width); + desc->setHeight(p_format.height); + desc->setDepth(p_format.depth); + desc->setMipmapLevelCount(p_format.mipmaps); if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) { - desc.arrayLength = p_format.array_layers; + desc->setArrayLength(p_format.array_layers); } else if (p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) { - desc.arrayLength = p_format.array_layers / 6; + desc->setArrayLength(p_format.array_layers / 6); } // TODO(sgc): Evaluate lossy texture support (perhaps as a project option?) // https://developer.apple.com/videos/play/tech-talks/10876?time=459 - // desc.compressionType = MTLTextureCompressionTypeLossy; + // desc->setCompressionType(MTL::TextureCompressionTypeLossy); if (p_format.samples > TEXTURE_SAMPLES_1) { SampleCount supported = (*device_properties).find_nearest_supported_sample_count(p_format.samples); @@ -275,19 +262,19 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p if (ok) { switch (p_format.texture_type) { case TEXTURE_TYPE_2D: - desc.textureType = MTLTextureType2DMultisample; + desc->setTextureType(MTL::TextureType2DMultisample); break; case TEXTURE_TYPE_2D_ARRAY: - desc.textureType = MTLTextureType2DMultisampleArray; + desc->setTextureType(MTL::TextureType2DMultisampleArray); break; default: break; } - desc.sampleCount = (NSUInteger)supported; + desc->setSampleCount((NS::UInteger)supported); if (p_format.mipmaps > 1) { // For a buffer-backed or multi-sample texture, the value must be 1. WARN_PRINT("mipmaps == 1 for multi-sample textures"); - desc.mipmapLevelCount = 1; + desc->setMipmapLevelCount(1); } } else { WARN_PRINT("Unsupported multi-sample texture type; disabling multi-sample"); @@ -295,89 +282,85 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p } } - static const MTLTextureSwizzle COMPONENT_SWIZZLE[TEXTURE_SWIZZLE_MAX] = { - static_cast(255), // IDENTITY - MTLTextureSwizzleZero, - MTLTextureSwizzleOne, - MTLTextureSwizzleRed, - MTLTextureSwizzleGreen, - MTLTextureSwizzleBlue, - MTLTextureSwizzleAlpha, + static const MTL::TextureSwizzle COMPONENT_SWIZZLE[TEXTURE_SWIZZLE_MAX] = { + static_cast(255), // IDENTITY + MTL::TextureSwizzleZero, + MTL::TextureSwizzleOne, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleGreen, + MTL::TextureSwizzleBlue, + MTL::TextureSwizzleAlpha, }; - MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( - p_view.swizzle_r != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_r] : MTLTextureSwizzleRed, - p_view.swizzle_g != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_g] : MTLTextureSwizzleGreen, - p_view.swizzle_b != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_b] : MTLTextureSwizzleBlue, - p_view.swizzle_a != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_a] : MTLTextureSwizzleAlpha); + MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make( + p_view.swizzle_r != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_r] : MTL::TextureSwizzleRed, + p_view.swizzle_g != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_g] : MTL::TextureSwizzleGreen, + p_view.swizzle_b != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_b] : MTL::TextureSwizzleBlue, + p_view.swizzle_a != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_a] : MTL::TextureSwizzleAlpha); // Represents a swizzle operation that is a no-op. - static MTLTextureSwizzleChannels IDENTITY_SWIZZLE = { - .red = MTLTextureSwizzleRed, - .green = MTLTextureSwizzleGreen, - .blue = MTLTextureSwizzleBlue, - .alpha = MTLTextureSwizzleAlpha, - }; + static MTL::TextureSwizzleChannels IDENTITY_SWIZZLE = MTL::TextureSwizzleChannels::Default(); - bool no_swizzle = memcmp(&IDENTITY_SWIZZLE, &swizzle, sizeof(MTLTextureSwizzleChannels)) == 0; + bool no_swizzle = memcmp(&IDENTITY_SWIZZLE, &swizzle, sizeof(MTL::TextureSwizzleChannels)) == 0; if (!no_swizzle) { - desc.swizzle = swizzle; + desc->setSwizzle(swizzle); } // Usage. - MTLResourceOptions options = 0; + MTL::ResourceOptions options = 0; bool is_linear = false; #if defined(VISIONOS_ENABLED) const bool supports_memoryless = true; #else GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations") - const bool supports_memoryless = (*device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*device_properties).features.highestFamily < MTLGPUFamilyMac1; + const bool supports_memoryless = (*device_properties).features.highestFamily >= MTL::GPUFamilyApple2 && (*device_properties).features.highestFamily < MTL::GPUFamilyMac1; GODOT_CLANG_WARNING_POP #endif if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) { - options = MTLResourceStorageModeMemoryless | MTLResourceHazardTrackingModeTracked; - desc.storageMode = MTLStorageModeMemoryless; + options = base_hazard_tracking | MTL::ResourceStorageModeMemoryless; + desc->setStorageMode(MTL::StorageModeMemoryless); } else { - options = MTLResourceCPUCacheModeDefaultCache | MTLResourceHazardTrackingModeTracked; + options = base_hazard_tracking | MTL::ResourceCPUCacheModeDefaultCache; if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) { - options |= MTLResourceStorageModeShared; + options |= MTL::ResourceStorageModeShared; // The user has indicated they want to read from the texture on the CPU, // so we'll see if we can use a linear format. // A linear format is a texture that is backed by a buffer, // which allows for CPU access to the texture data via a pointer. is_linear = is_valid_linear(p_format); } else { - options |= MTLResourceStorageModePrivate; + options |= MTL::ResourceStorageModePrivate; } } - desc.resourceOptions = options; + desc->setResourceOptions(options); + MTL::TextureUsage usage = desc->usage(); if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) { - desc.usage |= MTLTextureUsageShaderRead; + usage |= MTL::TextureUsageShaderRead; } if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) { - desc.usage |= MTLTextureUsageShaderWrite; + usage |= MTL::TextureUsageShaderWrite; } bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt)); if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && can_be_attachment) { - desc.usage |= MTLTextureUsageRenderTarget; + usage |= MTL::TextureUsageRenderTarget; } if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) { - desc.usage |= MTLTextureUsageShaderRead; + usage |= MTL::TextureUsageShaderRead; } if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) { ERR_FAIL_COND_V_MSG((format_caps & kMTLFmtCapsAtomic) == 0, RDD::TextureID(), "Atomic operations on this texture format are not supported."); ERR_FAIL_COND_V_MSG(!device_properties->features.supports_native_image_atomics, RDD::TextureID(), "Atomic operations on textures are not supported on this OS version. Check SUPPORTS_IMAGE_ATOMIC_32_BIT."); // If supports_native_image_atomics is true, this condition should always succeed, as it is set the same. - if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) { - desc.usage |= MTLTextureUsageShaderAtomic; + if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) { + usage |= MTL::TextureUsageShaderAtomic; } } @@ -388,7 +371,7 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p if (flags::any(p_format.usage_bits, TEXTURE_USAGE_CAN_UPDATE_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT) && can_be_attachment && no_swizzle) { // Per MoltenVK, can be cleared as a render attachment. - desc.usage |= MTLTextureUsageRenderTarget; + usage |= MTL::TextureUsageRenderTarget; } if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) { // Covered by blits. @@ -396,180 +379,169 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p // Create texture views with a different component layout. if (!p_format.shareable_formats.is_empty()) { - desc.usage |= MTLTextureUsagePixelFormatView; + usage |= MTL::TextureUsagePixelFormatView; } + desc->setUsage(usage); + // Allocate memory. - id obj = nil; + MTL::Texture *obj = nullptr; if (is_linear) { // Linear textures are restricted to 2D textures, a single mipmap level and a single array layer. - MTLPixelFormat pixel_format = desc.pixelFormat; + MTL::PixelFormat pixel_format = desc->pixelFormat(); size_t row_alignment = get_texel_buffer_alignment_for_format(p_format.format); size_t bytes_per_row = formats.getBytesPerRow(pixel_format, p_format.width); bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment); size_t bytes_per_layer = formats.getBytesPerLayer(pixel_format, bytes_per_row, p_format.height); size_t byte_count = bytes_per_layer * p_format.depth * p_format.array_layers; - id buf = [device newBufferWithLength:byte_count options:options]; - obj = [buf newTextureWithDescriptor:desc offset:0 bytesPerRow:bytes_per_row]; + MTL::Buffer *buf = device->newBuffer(byte_count, options); + obj = buf->newTexture(desc.get(), 0, bytes_per_row); + buf->release(); + + _track_resource(buf); } else { - obj = [device newTextureWithDescriptor:desc]; + obj = device->newTexture(desc.get()); } ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create texture."); - return rid::make(obj); + _track_resource(obj); + + return TextureID(reinterpret_cast(obj)); } RDD::TextureID RenderingDeviceDriverMetal::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil, uint32_t p_mipmaps) { - id res = (__bridge id)(void *)(uintptr_t)p_native_texture; + MTL::Texture *res = reinterpret_cast(p_native_texture); // If the requested format is different, we need to create a view. - MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_format); - if (res.pixelFormat != format) { - MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( - MTLTextureSwizzleRed, - MTLTextureSwizzleGreen, - MTLTextureSwizzleBlue, - MTLTextureSwizzleAlpha); - res = [res newTextureViewWithPixelFormat:format - textureType:res.textureType - levels:NSMakeRange(0, res.mipmapLevelCount) - slices:NSMakeRange(0, p_array_layers) - swizzle:swizzle]; + MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_format); + if (res->pixelFormat() != format) { + MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Default(); + res = res->newTextureView(format, res->textureType(), NS::Range::Make(0, res->mipmapLevelCount()), NS::Range::Make(0, p_array_layers), swizzle); ERR_FAIL_NULL_V_MSG(res, TextureID(), "Unable to create texture view."); } - return rid::make(res); + _track_resource(res); + + return TextureID(reinterpret_cast(res)); } RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) { - id src_texture = rid::get(p_original_texture); + MTL::Texture *src_texture = reinterpret_cast(p_original_texture.id); - NSUInteger slices = src_texture.arrayLength; - if (src_texture.textureType == MTLTextureTypeCube) { + NS::UInteger slices = src_texture->arrayLength(); + if (src_texture->textureType() == MTL::TextureTypeCube) { // Metal expects Cube textures to have a slice count of 6. slices = 6; - } else if (src_texture.textureType == MTLTextureTypeCubeArray) { + } else if (src_texture->textureType() == MTL::TextureTypeCubeArray) { // Metal expects Cube Array textures to have 6 slices per layer. slices *= 6; } #if DEV_ENABLED - if (src_texture.sampleCount > 1) { + if (src_texture->sampleCount() > 1) { // TODO(sgc): is it ok to create a shared texture from a multi-sample texture? WARN_PRINT("Is it safe to create a shared texture from multi-sample texture?"); } #endif - MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_view.format); + MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_view.format); - static const MTLTextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = { - static_cast(255), // IDENTITY - MTLTextureSwizzleZero, - MTLTextureSwizzleOne, - MTLTextureSwizzleRed, - MTLTextureSwizzleGreen, - MTLTextureSwizzleBlue, - MTLTextureSwizzleAlpha, + static const MTL::TextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = { + static_cast(255), // IDENTITY + MTL::TextureSwizzleZero, + MTL::TextureSwizzleOne, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleGreen, + MTL::TextureSwizzleBlue, + MTL::TextureSwizzleAlpha, }; -#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTLTextureSwizzle##CHAN) - MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( - SWIZZLE(r, Red), - SWIZZLE(g, Green), - SWIZZLE(b, Blue), - SWIZZLE(a, Alpha)); +#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTL::TextureSwizzle##CHAN) + MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(SWIZZLE(r, Red), SWIZZLE(g, Green), SWIZZLE(b, Blue), SWIZZLE(a, Alpha)); #undef SWIZZLE - id obj = [src_texture newTextureViewWithPixelFormat:format - textureType:src_texture.textureType - levels:NSMakeRange(0, src_texture.mipmapLevelCount) - slices:NSMakeRange(0, slices) - swizzle:swizzle]; + MTL::Texture *obj = src_texture->newTextureView(format, src_texture->textureType(), NS::Range::Make(0, src_texture->mipmapLevelCount()), NS::Range::Make(0, slices), swizzle); ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture"); - return rid::make(obj); + _track_resource(obj); + return TextureID(reinterpret_cast(obj)); } RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) { - id src_texture = rid::get(p_original_texture); + MTL::Texture *src_texture = reinterpret_cast(p_original_texture.id); - static const MTLTextureType VIEW_TYPES[] = { - MTLTextureType1D, // MTLTextureType1D - MTLTextureType1D, // MTLTextureType1DArray - MTLTextureType2D, // MTLTextureType2D - MTLTextureType2D, // MTLTextureType2DArray - MTLTextureType2D, // MTLTextureType2DMultisample - MTLTextureType2D, // MTLTextureTypeCube - MTLTextureType2D, // MTLTextureTypeCubeArray - MTLTextureType2D, // MTLTextureType3D - MTLTextureType2D, // MTLTextureType2DMultisampleArray + static const MTL::TextureType VIEW_TYPES[] = { + MTL::TextureType1D, // MTLTextureType1D + MTL::TextureType1D, // MTLTextureType1DArray + MTL::TextureType2D, // MTLTextureType2D + MTL::TextureType2D, // MTLTextureType2DArray + MTL::TextureType2D, // MTLTextureType2DMultisample + MTL::TextureType2D, // MTLTextureTypeCube + MTL::TextureType2D, // MTLTextureTypeCubeArray + MTL::TextureType2D, // MTLTextureType3D + MTL::TextureType2D, // MTLTextureType2DMultisampleArray }; - MTLTextureType textureType = VIEW_TYPES[src_texture.textureType]; + MTL::TextureType textureType = VIEW_TYPES[src_texture->textureType()]; switch (p_slice_type) { case TEXTURE_SLICE_2D: { - textureType = MTLTextureType2D; + textureType = MTL::TextureType2D; } break; case TEXTURE_SLICE_3D: { - textureType = MTLTextureType3D; + textureType = MTL::TextureType3D; } break; case TEXTURE_SLICE_CUBEMAP: { - textureType = MTLTextureTypeCube; + textureType = MTL::TextureTypeCube; } break; case TEXTURE_SLICE_2D_ARRAY: { - textureType = MTLTextureType2DArray; + textureType = MTL::TextureType2DArray; } break; case TEXTURE_SLICE_MAX: { ERR_FAIL_V_MSG(TextureID(), "Invalid texture slice type"); } break; } - MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_view.format); + MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_view.format); - static const MTLTextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = { - static_cast(255), // IDENTITY - MTLTextureSwizzleZero, - MTLTextureSwizzleOne, - MTLTextureSwizzleRed, - MTLTextureSwizzleGreen, - MTLTextureSwizzleBlue, - MTLTextureSwizzleAlpha, + static const MTL::TextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = { + static_cast(255), // IDENTITY + MTL::TextureSwizzleZero, + MTL::TextureSwizzleOne, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleGreen, + MTL::TextureSwizzleBlue, + MTL::TextureSwizzleAlpha, }; -#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTLTextureSwizzle##CHAN) - MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( - SWIZZLE(r, Red), - SWIZZLE(g, Green), - SWIZZLE(b, Blue), - SWIZZLE(a, Alpha)); +#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTL::TextureSwizzle##CHAN) + MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(SWIZZLE(r, Red), SWIZZLE(g, Green), SWIZZLE(b, Blue), SWIZZLE(a, Alpha)); #undef SWIZZLE - id obj = [src_texture newTextureViewWithPixelFormat:format - textureType:textureType - levels:NSMakeRange(p_mipmap, p_mipmaps) - slices:NSMakeRange(p_layer, p_layers) - swizzle:swizzle]; + MTL::Texture *obj = src_texture->newTextureView(format, textureType, NS::Range::Make(p_mipmap, p_mipmaps), NS::Range::Make(p_layer, p_layers), swizzle); ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture"); - return rid::make(obj); + _track_resource(obj); + return TextureID(reinterpret_cast(obj)); } void RenderingDeviceDriverMetal::texture_free(TextureID p_texture) { - rid::release(p_texture); + MTL::Texture *obj = reinterpret_cast(p_texture.id); + _untrack_resource(obj); + obj->release(); } uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) { - id __unsafe_unretained obj = rid::get(p_texture); - return obj.allocatedSize; + MTL::Texture *obj = reinterpret_cast(p_texture.id); + return obj->allocatedSize(); } void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) { - id __unsafe_unretained obj = rid::get(p_texture); + MTL::Texture *obj = reinterpret_cast(p_texture.id); PixelFormats &pf = *pixel_formats; - DataFormat format = pf.getDataFormat(obj.pixelFormat); + DataFormat format = pf.getDataFormat(obj->pixelFormat()); - uint32_t w = MAX(1u, obj.width >> p_subresource.mipmap); - uint32_t h = MAX(1u, obj.height >> p_subresource.mipmap); - uint32_t d = MAX(1u, obj.depth >> p_subresource.mipmap); + uint32_t w = MAX(1u, obj->width() >> p_subresource.mipmap); + uint32_t h = MAX(1u, obj->height() >> p_subresource.mipmap); + uint32_t d = MAX(1u, obj->depth() >> p_subresource.mipmap); uint32_t bw = 0, bh = 0; get_compressed_image_format_block_dimensions(format, bw, bh); @@ -581,23 +553,24 @@ void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture } Vector RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture, uint32_t p_layer) { - id obj = rid::get(p_texture); - ERR_FAIL_COND_V_MSG(obj.storageMode != MTLStorageModeShared, Vector(), "Texture must be created with TEXTURE_USAGE_CPU_READ_BIT set."); + MTL::Texture *obj = reinterpret_cast(p_texture.id); + ERR_FAIL_COND_V_MSG(obj->storageMode() != MTL::StorageModeShared, Vector(), "Texture must be created with TEXTURE_USAGE_CPU_READ_BIT set."); - if (obj.buffer) { + MTL::Buffer *buf = obj->buffer(); + if (buf) { ERR_FAIL_COND_V_MSG(p_layer > 0, Vector(), "A linear texture has a single layer."); - ERR_FAIL_COND_V_MSG(obj.mipmapLevelCount > 1, Vector(), "A linear texture has a single mipmap level."); + ERR_FAIL_COND_V_MSG(obj->mipmapLevelCount() > 1, Vector(), "A linear texture has a single mipmap level."); Vector image_data; - image_data.resize_uninitialized(obj.buffer.length); - memcpy(image_data.ptrw(), obj.buffer.contents, obj.buffer.length); + image_data.resize_uninitialized(buf->length()); + memcpy(image_data.ptrw(), buf->contents(), buf->length()); return image_data; } - DataFormat tex_format = pixel_formats->getDataFormat(obj.pixelFormat); - uint32_t tex_w = obj.width; - uint32_t tex_h = obj.height; - uint32_t tex_d = obj.depth; - uint32_t tex_mipmaps = obj.mipmapLevelCount; + DataFormat tex_format = pixel_formats->getDataFormat(obj->pixelFormat()); + uint32_t tex_w = obj->width(); + uint32_t tex_h = obj->height(); + uint32_t tex_d = obj->depth(); + uint32_t tex_mipmaps = obj->mipmapLevelCount(); // Must iteratively copy the texture data to a buffer. @@ -621,12 +594,7 @@ Vector RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture uint32_t bytes_per_img = bytes_per_row * bh; uint32_t mip_size = bytes_per_img * tex_d; - [obj getBytes:(void *)dest_ptr - bytesPerRow:bytes_per_row - bytesPerImage:bytes_per_img - fromRegion:MTLRegionMake3D(0, 0, 0, bw, bh, tex_d) - mipmapLevel:mm_i - slice:p_layer]; + obj->getBytes(dest_ptr, bytes_per_row, bytes_per_img, MTL::Region(0, 0, 0, bw, bh, tex_d), mm_i, p_layer); dest_ptr += mip_size; @@ -644,7 +612,7 @@ Vector RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture BitField RenderingDeviceDriverMetal::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { PixelFormats &pf = *pixel_formats; - if (pf.getMTLPixelFormat(p_format) == MTLPixelFormatInvalid) { + if (pf.getMTLPixelFormat(p_format) == MTL::PixelFormatInvalid) { return 0; } @@ -677,114 +645,115 @@ bool RenderingDeviceDriverMetal::texture_can_make_shared_with_format(TextureID p #pragma mark - Sampler -static const MTLCompareFunction COMPARE_OPERATORS[RD::COMPARE_OP_MAX] = { - MTLCompareFunctionNever, - MTLCompareFunctionLess, - MTLCompareFunctionEqual, - MTLCompareFunctionLessEqual, - MTLCompareFunctionGreater, - MTLCompareFunctionNotEqual, - MTLCompareFunctionGreaterEqual, - MTLCompareFunctionAlways, +static const MTL::CompareFunction COMPARE_OPERATORS[RD::COMPARE_OP_MAX] = { + MTL::CompareFunctionNever, + MTL::CompareFunctionLess, + MTL::CompareFunctionEqual, + MTL::CompareFunctionLessEqual, + MTL::CompareFunctionGreater, + MTL::CompareFunctionNotEqual, + MTL::CompareFunctionGreaterEqual, + MTL::CompareFunctionAlways, }; -static const MTLStencilOperation STENCIL_OPERATIONS[RD::STENCIL_OP_MAX] = { - MTLStencilOperationKeep, - MTLStencilOperationZero, - MTLStencilOperationReplace, - MTLStencilOperationIncrementClamp, - MTLStencilOperationDecrementClamp, - MTLStencilOperationInvert, - MTLStencilOperationIncrementWrap, - MTLStencilOperationDecrementWrap, +static const MTL::StencilOperation STENCIL_OPERATIONS[RD::STENCIL_OP_MAX] = { + MTL::StencilOperationKeep, + MTL::StencilOperationZero, + MTL::StencilOperationReplace, + MTL::StencilOperationIncrementClamp, + MTL::StencilOperationDecrementClamp, + MTL::StencilOperationInvert, + MTL::StencilOperationIncrementWrap, + MTL::StencilOperationDecrementWrap, }; -static const MTLBlendFactor BLEND_FACTORS[RD::BLEND_FACTOR_MAX] = { - MTLBlendFactorZero, - MTLBlendFactorOne, - MTLBlendFactorSourceColor, - MTLBlendFactorOneMinusSourceColor, - MTLBlendFactorDestinationColor, - MTLBlendFactorOneMinusDestinationColor, - MTLBlendFactorSourceAlpha, - MTLBlendFactorOneMinusSourceAlpha, - MTLBlendFactorDestinationAlpha, - MTLBlendFactorOneMinusDestinationAlpha, - MTLBlendFactorBlendColor, - MTLBlendFactorOneMinusBlendColor, - MTLBlendFactorBlendAlpha, - MTLBlendFactorOneMinusBlendAlpha, - MTLBlendFactorSourceAlphaSaturated, - MTLBlendFactorSource1Color, - MTLBlendFactorOneMinusSource1Color, - MTLBlendFactorSource1Alpha, - MTLBlendFactorOneMinusSource1Alpha, +static const MTL::BlendFactor BLEND_FACTORS[RD::BLEND_FACTOR_MAX] = { + MTL::BlendFactorZero, + MTL::BlendFactorOne, + MTL::BlendFactorSourceColor, + MTL::BlendFactorOneMinusSourceColor, + MTL::BlendFactorDestinationColor, + MTL::BlendFactorOneMinusDestinationColor, + MTL::BlendFactorSourceAlpha, + MTL::BlendFactorOneMinusSourceAlpha, + MTL::BlendFactorDestinationAlpha, + MTL::BlendFactorOneMinusDestinationAlpha, + MTL::BlendFactorBlendColor, + MTL::BlendFactorOneMinusBlendColor, + MTL::BlendFactorBlendAlpha, + MTL::BlendFactorOneMinusBlendAlpha, + MTL::BlendFactorSourceAlphaSaturated, + MTL::BlendFactorSource1Color, + MTL::BlendFactorOneMinusSource1Color, + MTL::BlendFactorSource1Alpha, + MTL::BlendFactorOneMinusSource1Alpha, }; -static const MTLBlendOperation BLEND_OPERATIONS[RD::BLEND_OP_MAX] = { - MTLBlendOperationAdd, - MTLBlendOperationSubtract, - MTLBlendOperationReverseSubtract, - MTLBlendOperationMin, - MTLBlendOperationMax, +static const MTL::BlendOperation BLEND_OPERATIONS[RD::BLEND_OP_MAX] = { + MTL::BlendOperationAdd, + MTL::BlendOperationSubtract, + MTL::BlendOperationReverseSubtract, + MTL::BlendOperationMin, + MTL::BlendOperationMax, }; -static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerAddressMode ADDRESS_MODES[RD::SAMPLER_REPEAT_MODE_MAX] = { - MTLSamplerAddressModeRepeat, - MTLSamplerAddressModeMirrorRepeat, - MTLSamplerAddressModeClampToEdge, - MTLSamplerAddressModeClampToBorderColor, - MTLSamplerAddressModeMirrorClampToEdge, +static const MTL::SamplerAddressMode ADDRESS_MODES[RD::SAMPLER_REPEAT_MODE_MAX] = { + MTL::SamplerAddressModeRepeat, + MTL::SamplerAddressModeMirrorRepeat, + MTL::SamplerAddressModeClampToEdge, + MTL::SamplerAddressModeClampToBorderColor, + MTL::SamplerAddressModeMirrorClampToEdge, }; -static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerBorderColor SAMPLER_BORDER_COLORS[RD::SAMPLER_BORDER_COLOR_MAX] = { - MTLSamplerBorderColorTransparentBlack, - MTLSamplerBorderColorTransparentBlack, - MTLSamplerBorderColorOpaqueBlack, - MTLSamplerBorderColorOpaqueBlack, - MTLSamplerBorderColorOpaqueWhite, - MTLSamplerBorderColorOpaqueWhite, +static const MTL::SamplerBorderColor SAMPLER_BORDER_COLORS[RD::SAMPLER_BORDER_COLOR_MAX] = { + MTL::SamplerBorderColorTransparentBlack, + MTL::SamplerBorderColorTransparentBlack, + MTL::SamplerBorderColorOpaqueBlack, + MTL::SamplerBorderColorOpaqueBlack, + MTL::SamplerBorderColorOpaqueWhite, + MTL::SamplerBorderColorOpaqueWhite, }; RDD::SamplerID RenderingDeviceDriverMetal::sampler_create(const SamplerState &p_state) { - MTLSamplerDescriptor *desc = [MTLSamplerDescriptor new]; - desc.supportArgumentBuffers = YES; + NS::SharedPtr desc = NS::TransferPtr(MTL::SamplerDescriptor::alloc()->init()); + desc->setSupportArgumentBuffers(true); - desc.magFilter = p_state.mag_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest; - desc.minFilter = p_state.min_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest; - desc.mipFilter = p_state.mip_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMipFilterLinear : MTLSamplerMipFilterNearest; + desc->setMagFilter(p_state.mag_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest); + desc->setMinFilter(p_state.min_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest); + desc->setMipFilter(p_state.mip_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMipFilterLinear : MTL::SamplerMipFilterNearest); - desc.sAddressMode = ADDRESS_MODES[p_state.repeat_u]; - desc.tAddressMode = ADDRESS_MODES[p_state.repeat_v]; - desc.rAddressMode = ADDRESS_MODES[p_state.repeat_w]; + desc->setSAddressMode(ADDRESS_MODES[p_state.repeat_u]); + desc->setTAddressMode(ADDRESS_MODES[p_state.repeat_v]); + desc->setRAddressMode(ADDRESS_MODES[p_state.repeat_w]); if (p_state.use_anisotropy) { - desc.maxAnisotropy = p_state.anisotropy_max; + desc->setMaxAnisotropy(p_state.anisotropy_max); } - desc.compareFunction = COMPARE_OPERATORS[p_state.compare_op]; + desc->setCompareFunction(COMPARE_OPERATORS[p_state.compare_op]); - desc.lodMinClamp = p_state.min_lod; - desc.lodMaxClamp = p_state.max_lod; + desc->setLodMinClamp(p_state.min_lod); + desc->setLodMaxClamp(p_state.max_lod); - desc.borderColor = SAMPLER_BORDER_COLORS[p_state.border_color]; + desc->setBorderColor(SAMPLER_BORDER_COLORS[p_state.border_color]); - desc.normalizedCoordinates = !p_state.unnormalized_uvw; + desc->setNormalizedCoordinates(!p_state.unnormalized_uvw); #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 260000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 260000 || __TV_OS_VERSION_MAX_ALLOWED >= 260000 || __VISION_OS_VERSION_MAX_ALLOWED >= 260000 if (p_state.lod_bias != 0.0) { - if (@available(macOS 26.0, iOS 26.0, tvOS 26.0, visionOS 26.0, *)) { - desc.lodBias = p_state.lod_bias; + if (__builtin_available(macOS 26.0, iOS 26.0, tvOS 26.0, visionOS 26.0, *)) { + desc->setLodBias(p_state.lod_bias); } } #endif - id obj = [device newSamplerStateWithDescriptor:desc]; - ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerStateWithDescriptor failed"); - return rid::make(obj); + MTL::SamplerState *obj = device->newSamplerState(desc.get()); + ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerState failed"); + return SamplerID(reinterpret_cast(obj)); } void RenderingDeviceDriverMetal::sampler_free(SamplerID p_sampler) { - rid::release(p_sampler); + MTL::SamplerState *obj = reinterpret_cast(p_sampler.id); + obj->release(); } bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) { @@ -801,40 +770,43 @@ bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataForm #pragma mark - Vertex Array RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) { - MTLVertexDescriptor *desc = MTLVertexDescriptor.vertexDescriptor; + MTL::VertexDescriptor *desc = MTL::VertexDescriptor::vertexDescriptor(); for (const VertexAttributeBindingsMap::KV &kv : p_vertex_bindings) { uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(kv.key); - MTLVertexBufferLayoutDescriptor *ld = desc.layouts[idx]; + MTL::VertexBufferLayoutDescriptor *ld = desc->layouts()->object(idx); if (kv.value.stride != 0) { - ld.stepFunction = kv.value.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance; - ld.stepRate = 1; - ld.stride = kv.value.stride; + ld->setStepFunction(kv.value.frequency == VERTEX_FREQUENCY_VERTEX ? MTL::VertexStepFunctionPerVertex : MTL::VertexStepFunctionPerInstance); + ld->setStepRate(1); + ld->setStride(kv.value.stride); } else { - ld.stepFunction = MTLVertexStepFunctionConstant; - ld.stepRate = 0; - ld.stride = 0; + ld->setStepFunction(MTL::VertexStepFunctionConstant); + ld->setStepRate(0); + ld->setStride(0); } - DEV_ASSERT(ld.stride == desc.layouts[idx].stride); + DEV_ASSERT(ld->stride() == desc->layouts()->object(idx)->stride()); } for (const VertexAttribute &vf : p_vertex_attribs) { - desc.attributes[vf.location].format = pixel_formats->getMTLVertexFormat(vf.format); - desc.attributes[vf.location].offset = vf.offset; + MTL::VertexAttributeDescriptor *attr = desc->attributes()->object(vf.location); + attr->setFormat((MTL::VertexFormat)pixel_formats->getMTLVertexFormat(vf.format)); + attr->setOffset(vf.offset); uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(vf.binding); - desc.attributes[vf.location].bufferIndex = idx; + attr->setBufferIndex(idx); if (vf.stride == 0) { // Constant attribute, so we must determine the stride to satisfy Metal API. - uint32_t stride = desc.layouts[idx].stride; - desc.layouts[idx].stride = std::max(stride, vf.offset + pixel_formats->getBytesPerBlock(vf.format)); + uint32_t stride = desc->layouts()->object(idx)->stride(); + desc->layouts()->object(idx)->setStride(std::max(stride, vf.offset + pixel_formats->getBytesPerBlock(vf.format))); } } - return rid::make(desc); + desc->retain(); + return VertexFormatID(reinterpret_cast(desc)); } void RenderingDeviceDriverMetal::vertex_format_free(VertexFormatID p_vertex_format) { - rid::release(p_vertex_format); + MTL::VertexDescriptor *obj = reinterpret_cast(p_vertex_format.id); + obj->release(); } #pragma mark - Barriers @@ -847,39 +819,8 @@ void RenderingDeviceDriverMetal::command_pipeline_barrier( VectorView p_buffer_barriers, VectorView p_texture_barriers, VectorView p_acceleration_structure_barriers) { - WARN_PRINT_ONCE("not implemented"); -} - -#pragma mark - Fences - -RDD::FenceID RenderingDeviceDriverMetal::fence_create() { - Fence *fence = nullptr; - if (@available(macOS 10.14, iOS 12.0, tvOS 12.0, visionOS 1.0, *)) { - fence = memnew(FenceEvent([device newSharedEvent])); - } else { - fence = memnew(FenceSemaphore()); - } - return FenceID(fence); -} - -Error RenderingDeviceDriverMetal::fence_wait(FenceID p_fence) { - Fence *fence = (Fence *)(p_fence.id); - return fence->wait(1000); -} - -void RenderingDeviceDriverMetal::fence_free(FenceID p_fence) { - Fence *fence = (Fence *)(p_fence.id); - memdelete(fence); -} - -#pragma mark - Semaphores - -RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create() { - // Metal doesn't use semaphores, as their purpose within Godot is to ensure ordering of command buffer execution. - return SemaphoreID(1); -} - -void RenderingDeviceDriverMetal::semaphore_free(SemaphoreID p_semaphore) { + MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id); + obj->pipeline_barrier(p_src_stages, p_dst_stages, p_memory_barriers, p_buffer_barriers, p_texture_barriers, p_acceleration_structure_barriers); } #pragma mark - Queues @@ -896,78 +837,10 @@ RDD::CommandQueueFamilyID RenderingDeviceDriverMetal::command_queue_family_get(B } } -RDD::CommandQueueID RenderingDeviceDriverMetal::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) { - return CommandQueueID(1); -} - -Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView, VectorView p_cmd_buffers, VectorView, FenceID p_cmd_fence, VectorView p_swap_chains) { - uint32_t size = p_cmd_buffers.size(); - if (size == 0) { - return OK; - } - - for (uint32_t i = 0; i < size - 1; i++) { - MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[i].id); - cmd_buffer->commit(); - } - - // The last command buffer will signal the fence and semaphores. - MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id); - Fence *fence = (Fence *)(p_cmd_fence.id); - if (fence != nullptr) { - cmd_buffer->end(); - id cb = cmd_buffer->get_command_buffer(); - fence->signal(cb); - } - - for (uint32_t i = 0; i < p_swap_chains.size(); i++) { - SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); - RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); - metal_surface->present(cmd_buffer); - } - - cmd_buffer->commit(); - - if (p_swap_chains.size() > 0) { - // Used as a signal that we're presenting, so this is the end of a frame. - [device_scope endScope]; - [device_scope beginScope]; - } - - return OK; -} - -void RenderingDeviceDriverMetal::command_queue_free(CommandQueueID p_cmd_queue) { -} - #pragma mark - Command Buffers -// ----- POOL ----- - -RDD::CommandPoolID RenderingDeviceDriverMetal::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) { - DEV_ASSERT(p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY); - return rid::make(device_queue); -} - -bool RenderingDeviceDriverMetal::command_pool_reset(CommandPoolID p_cmd_pool) { - return true; -} - -void RenderingDeviceDriverMetal::command_pool_free(CommandPoolID p_cmd_pool) { - rid::release(p_cmd_pool); -} - -// ----- BUFFER ----- - -RDD::CommandBufferID RenderingDeviceDriverMetal::command_buffer_create(CommandPoolID p_cmd_pool) { - id queue = rid::get(p_cmd_pool); - MDCommandBuffer *obj = new MDCommandBuffer(queue, this); - command_buffers.push_back(obj); - return CommandBufferID(obj); -} - bool RenderingDeviceDriverMetal::command_buffer_begin(CommandBufferID p_cmd_buffer) { - MDCommandBuffer *obj = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id); obj->begin(); return true; } @@ -977,7 +850,7 @@ bool RenderingDeviceDriverMetal::command_buffer_begin_secondary(CommandBufferID } void RenderingDeviceDriverMetal::command_buffer_end(CommandBufferID p_cmd_buffer) { - MDCommandBuffer *obj = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id); obj->end(); } @@ -996,6 +869,11 @@ void RenderingDeviceDriverMetal::_swap_chain_release_buffers(SwapChain *p_swap_c RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) { RenderingContextDriverMetal::Surface const *surface = (RenderingContextDriverMetal::Surface *)(p_surface); + if (use_barriers) { + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") + add_residency_set_to_main_queue(surface->get_residency_set()); + GODOT_CLANG_WARNING_POP + } // Create the render pass that will be used to draw to the swap chain's framebuffers. RDD::Attachment attachment; @@ -1067,6 +945,12 @@ void RenderingDeviceDriverMetal::swap_chain_set_max_fps(SwapChainID p_swap_chain void RenderingDeviceDriverMetal::swap_chain_free(SwapChainID p_swap_chain) { SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + if (use_barriers) { + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") + RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); + remove_residency_set_to_main_queue(surface->get_residency_set()); + GODOT_CLANG_WARNING_POP + } _swap_chain_release(swap_chain); render_pass_free(swap_chain->render_pass); memdelete(swap_chain); @@ -1077,34 +961,34 @@ void RenderingDeviceDriverMetal::swap_chain_free(SwapChainID p_swap_chain) { RDD::FramebufferID RenderingDeviceDriverMetal::framebuffer_create(RenderPassID p_render_pass, VectorView p_attachments, uint32_t p_width, uint32_t p_height) { MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); - Vector textures; + Vector textures; textures.resize(p_attachments.size()); for (uint32_t i = 0; i < p_attachments.size(); i += 1) { MDAttachment const &a = pass->attachments[i]; - id tex = rid::get(p_attachments[i]); - if (tex == nil) { + MTL::Texture *tex = reinterpret_cast(p_attachments[i].id); + if (tex == nullptr) { #if DEV_ENABLED WARN_PRINT("Invalid texture for attachment " + itos(i)); #endif } if (a.samples > 1) { - if (tex.sampleCount != a.samples) { + if (tex->sampleCount() != a.samples) { #if DEV_ENABLED - WARN_PRINT("Mismatched sample count for attachment " + itos(i) + "; expected " + itos(a.samples) + ", got " + itos(tex.sampleCount)); + WARN_PRINT("Mismatched sample count for attachment " + itos(i) + "; expected " + itos(a.samples) + ", got " + itos(tex->sampleCount())); #endif } } textures.write[i] = tex; } - MDFrameBuffer *fb = new MDFrameBuffer(textures, Size2i(p_width, p_height)); + MDFrameBuffer *fb = memnew(MDFrameBuffer(textures, Size2i(p_width, p_height))); return FramebufferID(fb); } void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) { MDFrameBuffer *obj = (MDFrameBuffer *)(p_framebuffer.id); - delete obj; + memdelete(obj); } #pragma mark - Shader @@ -1113,7 +997,7 @@ void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) { ShaderCacheEntry *entry = *pentry; _shader_cache.erase(key); - entry->library = nil; + entry->library.reset(); memdelete(entry); } } @@ -1130,12 +1014,12 @@ static_assert(is_layout_compatible(p_data.data_type); + r_ui.dataType = static_cast(p_data.data_type); memcpy(&r_ui.slot, &p_data.slot, sizeof(UniformInfo::Indexes)); memcpy(&r_ui.arg_buffer, &p_data.arg_buffer, sizeof(UniformInfo::Indexes)); - r_ui.access = static_cast(p_data.access); - r_ui.usage = static_cast(p_data.usage); - r_ui.textureType = static_cast(p_data.texture_type); + r_ui.access = static_cast(p_data.access); + r_ui.usage = static_cast(p_data.usage); + r_ui.textureType = static_cast(p_data.texture_type); r_ui.imageFormat = p_data.image_format; r_ui.arrayLength = p_data.array_length; r_ui.isMultisampled = p_data.is_multisampled; @@ -1159,20 +1043,20 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref RDD::ShaderID(), "Shader was compiled for a newer version of Metal"); - MTLGPUFamily compiled_gpu_family = static_cast(mtl_reflection_data.profile.gpu); + MTL::GPUFamily compiled_gpu_family = static_cast(mtl_reflection_data.profile.gpu); ERR_FAIL_COND_V_MSG(device_properties->features.highestFamily < compiled_gpu_family, RDD::ShaderID(), "Shader was generated for a newer Apple GPU"); - MTLCompileOptions *options = [MTLCompileOptions new]; + NS::SharedPtr options = NS::TransferPtr(MTL::CompileOptions::alloc()->init()); uint32_t major = mtl_reflection_data.msl_version / 10000; uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100; - options.languageVersion = MTLLanguageVersion((major << 0x10) + minor); - if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { - options.enableLogging = mtl_reflection_data.needs_debug_logging(); + options->setLanguageVersion(MTL::LanguageVersion((major << 0x10) + minor)); + if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { + options->setEnableLogging(mtl_reflection_data.needs_debug_logging()); } - HashMap libraries; + HashMap> libraries; PipelineType pipeline_type = PIPELINE_TYPE_RASTERIZATION; Vector decompressed_code; @@ -1185,8 +1069,12 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref } if (ShaderCacheEntry **p = _shader_cache.getptr(shader_data.hash); p != nullptr) { - libraries[shader.shader_stage] = (*p)->library; - continue; + if (std::shared_ptr lib = (*p)->library.lock()) { + libraries[shader.shader_stage] = lib; + continue; + } + // Library was released; remove stale cache entry and recreate. + _shader_cache.erase(shader_data.hash); } if (shader.code_decompressed_size > 0) { @@ -1201,34 +1089,27 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref cd->name = shader_name; cd->stage = shader.shader_stage; - NSString *source = [[NSString alloc] initWithBytes:(void *)decompressed_code.ptr() - length:shader_data.source_size - encoding:NSUTF8StringEncoding]; + NS::SharedPtr source = NS::TransferPtr(NS::String::alloc()->init((void *)decompressed_code.ptr(), shader_data.source_size, NS::UTF8StringEncoding)); - MDLibrary *library = nil; + std::shared_ptr library; if (shader_data.library_size > 0) { ERR_FAIL_COND_V_MSG(mtl_reflection_data.os_min_version > device_properties->os_version, RDD::ShaderID(), "Metal shader binary was generated for a newer target OS"); dispatch_data_t binary = dispatch_data_create(decompressed_code.ptr() + shader_data.source_size, shader_data.library_size, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT); - library = [MDLibrary newLibraryWithCacheEntry:cd - device:device + library = MDLibrary::create(cd, device, #if DEV_ENABLED - source:source + source.get(), #endif - data:binary]; + binary); } else { - options.preserveInvariance = shader_data.is_position_invariant; + options->setPreserveInvariance(shader_data.is_position_invariant); #if __MAC_OS_X_VERSION_MIN_REQUIRED >= 150000 || __IPHONE_OS_VERSION_MIN_REQUIRED >= 180000 || __TV_OS_VERSION_MIN_REQUIRED >= 180000 || defined(VISIONOS_ENABLED) - options.mathMode = MTLMathModeFast; + options->setMathMode(MTL::MathModeFast); #else - options.fastMathEnabled = YES; + options->setFastMathEnabled(true); #endif - library = [MDLibrary newLibraryWithCacheEntry:cd - device:device - source:source - options:options - strategy:_shader_load_strategy]; + library = MDLibrary::create(cd, device, source.get(), options.get(), _shader_load_strategy); } _shader_cache[shader_data.hash] = cd; @@ -1300,7 +1181,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref mtl_reflection_data.uses_argument_buffers(), libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]); - cs->local = MTLSizeMake(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]); + cs->local = MTL::Size(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]); shader = cs; } else { MDRenderShader *rs = new MDRenderShader( @@ -1341,16 +1222,20 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorViewsets.size(), UniformSetID(), "Set index out of range"); const UniformSet &shader_set = shader->sets.get(p_set_index); MDUniformSet *set = memnew(MDUniformSet); + // Determine if there are any dynamic uniforms in this set. + bool is_dynamic = !shader_set.dynamic_uniforms.is_empty(); + + Vector arg_buffer_data; if (device_properties->features.argument_buffers_supported()) { + arg_buffer_data.resize(shader_set.buffer_size); + // If argument buffers are enabled, we have already verified availability, so we can skip the runtime check. GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new") + uint64_t *ptr = (uint64_t *)arg_buffer_data.ptrw(); - set->arg_buffer = [device newBufferWithLength:shader_set.buffer_size options:MTLResourceStorageModeShared]; - uint64_t *ptr = (uint64_t *)set->arg_buffer.contents; - - HashMap bound_resources; - auto add_usage = [&bound_resources](MTLResourceUnsafe res, BitField stage, MTLResourceUsage usage) { + HashMap bound_resources; + auto add_usage = [&bound_resources](MTL::Resource *res, BitField stage, MTL::ResourceUsage usage) { StageResourceUsage *sru = bound_resources.getptr(res); if (sru == nullptr) { sru = &bound_resources.insert(res, ResourceUnused)->value; @@ -1365,6 +1250,10 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView sampler = rid::get(uniform.ids[j]); - *(MTLResourceID *)(ptr + idx.sampler + j) = sampler.gpuResourceID; + MTL::SamplerState *sampler = reinterpret_cast(uniform.ids[j].id); + *(MTL::ResourceID *)(ptr + idx.sampler + j) = sampler->gpuResourceID(); } } break; case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { uint32_t count = uniform.ids.size() / 2; for (uint32_t j = 0; j < count; j += 1) { - id sampler = rid::get(uniform.ids[j * 2 + 0]); - id texture = rid::get(uniform.ids[j * 2 + 1]); - *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; - *(MTLResourceID *)(ptr + idx.sampler + j) = sampler.gpuResourceID; + MTL::SamplerState *sampler = reinterpret_cast(uniform.ids[j * 2 + 0].id); + MTL::Texture *texture = reinterpret_cast(uniform.ids[j * 2 + 1].id); + *(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID(); + *(MTL::ResourceID *)(ptr + idx.sampler + j) = sampler->gpuResourceID(); - add_usage(texture, ui.active_stages, ui.usage); + ADD_USAGE(texture, ui.active_stages, ui.usage); } } break; case UNIFORM_TYPE_TEXTURE: { size_t count = uniform.ids.size(); for (size_t j = 0; j < count; j += 1) { - id texture = rid::get(uniform.ids[j]); - *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; + MTL::Texture *texture = reinterpret_cast(uniform.ids[j].id); + *(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID(); - add_usage(texture, ui.active_stages, ui.usage); + ADD_USAGE(texture, ui.active_stages, ui.usage); } } break; case UNIFORM_TYPE_IMAGE: { size_t count = uniform.ids.size(); for (size_t j = 0; j < count; j += 1) { - id texture = rid::get(uniform.ids[j]); - *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; - add_usage(texture, ui.active_stages, ui.usage); + MTL::Texture *texture = reinterpret_cast(uniform.ids[j].id); + *(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID(); + ADD_USAGE(texture, ui.active_stages, ui.usage); if (idx.buffer != UINT32_MAX) { // Emulated atomic image access. - id buffer = (texture.parentTexture ? texture.parentTexture : texture).buffer; - *(MTLGPUAddress *)(ptr + idx.buffer + j) = buffer.gpuAddress; + MTL::Texture *parent = texture->parentTexture(); + MTL::Buffer *buffer = (parent ? parent : texture)->buffer(); + *(MTLGPUAddress *)(ptr + idx.buffer + j) = buffer->gpuAddress(); - add_usage(buffer, ui.active_stages, ui.usage); + ADD_USAGE(buffer, ui.active_stages, ui.usage); } } } break; @@ -1429,23 +1319,26 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorViewmetal_buffer.gpuAddress; + *(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.get()->gpuAddress(); - add_usage(buffer->metal_buffer, ui.active_stages, ui.usage); + ADD_USAGE(buffer->metal_buffer.get(), ui.active_stages, ui.usage); } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { size_t count = uniform.ids.size(); for (size_t j = 0; j < count; j += 1) { - id texture = rid::get(uniform.ids[j]); - *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; + MTL::Texture *texture = reinterpret_cast(uniform.ids[j].id); + *(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID(); - add_usage(texture, ui.active_stages, ui.usage); + ADD_USAGE(texture, ui.active_stages, ui.usage); } } break; case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { - // Dynamic buffers are not supported by argument buffers currently. - // so we do not encode them, as there shouldn't be any runtime shaders that used them. + // Encode the base GPU address (frame 0); it will be updated at bind time. + const MetalBufferDynamicInfo *buffer = (const MetalBufferDynamicInfo *)uniform.ids[0].id; + *(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.get()->gpuAddress(); + + ADD_USAGE(buffer->metal_buffer.get(), ui.active_stages, ui.usage); } break; default: { DEV_ASSERT(false); @@ -1453,17 +1346,36 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView const &keyval : bound_resources) { - ResourceVector *resources = set->usage_to_resources.getptr(keyval.value); - if (resources == nullptr) { - resources = &set->usage_to_resources.insert(keyval.value, ResourceVector())->value; - } - int64_t pos = resources->span().bisect(keyval.key, true); - if (pos == resources->size() || (*resources)[pos] != keyval.key) { - resources->insert(pos, keyval.key); +#undef ADD_USAGE + + if (!use_barriers) { + for (KeyValue const &keyval : bound_resources) { + ResourceVector *resources = set->usage_to_resources.getptr(keyval.value); + if (resources == nullptr) { + resources = &set->usage_to_resources.insert(keyval.value, ResourceVector())->value; + } + int64_t pos = resources->span().bisect(keyval.key, true); + if (pos == resources->size() || (*resources)[pos] != keyval.key) { + resources->insert(pos, keyval.key); + } } } + if (!is_dynamic) { + set->arg_buffer = NS::TransferPtr(device->newBuffer(shader_set.buffer_size, base_hazard_tracking | MTL::ResourceStorageModePrivate)); +#if DEV_ENABLED + char label[64]; + snprintf(label, sizeof(label), "Uniform Set %u", p_set_index); + set->arg_buffer->setLabel(NS::String::string(label, NS::UTF8StringEncoding)); +#endif + _track_resource(set->arg_buffer.get()); + _copy_queue_copy_to_buffer(arg_buffer_data, set->arg_buffer.get()); + } else { + // Store the arg buffer data for dynamic uniform sets. + // It will be copied and updated at bind time. + set->arg_buffer_data = arg_buffer_data; + } + GODOT_CLANG_WARNING_POP } Vector bound_uniforms; @@ -1472,13 +1384,15 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorViewuniforms = bound_uniforms; - set->index = p_set_index; return UniformSetID(set); } void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) { MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id; + if (obj->arg_buffer) { + _untrack_resource(obj->arg_buffer.get()); + } memdelete(obj); } @@ -1520,42 +1434,42 @@ void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBuff #pragma mark - Transfer void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { - MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id); cmd->clear_buffer(p_buffer, p_offset, p_size); } void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView p_regions) { - MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id); cmd->copy_buffer(p_src_buffer, p_dst_buffer, p_regions); } void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { - MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id); cmd->copy_texture(p_src_texture, p_dst_texture, p_regions); } void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->resolve_texture(p_src_texture, p_src_texture_layout, p_src_layer, p_src_mipmap, p_dst_texture, p_dst_texture_layout, p_dst_layer, p_dst_mipmap); } void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->clear_color_texture(p_texture, p_texture_layout, p_color, p_subresources); } void RenderingDeviceDriverMetal::command_clear_depth_stencil_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const TextureSubresourceRange &p_subresources) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->clear_depth_stencil_texture(p_texture, p_texture_layout, p_depth, p_stencil, p_subresources); } void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { - MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id); cmd->copy_buffer_to_texture(p_src_buffer, p_dst_texture, p_regions); } void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { - MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id); cmd->copy_texture_to_buffer(p_src_texture, p_dst_buffer, p_regions); } @@ -1569,7 +1483,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) { // ----- BINDING ----- void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView p_data) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->encode_push_constant_data(p_shader, p_data); } @@ -1588,22 +1502,16 @@ String RenderingDeviceDriverMetal::_pipeline_get_cache_path() const { bool RenderingDeviceDriverMetal::pipeline_cache_create(const Vector &p_data) { return false; - CharString path = _pipeline_get_cache_path().utf8(); - NSString *nPath = [[NSString alloc] initWithBytesNoCopy:path.ptrw() - length:path.length() - encoding:NSUTF8StringEncoding - freeWhenDone:NO]; - MTLBinaryArchiveDescriptor *desc = [MTLBinaryArchiveDescriptor new]; - if ([[NSFileManager defaultManager] fileExistsAtPath:nPath]) { - desc.url = [NSURL fileURLWithPath:nPath]; - } - NSError *error = nil; - archive = [device newBinaryArchiveWithDescriptor:desc error:&error]; - return true; + // TODO: Convert to metal-cpp when pipeline caching is re-enabled + // CharString path = _pipeline_get_cache_path().utf8(); + // NS::SharedPtr desc = NS::TransferPtr(MTL::BinaryArchiveDescriptor::alloc()->init()); + // NS::Error *error = nullptr; + // archive = NS::TransferPtr(device->newBinaryArchive(desc.get(), &error)); + // return true; } void RenderingDeviceDriverMetal::pipeline_cache_free() { - archive = nil; + archive = nullptr; } size_t RenderingDeviceDriverMetal::pipeline_cache_query_size() { @@ -1615,20 +1523,17 @@ Vector RenderingDeviceDriverMetal::pipeline_cache_serialize() { return Vector(); } - CharString path = _pipeline_get_cache_path().utf8(); - - NSString *nPath = [[NSString alloc] initWithBytesNoCopy:path.ptrw() - length:path.length() - encoding:NSUTF8StringEncoding - freeWhenDone:NO]; - NSURL *target = [NSURL fileURLWithPath:nPath]; - NSError *error = nil; - if ([archive serializeToURL:target error:&error]) { - return Vector(); - } else { - print_line(error.localizedDescription.UTF8String); - return Vector(); - } + // TODO: Convert to metal-cpp when pipeline caching is re-enabled + // CharString path = _pipeline_get_cache_path().utf8(); + // NS::URL *target = NS::URL::fileURLWithPath(NS::String::string(path.get_data(), NS::UTF8StringEncoding)); + // NS::Error *error = nullptr; + // if (archive->serializeToURL(target, &error)) { + // return Vector(); + // } else { + // print_line(error->localizedDescription()->utf8String()); + // return Vector(); + // } + return Vector(); } #pragma mark - Rendering @@ -1652,15 +1557,15 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView attachments; @@ -1669,7 +1574,7 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView TEXTURE_SAMPLES_1) { mda.samples = (*device_properties).find_nearest_supported_sample_count(a.samples); @@ -1690,99 +1595,99 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView p_clear_values) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_begin_pass(p_render_pass, p_framebuffer, p_cmd_buffer_type, p_rect, p_clear_values); } void RenderingDeviceDriverMetal::command_end_render_pass(CommandBufferID p_cmd_buffer) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_end_pass(); } void RenderingDeviceDriverMetal::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_next_subpass(); } void RenderingDeviceDriverMetal::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView p_viewports) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_set_viewport(p_viewports); } void RenderingDeviceDriverMetal::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView p_scissors) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_set_scissor(p_scissors); } void RenderingDeviceDriverMetal::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView p_attachment_clears, VectorView p_rects) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_clear_attachments(p_attachment_clears, p_rects); } void RenderingDeviceDriverMetal::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->bind_pipeline(p_pipeline); } void RenderingDeviceDriverMetal::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets); } void RenderingDeviceDriverMetal::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_draw(p_vertex_count, p_instance_count, p_base_vertex, p_first_instance); } void RenderingDeviceDriverMetal::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_draw_indexed(p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance); } void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_draw_indexed_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride); } void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_draw_indexed_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } void RenderingDeviceDriverMetal::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_draw_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride); } void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets, p_dynamic_offsets); } void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_bind_index_buffer(p_buffer, p_format, p_offset); } void RenderingDeviceDriverMetal::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->render_set_blend_constants(p_constants); } @@ -1794,119 +1699,123 @@ void RenderingDeviceDriverMetal::command_render_set_line_width(CommandBufferID p // ----- PIPELINE ----- -RenderingDeviceDriverMetal::Result> RenderingDeviceDriverMetal::_create_function(MDLibrary *p_library, NSString *p_name, VectorView &p_specialization_constants) { - id library = p_library.library; +RenderingDeviceDriverMetal::Result> RenderingDeviceDriverMetal::_create_function(MDLibrary *p_library, NS::String *p_name, VectorView &p_specialization_constants) { + MTL::Library *library = p_library->get_library(); if (!library) { ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to compile Metal library"); } - id function = [library newFunctionWithName:p_name]; + MTL::Function *function = library->newFunction(p_name); ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, "No function named main0"); - if (function.functionConstantsDictionary.count == 0) { - return function; + NS::Dictionary *constants_dict = function->functionConstantsDictionary(); + if (constants_dict->count() == 0) { + return NS::TransferPtr(function); } - NSArray *constants = function.functionConstantsDictionary.allValues; + LocalVector constants; + NS::Enumerator *keys = constants_dict->keyEnumerator(); + while (NS::String *key = keys->nextObject()) { + constants.push_back(constants_dict->object(key)); + } + + // Check if already sorted by index. bool is_sorted = true; - for (uint32_t i = 1; i < constants.count; i++) { - if (constants[i - 1].index > constants[i].index) { + for (NS::UInteger i = 1; i < constants.size(); i++) { + MTL::FunctionConstant *prev = constants[i - 1]; + MTL::FunctionConstant *curr = constants[i]; + if (prev->index() > curr->index()) { is_sorted = false; break; } } if (!is_sorted) { - constants = [constants sortedArrayUsingComparator:^NSComparisonResult(MTLFunctionConstant *a, MTLFunctionConstant *b) { - if (a.index < b.index) { - return NSOrderedAscending; - } else if (a.index > b.index) { - return NSOrderedDescending; - } else { - return NSOrderedSame; + struct Comparator { + bool operator()(const MTL::FunctionConstant *p, const MTL::FunctionConstant *q) const { + return p->index() < q->index(); } - }]; + }; + + constants.sort_custom(); } - // Initialize an array of integers representing the indexes of p_specialization_constants + // Build a sorted list of specialization constants by constant_id. uint32_t *indexes = (uint32_t *)alloca(p_specialization_constants.size() * sizeof(uint32_t)); for (uint32_t i = 0; i < p_specialization_constants.size(); i++) { indexes[i] = i; } - // Sort the array of integers based on the values in p_specialization_constants std::sort(indexes, &indexes[p_specialization_constants.size()], [&](int a, int b) { return p_specialization_constants[a].constant_id < p_specialization_constants[b].constant_id; }); - MTLFunctionConstantValues *constantValues = [MTLFunctionConstantValues new]; - uint32_t i = 0; + NS::SharedPtr constantValues = NS::TransferPtr(MTL::FunctionConstantValues::alloc()->init()); + + // Merge the sorted constants from the function with the sorted user constants. + NS::UInteger i = 0; uint32_t j = 0; - while (i < constants.count && j < p_specialization_constants.size()) { - MTLFunctionConstant *curr = constants[i]; + while (i < constants.size() && j < p_specialization_constants.size()) { + MTL::FunctionConstant *curr = (MTL::FunctionConstant *)constants[i]; PipelineSpecializationConstant const &sc = p_specialization_constants[indexes[j]]; - if (curr.index == sc.constant_id) { - switch (curr.type) { - case MTLDataTypeBool: - case MTLDataTypeFloat: - case MTLDataTypeInt: - case MTLDataTypeUInt: { - [constantValues setConstantValue:&sc.int_value - type:curr.type - atIndex:sc.constant_id]; + if (curr->index() == sc.constant_id) { + switch (curr->type()) { + case MTL::DataTypeBool: + case MTL::DataTypeFloat: + case MTL::DataTypeInt: + case MTL::DataTypeUInt: { + constantValues->setConstantValue(&sc.int_value, curr->type(), sc.constant_id); } break; default: - ERR_FAIL_V_MSG(function, "Invalid specialization constant type"); + ERR_FAIL_V_MSG(NS::TransferPtr(function), "Invalid specialization constant type"); } i++; j++; - } else if (curr.index < sc.constant_id) { + } else if (curr->index() < sc.constant_id) { i++; } else { j++; } } - if (i != constants.count) { - MTLFunctionConstant *curr = constants[i]; - if (curr.index == R32UI_ALIGNMENT_CONSTANT_ID) { + // Handle R32UI_ALIGNMENT_CONSTANT_ID if present. + if (i < constants.size()) { + MTL::FunctionConstant *curr = constants[i]; + if (curr->index() == R32UI_ALIGNMENT_CONSTANT_ID) { uint32_t alignment = 16; // TODO(sgc): is this always correct? - [constantValues setConstantValue:&alignment - type:curr.type - atIndex:curr.index]; + constantValues->setConstantValue(&alignment, curr->type(), curr->index()); i++; } } - NSError *err = nil; - function = [library newFunctionWithName:@"main0" - constantValues:constantValues - error:&err]; - ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, String("specialized function failed: ") + err.localizedDescription.UTF8String); + NS::Error *err = nullptr; + function->release(); + function = library->newFunction(p_name, constantValues.get(), &err); + ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, String("specialized function failed: ") + (err ? err->localizedDescription()->utf8String() : "unknown error")); - return function; + return NS::TransferPtr(function); } -// RDD::PolygonCullMode == MTLCullMode. -static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, MTLCullModeNone)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, MTLCullModeFront)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, MTLCullModeBack)); +// RDD::PolygonCullMode == MTL::CullMode. +static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, MTL::CullModeNone)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, MTL::CullModeFront)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, MTL::CullModeBack)); -// RDD::StencilOperation == MTLStencilOperation. -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, MTLStencilOperationKeep)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, MTLStencilOperationZero)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, MTLStencilOperationReplace)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, MTLStencilOperationIncrementClamp)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, MTLStencilOperationDecrementClamp)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, MTLStencilOperationInvert)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, MTLStencilOperationIncrementWrap)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, MTLStencilOperationDecrementWrap)); +// RDD::StencilOperation == MTL::StencilOperation. +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, MTL::StencilOperationKeep)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, MTL::StencilOperationZero)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, MTL::StencilOperationReplace)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, MTL::StencilOperationIncrementClamp)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, MTL::StencilOperationDecrementClamp)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, MTL::StencilOperationInvert)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, MTL::StencilOperationIncrementWrap)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, MTL::StencilOperationDecrementWrap)); -// RDD::BlendOperation == MTLBlendOperation. -static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, MTLBlendOperationAdd)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, MTLBlendOperationSubtract)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, MTLBlendOperationReverseSubtract)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, MTLBlendOperationMin)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, MTLBlendOperationMax)); +// RDD::BlendOperation == MTL::BlendOperation. +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, MTL::BlendOperationAdd)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, MTL::BlendOperationSubtract)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, MTL::BlendOperationReverseSubtract)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, MTL::BlendOperationMin)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, MTL::BlendOperationMax)); RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( ShaderID p_shader, @@ -1922,7 +1831,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( uint32_t p_render_subpass, VectorView p_specialization_constants) { MDRenderShader *shader = (MDRenderShader *)(p_shader.id); - MTLVertexDescriptor *vert_desc = rid::get(p_vertex_format); + MTL::VertexDescriptor *vert_desc = reinterpret_cast(p_vertex_format.id); MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader); @@ -1933,7 +1842,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline"); - MTLRenderPipelineDescriptor *desc = [MTLRenderPipelineDescriptor new]; + NS::SharedPtr desc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init()); { MDSubpass const &subpass = pass->subpasses[p_render_subpass]; @@ -1941,7 +1850,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( uint32_t attachment = subpass.color_references[i].attachment; if (attachment != AttachmentReference::UNUSED) { MDAttachment const &a = pass->attachments[attachment]; - desc.colorAttachments[i].pixelFormat = a.format; + desc->colorAttachments()->object(i)->setPixelFormat(a.format); } } @@ -1950,17 +1859,27 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( MDAttachment const &a = pass->attachments[attachment]; if (a.type & MDAttachmentType::Depth) { - desc.depthAttachmentPixelFormat = a.format; + desc->setDepthAttachmentPixelFormat(a.format); } if (a.type & MDAttachmentType::Stencil) { - desc.stencilAttachmentPixelFormat = a.format; + desc->setStencilAttachmentPixelFormat(a.format); } } } - desc.vertexDescriptor = vert_desc; - desc.label = [NSString stringWithUTF8String:shader->name.get_data()]; + desc->setVertexDescriptor(vert_desc); + desc->setLabel(conv::to_nsstring(shader->name)); + + if (shader->uses_argument_buffers) { + // Set mutability of argument buffers. + for (uint32_t i = 0; i < shader->sets.size(); i++) { + const UniformSet &set = shader->sets[i]; + const MTL::Mutability mutability = set.dynamic_uniforms.is_empty() ? MTL::MutabilityImmutable : MTL::MutabilityMutable; + desc->vertexBuffers()->object(i)->setMutability(mutability); + desc->fragmentBuffers()->object(i)->setMutability(mutability); + } + } // Input assembly & tessellation. @@ -1968,69 +1887,69 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( switch (p_render_primitive) { case RENDER_PRIMITIVE_POINTS: - desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassPoint; + desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassPoint); break; case RENDER_PRIMITIVE_LINES: case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY: case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY: case RENDER_PRIMITIVE_LINESTRIPS: - desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassLine; + desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassLine); break; case RENDER_PRIMITIVE_TRIANGLES: case RENDER_PRIMITIVE_TRIANGLE_STRIPS: case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY: case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY: case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX: - desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; + desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle); break; case RENDER_PRIMITIVE_TESSELATION_PATCH: - desc.maxTessellationFactor = p_rasterization_state.patch_control_points; - desc.tessellationPartitionMode = MTLTessellationPartitionModeInteger; + desc->setMaxTessellationFactor(p_rasterization_state.patch_control_points); + desc->setTessellationPartitionMode(MTL::TessellationPartitionModeInteger); ERR_FAIL_V_MSG(PipelineID(), "tessellation not implemented"); break; case RENDER_PRIMITIVE_MAX: default: - desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified; + desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassUnspecified); break; } switch (p_render_primitive) { case RENDER_PRIMITIVE_POINTS: - pipeline->raster_state.render_primitive = MTLPrimitiveTypePoint; + pipeline->raster_state.render_primitive = MTL::PrimitiveTypePoint; break; case RENDER_PRIMITIVE_LINES: case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY: - pipeline->raster_state.render_primitive = MTLPrimitiveTypeLine; + pipeline->raster_state.render_primitive = MTL::PrimitiveTypeLine; break; case RENDER_PRIMITIVE_LINESTRIPS: case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY: - pipeline->raster_state.render_primitive = MTLPrimitiveTypeLineStrip; + pipeline->raster_state.render_primitive = MTL::PrimitiveTypeLineStrip; break; case RENDER_PRIMITIVE_TRIANGLES: case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY: - pipeline->raster_state.render_primitive = MTLPrimitiveTypeTriangle; + pipeline->raster_state.render_primitive = MTL::PrimitiveTypeTriangle; break; case RENDER_PRIMITIVE_TRIANGLE_STRIPS: case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY: case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX: - pipeline->raster_state.render_primitive = MTLPrimitiveTypeTriangleStrip; + pipeline->raster_state.render_primitive = MTL::PrimitiveTypeTriangleStrip; break; default: break; } // Rasterization. - desc.rasterizationEnabled = !p_rasterization_state.discard_primitives; - pipeline->raster_state.clip_mode = p_rasterization_state.enable_depth_clamp ? MTLDepthClipModeClamp : MTLDepthClipModeClip; - pipeline->raster_state.fill_mode = p_rasterization_state.wireframe ? MTLTriangleFillModeLines : MTLTriangleFillModeFill; + desc->setRasterizationEnabled(!p_rasterization_state.discard_primitives); + pipeline->raster_state.clip_mode = p_rasterization_state.enable_depth_clamp ? MTL::DepthClipModeClamp : MTL::DepthClipModeClip; + pipeline->raster_state.fill_mode = p_rasterization_state.wireframe ? MTL::TriangleFillModeLines : MTL::TriangleFillModeFill; - static const MTLCullMode CULL_MODE[3] = { - MTLCullModeNone, - MTLCullModeFront, - MTLCullModeBack, + static const MTL::CullMode CULL_MODE[3] = { + MTL::CullModeNone, + MTL::CullModeFront, + MTL::CullModeBack, }; pipeline->raster_state.cull_mode = CULL_MODE[p_rasterization_state.cull_mode]; - pipeline->raster_state.winding = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE) ? MTLWindingClockwise : MTLWindingCounterClockwise; + pipeline->raster_state.winding = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE) ? MTL::WindingClockwise : MTL::WindingCounterClockwise; pipeline->raster_state.depth_bias.enabled = p_rasterization_state.depth_bias_enabled; pipeline->raster_state.depth_bias.depth_bias = p_rasterization_state.depth_bias_constant_factor; pipeline->raster_state.depth_bias.slope_scale = p_rasterization_state.depth_bias_slope_factor; @@ -2048,20 +1967,20 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) { pipeline->sample_count = (*device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count); } - desc.rasterSampleCount = static_cast(pipeline->sample_count); - desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage; - desc.alphaToOneEnabled = p_multisample_state.enable_alpha_to_one; + desc->setRasterSampleCount(static_cast(pipeline->sample_count)); + desc->setAlphaToCoverageEnabled(p_multisample_state.enable_alpha_to_coverage); + desc->setAlphaToOneEnabled(p_multisample_state.enable_alpha_to_one); // Depth buffer. - bool depth_enabled = p_depth_stencil_state.enable_depth_test && desc.depthAttachmentPixelFormat != MTLPixelFormatInvalid; - bool stencil_enabled = p_depth_stencil_state.enable_stencil && desc.stencilAttachmentPixelFormat != MTLPixelFormatInvalid; + bool depth_enabled = p_depth_stencil_state.enable_depth_test && desc->depthAttachmentPixelFormat() != MTL::PixelFormatInvalid; + bool stencil_enabled = p_depth_stencil_state.enable_stencil && desc->stencilAttachmentPixelFormat() != MTL::PixelFormatInvalid; if (depth_enabled || stencil_enabled) { - MTLDepthStencilDescriptor *ds_desc = [MTLDepthStencilDescriptor new]; + NS::SharedPtr ds_desc = NS::TransferPtr(MTL::DepthStencilDescriptor::alloc()->init()); pipeline->raster_state.depth_test.enabled = depth_enabled; - ds_desc.depthWriteEnabled = p_depth_stencil_state.enable_depth_write; - ds_desc.depthCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator]; + ds_desc->setDepthWriteEnabled(p_depth_stencil_state.enable_depth_write); + ds_desc->setDepthCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator]); if (p_depth_stencil_state.enable_depth_range) { WARN_PRINT("unsupported: depth range"); } @@ -2073,33 +1992,33 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( { // Front. - MTLStencilDescriptor *sd = [MTLStencilDescriptor new]; - sd.stencilFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.fail]; - sd.depthStencilPassOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.pass]; - sd.depthFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.depth_fail]; - sd.stencilCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.front_op.compare]; - sd.readMask = p_depth_stencil_state.front_op.compare_mask; - sd.writeMask = p_depth_stencil_state.front_op.write_mask; - ds_desc.frontFaceStencil = sd; + NS::SharedPtr sd = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init()); + sd->setStencilFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.fail]); + sd->setDepthStencilPassOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.pass]); + sd->setDepthFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.depth_fail]); + sd->setStencilCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.front_op.compare]); + sd->setReadMask(p_depth_stencil_state.front_op.compare_mask); + sd->setWriteMask(p_depth_stencil_state.front_op.write_mask); + ds_desc->setFrontFaceStencil(sd.get()); } { // Back. - MTLStencilDescriptor *sd = [MTLStencilDescriptor new]; - sd.stencilFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.fail]; - sd.depthStencilPassOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.pass]; - sd.depthFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.depth_fail]; - sd.stencilCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.back_op.compare]; - sd.readMask = p_depth_stencil_state.back_op.compare_mask; - sd.writeMask = p_depth_stencil_state.back_op.write_mask; - ds_desc.backFaceStencil = sd; + NS::SharedPtr sd = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init()); + sd->setStencilFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.fail]); + sd->setDepthStencilPassOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.pass]); + sd->setDepthFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.depth_fail]); + sd->setStencilCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.back_op.compare]); + sd->setReadMask(p_depth_stencil_state.back_op.compare_mask); + sd->setWriteMask(p_depth_stencil_state.back_op.write_mask); + ds_desc->setBackFaceStencil(sd.get()); } } - pipeline->depth_stencil = [device newDepthStencilStateWithDescriptor:ds_desc]; - ERR_FAIL_NULL_V_MSG(pipeline->depth_stencil, PipelineID(), "Failed to create depth stencil state"); + pipeline->depth_stencil = NS::TransferPtr(device->newDepthStencilState(ds_desc.get())); + ERR_FAIL_COND_V_MSG(!pipeline->depth_stencil, PipelineID(), "Failed to create depth stencil state"); } else { // TODO(sgc): FB13671991 raised as Apple docs state calling setDepthStencilState:nil is valid, but currently generates an exception - pipeline->depth_stencil = get_resource_cache().get_depth_stencil_state(false, false); + pipeline->depth_stencil = NS::RetainPtr(get_resource_cache().get_depth_stencil_state(false, false)); } // Blend state. @@ -2111,30 +2030,31 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( const PipelineColorBlendState::Attachment &bs = p_blend_state.attachments[i]; - MTLRenderPipelineColorAttachmentDescriptor *ca_desc = desc.colorAttachments[p_color_attachments[i]]; - ca_desc.blendingEnabled = bs.enable_blend; + MTL::RenderPipelineColorAttachmentDescriptor *ca_desc = desc->colorAttachments()->object(p_color_attachments[i]); + ca_desc->setBlendingEnabled(bs.enable_blend); - ca_desc.sourceRGBBlendFactor = BLEND_FACTORS[bs.src_color_blend_factor]; - ca_desc.destinationRGBBlendFactor = BLEND_FACTORS[bs.dst_color_blend_factor]; - ca_desc.rgbBlendOperation = BLEND_OPERATIONS[bs.color_blend_op]; + ca_desc->setSourceRGBBlendFactor(BLEND_FACTORS[bs.src_color_blend_factor]); + ca_desc->setDestinationRGBBlendFactor(BLEND_FACTORS[bs.dst_color_blend_factor]); + ca_desc->setRgbBlendOperation(BLEND_OPERATIONS[bs.color_blend_op]); - ca_desc.sourceAlphaBlendFactor = BLEND_FACTORS[bs.src_alpha_blend_factor]; - ca_desc.destinationAlphaBlendFactor = BLEND_FACTORS[bs.dst_alpha_blend_factor]; - ca_desc.alphaBlendOperation = BLEND_OPERATIONS[bs.alpha_blend_op]; + ca_desc->setSourceAlphaBlendFactor(BLEND_FACTORS[bs.src_alpha_blend_factor]); + ca_desc->setDestinationAlphaBlendFactor(BLEND_FACTORS[bs.dst_alpha_blend_factor]); + ca_desc->setAlphaBlendOperation(BLEND_OPERATIONS[bs.alpha_blend_op]); - ca_desc.writeMask = MTLColorWriteMaskNone; + MTL::ColorWriteMask writeMask = MTL::ColorWriteMaskNone; if (bs.write_r) { - ca_desc.writeMask |= MTLColorWriteMaskRed; + writeMask |= MTL::ColorWriteMaskRed; } if (bs.write_g) { - ca_desc.writeMask |= MTLColorWriteMaskGreen; + writeMask |= MTL::ColorWriteMaskGreen; } if (bs.write_b) { - ca_desc.writeMask |= MTLColorWriteMaskBlue; + writeMask |= MTL::ColorWriteMaskBlue; } if (bs.write_a) { - ca_desc.writeMask |= MTLColorWriteMaskAlpha; + writeMask |= MTL::ColorWriteMaskAlpha; } + ca_desc->setWriteMask(writeMask); } pipeline->raster_state.blend.r = p_blend_state.blend_constant.r; @@ -2169,34 +2089,40 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( pipeline->raster_state.stencil.enabled = true; } - if (shader->vert != nil) { - Result> function_or_err = _create_function(shader->vert, @"main0", p_specialization_constants); + if (shader->vert) { + Result> function_or_err = _create_function(shader->vert.get(), MTLSTR("main0"), p_specialization_constants); ERR_FAIL_COND_V(std::holds_alternative(function_or_err), PipelineID()); - desc.vertexFunction = std::get>(function_or_err); + desc->setVertexFunction(std::get>(function_or_err).get()); } - if (shader->frag != nil) { - Result> function_or_err = _create_function(shader->frag, @"main0", p_specialization_constants); + if (shader->frag) { + Result> function_or_err = _create_function(shader->frag.get(), MTLSTR("main0"), p_specialization_constants); ERR_FAIL_COND_V(std::holds_alternative(function_or_err), PipelineID()); - desc.fragmentFunction = std::get>(function_or_err); + desc->setFragmentFunction(std::get>(function_or_err).get()); } - if (archive) { - desc.binaryArchives = @[ archive ]; + MTL::PipelineOption options = MTL::PipelineOptionNone; + MTL::BinaryArchive *arc = archive.get(); + if (arc) { + NS::SharedPtr archives = NS::TransferPtr(NS::Array::array(reinterpret_cast(&arc), 1)->retain()); + desc->setBinaryArchives(archives.get()); + if (archive_fail_on_miss) { + options |= MTL::PipelineOptionFailOnBinaryArchiveMiss; + } } - NSError *error = nil; - pipeline->state = [device newRenderPipelineStateWithDescriptor:desc - error:&error]; + NS::Error *error = nullptr; + pipeline->state = NS::TransferPtr(device->newRenderPipelineState(desc.get(), options, nullptr, &error)); pipeline->shader = shader; - ERR_FAIL_COND_V_MSG(error != nil, PipelineID(), ([NSString stringWithFormat:@"error creating pipeline: %@", error.localizedDescription].UTF8String)); + ERR_FAIL_COND_V_MSG(error != nullptr, PipelineID(), String("error creating pipeline: ") + error->localizedDescription()->utf8String()); + ERR_FAIL_COND_V_MSG(!pipeline->state, PipelineID(), "Failed to create render pipeline state"); - if (archive) { - if ([archive addRenderPipelineFunctionsWithDescriptor:desc error:&error]) { + if (arc) { + if (arc->addRenderPipelineFunctions(desc.get(), &error)) { archive_count += 1; } else { - print_error(error.localizedDescription.UTF8String); + print_error(error->localizedDescription()->utf8String()); } } @@ -2208,22 +2134,22 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( // ----- COMMANDS ----- void RenderingDeviceDriverMetal::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->bind_pipeline(p_pipeline); } void RenderingDeviceDriverMetal::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->compute_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets); } void RenderingDeviceDriverMetal::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->compute_dispatch(p_x_groups, p_y_groups, p_z_groups); } void RenderingDeviceDriverMetal::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->compute_dispatch_indirect(p_indirect_buffer, p_offset); } @@ -2240,33 +2166,47 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline"); - Result> function_or_err = _create_function(shader->kernel, @"main0", p_specialization_constants); + Result> function_or_err = _create_function(shader->kernel.get(), MTLSTR("main0"), p_specialization_constants); ERR_FAIL_COND_V(std::holds_alternative(function_or_err), PipelineID()); - id function = std::get>(function_or_err); + NS::SharedPtr function = std::get>(function_or_err); - MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new]; - desc.computeFunction = function; - desc.label = conv::to_nsstring(shader->name); - if (archive) { - desc.binaryArchives = @[ archive ]; + NS::SharedPtr desc = NS::TransferPtr(MTL::ComputePipelineDescriptor::alloc()->init()); + desc->setComputeFunction(function.get()); + desc->setLabel(conv::to_nsstring(shader->name)); + + if (shader->uses_argument_buffers) { + // Set mutability of argument buffers. + for (uint32_t i = 0; i < shader->sets.size(); i++) { + const UniformSet &set = shader->sets[i]; + const MTL::Mutability mutability = set.dynamic_uniforms.is_empty() ? MTL::MutabilityImmutable : MTL::MutabilityMutable; + desc->buffers()->object(i)->setMutability(mutability); + } } - NSError *error; - id state = [device newComputePipelineStateWithDescriptor:desc - options:MTLPipelineOptionNone - reflection:nil - error:&error]; - ERR_FAIL_COND_V_MSG(error != nil, PipelineID(), ([NSString stringWithFormat:@"error creating pipeline: %@", error.localizedDescription].UTF8String)); + MTL::PipelineOption options = MTL::PipelineOptionNone; + MTL::BinaryArchive *arc = archive.get(); + if (arc) { + NS::SharedPtr archives = NS::TransferPtr(NS::Array::array(reinterpret_cast(&arc), 1)->retain()); + desc->setBinaryArchives(archives.get()); + if (archive_fail_on_miss) { + options |= MTL::PipelineOptionFailOnBinaryArchiveMiss; + } + } + + NS::Error *error = nullptr; + NS::SharedPtr state = NS::TransferPtr(device->newComputePipelineState(desc.get(), options, nullptr, &error)); + ERR_FAIL_COND_V_MSG(error != nullptr, PipelineID(), String("error creating pipeline: ") + error->localizedDescription()->utf8String()); + ERR_FAIL_COND_V_MSG(!state, PipelineID(), "Failed to create compute pipeline state"); MDComputePipeline *pipeline = new MDComputePipeline(state); pipeline->compute_state.local = shader->local; pipeline->shader = shader; - if (archive) { - if ([archive addComputePipelineFunctionsWithDescriptor:desc error:&error]) { + if (arc) { + if (arc->addComputePipelineFunctions(desc.get(), &error)) { archive_count += 1; } else { - print_error(error.localizedDescription.UTF8String); + print_error(error->localizedDescription()->utf8String()); } } @@ -2358,12 +2298,12 @@ void RenderingDeviceDriverMetal::command_timestamp_write(CommandBufferID p_cmd_b #pragma mark - Labels void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->begin_label(p_label_name, p_color); } void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) { - MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id); cb->end_label(); } @@ -2381,38 +2321,41 @@ void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t } void RenderingDeviceDriverMetal::end_segment() { + MutexLock lock(copy_queue_mutex); + _copy_queue_flush(); } #pragma mark - Misc void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) { + NS::String *label = conv::to_nsstring(p_name); + switch (p_type) { case OBJECT_TYPE_TEXTURE: { - id tex = rid::get(p_driver_id); - tex.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + MTL::Texture *tex = reinterpret_cast(p_driver_id.id); + tex->setLabel(label); } break; case OBJECT_TYPE_SAMPLER: { // Can't set label after creation. } break; case OBJECT_TYPE_BUFFER: { const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id; - buf_info->metal_buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + buf_info->metal_buffer.get()->setLabel(label); } break; case OBJECT_TYPE_SHADER: { - NSString *label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; MDShader *shader = (MDShader *)(p_driver_id.id); if (MDRenderShader *rs = dynamic_cast(shader); rs != nullptr) { - [rs->vert setLabel:label]; - [rs->frag setLabel:label]; + rs->vert->set_label(label); + rs->frag->set_label(label); } else if (MDComputeShader *cs = dynamic_cast(shader); cs != nullptr) { - [cs->kernel setLabel:label]; + cs->kernel->set_label(label); } else { DEV_ASSERT(false); } } break; case OBJECT_TYPE_UNIFORM_SET: { MDUniformSet *set = (MDUniformSet *)(p_driver_id.id); - set->arg_buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + set->arg_buffer->setLabel(label); } break; case OBJECT_TYPE_PIPELINE: { // Can't set label after creation. @@ -2426,7 +2369,7 @@ void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p_type, ID p_driver_id) { switch (p_type) { case DRIVER_RESOURCE_LOGICAL_DEVICE: { - return (uint64_t)(uintptr_t)(__bridge void *)device; + return (uint64_t)(uintptr_t)device; } case DRIVER_RESOURCE_PHYSICAL_DEVICE: { return 0; @@ -2435,7 +2378,7 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p return 0; } case DRIVER_RESOURCE_COMMAND_QUEUE: { - return (uint64_t)(uintptr_t)(__bridge void *)device_queue; + return (uint64_t)(uintptr_t)get_command_queue(); } case DRIVER_RESOURCE_QUEUE_FAMILY: { return 0; @@ -2460,11 +2403,11 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p } case DRIVER_RESOURCE_COMPUTE_PIPELINE: { MDComputePipeline *pipeline = (MDComputePipeline *)(p_driver_id.id); - return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state; + return (uint64_t)(uintptr_t)pipeline->state.get(); } case DRIVER_RESOURCE_RENDER_PIPELINE: { MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id); - return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state; + return (uint64_t)(uintptr_t)pipeline->state.get(); } default: { return 0; @@ -2472,8 +2415,65 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p } } +void RenderingDeviceDriverMetal::_copy_queue_copy_to_buffer(Span p_src_data, MTL::Buffer *p_dst_buffer, uint64_t p_dst_offset) { + MutexLock lock(copy_queue_mutex); + if (_copy_queue_buffer_available() < p_src_data.size()) { + _copy_queue_flush(); + } + + MTL::BlitCommandEncoder *blit_encoder = _copy_queue_blit_encoder(); + + memcpy(_copy_queue_buffer_ptr(), p_src_data.ptr(), p_src_data.size()); + + copy_queue_rs.get()->addAllocation(p_dst_buffer); + blit_encoder->copyFromBuffer(copy_queue_buffer.get(), copy_queue_buffer_offset, p_dst_buffer, p_dst_offset, p_src_data.size()); + + _copy_queue_buffer_consume(p_src_data.size()); +} + +void RenderingDeviceDriverMetal::_copy_queue_flush() { + if (!copy_queue_blit_encoder) { + return; + } + + copy_queue_rs.get()->addAllocation(copy_queue_buffer.get()); + copy_queue_rs.get()->commit(); + + copy_queue_blit_encoder.get()->endEncoding(); + copy_queue_blit_encoder.reset(); + copy_queue_command_buffer.get()->commit(); + copy_queue_command_buffer.get()->waitUntilCompleted(); + copy_queue_command_buffer.reset(); + copy_queue_buffer_offset = 0; + copy_queue_rs.get()->removeAllAllocations(); +} + +Error RenderingDeviceDriverMetal::_copy_queue_initialize() { + DEV_ASSERT(!copy_queue); + + copy_queue = NS::TransferPtr(device->newCommandQueue()); + copy_queue.get()->setLabel(MTLSTR("Copy Command Queue")); + ERR_FAIL_COND_V(!copy_queue, ERR_CANT_CREATE); + + // Reserve 64 KiB for copy commands. If the buffer fills, it will be flushed automatically. + copy_queue_buffer = NS::TransferPtr(device->newBuffer(64 * 1024, MTL::ResourceStorageModeShared | MTL::ResourceHazardTrackingModeUntracked)); + copy_queue_buffer.get()->setLabel(MTLSTR("Copy Command Scratch Buffer")); + + if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 1.0, *)) { + MTL::ResidencySetDescriptor *rs_desc = MTL::ResidencySetDescriptor::alloc()->init(); + rs_desc->setInitialCapacity(2); + rs_desc->setLabel(MTLSTR("Copy Queue Residency Set")); + NS::Error *error = nullptr; + copy_queue_rs = NS::TransferPtr(device->newResidencySet(rs_desc, &error)); + rs_desc->release(); + copy_queue.get()->addResidencySet(copy_queue_rs.get()); + } + + return OK; +} + uint64_t RenderingDeviceDriverMetal::get_total_memory_used() { - return device.currentAllocatedSize; + return device->currentAllocatedSize(); } uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() { @@ -2602,7 +2602,7 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) { switch (p_trait) { case API_TRAIT_HONORS_PIPELINE_BARRIERS: - return false; + return use_barriers; case API_TRAIT_CLEARS_WITH_COPY_ENGINE: return false; default: @@ -2663,11 +2663,11 @@ bool RenderingDeviceDriverMetal::is_composite_alpha_supported(CommandQueueID p_q } size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const { - return [device minimumLinearTextureAlignmentForPixelFormat:pixel_formats->getMTLPixelFormat(p_format)]; + return device->minimumLinearTextureAlignmentForPixelFormat(pixel_formats->getMTLPixelFormat(p_format)); } -size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const { - return [device minimumLinearTextureAlignmentForPixelFormat:p_format]; +size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTL::PixelFormat p_format) const { + return device->minimumLinearTextureAlignmentForPixelFormat(p_format); } /******************/ @@ -2675,6 +2675,9 @@ size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTLPixe RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver) : context_driver(p_context_driver) { DEV_ASSERT(p_context_driver != nullptr); + if (String res = OS::get_singleton()->get_environment("GODOT_MTL_ARCHIVE_FAIL_ON_MISS"); res == "1") { + archive_fail_on_miss = true; + } #if TARGET_OS_OSX if (String res = OS::get_singleton()->get_environment("GODOT_MTL_SHADER_LOAD_STRATEGY"); res == U"lazy") { @@ -2687,10 +2690,6 @@ RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMet } RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() { - for (MDCommandBuffer *cb : command_buffers) { - delete cb; - } - for (KeyValue &kv : _shader_cache) { memdelete(kv.value); } @@ -2713,18 +2712,25 @@ RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() { Error RenderingDeviceDriverMetal::_create_device() { device = context_driver->get_metal_device(); - device_queue = [device newCommandQueue]; - ERR_FAIL_NULL_V(device_queue, ERR_CANT_CREATE); - - device_scope = [MTLCaptureManager.sharedCaptureManager newCaptureScopeWithCommandQueue:device_queue]; - device_scope.label = @"Godot Frame"; - [device_scope beginScope]; // Allow Xcode to capture the first frame, if desired. - - resource_cache = std::make_unique(this); + device_scope = NS::TransferPtr(MTL::CaptureManager::sharedCaptureManager()->newCaptureScope(device)); + device_scope->setLabel(MTLSTR("Godot Frame")); + device_scope->beginScope(); // Allow Xcode to capture the first frame, if desired. return OK; } +void RenderingDeviceDriverMetal::_track_resource(MTL::Resource *p_resource) { + if (use_barriers) { + _residency_add.push_back(p_resource); + } +} + +void RenderingDeviceDriverMetal::_untrack_resource(MTL::Resource *p_resource) { + if (use_barriers) { + _residency_del.push_back(p_resource); + } +} + void RenderingDeviceDriverMetal::_check_capabilities() { capabilities.device_family = DEVICE_METAL; parse_msl_version(device_properties->features.msl_target_version, capabilities.version_major, capabilities.version_minor); @@ -2733,13 +2739,17 @@ void RenderingDeviceDriverMetal::_check_capabilities() { API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) { using DP = MetalDeviceProfile; - NSOperatingSystemVersion os_version = NSProcessInfo.processInfo.operatingSystemVersion; + NS::OperatingSystemVersion os_version = NS::ProcessInfo::processInfo()->operatingSystemVersion(); MetalDeviceProfile res; res.min_os_version = MinOsVersion(os_version.majorVersion, os_version.minorVersion, os_version.patchVersion); #if TARGET_OS_OSX res.platform = DP::Platform::macOS; -#else +#elif TARGET_OS_IPHONE res.platform = DP::Platform::iOS; +#elif TARGET_OS_VISION + res.platform = DP::Platform::visionOS; +#else +#error "Unsupported Apple platform" #endif res.features = { .msl_version = p_device_properties->features.msl_target_version, @@ -2749,31 +2759,31 @@ static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties * // highestFamily will only be set to an Apple GPU family switch (p_device_properties->features.highestFamily) { - case MTLGPUFamilyApple1: + case MTL::GPUFamilyApple1: res.gpu = DP::GPU::Apple1; break; - case MTLGPUFamilyApple2: + case MTL::GPUFamilyApple2: res.gpu = DP::GPU::Apple2; break; - case MTLGPUFamilyApple3: + case MTL::GPUFamilyApple3: res.gpu = DP::GPU::Apple3; break; - case MTLGPUFamilyApple4: + case MTL::GPUFamilyApple4: res.gpu = DP::GPU::Apple4; break; - case MTLGPUFamilyApple5: + case MTL::GPUFamilyApple5: res.gpu = DP::GPU::Apple5; break; - case MTLGPUFamilyApple6: + case MTL::GPUFamilyApple6: res.gpu = DP::GPU::Apple6; break; - case MTLGPUFamilyApple7: + case MTL::GPUFamilyApple7: res.gpu = DP::GPU::Apple7; break; - case MTLGPUFamilyApple8: + case MTL::GPUFamilyApple8: res.gpu = DP::GPU::Apple8; break; - case MTLGPUFamilyApple9: + case MTL::GPUFamilyApple9: res.gpu = DP::GPU::Apple9; break; default: { @@ -2785,17 +2795,21 @@ static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties * return res; } -Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p_frame_count) { +Error RenderingDeviceDriverMetal::_initialize(uint32_t p_device_index, uint32_t p_frame_count) { context_device = context_driver->device_get(p_device_index); Error err = _create_device(); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); device_properties = memnew(MetalDeviceProperties(device)); device_profile = device_profile_from_properties(device_properties); + resource_cache = std::make_unique(device, *pixel_formats, device_properties->limits.maxPerStageBufferCount); shader_container_format = memnew(RenderingShaderContainerFormatMetal(&device_profile)); _check_capabilities(); + err = _copy_queue_initialize(); + ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + _frame_count = p_frame_count; // Set the pipeline cache ID based on the Metal version. @@ -2816,7 +2830,7 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p } // The Metal renderer requires Apple4 family. This is 2017 era A11 chips and newer. - if (device_properties->features.highestFamily < MTLGPUFamilyApple4) { + if (device_properties->features.highestFamily < MTL::GPUFamilyApple4) { String error_string = vformat("Your Apple GPU does not support the following features, which are required to use Metal-based renderers in Godot:\n\n"); if (!device_properties->features.imageCubeArray) { error_string += "- No support for image cube arrays.\n"; diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index d501a1140ab..b6cdd84786c 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -30,13 +30,13 @@ #pragma once -#import "metal_device_profile.h" -#import "metal_objects.h" +#include "metal_device_profile.h" +#include "metal_objects_shared.h" #include "servers/rendering/rendering_device_driver.h" -#import -#import +#include +#include class RenderingShaderContainerFormatMetal; @@ -48,18 +48,28 @@ class RenderingShaderContainerFormatMetal; class RenderingContextDriverMetal; +namespace MTL3 { +class MDCommandBuffer; +} +namespace MTL4 { +class MDCommandBuffer; +} + class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMetal : public RenderingDeviceDriver { friend struct ShaderCacheEntry; - friend class MDCommandBuffer; + friend class MTL3::MDCommandBuffer; + friend class MTL4::MDCommandBuffer; + friend class MDUniformSet; template using Result = std::variant; #pragma mark - Generic +protected: RenderingContextDriverMetal *context_driver = nullptr; RenderingContextDriver::Device context_device; - id device = nil; + MTL::Device *device = nullptr; uint32_t _frame_count = 1; /// frame_index is a cyclic counter derived from the current frame number modulo frame_count, @@ -78,16 +88,84 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet RDD::FragmentShadingRateCapabilities fsr_capabilities; RDD::FragmentDensityMapCapabilities fdm_capabilities; - id archive = nil; + NS::SharedPtr archive; uint32_t archive_count = 0; + // DEV: When true, attempting to create a pipeline will fail if it cannot use the archive. + bool archive_fail_on_miss = false; - id device_queue = nil; - id device_scope = nil; + /// Resources to be added to the `main_residency_set`. + LocalVector _residency_add; + /// Resources to be removed from the `main_residency_set`. + LocalVector _residency_del; + +#pragma mark - Copy Queue + + Mutex copy_queue_mutex; + /// A command queue used for internal copy operations. + NS::SharedPtr copy_queue; + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") + NS::SharedPtr copy_queue_rs; + GODOT_CLANG_WARNING_POP + // If this is not nullptr, there are pending copy operations. + NS::SharedPtr copy_queue_command_buffer; + NS::SharedPtr copy_queue_blit_encoder; + NS::SharedPtr copy_queue_buffer; + NS::UInteger copy_queue_buffer_offset = 0; + + _FORCE_INLINE_ NS::UInteger _copy_queue_buffer_available() const { + return copy_queue_buffer.get()->length() - copy_queue_buffer_offset; + } + + /// Marks p_size bytes as consumed from the copy queue buffer, aligning the offset to 16 bytes. + _FORCE_INLINE_ void _copy_queue_buffer_consume(NS::UInteger p_size) { + NS::UInteger aligned_offset = round_up_to_alignment(copy_queue_buffer_offset, 16); + copy_queue_buffer_offset = aligned_offset + p_size; + } + + /// Returns a pointer to the current position in the copy queue buffer. + _FORCE_INLINE_ void *_copy_queue_buffer_ptr() const { + return static_cast(copy_queue_buffer.get()->contents()) + copy_queue_buffer_offset; + } + + _FORCE_INLINE_ MTL::CommandBuffer *_copy_queue_command_buffer() { + if (!copy_queue_command_buffer) { + DEV_ASSERT(!copy_queue_blit_encoder); + copy_queue_command_buffer = NS::RetainPtr(copy_queue.get()->commandBufferWithUnretainedReferences()); + } + return copy_queue_command_buffer.get(); + } + + _FORCE_INLINE_ MTL::BlitCommandEncoder *_copy_queue_blit_encoder() { + if (!copy_queue_blit_encoder) { + MTL::BlitCommandEncoder *enc = _copy_queue_command_buffer()->blitCommandEncoder(); + copy_queue_blit_encoder = NS::RetainPtr(enc); + } + return copy_queue_blit_encoder.get(); + } + + void _copy_queue_copy_to_buffer(Span p_src_data, MTL::Buffer *p_dst_buffer, uint64_t p_dst_offset = 0); + void _copy_queue_flush(); + Error _copy_queue_initialize(); + + NS::SharedPtr device_scope; String pipeline_cache_id; - Error _create_device(); + virtual MTL::CommandQueue *get_command_queue() const = 0; + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") + virtual void add_residency_set_to_main_queue(MTL::ResidencySet *p_set) = 0; + virtual void remove_residency_set_to_main_queue(MTL::ResidencySet *p_set) = 0; + NS::SharedPtr main_residency_set; + GODOT_CLANG_WARNING_POP + + bool use_barriers = false; + MTL::ResourceOptions base_hazard_tracking = MTL::ResourceHazardTrackingModeTracked; + + virtual Error _create_device(); + virtual void _track_resource(MTL::Resource *p_resource); + virtual void _untrack_resource(MTL::Resource *p_resource); void _check_capabilities(); + Error _initialize(uint32_t p_device_index, uint32_t p_frame_count); #pragma mark - Shader Cache @@ -103,7 +181,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet void shader_cache_free_entry(const SHA256Digest &key); public: - Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override final; + virtual Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override = 0; #pragma mark - Memory @@ -111,7 +189,7 @@ public: public: struct BufferInfo { - id metal_buffer; + NS::SharedPtr metal_buffer; _FORCE_INLINE_ bool is_dynamic() const { return _frame_idx != UINT32_MAX; } _FORCE_INLINE_ uint32_t frame_index() const { return _frame_idx; } @@ -131,7 +209,6 @@ public: virtual void buffer_unmap(BufferID p_buffer) override final; virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final; virtual uint64_t buffer_get_dynamic_offsets(Span p_buffers) override final; - virtual void buffer_flush(BufferID p_buffer) override final; virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; #pragma mark - Texture @@ -168,6 +245,7 @@ public: #pragma mark - Barriers +public: virtual void command_pipeline_barrier( CommandBufferID p_cmd_buffer, BitField p_src_stages, @@ -179,78 +257,16 @@ public: #pragma mark - Fences -private: - struct Fence { - virtual void signal(id p_cmd_buffer) = 0; - virtual Error wait(uint32_t p_timeout_ms) = 0; - virtual ~Fence() = default; - }; - - struct FenceEvent : public Fence { - id event; - uint64_t value; - FenceEvent(id p_event) : - event(p_event), - value(0) {} - - virtual void signal(id p_cb) override { - if (p_cb) { - value++; - [p_cb encodeSignalEvent:event value:value]; - } - } - - virtual Error wait(uint32_t p_timeout_ms) override { - GODOT_CLANG_WARNING_PUSH - GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") - BOOL signaled = [event waitUntilSignaledValue:value timeoutMS:p_timeout_ms]; - GODOT_CLANG_WARNING_POP - if (!signaled) { -#ifdef DEBUG_ENABLED - ERR_PRINT("timeout waiting for fence"); -#endif - return ERR_TIMEOUT; - } - - return OK; - } - }; - - struct FenceSemaphore : public Fence { - dispatch_semaphore_t semaphore; - FenceSemaphore() : - semaphore(dispatch_semaphore_create(0)) {} - - virtual void signal(id p_cb) override { - if (p_cb) { - [p_cb addCompletedHandler:^(id buffer) { - dispatch_semaphore_signal(semaphore); - }]; - } else { - dispatch_semaphore_signal(semaphore); - } - } - - virtual Error wait(uint32_t p_timeout_ms) override { - dispatch_time_t timeout = dispatch_time(DISPATCH_TIME_NOW, static_cast(p_timeout_ms) * 1000000); - long result = dispatch_semaphore_wait(semaphore, timeout); - if (result != 0) { - return ERR_TIMEOUT; - } - return OK; - } - }; - public: - virtual FenceID fence_create() override final; - virtual Error fence_wait(FenceID p_fence) override final; - virtual void fence_free(FenceID p_fence) override final; + virtual FenceID fence_create() override = 0; + virtual Error fence_wait(FenceID p_fence) override = 0; + virtual void fence_free(FenceID p_fence) override = 0; #pragma mark - Semaphores public: - virtual SemaphoreID semaphore_create() override final; - virtual void semaphore_free(SemaphoreID p_semaphore) override final; + virtual SemaphoreID semaphore_create() override = 0; + virtual void semaphore_free(SemaphoreID p_semaphore) override = 0; #pragma mark - Commands // ----- QUEUE FAMILY ----- @@ -258,25 +274,22 @@ public: virtual CommandQueueFamilyID command_queue_family_get(BitField p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) override final; // ----- QUEUE ----- + public: - virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final; - virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView p_wait_semaphores, VectorView p_cmd_buffers, VectorView p_cmd_semaphores, FenceID p_cmd_fence, VectorView p_swap_chains) override final; - virtual void command_queue_free(CommandQueueID p_cmd_queue) override final; + virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override = 0; + virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView p_wait_semaphores, VectorView p_cmd_buffers, VectorView p_cmd_semaphores, FenceID p_cmd_fence, VectorView p_swap_chains) override = 0; + virtual void command_queue_free(CommandQueueID p_cmd_queue) override = 0; // ----- POOL ----- - virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final; - virtual bool command_pool_reset(CommandPoolID p_cmd_pool) override final; - virtual void command_pool_free(CommandPoolID p_cmd_pool) override final; + virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override = 0; + virtual bool command_pool_reset(CommandPoolID p_cmd_pool) override = 0; + virtual void command_pool_free(CommandPoolID p_cmd_pool) override = 0; // ----- BUFFER ----- -private: - // Used to maintain references. - Vector command_buffers; - public: - virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final; + virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override = 0; virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final; virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final; virtual void command_buffer_end(CommandBufferID p_cmd_buffer) override final; @@ -284,7 +297,7 @@ public: #pragma mark - Swapchain -private: +protected: struct SwapChain { RenderingContextDriver::SurfaceID surface = RenderingContextDriver::SurfaceID(); RenderPassID render_pass; @@ -355,7 +368,7 @@ public: #pragma mark Pipeline private: - Result> _create_function(MDLibrary *p_library, NSString *p_name, VectorView &p_specialization_constants); + Result> _create_function(MDLibrary *p_library, NS::String *p_name, VectorView &p_specialization_constants); public: virtual void pipeline_free(PipelineID p_pipeline_id) override final; @@ -506,14 +519,13 @@ public: virtual const MultiviewCapabilities &get_multiview_capabilities() override final; virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; - virtual String get_api_name() const override final { return "Metal"; } virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; virtual const Capabilities &get_capabilities() const override final; virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final; // Metal-specific. - id get_device() const { return device; } + MTL::Device *get_device() const { return device; } PixelFormats &get_pixel_formats() const { return *pixel_formats; } MDResourceCache &get_resource_cache() const { return *resource_cache; } MetalDeviceProperties const &get_device_properties() const { return *device_properties; } @@ -523,7 +535,7 @@ public: } size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const; - size_t get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const; + size_t get_texel_buffer_alignment_for_format(MTL::PixelFormat p_format) const; _FORCE_INLINE_ uint32_t frame_count() const { return _frame_count; } _FORCE_INLINE_ uint32_t frame_index() const { return _frame_index; } @@ -534,7 +546,7 @@ public: ~RenderingDeviceDriverMetal(); }; -// Defined outside because we need to forward declare it in metal_objects.h +// Defined outside because we need to forward declare it in metal3_objects.h struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalBufferDynamicInfo : public RenderingDeviceDriverMetal::BufferInfo { uint64_t size_bytes; // Contains the real buffer size / frame_count. uint32_t next_frame_index(uint32_t p_frame_count) { diff --git a/drivers/metal/rendering_device_driver_metal3.cpp b/drivers/metal/rendering_device_driver_metal3.cpp new file mode 100644 index 00000000000..e2a1e4020fb --- /dev/null +++ b/drivers/metal/rendering_device_driver_metal3.cpp @@ -0,0 +1,369 @@ +/**************************************************************************/ +/* rendering_device_driver_metal3.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_device_driver_metal3.h" + +#include "pixel_formats.h" +#include "rendering_context_driver_metal.h" + +#include "core/config/project_settings.h" +#include "core/string/ustring.h" + +namespace MTL3 { + +#pragma mark - FenceEvent / FenceSemaphore + +void RenderingDeviceDriverMetal::FenceEvent::signal(MTL::CommandBuffer *p_cb) { + if (p_cb) { + value++; + p_cb->encodeSignalEvent(event.get(), value); + } +} + +Error RenderingDeviceDriverMetal::FenceEvent::wait(uint32_t p_timeout_ms) { + bool signaled = event->waitUntilSignaledValue(value, p_timeout_ms); + if (!signaled) { +#ifdef DEBUG_ENABLED + ERR_PRINT("timeout waiting for fence"); +#endif + return ERR_TIMEOUT; + } + return OK; +} + +void RenderingDeviceDriverMetal::FenceSemaphore::signal(MTL::CommandBuffer *p_cb) { + if (p_cb) { + p_cb->addCompletedHandler([this](MTL::CommandBuffer *) { + dispatch_semaphore_signal(semaphore); + }); + } else { + dispatch_semaphore_signal(semaphore); + } +} + +Error RenderingDeviceDriverMetal::FenceSemaphore::wait(uint32_t p_timeout_ms) { + dispatch_time_t timeout = dispatch_time(DISPATCH_TIME_NOW, static_cast(p_timeout_ms) * 1000000); + long result = dispatch_semaphore_wait(semaphore, timeout); + if (result != 0) { + return ERR_TIMEOUT; + } + return OK; +} + +#pragma mark - Constructor / Destructor + +RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver) : + ::RenderingDeviceDriverMetal(p_context_driver) { +} + +RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() { + for (MDCommandBuffer *cb : command_buffers) { + memdelete(cb); + } +} + +#pragma mark - Initialization + +Error RenderingDeviceDriverMetal::_create_device() { + Error err = ::RenderingDeviceDriverMetal::_create_device(); + ERR_FAIL_COND_V(err, err); + + device_queue = NS::TransferPtr(device->newCommandQueue()); + ERR_FAIL_NULL_V(device_queue.get(), ERR_CANT_CREATE); + device_queue->setLabel(MTLSTR("Godot Main Command Queue")); + + return OK; +} + +Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p_frame_count) { + Error err = _initialize(p_device_index, p_frame_count); + ERR_FAIL_COND_V(err, err); + + // Barriers are still experimental in Metal 3, so they are disabled by default + // and can only be enabled via an environment variable. + bool barriers_enabled = OS::get_singleton()->get_environment("GODOT_MTL_FORCE_BARRIERS") == "1"; + if (__builtin_available(macos 26.0, ios 26.0, tvos 26.0, visionos 26.0, *)) { + if (barriers_enabled) { + print_line("Metal 3: Resource barriers enabled."); + NS::SharedPtr rs_desc = NS::TransferPtr(MTL::ResidencySetDescriptor::alloc()->init()); + rs_desc->setInitialCapacity(250); + rs_desc->setLabel(MTLSTR("Main Residency Set")); + NS::Error *error = nullptr; + NS::SharedPtr mrs = NS::TransferPtr(device->newResidencySet(rs_desc.get(), &error)); + if (!mrs) { + String error_msg = error ? String(error->localizedDescription()->utf8String()) : "Unknown error"; + print_error(vformat("Resource barriers unavailable. Failed to create main residency set for explicit resource barriers: %s", error_msg)); + } else { + use_barriers = true; + base_hazard_tracking = MTL::ResourceHazardTrackingModeUntracked; + main_residency_set = mrs; + device_queue->addResidencySet(mrs.get()); + } + } + } else { + if (barriers_enabled) { + // Application or user has requested barriers, but the OS doesn't support them. + print_verbose("Metal 3: Resource barriers are not supported on this OS version."); + barriers_enabled = false; + } + } + + return OK; +} + +#pragma mark - Residency + +void RenderingDeviceDriverMetal::add_residency_set_to_main_queue(MTL::ResidencySet *p_set) { + device_queue->addResidencySet(p_set); +} + +void RenderingDeviceDriverMetal::remove_residency_set_to_main_queue(MTL::ResidencySet *p_set) { + device_queue->removeResidencySet(p_set); +} + +#pragma mark - Fences + +RDD::FenceID RenderingDeviceDriverMetal::fence_create() { + Fence *fence = memnew(FenceEvent(NS::TransferPtr(device->newSharedEvent()))); + return FenceID(fence); +} + +Error RenderingDeviceDriverMetal::fence_wait(FenceID p_fence) { + Fence *fence = (Fence *)(p_fence.id); + return fence->wait(1000); +} + +void RenderingDeviceDriverMetal::fence_free(FenceID p_fence) { + Fence *fence = (Fence *)(p_fence.id); + memdelete(fence); +} + +#pragma mark - Semaphores + +RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create() { + if (use_barriers) { + Semaphore *sem = memnew(Semaphore(NS::TransferPtr(device->newEvent()))); + return SemaphoreID(sem); + } + return SemaphoreID(1); +} + +void RenderingDeviceDriverMetal::semaphore_free(SemaphoreID p_semaphore) { + if (use_barriers) { + Semaphore *sem = (Semaphore *)(p_semaphore.id); + memdelete(sem); + } +} + +#pragma mark - Command Queues + +RDD::CommandQueueID RenderingDeviceDriverMetal::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) { + return CommandQueueID(1); +} + +Error RenderingDeviceDriverMetal::_execute_and_present_barriers(CommandQueueID p_cmd_queue, VectorView p_wait_sem, VectorView p_cmd_buffers, VectorView p_cmd_sem, FenceID p_cmd_fence, VectorView p_swap_chains) { + uint32_t size = p_cmd_buffers.size(); + if (size == 0) { + return OK; + } + + bool changed = false; + MTL::ResidencySet *mrs = main_residency_set.get(); + if (!_residency_add.is_empty()) { + mrs->addAllocations(reinterpret_cast(_residency_add.ptr()), _residency_add.size()); + _residency_add.clear(); + changed = true; + } + if (!_residency_del.is_empty()) { + mrs->removeAllocations(reinterpret_cast(_residency_del.ptr()), _residency_del.size()); + _residency_del.clear(); + changed = true; + } + if (changed) { + mrs->commit(); + } + + if (p_wait_sem.size() > 0) { + MTL::CommandBuffer *cb = device_queue->commandBuffer(); +#ifdef DEV_ENABLED + cb->setLabel(MTLSTR("Wait Command Buffer")); +#endif + for (uint32_t i = 0; i < p_wait_sem.size(); i++) { + Semaphore *sem = (Semaphore *)p_wait_sem[i].id; + cb->encodeWait(sem->event.get(), sem->value); + } + cb->commit(); + } + + for (uint32_t i = 0; i < size - 1; i++) { + MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[i].id); + cmd_buffer->commit(); + } + + // The last command buffer will signal the fence and semaphores. + MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id); + Fence *fence = (Fence *)(p_cmd_fence.id); + if (fence != nullptr) { + cmd_buffer->end(); + MTL::CommandBuffer *cb = cmd_buffer->get_command_buffer(); + fence->signal(cb); + } + + struct DrawRequest { + NS::SharedPtr drawable; + DisplayServer::VSyncMode vsync_mode; + double duration; + }; + + if (p_swap_chains.size() > 0) { + Vector drawables; + drawables.reserve(p_swap_chains.size()); + + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); + MTL::Drawable *drawable = metal_surface->next_drawable(); + if (drawable) { + drawables.push_back(DrawRequest{ + .drawable = NS::RetainPtr(drawable), + .vsync_mode = metal_surface->vsync_mode, + .duration = metal_surface->present_minimum_duration, + }); + } + } + + MTL::CommandBuffer *cb = cmd_buffer->get_command_buffer(); + cb->addCompletedHandler([drawables = std::move(drawables)](MTL::CommandBuffer *) { + for (const DrawRequest &dr : drawables) { + switch (dr.vsync_mode) { + case DisplayServer::VSYNC_DISABLED: { + dr.drawable->present(); + } break; + default: { + dr.drawable->presentAfterMinimumDuration(dr.duration); + } break; + } + } + }); + } + + cmd_buffer->commit(); + + if (p_cmd_sem.size() > 0) { + MTL::CommandBuffer *cb = device_queue->commandBuffer(); + for (uint32_t i = 0; i < p_cmd_sem.size(); i++) { + Semaphore *sem = (Semaphore *)p_cmd_sem[i].id; + sem->value++; + cb->encodeSignalEvent(sem->event.get(), sem->value); + } + cb->commit(); + } + + return OK; +} + +Error RenderingDeviceDriverMetal::_execute_and_present(CommandQueueID p_cmd_queue, VectorView p_wait_sem, VectorView p_cmd_buffers, VectorView p_cmd_sem, FenceID p_cmd_fence, VectorView p_swap_chains) { + uint32_t size = p_cmd_buffers.size(); + if (size == 0) { + return OK; + } + + for (uint32_t i = 0; i < size - 1; i++) { + MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[i].id); + cmd_buffer->commit(); + } + + // The last command buffer will signal the fence and semaphores. + MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id); + Fence *fence = (Fence *)(p_cmd_fence.id); + if (fence != nullptr) { + cmd_buffer->end(); + MTL::CommandBuffer *cb = cmd_buffer->get_command_buffer(); + fence->signal(cb); + } + + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); + metal_surface->present(cmd_buffer); + } + + cmd_buffer->commit(); + + return OK; +} + +Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView p_wait_sem, VectorView p_cmd_buffers, VectorView p_cmd_sem, FenceID p_cmd_fence, VectorView p_swap_chains) { + Error res; + if (use_barriers) { + res = _execute_and_present_barriers(p_cmd_queue, p_wait_sem, p_cmd_buffers, p_cmd_sem, p_cmd_fence, p_swap_chains); + } else { + res = _execute_and_present(p_cmd_queue, p_wait_sem, p_cmd_buffers, p_cmd_sem, p_cmd_fence, p_swap_chains); + } + ERR_FAIL_COND_V(res != OK, res); + + if (p_swap_chains.size() > 0) { + // Used as a signal that we're presenting, so this is the end of a frame. + MTL::CaptureScope *scope = device_scope.get(); + scope->endScope(); + scope->beginScope(); + } + + return OK; +} + +void RenderingDeviceDriverMetal::command_queue_free(CommandQueueID p_cmd_queue) { +} + +#pragma mark - Command Pools + +RDD::CommandPoolID RenderingDeviceDriverMetal::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) { + DEV_ASSERT(p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY); + return CommandPoolID(reinterpret_cast(device_queue.get())); +} + +bool RenderingDeviceDriverMetal::command_pool_reset(CommandPoolID p_cmd_pool) { + return true; +} + +void RenderingDeviceDriverMetal::command_pool_free(CommandPoolID p_cmd_pool) { + // Nothing to free - the device_queue is managed by SharedPtr. +} + +#pragma mark - Command Buffers + +RDD::CommandBufferID RenderingDeviceDriverMetal::command_buffer_create(CommandPoolID p_cmd_pool) { + MTL::CommandQueue *queue = reinterpret_cast(p_cmd_pool.id); + MDCommandBuffer *obj = memnew(MDCommandBuffer(queue, this)); + command_buffers.push_back(obj); + return CommandBufferID(obj); +} + +} // namespace MTL3 diff --git a/drivers/metal/rendering_device_driver_metal3.h b/drivers/metal/rendering_device_driver_metal3.h new file mode 100644 index 00000000000..648fdd57a82 --- /dev/null +++ b/drivers/metal/rendering_device_driver_metal3.h @@ -0,0 +1,115 @@ +/**************************************************************************/ +/* rendering_device_driver_metal3.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "metal3_objects.h" +#include "rendering_device_driver_metal.h" + +#include + +namespace MTL3 { + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMetal final : public ::RenderingDeviceDriverMetal { + friend class MDCommandBuffer; +#pragma mark - Generic + + NS::SharedPtr device_queue; + + struct Fence { + virtual void signal(MTL::CommandBuffer *p_cmd_buffer) = 0; + virtual Error wait(uint32_t p_timeout_ms) = 0; + virtual ~Fence() = default; + }; + + struct FenceEvent : Fence { + NS::SharedPtr event; + uint64_t value = 0; + FenceEvent(NS::SharedPtr p_event) : + event(p_event) {} + void signal(MTL::CommandBuffer *p_cb) override; + Error wait(uint32_t p_timeout_ms) override; + }; + + struct FenceSemaphore : Fence { + dispatch_semaphore_t semaphore; + FenceSemaphore() : + semaphore(dispatch_semaphore_create(0)) {} + void signal(MTL::CommandBuffer *p_cb) override; + Error wait(uint32_t p_timeout_ms) override; + }; + + struct Semaphore { + NS::SharedPtr event; + uint64_t value = 0; + Semaphore(NS::SharedPtr p_event) : + event(p_event) {} + }; + + Vector command_buffers; + + Error _create_device() override; + Error _execute_and_present_barriers(CommandQueueID p_cmd_queue, VectorView p_wait_semaphores, VectorView p_cmd_buffers, VectorView p_cmd_semaphores, FenceID p_cmd_fence, VectorView p_swap_chains); + Error _execute_and_present(CommandQueueID p_cmd_queue, VectorView p_wait_semaphores, VectorView p_cmd_buffers, VectorView p_cmd_semaphores, FenceID p_cmd_fence, VectorView p_swap_chains); + +protected: + MTL::CommandQueue *get_command_queue() const override { return device_queue.get(); } + void add_residency_set_to_main_queue(MTL::ResidencySet *p_set) override; + void remove_residency_set_to_main_queue(MTL::ResidencySet *p_set) override; + +public: + Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override; + + FenceID fence_create() override; + Error fence_wait(FenceID p_fence) override; + void fence_free(FenceID p_fence) override; + + SemaphoreID semaphore_create() override; + void semaphore_free(SemaphoreID p_semaphore) override; + + CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override; + Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView p_wait_semaphores, VectorView p_cmd_buffers, VectorView p_cmd_semaphores, FenceID p_cmd_fence, VectorView p_swap_chains) override; + void command_queue_free(CommandQueueID p_cmd_queue) override; + + CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override; + bool command_pool_reset(CommandPoolID p_cmd_pool) override; + void command_pool_free(CommandPoolID p_cmd_pool) override; + + CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override; + +#pragma mark - Miscellaneous + + String get_api_name() const override { return "Metal"; } + + RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver); + ~RenderingDeviceDriverMetal(); +}; + +} // namespace MTL3 diff --git a/drivers/metal/rendering_shader_container_metal.mm b/drivers/metal/rendering_shader_container_metal.cpp similarity index 85% rename from drivers/metal/rendering_shader_container_metal.mm rename to drivers/metal/rendering_shader_container_metal.cpp index 6fc95e08f03..2c74d33bb3a 100644 --- a/drivers/metal/rendering_shader_container_metal.mm +++ b/drivers/metal/rendering_shader_container_metal.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* rendering_shader_container_metal.mm */ +/* rendering_shader_container_metal.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -28,21 +28,21 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /**************************************************************************/ -#import "rendering_shader_container_metal.h" +#include "rendering_shader_container_metal.h" -#import "metal_utils.h" +#include "metal_utils.h" -#import "core/io/file_access.h" -#import "core/io/marshalls.h" -#import "core/templates/fixed_vector.h" -#import "servers/rendering/rendering_device.h" +#include "core/io/file_access.h" +#include "core/io/marshalls.h" +#include "core/templates/fixed_vector.h" +#include "servers/rendering/rendering_device.h" #include "thirdparty/spirv-reflect/spirv_reflect.h" -#import -#import -#import -#import +#include +#include +#include +#include void RenderingShaderContainerMetal::_initialize_toolchain_properties() { if (compiler_props.is_valid()) { @@ -236,6 +236,74 @@ spv::ExecutionModel map_stage(RDD::ShaderStage p_stage) { return SHADER_STAGE_REMAP[p_stage]; } +Error RenderingShaderContainerMetal::reflect_spirv(const ReflectShader &p_shader) { + // const LocalVector &p_spirv = p_shader.shader_stages; + // + // using ShaderStage = RenderingDeviceCommons::ShaderStage; + // + // const uint32_t spirv_size = p_spirv.size(); + // + // HashSet atomic_spirv_ids; + // bool atomics_scanned = false; + // auto scan_atomic_accesses = [&atomic_spirv_ids, &p_spirv, spirv_size, &atomics_scanned]() { + // if (atomics_scanned) { + // return; + // } + // + // for (uint32_t i = 0; i < spirv_size + 0; i++) { + // const uint32_t STARTING_WORD_INDEX = 5; + // Span spirv = p_spirv[i].spirv(); + // const uint32_t *words = spirv.ptr() + STARTING_WORD_INDEX; + // while (words < spirv.end()) { + // uint32_t instruction = *words; + // uint16_t word_count = instruction >> 16; + // SpvOp opcode = (SpvOp)(instruction & 0xFFFF); + // if (opcode == SpvOpImageTexelPointer) { + // uint32_t image_var_id = words[3]; + // atomic_spirv_ids.insert(image_var_id); + // } + // words += word_count; + // } + // } + // + // atomics_scanned = true; + // }; + // + // for (uint32_t i = 0; i < spirv_size + 0; i++) { + // ShaderStage stage = p_spirv[i].shader_stage; + // ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); + // SpvReflectResult result; + // + // const SpvReflectShaderModule &module = p_spirv[i].module(); + // + // uint32_t binding_count = 0; + // result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); + // CRASH_COND(result != SPV_REFLECT_RESULT_SUCCESS); + // + // if (binding_count > 0) { + // LocalVector bindings; + // bindings.resize_uninitialized(binding_count); + // result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, bindings.ptr()); + // + // for (uint32_t j = 0; j < binding_count; j++) { + // const SpvReflectDescriptorBinding &binding = *bindings[j]; + // + // switch (binding.descriptor_type) { + // case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + // case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + // case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: + // case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + // break; + // default: + // break; + // } + // } + // } + // } + // + return OK; +} + bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_shader) { using namespace spirv_cross; using spirv_cross::CompilerMSL; @@ -282,12 +350,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ msl_options.ios_support_base_vertex_instance = true; } - // We don't currently allow argument buffers when using dynamic buffers as - // the current implementation does not update the argument buffer each time - // the dynamic buffer changes. This is a future TODO. - bool argument_buffers_allowed = get_shader_reflection().has_dynamic_buffers == false; - - if (device_profile->features.use_argument_buffers && argument_buffers_allowed) { + if (device_profile->features.use_argument_buffers) { msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; msl_options.argument_buffers = true; mtl_reflection_data.set_uses_argument_buffers(true); @@ -384,9 +447,9 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: { if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE)) { if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE)) { - found->access = MTLBindingAccessReadWrite; + found->access = MTL::BindingAccessReadWrite; } else { - found->access = MTLBindingAccessWriteOnly; + found->access = MTL::BindingAccessWriteOnly; } } } break; @@ -394,9 +457,9 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: { if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE)) { if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE)) { - found->access = MTLBindingAccessReadWrite; + found->access = MTL::BindingAccessReadWrite; } else { - found->access = MTLBindingAccessWriteOnly; + found->access = MTL::BindingAccessWriteOnly; } } } break; @@ -405,14 +468,14 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ } switch (found->access) { - case MTLBindingAccessReadOnly: - found->usage = MTLResourceUsageRead; + case MTL::BindingAccessReadOnly: + found->usage = MTL::ResourceUsageRead; break; - case MTLBindingAccessWriteOnly: - found->usage = MTLResourceUsageWrite; + case MTL::BindingAccessWriteOnly: + found->usage = MTL::ResourceUsageWrite; break; - case MTLBindingAccessReadWrite: - found->usage = MTLResourceUsageRead | MTLResourceUsageWrite; + case MTL::BindingAccessReadWrite: + found->usage = MTL::ResourceUsageRead | MTL::ResourceUsageWrite; break; } @@ -424,7 +487,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ switch (type) { case RDC::UNIFORM_TYPE_SAMPLER: { - found->data_type = MTLDataTypeSampler; + found->data_type = MTL::DataTypeSampler; found->get_indexes(UniformData::IndexType::SLOT).sampler = next_index(Sampler, binding_stride); found->get_indexes(UniformData::IndexType::ARG).sampler = next_arg_index(binding_stride); @@ -433,7 +496,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ } break; case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { - found->data_type = MTLDataTypeTexture; + found->data_type = MTL::DataTypeTexture; found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride); found->get_indexes(UniformData::IndexType::SLOT).sampler = next_index(Sampler, binding_stride); found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride); @@ -443,7 +506,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ case RDC::UNIFORM_TYPE_TEXTURE: case RDC::UNIFORM_TYPE_IMAGE: case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: { - found->data_type = MTLDataTypeTexture; + found->data_type = MTL::DataTypeTexture; found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride); found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride); rb.basetype = SPIRType::BaseType::Image; @@ -455,13 +518,13 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: case RDC::UNIFORM_TYPE_STORAGE_BUFFER: { - found->data_type = MTLDataTypePointer; + found->data_type = MTL::DataTypePointer; found->get_indexes(UniformData::IndexType::SLOT).buffer = next_index(Buffer, binding_stride); found->get_indexes(UniformData::IndexType::ARG).buffer = next_arg_index(binding_stride); rb.basetype = SPIRType::BaseType::Void; } break; case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: { - found->data_type = MTLDataTypeTexture; + found->data_type = MTL::DataTypeTexture; found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride); found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride); rb.basetype = SPIRType::BaseType::Image; @@ -476,44 +539,53 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_ rb.msl_texture = found->get_indexes(shader_index_type).texture; rb.msl_sampler = found->get_indexes(shader_index_type).sampler; - if (found->data_type == MTLDataTypeTexture) { + if (found->data_type == MTL::DataTypeTexture) { const SpvReflectImageTraits &image = uniform.get_spv_reflect().image; switch (image.dim) { case SpvDim1D: { if (image.arrayed) { - found->texture_type = MTLTextureType1DArray; + found->texture_type = MTL::TextureType1DArray; } else { - found->texture_type = MTLTextureType1D; + found->texture_type = MTL::TextureType1D; } } break; case SpvDimSubpassData: case SpvDim2D: { if (image.arrayed && image.ms) { - found->texture_type = MTLTextureType2DMultisampleArray; + found->texture_type = MTL::TextureType2DMultisampleArray; } else if (image.arrayed) { - found->texture_type = MTLTextureType2DArray; + found->texture_type = MTL::TextureType2DArray; } else if (image.ms) { - found->texture_type = MTLTextureType2DMultisample; + found->texture_type = MTL::TextureType2DMultisample; } else { - found->texture_type = MTLTextureType2D; + found->texture_type = MTL::TextureType2D; } } break; case SpvDim3D: { - found->texture_type = MTLTextureType3D; + found->texture_type = MTL::TextureType3D; } break; case SpvDimCube: { if (image.arrayed) { - found->texture_type = MTLTextureTypeCubeArray; + found->texture_type = MTL::TextureTypeCubeArray; } else { - found->texture_type = MTLTextureTypeCube; + found->texture_type = MTL::TextureTypeCube; } } break; case SpvDimRect: { // Ignored. } break; case SpvDimBuffer: { - found->texture_type = MTLTextureTypeTextureBuffer; + found->texture_type = MTL::TextureTypeTextureBuffer; + // If this is used with atomics, we need to use a read-write texture. + // scan_atomic_accesses(); + // if (atomic_spirv_ids.find(uniform.spirv_id) != atomic_spirv_ids.end()) { + // rb.access = MTLBindingAccessReadWrite; + // found->access = MTLBindingAccessReadWrite; + // } else { + // rb.access = MTLBindingAccessReadOnly; + // found->access = MTLBindingAccessReadOnly; + // } } break; case SpvDimTileImageDataEXT: { // Godot does not use this extension. diff --git a/drivers/metal/rendering_shader_container_metal.h b/drivers/metal/rendering_shader_container_metal.h index 24af5783db4..13097b4ef48 100644 --- a/drivers/metal/rendering_shader_container_metal.h +++ b/drivers/metal/rendering_shader_container_metal.h @@ -30,11 +30,11 @@ #pragma once -#import "metal_device_profile.h" -#import "sha256_digest.h" +#include "metal_device_profile.h" +#include "sha256_digest.h" -#import "servers/rendering/rendering_device_driver.h" -#import "servers/rendering/rendering_shader_container.h" +#include "servers/rendering/rendering_device_driver.h" +#include "servers/rendering/rendering_shader_container.h" constexpr uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535; /// Metal buffer index for the view mask when rendering multi-view. @@ -177,6 +177,8 @@ private: Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector &r_binary_data); + Error reflect_spirv(const ReflectShader &p_shader); + public: static constexpr uint32_t FORMAT_VERSION = 2; diff --git a/drivers/metal/sha256_digest.h b/drivers/metal/sha256_digest.h index 6b477c959da..f54a7451391 100644 --- a/drivers/metal/sha256_digest.h +++ b/drivers/metal/sha256_digest.h @@ -30,9 +30,9 @@ #pragma once -#import -#import -#import +#include +#include +#include #include "core/templates/hashfuncs.h" #include "core/templates/local_vector.h" diff --git a/platform/macos/display_server_embedded.mm b/platform/macos/display_server_embedded.mm index d0c9c6ab0e9..d85d48ca37e 100644 --- a/platform/macos/display_server_embedded.mm +++ b/platform/macos/display_server_embedded.mm @@ -160,7 +160,7 @@ DisplayServerEmbedded::DisplayServerEmbedded(const String &p_rendering_driver, W #endif #ifdef METAL_ENABLED if (rendering_driver == "metal") { - wpd.metal.layer = (CAMetalLayer *)layer; + wpd.metal.layer = (__bridge CA::MetalLayer *)layer; } #endif Error err = rendering_context->window_create(window_id_counter, &wpd); diff --git a/platform/macos/display_server_macos.mm b/platform/macos/display_server_macos.mm index 8c599eb8f3c..74b31fa14ca 100644 --- a/platform/macos/display_server_macos.mm +++ b/platform/macos/display_server_macos.mm @@ -73,6 +73,7 @@ #if defined(RD_ENABLED) #include "servers/rendering/renderer_rd/renderer_compositor_rd.h" +#include "servers/rendering/rendering_device.h" #endif #if defined(ACCESSKIT_ENABLED) @@ -185,7 +186,7 @@ DisplayServerMacOS::WindowID DisplayServerMacOS::_create_window(WindowMode p_mod #endif #ifdef METAL_ENABLED if (rendering_driver == "metal") { - wpd.metal.layer = (CAMetalLayer *)layer; + wpd.metal.layer = (__bridge CA::MetalLayer *)layer; } #endif Error err = rendering_context->window_create(window_id_counter, &wpd); diff --git a/servers/rendering/renderer_rd/effects/SCsub b/servers/rendering/renderer_rd/effects/SCsub index 30656a4225b..e76f258923a 100644 --- a/servers/rendering/renderer_rd/effects/SCsub +++ b/servers/rendering/renderer_rd/effects/SCsub @@ -7,6 +7,10 @@ Import("env") env_effects = env.Clone() +# metal-cpp headers for Metal FX +if env["metal"]: + env_effects.Prepend(CPPPATH=["#thirdparty/metal-cpp"]) + # Thirdparty source files thirdparty_obj = [] @@ -69,8 +73,6 @@ env.servers_sources += thirdparty_obj module_obj = [] env_effects.add_source_files(module_obj, "*.cpp") -if env["metal"]: - env_effects.add_source_files(module_obj, "metal_fx.mm") env.servers_sources += module_obj # Needed to force rebuilding the module files when the thirdparty library is updated. diff --git a/servers/rendering/renderer_rd/effects/metal_fx.mm b/servers/rendering/renderer_rd/effects/metal_fx.cpp similarity index 59% rename from servers/rendering/renderer_rd/effects/metal_fx.mm rename to servers/rendering/renderer_rd/effects/metal_fx.cpp index 0f6152d5261..11d23a77912 100644 --- a/servers/rendering/renderer_rd/effects/metal_fx.mm +++ b/servers/rendering/renderer_rd/effects/metal_fx.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* metal_fx.mm */ +/* metal_fx.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -28,20 +28,24 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /**************************************************************************/ -#import "metal_fx.h" +#ifdef METAL_ENABLED -#import "../storage_rd/render_scene_buffers_rd.h" -#import "drivers/metal/pixel_formats.h" -#import "drivers/metal/rendering_device_driver_metal.h" +#include "metal_fx.h" -#import -#import +#include "../storage_rd/render_scene_buffers_rd.h" +#include "drivers/metal/pixel_formats.h" +#include "drivers/metal/rendering_device_driver_metal3.h" + +#include using namespace RendererRD; #pragma mark - Spatial Scaler MFXSpatialContext::~MFXSpatialContext() { + if (scaler) { + scaler->release(); + } } MFXSpatialEffect::MFXSpatialEffect() { @@ -51,28 +55,21 @@ MFXSpatialEffect::~MFXSpatialEffect() { } void MFXSpatialEffect::callback(RDD *p_driver, RDD::CommandBufferID p_command_buffer, CallbackArgs *p_userdata) { - GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") - - MDCommandBuffer *obj = (MDCommandBuffer *)(p_command_buffer.id); + MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_command_buffer.id); obj->end(); - id src_texture = rid::get(p_userdata->src); - id dst_texture = rid::get(p_userdata->dst); + MTL::Texture *src_texture = reinterpret_cast(p_userdata->src.id); + MTL::Texture *dst_texture = reinterpret_cast(p_userdata->dst.id); - __block id scaler = p_userdata->ctx.scaler; - scaler.colorTexture = src_texture; - scaler.outputTexture = dst_texture; - [scaler encodeToCommandBuffer:obj->get_command_buffer()]; - // TODO(sgc): add API to retain objects until the command buffer completes - [obj->get_command_buffer() addCompletedHandler:^(id _Nonnull) { - // This block retains a reference to the scaler until the command buffer. - // completes. - scaler = nil; - }]; + MTLFX::SpatialScalerBase *scaler = p_userdata->scaler; + scaler->setColorTexture(src_texture); + scaler->setOutputTexture(dst_texture); + MTLFX::SpatialScaler *s = static_cast(scaler); + MTL3::MDCommandBuffer *cmd = (MTL3::MDCommandBuffer *)(p_command_buffer.id); + s->encodeToCommandBuffer(cmd->get_command_buffer()); + obj->retain_resource(scaler); CallbackArgs::free(&p_userdata); - - GODOT_CLANG_WARNING_POP } void MFXSpatialEffect::ensure_context(Ref p_render_buffers) { @@ -98,27 +95,23 @@ void MFXSpatialEffect::process(Ref p_render_buffers, RID p MFXSpatialContext *MFXSpatialEffect::create_context(CreateParams p_params) const { DEV_ASSERT(RD::get_singleton()->has_feature(RD::SUPPORTS_METALFX_SPATIAL)); - GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") - RenderingDeviceDriverMetal *rdd = (RenderingDeviceDriverMetal *)RD::get_singleton()->get_device_driver(); PixelFormats &pf = rdd->get_pixel_formats(); - id dev = rdd->get_device(); + MTL::Device *dev = rdd->get_device(); - MTLFXSpatialScalerDescriptor *desc = [MTLFXSpatialScalerDescriptor new]; - desc.inputWidth = (NSUInteger)p_params.input_size.width; - desc.inputHeight = (NSUInteger)p_params.input_size.height; + NS::SharedPtr desc = NS::TransferPtr(MTLFX::SpatialScalerDescriptor::alloc()->init()); + desc->setInputWidth((NS::UInteger)p_params.input_size.width); + desc->setInputHeight((NS::UInteger)p_params.input_size.height); - desc.outputWidth = (NSUInteger)p_params.output_size.width; - desc.outputHeight = (NSUInteger)p_params.output_size.height; + desc->setOutputWidth((NS::UInteger)p_params.output_size.width); + desc->setOutputHeight((NS::UInteger)p_params.output_size.height); + + desc->setColorTextureFormat((MTL::PixelFormat)pf.getMTLPixelFormat(p_params.input_format)); + desc->setOutputTextureFormat((MTL::PixelFormat)pf.getMTLPixelFormat(p_params.output_format)); + desc->setColorProcessingMode(MTLFX::SpatialScalerColorProcessingModeLinear); - desc.colorTextureFormat = pf.getMTLPixelFormat(p_params.input_format); - desc.outputTextureFormat = pf.getMTLPixelFormat(p_params.output_format); - desc.colorProcessingMode = MTLFXSpatialScalerColorProcessingModeLinear; - id scaler = [desc newSpatialScalerWithDevice:dev]; MFXSpatialContext *context = memnew(MFXSpatialContext); - context->scaler = scaler; - - GODOT_CLANG_WARNING_POP + context->scaler = desc->newSpatialScaler(dev); return context; } @@ -127,7 +120,11 @@ MFXSpatialContext *MFXSpatialEffect::create_context(CreateParams p_params) const #pragma mark - Temporal Scaler -MFXTemporalContext::~MFXTemporalContext() {} +MFXTemporalContext::~MFXTemporalContext() { + if (scaler) { + scaler->release(); + } +} MFXTemporalEffect::MFXTemporalEffect() {} MFXTemporalEffect::~MFXTemporalEffect() {} @@ -135,35 +132,29 @@ MFXTemporalEffect::~MFXTemporalEffect() {} MFXTemporalContext *MFXTemporalEffect::create_context(CreateParams p_params) const { DEV_ASSERT(RD::get_singleton()->has_feature(RD::SUPPORTS_METALFX_TEMPORAL)); - GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") - RenderingDeviceDriverMetal *rdd = (RenderingDeviceDriverMetal *)RD::get_singleton()->get_device_driver(); PixelFormats &pf = rdd->get_pixel_formats(); - id dev = rdd->get_device(); + MTL::Device *dev = rdd->get_device(); - MTLFXTemporalScalerDescriptor *desc = [MTLFXTemporalScalerDescriptor new]; - desc.inputWidth = (NSUInteger)p_params.input_size.width; - desc.inputHeight = (NSUInteger)p_params.input_size.height; + NS::SharedPtr desc = NS::TransferPtr(MTLFX::TemporalScalerDescriptor::alloc()->init()); + desc->setInputWidth((NS::UInteger)p_params.input_size.width); + desc->setInputHeight((NS::UInteger)p_params.input_size.height); - desc.outputWidth = (NSUInteger)p_params.output_size.width; - desc.outputHeight = (NSUInteger)p_params.output_size.height; + desc->setOutputWidth((NS::UInteger)p_params.output_size.width); + desc->setOutputHeight((NS::UInteger)p_params.output_size.height); - desc.colorTextureFormat = pf.getMTLPixelFormat(p_params.input_format); - desc.depthTextureFormat = pf.getMTLPixelFormat(p_params.depth_format); - desc.motionTextureFormat = pf.getMTLPixelFormat(p_params.motion_format); - desc.autoExposureEnabled = NO; + desc->setColorTextureFormat((MTL::PixelFormat)pf.getMTLPixelFormat(p_params.input_format)); + desc->setDepthTextureFormat((MTL::PixelFormat)pf.getMTLPixelFormat(p_params.depth_format)); + desc->setMotionTextureFormat((MTL::PixelFormat)pf.getMTLPixelFormat(p_params.motion_format)); + desc->setAutoExposureEnabled(false); - desc.outputTextureFormat = pf.getMTLPixelFormat(p_params.output_format); + desc->setOutputTextureFormat((MTL::PixelFormat)pf.getMTLPixelFormat(p_params.output_format)); - id scaler = [desc newTemporalScalerWithDevice:dev]; MFXTemporalContext *context = memnew(MFXTemporalContext); - context->scaler = scaler; - - scaler.motionVectorScaleX = p_params.motion_vector_scale.x; - scaler.motionVectorScaleY = p_params.motion_vector_scale.y; - scaler.depthReversed = true; // Godot uses reverse Z per https://github.com/godotengine/godot/pull/88328 - - GODOT_CLANG_WARNING_POP + context->scaler = desc->newTemporalScaler(dev); + context->scaler->setMotionVectorScaleX(p_params.motion_vector_scale.x); + context->scaler->setMotionVectorScaleY(p_params.motion_vector_scale.y); + context->scaler->setDepthReversed(true); // Godot uses reverse Z per https://github.com/godotengine/godot/pull/88328 return context; } @@ -188,38 +179,33 @@ void MFXTemporalEffect::process(RendererRD::MFXTemporalContext *p_ctx, RendererR } void MFXTemporalEffect::callback(RDD *p_driver, RDD::CommandBufferID p_command_buffer, CallbackArgs *p_userdata) { - GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") - - MDCommandBuffer *obj = (MDCommandBuffer *)(p_command_buffer.id); + MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_command_buffer.id); obj->end(); - id src_texture = rid::get(p_userdata->src); - id depth = rid::get(p_userdata->depth); - id motion = rid::get(p_userdata->motion); - id exposure = rid::get(p_userdata->exposure); + MTL::Texture *src_texture = reinterpret_cast(p_userdata->src.id); + MTL::Texture *depth = reinterpret_cast(p_userdata->depth.id); + MTL::Texture *motion = reinterpret_cast(p_userdata->motion.id); + MTL::Texture *exposure = reinterpret_cast(p_userdata->exposure.id); - id dst_texture = rid::get(p_userdata->dst); + MTL::Texture *dst_texture = reinterpret_cast(p_userdata->dst.id); - __block id scaler = p_userdata->ctx.scaler; - scaler.reset = p_userdata->reset; - scaler.colorTexture = src_texture; - scaler.depthTexture = depth; - scaler.motionTexture = motion; - scaler.exposureTexture = exposure; - scaler.jitterOffsetX = p_userdata->jitter_offset.x; - scaler.jitterOffsetY = p_userdata->jitter_offset.y; - scaler.outputTexture = dst_texture; - [scaler encodeToCommandBuffer:obj->get_command_buffer()]; - // TODO(sgc): add API to retain objects until the command buffer completes - [obj->get_command_buffer() addCompletedHandler:^(id _Nonnull) { - // This block retains a reference to the scaler until the command buffer. - // completes. - scaler = nil; - }]; + MTLFX::TemporalScalerBase *scaler = p_userdata->scaler; + scaler->setReset(p_userdata->reset); + scaler->setColorTexture(src_texture); + scaler->setDepthTexture(depth); + scaler->setMotionTexture(motion); + scaler->setExposureTexture(exposure); + scaler->setJitterOffsetX(p_userdata->jitter_offset.x); + scaler->setJitterOffsetY(p_userdata->jitter_offset.y); + scaler->setOutputTexture(dst_texture); + MTLFX::TemporalScaler *s = static_cast(scaler); + MTL3::MDCommandBuffer *cmd = (MTL3::MDCommandBuffer *)(p_command_buffer.id); + s->encodeToCommandBuffer(cmd->get_command_buffer()); + obj->retain_resource(scaler); CallbackArgs::free(&p_userdata); - - GODOT_CLANG_WARNING_POP } #endif + +#endif diff --git a/servers/rendering/renderer_rd/effects/metal_fx.h b/servers/rendering/renderer_rd/effects/metal_fx.h index b2a68aa1f95..7b6c1a215b1 100644 --- a/servers/rendering/renderer_rd/effects/metal_fx.h +++ b/servers/rendering/renderer_rd/effects/metal_fx.h @@ -41,32 +41,28 @@ #include "core/templates/paged_allocator.h" #include "servers/rendering/renderer_scene_render.h" -#ifdef __OBJC__ -@protocol MTLFXSpatialScaler; -@protocol MTLFXTemporalScaler; -#endif +namespace MTLFX { +class SpatialScalerBase; +class TemporalScalerBase; +} //namespace MTLFX namespace RendererRD { struct MFXSpatialContext { -#ifdef __OBJC__ - id scaler = nullptr; -#else - void *scaler = nullptr; -#endif + MTLFX::SpatialScalerBase *scaler = nullptr; MFXSpatialContext() = default; ~MFXSpatialContext(); }; class MFXSpatialEffect : public SpatialUpscaler { struct CallbackArgs { - MFXSpatialEffect *owner; + MFXSpatialEffect *owner = nullptr; + MTLFX::SpatialScalerBase *scaler = nullptr; RDD::TextureID src; RDD::TextureID dst; - MFXSpatialContext ctx; - CallbackArgs(MFXSpatialEffect *p_owner, RDD::TextureID p_src, RDD::TextureID p_dst, MFXSpatialContext p_ctx) : - owner(p_owner), src(p_src), dst(p_dst), ctx(p_ctx) {} + CallbackArgs(MFXSpatialEffect *p_owner, RDD::TextureID p_src, RDD::TextureID p_dst, const MFXSpatialContext &p_ctx) : + owner(p_owner), scaler(p_ctx.scaler), src(p_src), dst(p_dst) {} static void free(CallbackArgs **p_args) { (*p_args)->owner->args_allocator.free(*p_args); @@ -98,25 +94,21 @@ public: #ifdef METAL_MFXTEMPORAL_ENABLED struct MFXTemporalContext { -#ifdef __OBJC__ - id scaler = nullptr; -#else - void *scaler = nullptr; -#endif + MTLFX::TemporalScalerBase *scaler = nullptr; MFXTemporalContext() = default; ~MFXTemporalContext(); }; class MFXTemporalEffect { struct CallbackArgs { - MFXTemporalEffect *owner; + MFXTemporalEffect *owner = nullptr; + MTLFX::TemporalScalerBase *scaler = nullptr; RDD::TextureID src; RDD::TextureID depth; RDD::TextureID motion; RDD::TextureID exposure; Vector2 jitter_offset; RDD::TextureID dst; - MFXTemporalContext ctx; bool reset = false; CallbackArgs( @@ -127,16 +119,16 @@ class MFXTemporalEffect { RDD::TextureID p_exposure, Vector2 p_jitter_offset, RDD::TextureID p_dst, - MFXTemporalContext p_ctx, + const MFXTemporalContext &p_ctx, bool p_reset) : owner(p_owner), + scaler(p_ctx.scaler), src(p_src), depth(p_depth), motion(p_motion), exposure(p_exposure), jitter_offset(p_jitter_offset), dst(p_dst), - ctx(p_ctx), reset(p_reset) {} static void free(CallbackArgs **p_args) { diff --git a/thirdparty/README.md b/thirdparty/README.md index d59900e500d..5a7d5da8086 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -694,6 +694,18 @@ Patches: - `0001-msvc-2019-psa-redeclaration.patch` ([GH-90535](https://github.com/godotengine/godot/pull/90535)) +## metal-cpp + +- Upstream: https://developer.apple.com/metal/cpp/ +- Version: 26.0 (2025) +- License: Apache 2.0 + +Update instructions: + +- Download latest metal-cpp ZIP from https://developer.apple.com/metal/cpp/: +- Run `update-metal-cpp.sh ` to extract the relevant files and apply patches. + + ## meshoptimizer - Upstream: https://github.com/zeux/meshoptimizer diff --git a/thirdparty/metal-cpp/Foundation/NSData.hpp b/thirdparty/metal-cpp/Foundation/NSData.hpp index 3ad360609fa..fbf3f20343a 100644 --- a/thirdparty/metal-cpp/Foundation/NSData.hpp +++ b/thirdparty/metal-cpp/Foundation/NSData.hpp @@ -33,6 +33,7 @@ class Data : public Copying { public: void* mutableBytes() const; + void* bytes() const; UInteger length() const; }; } @@ -44,6 +45,11 @@ _NS_INLINE void* NS::Data::mutableBytes() const return Object::sendMessage(this, _NS_PRIVATE_SEL(mutableBytes)); } +_NS_INLINE void* NS::Data::bytes() const +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(bytes)); +} + //------------------------------------------------------------------------------------------------------------------------------------------------------------- _NS_INLINE NS::UInteger NS::Data::length() const diff --git a/thirdparty/metal-cpp/Foundation/NSPrivate.hpp b/thirdparty/metal-cpp/Foundation/NSPrivate.hpp index f8d87004f3e..17909fbd2ac 100644 --- a/thirdparty/metal-cpp/Foundation/NSPrivate.hpp +++ b/thirdparty/metal-cpp/Foundation/NSPrivate.hpp @@ -272,6 +272,8 @@ namespace Private "initWithBytes:objCType:"); _NS_PRIVATE_DEF_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_, "initWithBytesNoCopy:length:encoding:freeWhenDone:"); + _NS_PRIVATE_DEF_SEL(initWithBytes_length_encoding_, + "initWithBytes:length:encoding:"); _NS_PRIVATE_DEF_SEL(initWithChar_, "initWithChar:"); _NS_PRIVATE_DEF_SEL(initWithCoder_, @@ -372,6 +374,8 @@ namespace Private "methodSignatureForSelector:"); _NS_PRIVATE_DEF_SEL(mutableBytes, "mutableBytes"); + _NS_PRIVATE_DEF_SEL(bytes, + "bytes"); _NS_PRIVATE_DEF_SEL(name, "name"); _NS_PRIVATE_DEF_SEL(nextObject, diff --git a/thirdparty/metal-cpp/Foundation/NSString.hpp b/thirdparty/metal-cpp/Foundation/NSString.hpp index 07ba3f8d394..d4d0c52ec26 100644 --- a/thirdparty/metal-cpp/Foundation/NSString.hpp +++ b/thirdparty/metal-cpp/Foundation/NSString.hpp @@ -87,6 +87,7 @@ public: String* init(); String* init(const String* pString); String* init(const char* pString, StringEncoding encoding); + String* init(void* pBytes, UInteger len, StringEncoding encoding); String* init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer); unichar character(UInteger index) const; @@ -168,6 +169,12 @@ _NS_INLINE NS::String* NS::String::init(const char* pString, StringEncoding enco //------------------------------------------------------------------------------------------------------------------------------------------------------------- +_NS_INLINE NS::String* NS::String::init(void* pBytes, UInteger len, StringEncoding encoding) +{ + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBytes_length_encoding_), pBytes, len, encoding); +} +//------------------------------------------------------------------------------------------------------------------------------------------------------------- + _NS_INLINE NS::String* NS::String::init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer) { return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_), pBytes, len, encoding, freeBuffer); diff --git a/thirdparty/metal-cpp/README.md b/thirdparty/metal-cpp/README.md deleted file mode 100644 index 03d628c78b3..00000000000 --- a/thirdparty/metal-cpp/README.md +++ /dev/null @@ -1,313 +0,0 @@ -## About - -**metal-cpp** is a low overhead and header only C++ interface for Metal that helps developers add Metal functionality to graphics applications that are written in C++ (such as game engines). **metal-cpp** removes the need to create a shim and allows developers to call Metal functions directly from anywhere in their existing C++ code. - - -## Highlights - -- Drop in C++ alternative interface to the Metal Objective-C headers. -- Direct mapping of all Metal Objective-C classes, constants and enums to C++ in the MTL C++ namespace. -- No measurable overhead compared to calling Metal Objective-C headers, due to inlining of C++ function calls. -- No usage of wrapper containers that require additional allocations. -- Requires C++17 due to the usage of `constexpr` in `NS::Object`. -- Identical header files and function/constant/enum availability for iOS, macOS and tvOS. -- Backwards compatibility: All `bool MTL::Device::supports...()` functions check if their required selectors exist and automatically return `false` if not. -- String (`ErrorDomain`) constants are weak linked and automatically set to `nullptr` if not available. - -## Changelog - -| Version | Changes | -|-|-| -| macOS 26, iOS 26 | Add all the Metal APIs in macOS 26, iOS 26, including support for the **Apple10** GPU family.
Add support for Metal 4 and new denoiser and temporal scalers in MetalFX.| -| macOS 15, iOS 18 | Add all the Metal APIs in macOS 15 and iOS 18. | -| macOS 14, iOS 17 | Add support for the **MetalFX** framework.
Add all the APIs in macOS 14 and iOS 17. | -| macOS 13.3, iOS 16.4 | Add all the APIs in macOS 13.3 and iOS 16.4. | -| macOS 13, iOS 16| Add all the APIs in macOS 13 and iOS 16.
New optional `NS::SharedPtr` type to assist with memory management.
New convenience function to create a `CA::MetalLayer`.
New `MTLSTR(str)` macro allows faster string creation from literals.
Fix a problem with the signature of functions that take an array of pointers as input.
Fix a problem with the signature of the `setGroups()` function in `MTL::LinkedFunctions`.| -| macOS 12, iOS 15 | Initial release. | - -## Memory Allocation Policy - -**metal-cpp** follows the object allocation policies of Cocoa, Cocoa Touch, and CoreFoundation. Understanding these rules is especially important when using metal-cpp, as C++ objects are not eligible for automatic reference counting (ARC). - -**metal-cpp** objects are reference counted. To help convey and manage object lifecycles, the following conventions are observed: - -1. *You own any object returned by methods whose name begins with* `alloc` *,* `new` *,* `copy` *,* `mutableCopy` *, or* `Create`. The method returns these objects with `retainCount` equals to `1`. -2. *You can take ownership of an object by calling its* ```retain()``` *method*. A received object is normally guaranteed to remain valid within the method it was received in. You use `retain` in two situations: (1) In the implementation of an accessor method (a setter) or to take ownership of an object; and (2) To prevent an object from being deallocated as a side-effect of some other operation. -3. *When you no longer need it, you must relinquish ownership of an object you own*. You relinquish ownership by calling its `release()` or `autorelease()` method. -4. *You must not relinquish ownership of an object you do not own*. - -When an object's `retainCount` reaches `0`, the object is immediately deallocated. It is illegal to call methods on a deallocated object and it may lead to an application crash. - -### AutoreleasePools and Objects - -Several methods that create temporary objects in **metal-cpp** add them to an `AutoreleasePool` to help manage their lifetimes. In these situations, after **metal-cpp** creates the object, it adds it to an `AutoreleasePool`, which will release its objects when you release (or drain) it. - -By adding temporary objects to an AutoreleasePool, you do not need to explicitly call `release()` to deallocate them. Instead, you can rely on the `AutoreleasePool` to implicitly manage those lifetimes. - -If you create an object with a method that does not begin with `alloc`, `new`, `copy`, `mutableCopy`, or `Create`, the creating method adds the object to an autorelease pool. - -The typical scope of an `AutoreleasePool` is one frame of rendering for the main thread of the program. When the thread returns control to the RunLoop (an object responsible for receiving input and events from the windowing system), the pool is *drained*, releasing its objects. - -You can create and manage additional `AutoreleasePool`s at smaller scopes to reduce your program's working set, and you are required to do so for any additional threads your program creates. - -If an object's lifecycle needs to be extended beyond the scope of an `AutoreleasePool` instance, you can claim ownership of it by calling its `retain()` method before the pool is drained. In these cases, you are responsible for making the appropriate `release()` call on the object after you no longer need it. - -You can find a more-detailed introduction to the memory management rules here: https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html, and here: https://developer.apple.com/library/archive/documentation/CoreFoundation/Conceptual/CFMemoryMgmt/Concepts/Ownership.html - -For more details about the application's RunLoop, please find its documentation here: https://developer.apple.com/documentation/foundation/nsrunloop - -### Use and debug AutoreleasePools - -When you create an autoreleased object and there is no enclosing `AutoreleasePool`, the object is leaked. - -To prevent this, you normally create an `AutoreleasePool` in your program's `main` function, and in the entry function for every thread you create. You may also create additional `AutoreleasePool`s to avoid growing your program's high memory watermark when you create several autoreleased objects, such as when rendering. - -Use the Environment Variable `OBJC_DEBUG_MISSING_POOLS=YES` to print a runtime warning when an autoreleased object is leaked because no enclosing `AutoreleasePool` is available for its thread. - -You can also run `leaks --autoreleasePools` on a memgraph file or a process ID (macOS only) to view a listing of your program's `AutoreleasePool`s and all objects they contain. - -### NS::SharedPtr - -The **metal-cpp** headers include an optional `NS::SharedPtr<>` (shared pointer) template that can help you manually manage memory in your apps. - -Shared pointers in **metal-cpp** are different from `std::shared_ptr<>` in that they implement specific optimizations for its memory model. For example, **metal-cpp**'s shared pointers avoid the overhead of the standard library's version by leveraging the reference counting implementation of the `NS::Object` type. - -#### Note - -The **metal-cpp** shared pointer’s destructor method always calls the `release()` method of the pointer that it wraps. - -You can create an `NS::SharedPtr<>` by calling the metal-cpp's factory method that's appropriate for your application's intent: - -* You can **transfer** ownership of a pointer to a new shared pointer instance by calling the `NS::TransferPtr()` factory function, which is the correct function for Resource Acquisition is Initialization (RAII) implementations because it doesn't increase the pointee's retain count. - -* You can **share** ownership of a pointer with another entity by calling the `NS::RetainPtr()` factory function. This function can also extend an object's lifecycle beyond an `AutoreleasePool` instance's scope because it creates a strong reference to the pointee and increases its retain count. - -Usage of `NS::SharedPtr<>` is optional. - -### nullptr - -Similar to Objective-C, it is legal to call any method, including `retain()` and `release()`, on `nullptr` "objects". While calling methods on `nullptr` still does incur in function call overhead, the effective result is equivalent of a NOP. - -Conversely, do not assume that because calling a method on a pointer did not result in a crash, that the pointed-to object is valid. - -## Adding metal-cpp to a Project - -Simply include `Metal/Metal.hpp`. To ensure that the selector and class symbols are linked, add to one of your cpp files: - -```cpp -#define NS_PRIVATE_IMPLEMENTATION -#define MTL_PRIVATE_IMPLEMENTATION - -#include "Metal/Metal.hpp" -``` - -If you want to use the QuartzCore wrapper, add: - -```cpp -#define CA_PRIVATE_IMPLEMENTATION - -#include "QuartzCore/QuartzCore.hpp" -``` - -## Generating a Single Header File - -Purely optional: You can generate a single header file that contains all **metal-cpp** headers via: - -```shell -./SingleHeader/MakeSingleHeader.py Foundation/Foundation.hpp QuartzCore/QuartzCore.hpp Metal/Metal.hpp MetalFX/MetalFX.hpp -``` - -By default the generator script writes its output to `./SingleHeader/Metal.hpp`. Use the `-o` option to customize output filename. - -## Global Symbol Visibility - -metal-cpp marks all its symbols with `default` visibility. Define the macro: `METALCPP_SYMBOL_VISIBILITY_HIDDEN` to override this behavior and hide its symbols. - -## Examples - -#### Creating the device - -###### Objective-C (with automatic reference counting) - -```objc -id< MTLDevice > device = MTLCreateSystemDefaultDevice(); - -// ... -``` - -###### Objective-C - -```objc -id< MTLDevice > device = MTLCreateSystemDefaultDevice(); - -// ... - -[device release]; -``` - -###### C++ - -```cpp -MTL::Device* pDevice = MTL::CreateSystemDefaultDevice(); - -// ... - -pDevice->release(); -``` - -###### C++ (using NS::SharedPtr) - -```cpp -NS::SharedPtr< MTL::Device > pDevice = NS::TransferPtr( MTL::CreateSystemDefaultDevice() ); - -// ... -``` - -#### Metal function calls map directly to C++ - -###### Objective-C (with automatic reference counting) - -```objc -MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; - -[samplerDescriptor setSAddressMode: MTLSamplerAddressModeRepeat]; -[samplerDescriptor setTAddressMode: MTLSamplerAddressModeRepeat]; -[samplerDescriptor setRAddressMode: MTLSamplerAddressModeRepeat]; -[samplerDescriptor setMagFilter: MTLSamplerMinMagFilterLinear]; -[samplerDescriptor setMinFilter: MTLSamplerMinMagFilterLinear]; -[samplerDescriptor setMipFilter: MTLSamplerMipFilterLinear]; -[samplerDescriptor setSupportArgumentBuffers: YES]; - -id< MTLSamplerState > samplerState = [device newSamplerStateWithDescriptor:samplerDescriptor]; -``` - -###### Objective-C - -```objc -MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; - -[samplerDescriptor setSAddressMode: MTLSamplerAddressModeRepeat]; -[samplerDescriptor setTAddressMode: MTLSamplerAddressModeRepeat]; -[samplerDescriptor setRAddressMode: MTLSamplerAddressModeRepeat]; -[samplerDescriptor setMagFilter: MTLSamplerMinMagFilterLinear]; -[samplerDescriptor setMinFilter: MTLSamplerMinMagFilterLinear]; -[samplerDescriptor setMipFilter: MTLSamplerMipFilterLinear]; -[samplerDescriptor setSupportArgumentBuffers: YES]; - -id< MTLSamplerState > samplerState = [device newSamplerStateWithDescriptor:samplerDescriptor]; - -[samplerDescriptor release]; - -// ... - -[samplerState release]; -``` - -###### C++ - -```cpp -MTL::SamplerDescriptor* pSamplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); - -pSamplerDescriptor->setSAddressMode( MTL::SamplerAddressModeRepeat ); -pSamplerDescriptor->setTAddressMode( MTL::SamplerAddressModeRepeat ); -pSamplerDescriptor->setRAddressMode( MTL::SamplerAddressModeRepeat ); -pSamplerDescriptor->setMagFilter( MTL::SamplerMinMagFilterLinear ); -pSamplerDescriptor->setMinFilter( MTL::SamplerMinMagFilterLinear ); -pSamplerDescriptor->setMipFilter( MTL::SamplerMipFilterLinear ); -pSamplerDescriptor->setSupportArgumentBuffers( true ); - -MTL::SamplerState* pSamplerState = pDevice->newSamplerState( pSamplerDescriptor ); - -pSamplerDescriptor->release(); - -// ... - -pSamplerState->release(); -``` - -###### C++ (using NS::SharedPtr) - -```cpp -NS::SharedPtr< MTL::SamplerDescriptor > pSamplerDescriptor = NS::TransferPtr( MTL::SamplerDescriptor::alloc()->init() ); - -pSamplerDescriptor->setSAddressMode( MTL::SamplerAddressModeRepeat ); -pSamplerDescriptor->setTAddressMode( MTL::SamplerAddressModeRepeat ); -pSamplerDescriptor->setRAddressMode( MTL::SamplerAddressModeRepeat ); -pSamplerDescriptor->setMagFilter( MTL::SamplerMinMagFilterLinear ); -pSamplerDescriptor->setMinFilter( MTL::SamplerMinMagFilterLinear ); -pSamplerDescriptor->setMipFilter( MTL::SamplerMipFilterLinear ); -pSamplerDescriptor->setSupportArgumentBuffers( true ); - -NS::SharedPtr< MTL::SamplerState > pSamplerState( pDevice->newSamplerState( pSamplerDescriptor ) ); -``` - -#### A subset of bindings for Foundation classes is provided for seamless integration - -###### Objective-C (with automatic reference counting) - -```objc -NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; -NSString* string = [NSString stringWithCString: "Hello World" encoding: NSASCIIStringEncoding]; - -printf( "string = \"%s\"\n", [string cStringUsingEncoding: NSASCIIStringEncoding] ); -``` - -###### Objective-C - -```objc -NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; -NSString* string = [NSString stringWithCString: "Hello World" encoding: NSASCIIStringEncoding]; - -printf( "string = \"%s\"\n", [string cStringUsingEncoding: NSASCIIStringEncoding] ); - -[pool release]; -``` - -###### C++ - -```cpp -NS::AutoreleasePool* pPool = NS::AutoreleasePool::alloc()->init(); -NS::String* pString = NS::String::string( "Hello World", NS::ASCIIStringEncoding ); - -printf( "pString = \"%s\"\n", pString->cString( NS::ASCIIStringEncoding ) ); - -pPool->release(); -``` - -###### C++ (using NS::SharedPtr) - -```cpp -NS::SharedPtr< NS::AutoreleasePool > pPool = NS::TransferPtr( NS::AutoreleasePool::alloc()->init() ); -NS::String* pString = NS::String::string( "Hello World", NS::ASCIIStringEncoding ); - -printf( "pString = \"%s\"\n", pString->cString( NS::ASCIIStringEncoding ) ); -``` - -#### Containers - -Use the CoreFoundation framework to create `NS::Array` and `NS::Dictionary` instances. - -```cpp -MTL::AccelerationStructureTriangleGeometryDescriptor* pGeoDescriptor = MTL::AccelerationStructureTriangleGeometryDescriptor::alloc()->init(); -CFTypeRef descriptors[] = { ( CFTypeRef )( pGeoDescriptor ) }; -NS::Array* pGeoDescriptors = ( NS::Array* )( CFArrayCreate( kCFAllocatorDefault, descriptors, SIZEOF_ARRAY( descriptors), &kCFTypeArrayCallBacks ) ); - -// ... - -pGeoDescriptors->release(); -``` - -Containers, such as `NS::Array` and `NS::Dictionary`, retain the objects they hold and release them when the container is deallocated. - -#### Accessing the Metal Drawable - -```cpp -#import - -// ... - -CA::MetalLayer* pMetalLayer = /* layer associated with the view */; -CA::MetalDrawable* pMetalDrawable = pMetalLayer->nextDrawable(); - -// ... -``` diff --git a/thirdparty/metal-cpp/SingleHeader/MakeSingleHeader.py b/thirdparty/metal-cpp/SingleHeader/MakeSingleHeader.py deleted file mode 100755 index c8d3715fe2a..00000000000 --- a/thirdparty/metal-cpp/SingleHeader/MakeSingleHeader.py +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/env python3 - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- -# -# SingleHeader/MakeSingleHeader.py -# -# Copyright 2020-2024 Apple Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -import argparse -import datetime -import logging -import os -import re -import subprocess -import sys - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -class HeaderPrefix( object ): - __template = ( '//\n' - '// {file}\n' - '//\n' - '// {meta_data}\n' - '//\n' - '// Copyright 2020-2024 Apple Inc.\n' - '//\n' - '// Licensed under the Apache License, Version 2.0 (the "License");\n' - '// you may not use this file except in compliance with the License.\n' - '// You may obtain a copy of the License at\n' - '//\n' - '// http://www.apache.org/licenses/LICENSE-2.0\n' - '//\n' - '// Unless required by applicable law or agreed to in writing, software\n' - '// distributed under the License is distributed on an "AS IS" BASIS,\n' - '// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n' - '// See the License for the specific language governing permissions and\n' - '// limitations under the License.\n' - '//\n' - '\n' ) - - __template_commit = 'Autogenerated from commit {commit}.' - __template_date = 'Autogenerated on %B %d, %Y.' - - def __init__( self, file ): - self.__file = file - - def __str__( self ): - return self.__template.format( file = self.__file, meta_data = self.__meta_data_string() ) - - def __get_commit_hash( self ): - git_commit_hash = None - - try: - git_dir = os.path.dirname( os.path.realpath( __file__ ) ) - proc = subprocess.Popen( [ 'git', 'rev-parse', 'HEAD' ], cwd = git_dir, stdout = subprocess.PIPE, stderr = subprocess.PIPE ) - git_commit_hash = proc.stdout.read().decode( 'utf-8', 'replace' ).strip() - except: - logging.error( 'Failed to determine git commit hash!' ) - pass - - return git_commit_hash - - def __get_commit_string( self ): - meta_data = None - git_commit_hash = self.__get_commit_hash() - - if git_commit_hash: - meta_data = self.__template_commit.format( commit = git_commit_hash ) - - return meta_data - - def __get_date_string( self ): - today = datetime.date.today() - - return today.strftime( self.__template_date ) - - def __meta_data_string( self ): - meta_data = self.__get_commit_string() - - if not meta_data: - meta_data = self.__get_date_string() - - return meta_data - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -class SingleHeader( object ): - __pragma_once = '#pragma once\n\n' - - def __init__( self ): - self.__header_paths = list() - - def __str__( self ): - return self.process() - - def append( self, header_path ): - self.__header_paths.append( header_path ) - - def process( self ): - out_header = self.__pragma_once - - self.__included_headers = set() - self.__base_path = list() - - for header_path in self.__header_paths: - out_header += self.__process_header( header_path ) - - return self.__strip_empty_lines( out_header ) - - def __read_header( self, path ): - path = os.path.realpath( path ) - - try: - f = open( path, 'r' ) - except: - raise RuntimeError( 'Failed to open file \"' + path + '\" for read!' ) - - return f.read() - - def __strip_pragma_once( self, header ): - return re.sub( '\\s*#pragma once\s*\\/\\/-*\\n', '', header ) - - def __strip_comments( self, header ): - return re.sub( '^//.*\\n', '', header, flags = re.MULTILINE ) - - def __strip_empty_lines( self, header ): - return re.sub( '\\n\\n+', '\\n\\n', header, flags = re.MULTILINE ) - - def __substitute_include_directive( self, match ): - header_path = match.group( 'HEADER_PATH' ) - - logging.info( '\tSubstituting \"' + header_path + '\"...' ) - - return self.__process_header( os.path.join( self.__base_path[-1], header_path ) ) - - def __process_include_directives( self, header ): - return re.sub( '^\\s*#include\\s\\"(?P\\S*)\\"', self.__substitute_include_directive, header, flags = re.MULTILINE ) - - def __process_foundation_directives( self, header ): - if header.find("#include ") != -1: - logging.info( '\tSubstituting ...' ) - return header.replace("#include ", self.__process_header( os.path.join( self.__base_path[-1], "../Foundation/Foundation.hpp" ) ) ) - return header - - - def __process_header( self, header_path ): - out_header = '' - - header_path = os.path.realpath( header_path ) - - if not header_path in self.__included_headers: - logging.info( 'Processing \"' + header_path + '\"...' ) - - self.__base_path.append( os.path.dirname( header_path ) ) - self.__included_headers.add( header_path ) - - out_header = self.__read_header( header_path ) - out_header = self.__strip_pragma_once( out_header ) - out_header = self.__strip_comments( out_header ) - out_header = self.__process_include_directives( out_header ) - out_header = self.__process_foundation_directives( out_header ) - - self.__base_path.pop() - else: - logging.info( '\tSkipping \"' + header_path + '\"...' ) - - return out_header - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -def create_argument_parser(): - parser = argparse.ArgumentParser() - base_path = os.path.dirname( os.path.realpath( __file__ ) ) - output_path = os.path.join( base_path, 'Metal.hpp' ) - - parser.add_argument( '-o', '--output', dest = 'output_path', metavar = 'PATH', default = output_path, help = 'Output path for the single header file.' ) - parser.add_argument( '-v', '--verbose', action = 'store_true', help = 'Show verbose output.' ) - parser.add_argument( dest = 'header_paths', metavar = 'HEADER_FILE', nargs='+', help = 'Input header file.' ) - - return parser - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -def parse_arguments(): - parser = create_argument_parser() - args = parser.parse_args() - - if args.verbose: - logging.getLogger().setLevel( logging.INFO ) - else: - logging.getLogger().setLevel( logging.ERROR ) - - return args - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -def make_header( args ): - prefix = HeaderPrefix( os.path.basename( args.output_path ) ) - header = SingleHeader() - - for header_path in args.header_paths: - header.append( header_path ) - - return str( prefix ) + str( header ) - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -def make_dir( path ): - try: - if not os.path.exists( path ): - os.makedirs( path ) - except os.error: - pass - except: - raise - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -def write_header( args, content ): - path = os.path.realpath( args.output_path ) - - logging.info( 'Writing \"' + path + '\"...' ) - - make_dir( os.path.dirname( path ) ) - - try: - f = open( path, 'w' ) - except: - raise RuntimeError( 'Failed to open file \"' + path + '\" for write!' ) - - f.write( content ) - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- - -if __name__ == '__main__': - result = -1 - - try: - if sys.getdefaultencoding().lower() == 'ascii': - reload( sys ) - sys.setdefaultencoding( 'utf-8' ) - - args = parse_arguments() - header = make_header( args ) - - write_header( args, header ) - - result = 0 - - except ( KeyboardInterrupt, SystemExit ): - pass - except: - raise - - sys.exit( result ) - -#-------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/thirdparty/metal-cpp/patches/0001-add-missing-apis.patch b/thirdparty/metal-cpp/patches/0001-add-missing-apis.patch new file mode 100644 index 00000000000..0afcbaed09d --- /dev/null +++ b/thirdparty/metal-cpp/patches/0001-add-missing-apis.patch @@ -0,0 +1,76 @@ + thirdparty/metal-cpp/Foundation/NSData.hpp | 6 ++++++ + thirdparty/metal-cpp/Foundation/NSPrivate.hpp | 4 ++++ + thirdparty/metal-cpp/Foundation/NSString.hpp | 7 +++++++ + 3 files changed, 17 insertions(+) + +diff --git a/thirdparty/metal-cpp/Foundation/NSData.hpp b/thirdparty/metal-cpp/Foundation/NSData.hpp +index 3ad360609f..fbf3f20343 100644 +--- a/thirdparty/metal-cpp/Foundation/NSData.hpp ++++ b/thirdparty/metal-cpp/Foundation/NSData.hpp +@@ -33,6 +33,7 @@ class Data : public Copying + { + public: + void* mutableBytes() const; ++ void* bytes() const; + UInteger length() const; + }; + } +@@ -44,6 +45,11 @@ _NS_INLINE void* NS::Data::mutableBytes() const + return Object::sendMessage(this, _NS_PRIVATE_SEL(mutableBytes)); + } + ++_NS_INLINE void* NS::Data::bytes() const ++{ ++ return Object::sendMessage(this, _NS_PRIVATE_SEL(bytes)); ++} ++ + //------------------------------------------------------------------------------------------------------------------------------------------------------------- + + _NS_INLINE NS::UInteger NS::Data::length() const +diff --git a/thirdparty/metal-cpp/Foundation/NSPrivate.hpp b/thirdparty/metal-cpp/Foundation/NSPrivate.hpp +index f8d87004f3..17909fbd2a 100644 +--- a/thirdparty/metal-cpp/Foundation/NSPrivate.hpp ++++ b/thirdparty/metal-cpp/Foundation/NSPrivate.hpp +@@ -272,6 +272,8 @@ namespace Private + "initWithBytes:objCType:"); + _NS_PRIVATE_DEF_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_, + "initWithBytesNoCopy:length:encoding:freeWhenDone:"); ++ _NS_PRIVATE_DEF_SEL(initWithBytes_length_encoding_, ++ "initWithBytes:length:encoding:"); + _NS_PRIVATE_DEF_SEL(initWithChar_, + "initWithChar:"); + _NS_PRIVATE_DEF_SEL(initWithCoder_, +@@ -372,6 +374,8 @@ namespace Private + "methodSignatureForSelector:"); + _NS_PRIVATE_DEF_SEL(mutableBytes, + "mutableBytes"); ++ _NS_PRIVATE_DEF_SEL(bytes, ++ "bytes"); + _NS_PRIVATE_DEF_SEL(name, + "name"); + _NS_PRIVATE_DEF_SEL(nextObject, +diff --git a/thirdparty/metal-cpp/Foundation/NSString.hpp b/thirdparty/metal-cpp/Foundation/NSString.hpp +index 07ba3f8d39..d4d0c52ec2 100644 +--- a/thirdparty/metal-cpp/Foundation/NSString.hpp ++++ b/thirdparty/metal-cpp/Foundation/NSString.hpp +@@ -87,6 +87,7 @@ public: + String* init(); + String* init(const String* pString); + String* init(const char* pString, StringEncoding encoding); ++ String* init(void* pBytes, UInteger len, StringEncoding encoding); + String* init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer); + + unichar character(UInteger index) const; +@@ -168,6 +169,12 @@ _NS_INLINE NS::String* NS::String::init(const char* pString, StringEncoding enco + + //------------------------------------------------------------------------------------------------------------------------------------------------------------- + ++_NS_INLINE NS::String* NS::String::init(void* pBytes, UInteger len, StringEncoding encoding) ++{ ++ return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBytes_length_encoding_), pBytes, len, encoding); ++} ++//------------------------------------------------------------------------------------------------------------------------------------------------------------- ++ + _NS_INLINE NS::String* NS::String::init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer) + { + return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_), pBytes, len, encoding, freeBuffer); diff --git a/thirdparty/metal-cpp/update-metal-cpp.sh b/thirdparty/metal-cpp/update-metal-cpp.sh index cba23ad48c8..ba3453052cf 100755 --- a/thirdparty/metal-cpp/update-metal-cpp.sh +++ b/thirdparty/metal-cpp/update-metal-cpp.sh @@ -7,58 +7,24 @@ VERSION="macOS26-iOS26" -pushd "$(dirname "$0")" > /dev/null -SCRIPT_DIR="$(pwd)" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -# If a tarball/zip is provided as argument, extract it +# If a zip is provided as argument, extract it if [ -n "$1" ]; then echo "Updating metal-cpp from: $1" - # Create temp directory for extraction and backup - TMPDIR=$(mktemp -d) - trap "rm -rf '$TMPDIR'" EXIT + rm -rf \ + "$SCRIPT_DIR/Foundation" \ + "$SCRIPT_DIR/Metal" \ + "$SCRIPT_DIR/MetalFX" \ + "$SCRIPT_DIR/QuartzCore" \ + "$SCRIPT_DIR/SingleHeader" - # Preserve this script - cp "$SCRIPT_DIR/update-metal-cpp.sh" "$TMPDIR/update-metal-cpp.sh.bak" - - # Clean existing files (keep this script) - use absolute path for safety - find "$SCRIPT_DIR" -mindepth 1 -maxdepth 1 ! -name 'update-metal-cpp.sh' -exec rm -rf {} + - - # Extract archive - pushd "$TMPDIR" > /dev/null - if [[ "$1" == *.zip ]]; then - unzip -q "$1" - else - tar --strip-components=1 -xf "$1" - fi - - # Copy contents (handle both flat and nested archives) - if [ -d "metal-cpp" ]; then - cp -r metal-cpp/* "$SCRIPT_DIR/" - elif [ -d "Metal" ]; then - cp -r . "$SCRIPT_DIR/" - else - # Try to find the metal-cpp directory - METAL_DIR=$(find . -type d -name "Metal" -print -quit | xargs dirname) - if [ -n "$METAL_DIR" ]; then - cp -r "$METAL_DIR"/* "$SCRIPT_DIR/" - else - echo "Error: Could not find metal-cpp files in archive" - exit 1 - fi - fi - popd > /dev/null - - # Restore this script - mv "$TMPDIR/update-metal-cpp.sh.bak" "$SCRIPT_DIR/update-metal-cpp.sh" + unzip -q "$1" -d "$SCRIPT_DIR" echo "Extracted metal-cpp $VERSION" else - echo "Usage: $0 " - echo "" - echo "Download metal-cpp from: https://developer.apple.com/metal/cpp/" - echo "Then run: $0 /path/to/metal-cpp.zip" - echo "" echo "Applying patches only..." fi @@ -68,6 +34,30 @@ fi echo "Applying Godot compatibility patches..." +# Apply patch files (idempotent) +PATCH_DIR="$SCRIPT_DIR/patches" +if [ -d "$PATCH_DIR" ]; then + for PATCH in "$PATCH_DIR"/*.patch; do + if [ ! -e "$PATCH" ]; then + echo " No patches found in $PATCH_DIR" + break + fi + + PATCH_NAME="$(basename "$PATCH")" + if git -C "$REPO_ROOT" apply --check "$PATCH" > /dev/null 2>&1; then + git -C "$REPO_ROOT" apply "$PATCH" + echo " $PATCH_NAME: applied" + elif git -C "$REPO_ROOT" apply --reverse --check "$PATCH" > /dev/null 2>&1; then + echo " $PATCH_NAME: already applied" + else + echo " $PATCH_NAME: failed to apply" + exit 1 + fi + done +else + echo " Warning: $PATCH_DIR not found" +fi + # Patch 1: Add forward declarations to NSDefines.hpp to avoid conflicts with # Godot's global types (String, Object, Error). # @@ -104,4 +94,3 @@ else fi echo "Done." -popd > /dev/null