Metal: Convert index & vertex format on GPU when possible.

- When converting vertex buffer: - if there is no render pass active, use compute shader to convert. - if there is a render pass active and device supports explicit memory barrier then convert the buffer in vertex shader with direct buffer write and insert a memory barrier. - if there is a render pass active and device doesn't support explicit memory barrier then convert the buffer on CPU. Bug: angleproject:2634 Change-Id: I5346e3a2adb855f40e46a3912d9db404a4482e0f Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2434025 Commit-Queue: Le Hoang Quyen <le.hoang.q@gmail.com> Reviewed-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jonah Ryan-Davis <jonahr@google.com>

Metal: Convert index & vertex format on GPU when possible.
8a50b42b · Le Hoang Quyen · Commit Bot · ed23dc84 · 8a50b42b · 8a50b42b
Commit 8a50b42b authored Oct 24, 2020 by Le Hoang Quyen Committed by Commit Bot Oct 31, 2020
17 changed files
--- a/include/platform/FeaturesMtl.h
+++ b/include/platform/FeaturesMtl.h
@@ -26,6 +26,14 @@ struct FeaturesMtl : FeatureSetBase
        "has_depth_texture_filtering", FeatureCategory::MetalFeatures,
        "The renderer supports depth texture's filtering other than nearest", &members};

+    // Support explicit memory barrier
+    Feature hasExplicitMemBarrier = {"has_explicit_mem_barrier_mtl", FeatureCategory::MetalFeatures,
+                                     "The renderer supports explicit memory barrier", &members};
+
+    // Some renderer can break render pass cheaply, i.e. desktop class GPUs.
+    Feature hasCheapRenderPass = {"has_cheap_render_pass_mtl", FeatureCategory::MetalFeatures,
+                                  "The renderer can cheaply break a render pass.", &members};
+
    // Non-uniform compute shader dispatch support, i.e. Group size is not necessarily to be fixed:
    Feature hasNonUniformDispatch = {
        "has_non_uniform_dispatch", FeatureCategory::MetalFeatures,

--- a/scripts/code_generation_hashes/Metal_default_shaders.json
+++ b/scripts/code_generation_hashes/Metal_default_shaders.json
@@ -14,7 +14,7 @@
  "src/libANGLE/renderer/metal/shaders/constants.h":
    "dad1a869a1095be669b7da5651901d38",
  "src/libANGLE/renderer/metal/shaders/copy_buffer.metal":
-    "813e16a38d6e3ba858b62a712b1b316d",
+    "83d33cc789cb5df7b173b98c50770c0f",
  "src/libANGLE/renderer/metal/shaders/format_autogen.h":
    "b1d6512b904a7eb151b0095b7898b0e5",
  "src/libANGLE/renderer/metal/shaders/gen_indices.metal":
@@ -24,7 +24,7 @@
  "src/libANGLE/renderer/metal/shaders/gen_mtl_internal_shaders.py":
    "b48af61c8b02dda646b4c8febce50227",
  "src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc":
-    "72e525145bc8f11993791c0f44e79b33",
+    "a0164451469303a462fd777c289c36ee",
  "src/libANGLE/renderer/metal/shaders/visibility.metal":
    "b82aa740cf4b0aed606aacef1024beea"
 }
\ No newline at end of file
--- a/src/libANGLE/renderer/metal/DisplayMtl.mm
+++ b/src/libANGLE/renderer/metal/DisplayMtl.mm
@@ -731,14 +731,19 @@ void DisplayMtl::initializeFeatures()
    // default values:
    mFeatures.hasBaseVertexInstancedDraw.enabled        = true;
    mFeatures.hasDepthTextureFiltering.enabled          = false;
+    mFeatures.hasExplicitMemBarrier.enabled             = false;
    mFeatures.hasNonUniformDispatch.enabled             = true;
    mFeatures.hasStencilOutput.enabled                  = false;
    mFeatures.hasTextureSwizzle.enabled                 = false;
    mFeatures.allowSeparatedDepthStencilBuffers.enabled = false;
    mFeatures.allowGenMultipleMipsPerPass.enabled       = true;
+    mFeatures.hasCheapRenderPass.enabled                = false;

    ANGLE_FEATURE_CONDITION((&mFeatures), hasDepthTextureFiltering,
                            TARGET_OS_OSX || TARGET_OS_MACCATALYST);
+    ANGLE_FEATURE_CONDITION(
+        (&mFeatures), hasExplicitMemBarrier,
+        isMetal2_1 && (TARGET_OS_OSX || TARGET_OS_MACCATALYST) && !ANGLE_MTL_ARM);
    ANGLE_FEATURE_CONDITION((&mFeatures), hasDepthAutoResolve, supportsEitherGPUFamily(3, 2));
    ANGLE_FEATURE_CONDITION((&mFeatures), hasStencilAutoResolve, supportsEitherGPUFamily(5, 2));
    ANGLE_FEATURE_CONDITION((&mFeatures), allowMultisampleStoreAndResolve,
@@ -756,6 +761,9 @@ void DisplayMtl::initializeFeatures()
    // Fence sync is flaky on Nvidia
    ANGLE_FEATURE_CONDITION((&mFeatures), hasEvents, isMetal2_1 && !isNVIDIA());

+    ANGLE_FEATURE_CONDITION((&mFeatures), hasCheapRenderPass,
+                            (TARGET_OS_OSX || TARGET_OS_MACCATALYST) && !ANGLE_MTL_ARM);
+
 #if !TARGET_OS_MACCATALYST && (TARGET_OS_IOS || TARGET_OS_TV)
    // Base Vertex drawing is only supported since GPU family 3.
    ANGLE_FEATURE_CONDITION((&mFeatures), hasBaseVertexInstancedDraw, supportsIOSGPUFamily(3));

--- a/src/libANGLE/renderer/metal/VertexArrayMtl.h
+++ b/src/libANGLE/renderer/metal/VertexArrayMtl.h
@@ -95,11 +95,22 @@ class VertexArrayMtl : public VertexArrayImpl
                                      size_t attribIndex,
                                      const mtl::VertexFormat &vertexFormat);

-    angle::Result convertVertexBufferCPU(const gl::Context *glContext,
+    angle::Result convertVertexBufferCPU(ContextMtl *contextMtl,
                                         BufferMtl *srcBuffer,
                                         const gl::VertexBinding &binding,
                                         size_t attribIndex,
-                                         const mtl::VertexFormat &vertexFormat,
+                                         const mtl::VertexFormat &convertedFormat,
+                                         GLuint targetStride,
+                                         size_t vertexCount,
+                                         ConversionBufferMtl *conversion);
+    angle::Result convertVertexBufferGPU(const gl::Context *glContext,
+                                         BufferMtl *srcBuffer,
+                                         const gl::VertexBinding &binding,
+                                         size_t attribIndex,
+                                         const mtl::VertexFormat &convertedFormat,
+                                         GLuint targetStride,
+                                         size_t vertexCount,
+                                         bool isExpandingComponents,
                                         ConversionBufferMtl *conversion);

    // These can point to real BufferMtl or converted buffer in mConvertedArrayBufferHolders

--- a/src/libANGLE/renderer/metal/VertexArrayMtl.mm
+++ b/src/libANGLE/renderer/metal/VertexArrayMtl.mm
--- a/src/libANGLE/renderer/metal/mtl_command_buffer.h
+++ b/src/libANGLE/renderer/metal/mtl_command_buffer.h
@@ -441,6 +441,10 @@ class RenderCommandEncoder final : public CommandEncoder
                                      MTLResourceUsage usage,
                                      mtl::RenderStages states);

+    RenderCommandEncoder &memoryBarrierWithResource(const BufferRef &resource,
+                                                    mtl::RenderStages after,
+                                                    mtl::RenderStages before);
+
    RenderCommandEncoder &setColorStoreAction(MTLStoreAction action, uint32_t colorAttachmentIndex);
    // Set store action for every color attachment.
    RenderCommandEncoder &setColorStoreAction(MTLStoreAction action);

--- a/src/libANGLE/renderer/metal/mtl_command_buffer.mm
+++ b/src/libANGLE/renderer/metal/mtl_command_buffer.mm
@@ -64,6 +64,7 @@ namespace
    PROC(DrawIndexedInstancedBaseVertex) \
    PROC(SetVisibilityResultMode)        \
    PROC(UseResource)                    \
+    PROC(MemoryBarrierWithResource)      \
    PROC(PushDebugGroup)                 \
    PROC(PopDebugGroup)

@@ -347,6 +348,26 @@ void UseResourceCmd(id<MTLRenderCommandEncoder> encoder, IntermediateCommandStre
    [resource ANGLE_MTL_RELEASE];
 }

+void MemoryBarrierWithResourceCmd(id<MTLRenderCommandEncoder> encoder,
+                                  IntermediateCommandStream *stream)
+{
+    id<MTLResource> resource = stream->fetch<id<MTLResource>>();
+    mtl::RenderStages after  = stream->fetch<mtl::RenderStages>();
+    mtl::RenderStages before = stream->fetch<mtl::RenderStages>();
+    ANGLE_UNUSED_VARIABLE(after);
+    ANGLE_UNUSED_VARIABLE(before);
+#if defined(__MAC_10_14) && (TARGET_OS_OSX || TARGET_OS_MACCATALYST)
+    if (ANGLE_APPLE_AVAILABLE_XC(10.14, 13.0))
+    {
+        [encoder memoryBarrierWithResources:&resource
+                                      count:1
+                                afterStages:after
+                               beforeStages:before];
+    }
+#endif
+    [resource ANGLE_MTL_RELEASE];
+}
+
 void PushDebugGroupCmd(id<MTLRenderCommandEncoder> encoder, IntermediateCommandStream *stream)
 {
    NSString *label = stream->fetch<NSString *>();
@@ -1633,6 +1654,25 @@ RenderCommandEncoder &RenderCommandEncoder::useResource(const BufferRef &resourc
    return *this;
 }

+RenderCommandEncoder &RenderCommandEncoder::memoryBarrierWithResource(const BufferRef &resource,
+                                                                      mtl::RenderStages after,
+                                                                      mtl::RenderStages before)
+{
+    if (!resource)
+    {
+        return *this;
+    }
+
+    cmdBuffer().setWriteDependency(resource);
+
+    mCommands.push(CmdType::MemoryBarrierWithResource)
+        .push([resource->get() ANGLE_MTL_RETAIN])
+        .push(after)
+        .push(before);
+
+    return *this;
+}
+
 void RenderCommandEncoder::pushDebugGroup(NSString *label)
 {
    // Defer the insertion until endEncoding()

--- a/src/libANGLE/renderer/metal/mtl_common.h
+++ b/src/libANGLE/renderer/metal/mtl_common.h
@@ -36,6 +36,12 @@
 #    define TARGET_OS_MACCATALYST 0
 #endif

+#if defined(__ARM_ARCH)
+#    define ANGLE_MTL_ARM (__ARM_ARCH != 0)
+#else
+#    define ANGLE_MTL_ARM 0
+#endif
+
 #define ANGLE_MTL_OBJC_SCOPE @autoreleasepool

 #if !__has_feature(objc_arc)

--- a/src/libANGLE/renderer/metal/mtl_render_utils.h
+++ b/src/libANGLE/renderer/metal/mtl_render_utils.h
@@ -161,6 +161,21 @@ struct CopyPixelsToBufferParams : CopyPixelsCommonParams
    bool reverseTextureRowOrder;
 };

+struct VertexFormatConvertParams
+{
+    BufferRef srcBuffer;
+    uint32_t srcBufferStartOffset = 0;
+    uint32_t srcStride            = 0;
+    uint32_t srcDefaultAlphaData  = 0;  // casted as uint
+
+    BufferRef dstBuffer;
+    uint32_t dstBufferStartOffset = 0;
+    uint32_t dstStride            = 0;
+    uint32_t dstComponents        = 0;
+
+    uint32_t vertexCount = 0;
+};
+
 // Utils class for clear & blitting
 class ClearUtils final : angle::NonCopyable
 {
@@ -356,7 +371,7 @@ class IndexGeneratorUtils final : angle::NonCopyable
 };

 // Util class for handling visibility query result
-class VisibilityResultUtils
+class VisibilityResultUtils final : angle::NonCopyable
 {
  public:
    void onDestroy();
@@ -396,7 +411,7 @@ class MipmapUtils final : angle::NonCopyable
 };

 // Util class for handling pixels copy between buffers and textures
-class CopyPixelsUtils
+class CopyPixelsUtils final : angle::NonCopyable
 {
  public:
    CopyPixelsUtils() = default;
@@ -429,6 +444,73 @@ class CopyPixelsUtils
    const std::string mWriteShaderName;
 };

+// Util class for handling vertex format conversion on GPU
+class VertexFormatConversionUtils final : angle::NonCopyable
+{
+  public:
+    void onDestroy();
+
+    // Convert vertex format to float. Compute shader version.
+    angle::Result convertVertexFormatToFloatCS(ContextMtl *contextMtl,
+                                               const angle::Format &srcAngleFormat,
+                                               const VertexFormatConvertParams &params);
+    // Convert vertex format to float. Vertex shader version. This version should be used if
+    // a render pass is active and we don't want to break it. Explicit memory barrier must be
+    // supported.
+    angle::Result convertVertexFormatToFloatVS(const gl::Context *context,
+                                               RenderCommandEncoder *renderEncoder,
+                                               const angle::Format &srcAngleFormat,
+                                               const VertexFormatConvertParams &params);
+    // Expand number of components per vertex's attribute (or just simply copy components between
+    // buffers with different stride and offset)
+    angle::Result expandVertexFormatComponentsCS(ContextMtl *contextMtl,
+                                                 const angle::Format &srcAngleFormat,
+                                                 const VertexFormatConvertParams &params);
+    angle::Result expandVertexFormatComponentsVS(const gl::Context *context,
+                                                 RenderCommandEncoder *renderEncoder,
+                                                 const angle::Format &srcAngleFormat,
+                                                 const VertexFormatConvertParams &params);
+
+  private:
+    void ensureComponentsExpandComputePipelineCreated(ContextMtl *contextMtl);
+    AutoObjCPtr<id<MTLRenderPipelineState>> getComponentsExpandRenderPipeline(
+        ContextMtl *contextMtl,
+        RenderCommandEncoder *renderEncoder);
+
+    AutoObjCPtr<id<MTLComputePipelineState>> getFloatConverstionComputePipeline(
+        ContextMtl *contextMtl,
+        const angle::Format &srcAngleFormat);
+
+    AutoObjCPtr<id<MTLRenderPipelineState>> getFloatConverstionRenderPipeline(
+        ContextMtl *contextMtl,
+        RenderCommandEncoder *renderEncoder,
+        const angle::Format &srcAngleFormat);
+
+    template <typename EncoderType, typename PipelineType>
+    angle::Result setupCommonConvertVertexFormatToFloat(ContextMtl *contextMtl,
+                                                        EncoderType cmdEncoder,
+                                                        const PipelineType &pipeline,
+                                                        const angle::Format &srcAngleFormat,
+                                                        const VertexFormatConvertParams &params);
+    template <typename EncoderType, typename PipelineType>
+    angle::Result setupCommonExpandVertexFormatComponents(ContextMtl *contextMtl,
+                                                          EncoderType cmdEncoder,
+                                                          const PipelineType &pipeline,
+                                                          const angle::Format &srcAngleFormat,
+                                                          const VertexFormatConvertParams &params);
+
+    using ConvertToFloatCompPipelineArray =
+        std::array<AutoObjCPtr<id<MTLComputePipelineState>>, angle::kNumANGLEFormats>;
+    using ConvertToFloatRenderPipelineArray =
+        std::array<RenderPipelineCache, angle::kNumANGLEFormats>;
+
+    ConvertToFloatCompPipelineArray mConvertToFloatCompPipelineCaches;
+    ConvertToFloatRenderPipelineArray mConvertToFloatRenderPipelineCaches;
+
+    AutoObjCPtr<id<MTLComputePipelineState>> mComponentsExpandCompPipeline;
+    RenderPipelineCache mComponentsExpandRenderPipelineCache;
+};
+
 // RenderUtils: container class of various util classes above
 class RenderUtils : public Context, angle::NonCopyable
 {
@@ -501,6 +583,25 @@ class RenderUtils : public Context, angle::NonCopyable
                                                const angle::Format &dstAngleFormat,
                                                const CopyPixelsToBufferParams &params);

+    // See VertexFormatConversionUtils::convertVertexFormatToFloatCS()
+    angle::Result convertVertexFormatToFloatCS(ContextMtl *contextMtl,
+                                               const angle::Format &srcAngleFormat,
+                                               const VertexFormatConvertParams &params);
+    // See VertexFormatConversionUtils::convertVertexFormatToFloatVS()
+    angle::Result convertVertexFormatToFloatVS(const gl::Context *context,
+                                               RenderCommandEncoder *renderEncoder,
+                                               const angle::Format &srcAngleFormat,
+                                               const VertexFormatConvertParams &params);
+    // See VertexFormatConversionUtils::expandVertexFormatComponentsCS()
+    angle::Result expandVertexFormatComponentsCS(ContextMtl *contextMtl,
+                                                 const angle::Format &srcAngleFormat,
+                                                 const VertexFormatConvertParams &params);
+    // See VertexFormatConversionUtils::expandVertexFormatComponentsVS()
+    angle::Result expandVertexFormatComponentsVS(const gl::Context *context,
+                                                 RenderCommandEncoder *renderEncoder,
+                                                 const angle::Format &srcAngleFormat,
+                                                 const VertexFormatConvertParams &params);
+
  private:
    // override ErrorHandler
    void handleError(GLenum error,
@@ -522,6 +623,7 @@ class RenderUtils : public Context, angle::NonCopyable
    VisibilityResultUtils mVisibilityResultUtils;
    MipmapUtils mMipmapUtils;
    std::array<CopyPixelsUtils, angle::EnumSize<PixelType>()> mCopyPixelsUtils;
+    VertexFormatConversionUtils mVertexFormatUtils;
 };

 }  // namespace mtl

--- a/src/libANGLE/renderer/metal/mtl_render_utils.mm
+++ b/src/libANGLE/renderer/metal/mtl_render_utils.mm
--- a/src/libANGLE/renderer/metal/shaders/copy_buffer.metal
+++ b/src/libANGLE/renderer/metal/shaders/copy_buffer.metal
@@ -1495,3 +1495,184 @@ kernel void writeFromUIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(uint))

 #undef SUPPORTED_FORMATS
 }
+
+/** -----  vertex format conversion --------*/
+struct CopyVertexParams
+{
+    uint srcBufferStartOffset;
+    uint srcStride;
+    uint srcComponentBytes;  // unused when convert to float
+    uint srcComponents;      // unused when convert to float
+    // Default source alpha when expanding the number of components.
+    // if source has less than 32 bits per component, only those bits are usable in
+    // srcDefaultAlpha
+    uchar4 srcDefaultAlphaData;  // unused when convert to float
+
+    uint dstBufferStartOffset;
+    uint dstStride;
+    uint dstComponents;
+
+    uint vertexCount;
+};
+
+#define INT_FORMAT_PROC(FORMAT, PROC) \
+    PROC(FORMAT##_UNORM)              \
+    PROC(FORMAT##_SNORM)              \
+    PROC(FORMAT##_UINT)               \
+    PROC(FORMAT##_SINT)               \
+    PROC(FORMAT##_USCALED)            \
+    PROC(FORMAT##_SSCALED)
+
+#define PURE_INT_FORMAT_PROC(FORMAT, PROC) \
+    PROC(FORMAT##_UINT)                    \
+    PROC(FORMAT##_SINT)
+
+#define FLOAT_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FLOAT)
+#define FIXED_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FIXED)
+
+#define FORMAT_BITS_PROC(BITS, PROC1, PROC2) \
+    PROC1(R##BITS, PROC2)                    \
+    PROC1(R##BITS##G##BITS, PROC2)           \
+    PROC1(R##BITS##G##BITS##B##BITS, PROC2)  \
+    PROC1(R##BITS##G##BITS##B##BITS##A##BITS, PROC2)
+
+template <typename IntType>
+static inline void writeFloatVertex(constant CopyVertexParams &options,
+                                    uint idx,
+                                    vec<IntType, 4> data,
+                                    device uchar *dst)
+{
+    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
+
+    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
+    {
+        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
+    }
+}
+
+template <>
+inline void writeFloatVertex(constant CopyVertexParams &options,
+                             uint idx,
+                             vec<float, 4> data,
+                             device uchar *dst)
+{
+    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
+
+    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
+    {
+        floatToBytes(data[component], dstOffset, dst);
+    }
+}
+
+// Function to convert from any vertex format to float vertex format
+static inline void convertToFloatVertexFormat(uint index,
+                                              constant CopyVertexParams &options,
+                                              constant uchar *srcBuffer,
+                                              device uchar *dstBuffer)
+{
+#define SUPPORTED_FORMATS(PROC)                   \
+    FORMAT_BITS_PROC(8, INT_FORMAT_PROC, PROC)    \
+    FORMAT_BITS_PROC(16, INT_FORMAT_PROC, PROC)   \
+    FORMAT_BITS_PROC(32, INT_FORMAT_PROC, PROC)   \
+    FORMAT_BITS_PROC(16, FLOAT_FORMAT_PROC, PROC) \
+    FORMAT_BITS_PROC(32, FLOAT_FORMAT_PROC, PROC) \
+    FORMAT_BITS_PROC(32, FIXED_FORMAT_PROC, PROC) \
+    PROC(R10G10B10A2_SINT)                        \
+    PROC(R10G10B10A2_UINT)                        \
+    PROC(R10G10B10A2_SSCALED)                     \
+    PROC(R10G10B10A2_USCALED)
+
+    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
+
+#define COMVERT_FLOAT_VERTEX_SWITCH_CASE(FORMAT)           \
+    case FormatID::FORMAT: {                               \
+        auto data = read##FORMAT(bufferOffset, srcBuffer); \
+        writeFloatVertex(options, index, data, dstBuffer); \
+    }                                                      \
+    break;
+
+    switch (kCopyFormatType)
+    {
+        SUPPORTED_FORMATS(COMVERT_FLOAT_VERTEX_SWITCH_CASE)
+    }
+
+#undef SUPPORTED_FORMATS
+}
+
+// Kernel to convert from any vertex format to float vertex format
+kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
+                                         constant CopyVertexParams &options [[buffer(0)]],
+                                         constant uchar *srcBuffer [[buffer(1)]],
+                                         device uchar *dstBuffer [[buffer(2)]])
+{
+    ANGLE_KERNEL_GUARD(index, options.vertexCount);
+    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
+}
+
+// Vertex shader to convert from any vertex format to float vertex format
+vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
+                                         constant CopyVertexParams &options [[buffer(0)]],
+                                         constant uchar *srcBuffer [[buffer(1)]],
+                                         device uchar *dstBuffer [[buffer(2)]])
+{
+    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
+}
+
+// Function to expand (or just simply copy) the components of the vertex
+static inline void expandVertexFormatComponents(uint index,
+                                                constant CopyVertexParams &options,
+                                                constant uchar *srcBuffer,
+                                                device uchar *dstBuffer)
+{
+    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
+    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
+
+    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
+    uint component;
+    for (component = 0; component < options.srcComponents; ++component,
+        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
+    {
+        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
+        {
+            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
+        }
+    }
+
+    for (; component < dstComponentsBeforeAlpha;
+         ++component, dstOffset += options.srcComponentBytes)
+    {
+        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
+        {
+            dstBuffer[dstOffset + byte] = 0;
+        }
+    }
+
+    if (component < options.dstComponents)
+    {
+        // Last alpha component
+        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
+        {
+            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
+        }
+    }
+}
+
+// Kernel to expand (or just simply copy) the components of the vertex
+kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
+                                           constant CopyVertexParams &options [[buffer(0)]],
+                                           constant uchar *srcBuffer [[buffer(1)]],
+                                           device uchar *dstBuffer [[buffer(2)]])
+{
+    ANGLE_KERNEL_GUARD(index, options.vertexCount);
+
+    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
+}
+
+// Vertex shader to expand (or just simply copy) the components of the vertex
+vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
+                                           constant CopyVertexParams &options [[buffer(0)]],
+                                           constant uchar *srcBuffer [[buffer(1)]],
+                                           device uchar *dstBuffer [[buffer(2)]])
+{
+    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
+}
--- a/src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc
+++ b/src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc
--- a/src/tests/gl_tests/VertexAttributeTest.cpp
+++ b/src/tests/gl_tests/VertexAttributeTest.cpp
@@ -169,7 +169,8 @@ class VertexAttributeTest : public ANGLETest
              bufferOffset(0),
              source(sourceIn),
              inputData(inputDataIn),
-              expectedData(expectedDataIn)
+              expectedData(expectedDataIn),
+              clearBeforeDraw(false)
        {}

        GLenum type;
@@ -179,6 +180,8 @@ class VertexAttributeTest : public ANGLETest

        const void *inputData;
        const GLfloat *expectedData;
+
+        bool clearBeforeDraw;
    };

    void setupTest(const TestData &test, GLint typeSize)
@@ -269,6 +272,11 @@ class VertexAttributeTest : public ANGLETest
            GLint typeSize = i + 1;
            setupTest(test, typeSize);

+            if (test.clearBeforeDraw)
+            {
+                glClear(GL_COLOR_BUFFER_BIT);
+            }
+
            drawQuad(mProgram, "position", 0.5f);

            glDisableVertexAttribArray(mTestAttrib);
@@ -986,6 +994,48 @@ TEST_P(VertexAttributeTestES3, IntNormalized)
    runTest(data);
 }

+// Same as IntUnnormalized but with glClear() before running the test to force
+// starting a render pass. This to verify that buffer format conversion within
+// an active render pass works as expected in Metal back-end.
+TEST_P(VertexAttributeTestES3, IntUnnormalizedWithClear)
+{
+    GLint lo                                  = std::numeric_limits<GLint>::min();
+    GLint hi                                  = std::numeric_limits<GLint>::max();
+    std::array<GLint, kVertexCount> inputData = {
+        {0, 1, 2, 3, -1, -2, -3, -4, -1, hi, hi - 1, lo, lo + 1}};
+    std::array<GLfloat, kVertexCount> expectedData;
+    for (size_t i = 0; i < kVertexCount; i++)
+    {
+        expectedData[i] = static_cast<GLfloat>(inputData[i]);
+    }
+
+    TestData data(GL_INT, GL_FALSE, Source::BUFFER, inputData.data(), expectedData.data());
+    data.clearBeforeDraw = true;
+
+    runTest(data);
+}
+
+// Same as IntNormalized but with glClear() before running the test to force
+// starting a render pass. This to verify that buffer format conversion within
+// an active render pass works as expected in Metal back-end.
+TEST_P(VertexAttributeTestES3, IntNormalizedWithClear)
+{
+    GLint lo                                  = std::numeric_limits<GLint>::min();
+    GLint hi                                  = std::numeric_limits<GLint>::max();
+    std::array<GLint, kVertexCount> inputData = {
+        {0, 1, 2, 3, -1, -2, -3, -4, -1, hi, hi - 1, lo, lo + 1}};
+    std::array<GLfloat, kVertexCount> expectedData;
+    for (size_t i = 0; i < kVertexCount; i++)
+    {
+        expectedData[i] = Normalize(inputData[i]);
+    }
+
+    TestData data(GL_INT, GL_TRUE, Source::BUFFER, inputData.data(), expectedData.data());
+    data.clearBeforeDraw = true;
+
+    runTest(data);
+}
+
 TEST_P(VertexAttributeTestES3, UnsignedIntUnnormalized)
 {
    GLuint mid                                 = std::numeric_limits<GLuint>::max() >> 1;
@@ -1018,6 +1068,26 @@ TEST_P(VertexAttributeTestES3, UnsignedIntNormalized)
    runTest(data);
 }

+// Same as UnsignedIntNormalized but with glClear() before running the test to force
+// starting a render pass. This to verify that buffer format conversion within
+// an active render pass works as expected in Metal back-end.
+TEST_P(VertexAttributeTestES3, UnsignedIntNormalizedWithClear)
+{
+    GLuint mid                                 = std::numeric_limits<GLuint>::max() >> 1;
+    GLuint hi                                  = std::numeric_limits<GLuint>::max();
+    std::array<GLuint, kVertexCount> inputData = {
+        {0, 1, 2, 3, 254, 255, 256, mid - 1, mid, mid + 1, hi - 2, hi - 1, hi}};
+    std::array<GLfloat, kVertexCount> expectedData;
+    for (size_t i = 0; i < kVertexCount; i++)
+    {
+        expectedData[i] = Normalize(inputData[i]);
+    }
+
+    TestData data(GL_UNSIGNED_INT, GL_TRUE, Source::BUFFER, inputData.data(), expectedData.data());
+    data.clearBeforeDraw = true;
+    runTest(data);
+}
+
 void SetupColorsForUnitQuad(GLint location, const GLColor32F &color, GLenum usage, GLBuffer *vbo)
 {
    glBindBuffer(GL_ARRAY_BUFFER, *vbo);
@@ -3553,14 +3623,46 @@ void main()
 // tests should be run against.
 // D3D11 Feature Level 9_3 uses different D3D formats for vertex attribs compared to Feature Levels
 // 10_0+, so we should test them separately.
-ANGLE_INSTANTIATE_TEST_ES2_AND_ES3(VertexAttributeTest);
-
-ANGLE_INSTANTIATE_TEST_ES2_AND_ES3(VertexAttributeOORTest);
-
-ANGLE_INSTANTIATE_TEST_ES3_AND(VertexAttributeTestES3, ES3_METAL());
+ANGLE_INSTANTIATE_TEST_ES2_AND_ES3_AND(
+    VertexAttributeTest,
+    ES3_METAL(),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ true),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ false));
+
+ANGLE_INSTANTIATE_TEST_ES2_AND_ES3_AND(
+    VertexAttributeOORTest,
+    ES3_METAL(),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ true),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ false));
+
+ANGLE_INSTANTIATE_TEST_ES3_AND(
+    VertexAttributeTestES3,
+    ES3_METAL(),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ true),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ false));

 ANGLE_INSTANTIATE_TEST_ES31(VertexAttributeTestES31);

-ANGLE_INSTANTIATE_TEST_ES2_AND_ES3(VertexAttributeCachingTest);
+ANGLE_INSTANTIATE_TEST_ES2_AND_ES3_AND(
+    VertexAttributeCachingTest,
+    ES3_METAL(),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ true),
+    WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
+                                             /* hasBarrier */ false,
+                                             /* cheapRenderPass */ false));

 }  // anonymous namespace
--- a/src/tests/test_utils/angle_test_configs.cpp
+++ b/src/tests/test_utils/angle_test_configs.cpp
@@ -234,6 +234,16 @@ std::ostream &operator<<(std::ostream &stream, const PlatformParameters &pp)
            break;
    }

+    if (pp.eglParameters.hasExplicitMemBarrierFeatureMtl == EGL_FALSE)
+    {
+        stream << "_NoExplicitMemoryBarrier";
+    }
+
+    if (pp.eglParameters.hasCheapRenderPassFeatureMtl == EGL_FALSE)
+    {
+        stream << "_NoCheapRenderPass";
+    }
+
    return stream;
 }


--- a/src/tests/test_utils/angle_test_configs.h
+++ b/src/tests/test_utils/angle_test_configs.h
@@ -241,6 +241,16 @@ inline PlatformParameters WithNoGenMultipleMipsPerPass(const PlatformParameters 
    return re;
 }

+inline PlatformParameters WithMetalMemoryBarrierAndCheapRenderPass(const PlatformParameters &params,
+                                                                   bool hasBarrier,
+                                                                   bool cheapRenderPass)
+{
+    PlatformParameters re                            = params;
+    re.eglParameters.hasExplicitMemBarrierFeatureMtl = hasBarrier ? EGL_TRUE : EGL_FALSE;
+    re.eglParameters.hasCheapRenderPassFeatureMtl    = cheapRenderPass ? EGL_TRUE : EGL_FALSE;
+    return re;
+}
+
 inline PlatformParameters WithRobustness(const PlatformParameters &params)
 {
    PlatformParameters withRobustness       = params;

--- a/util/EGLPlatformParameters.h
+++ b/util/EGLPlatformParameters.h
@@ -62,7 +62,8 @@ struct EGLPlatformParameters
                        debugLayersEnabled, contextVirtualization, transformFeedbackFeature,
                        allocateNonZeroMemoryFeature, emulateCopyTexImage2DFromRenderbuffers,
                        shaderStencilOutputFeature, genMultipleMipsPerPassFeature, platformMethods,
-                        robustness, emulatedPrerotation);
+                        robustness, emulatedPrerotation, hasExplicitMemBarrierFeatureMtl,
+                        hasCheapRenderPassFeatureMtl);
    }

    EGLint renderer                               = EGL_PLATFORM_ANGLE_TYPE_DEFAULT_ANGLE;
@@ -79,6 +80,8 @@ struct EGLPlatformParameters
    EGLint shaderStencilOutputFeature             = EGL_DONT_CARE;
    EGLint genMultipleMipsPerPassFeature          = EGL_DONT_CARE;
    uint32_t emulatedPrerotation                  = 0;  // Can be 0, 90, 180 or 270
+    EGLint hasExplicitMemBarrierFeatureMtl        = EGL_DONT_CARE;
+    EGLint hasCheapRenderPassFeatureMtl           = EGL_DONT_CARE;
    angle::PlatformMethods *platformMethods       = nullptr;
 };


--- a/util/EGLWindow.cpp
+++ b/util/EGLWindow.cpp
@@ -221,6 +221,16 @@ bool EGLWindow::initializeDisplay(OSWindow *osWindow,
            break;
    }

+    if (params.hasExplicitMemBarrierFeatureMtl == EGL_FALSE)
+    {
+        disabledFeatureOverrides.push_back("has_explicit_mem_barrier_mtl");
+    }
+
+    if (params.hasCheapRenderPassFeatureMtl == EGL_FALSE)
+    {
+        disabledFeatureOverrides.push_back("has_cheap_render_pass_mtl");
+    }
+
    if (!disabledFeatureOverrides.empty())
    {
        if (strstr(extensionString, "EGL_ANGLE_feature_control") == nullptr)