Vulkan: Convert vertex attributes in compute

In this commit, VertexArrayVk::convertVertexBuffer() is renamed to VertexArrayVk::convertVertexBufferCpu() to explicitly show it does a CPU readback. A new VertexArrayVk::convertVertexBuffer() function is added that has the same functionality in gpu (with some assumptions, where the CPU fallback is used should those assumptions fail). Currently, the only requirement is that buffer offset/stride are divided by the component size. ConvertVertex.comp is the shader responsible for this conversion, and it implements the functionality in renderer/copyvertex.inc, minus a few functions that are not used in the Vulkan backend. Bug: angleproject:2958, angleproject:3009 Change-Id: I8ec9a5f4672509bcf7b9e352cd27663970ad4653 Reviewed-on: https://chromium-review.googlesource.com/c/1364451 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Yuly Novikov <ynovikov@chromium.org>

Vulkan: Convert vertex attributes in compute
611bbaab · Shahbaz Youssefi · Commit Bot · be607ad6 · 611bbaab · 611bbaab
Commit 611bbaab authored Dec 06, 2018 by Shahbaz Youssefi Committed by Commit Bot Dec 11, 2018
36 changed files
--- a/include/platform/FeaturesVk.h
+++ b/include/platform/FeaturesVk.h
@@ -48,6 +48,11 @@ struct FeaturesVk
    // actual behavior. Clamp the point size to the value from the API to fix this.
    // Tracked in http://anglebug.com/2970.
    bool clampPointSize = false;
+    // On some android devices, the memory barrier between the compute shader that converts vertex
+    // attributes and the vertex shader that reads from it is ineffective.  Only known workaround is
+    // to perform a flush after the conversion.  http://anglebug.com/3016
+    bool flushAfterVertexConversion = false;
 };
 }  // namespace angle

--- a/scripts/run_code_generation_hashes.json
+++ b/scripts/run_code_generation_hashes.json
@@ -6,7 +6,7 @@
  "ANGLE format:src/libANGLE/renderer/angle_format_map.json":
    "be9f9bdbdf785dda05920146e8c55dbb",
  "ANGLE format:src/libANGLE/renderer/gen_angle_format_table.py":
-    "9de29b6ca59a05747623c0dc32344b14",
+    "c215a4284425ea888f95d084a3d52e51",
  "ANGLE load functions table:src/libANGLE/renderer/gen_load_functions_table.py":
    "8afc7eecce2a3ba9f0b4beacb1aa7fe2",
  "ANGLE load functions table:src/libANGLE/renderer/load_functions_data.json":
@@ -34,7 +34,7 @@
  "DXGI format:src/libANGLE/renderer/d3d/d3d11/gen_dxgi_format_table.py":
    "8ea01df6cb7f160772d3c85dd5164890",
  "DXGI format:src/libANGLE/renderer/gen_angle_format_table.py":
-    "9de29b6ca59a05747623c0dc32344b14",
+    "c215a4284425ea888f95d084a3d52e51",
  "ESSL static builtins:src/compiler/translator/builtin_function_declarations.txt":
    "e5e567406476306ea06984d885be028d",
  "ESSL static builtins:src/compiler/translator/builtin_variables.json":
@@ -76,9 +76,11 @@
  "Vulkan format:src/libANGLE/renderer/vulkan/vk_format_map.json":
    "84f988ff75f4d5b8f2a5d572ee8c51cc",
  "Vulkan internal shader programs:src/libANGLE/renderer/vulkan/gen_vk_internal_shaders.py":
-    "3faf63ba4dac90b949c96a3e382ba81b",
+    "1c64f7187357d7561c984ec57d251e74",
  "Vulkan internal shader programs:src/libANGLE/renderer/vulkan/shaders/src/BufferUtils.comp":
    "0c8c050841543da0d7faca2559212aa8",
+  "Vulkan internal shader programs:src/libANGLE/renderer/vulkan/shaders/src/ConvertVertex.comp":
+    "93649f61036c2fa4739988ad71f413df",
  "Vulkan internal shader programs:src/libANGLE/renderer/vulkan/shaders/src/FullScreenQuad.vert":
    "1743adf55153edf91363fa7b4350d859",
  "Vulkan internal shader programs:src/libANGLE/renderer/vulkan/shaders/src/PushConstantColor.frag":

--- a/src/libANGLE/renderer/Format.h
+++ b/src/libANGLE/renderer/Format.h
@@ -35,7 +35,8 @@ struct Format final : private angle::NonCopyable
                     GLuint depthBits,
                     GLuint stencilBits,
                     GLuint pixelBytes,
-                     bool isBlock);
+                     bool isBlock,
+                     bool isFixed);
    static const Format &Get(FormatID id);
    static FormatID InternalFormatToID(GLenum internalFormat);
@@ -75,6 +76,7 @@ struct Format final : private angle::NonCopyable
    GLuint pixelBytes;
    bool isBlock;
+    bool isFixed;
 };
 constexpr Format::Format(FormatID id,
@@ -92,7 +94,8 @@ constexpr Format::Format(FormatID id,
                         GLuint depthBits,
                         GLuint stencilBits,
                         GLuint pixelBytes,
-                         bool isBlock)
+                         bool isBlock,
+                         bool isFixed)
    : id(id),
      glInternalFormat(glFormat),
      fboImplementationInternalFormat(fboFormat),
@@ -108,7 +111,8 @@ constexpr Format::Format(FormatID id,
      depthBits(depthBits),
      stencilBits(stencilBits),
      pixelBytes(pixelBytes),
-      isBlock(isBlock)
+      isBlock(isBlock),
+      isFixed(isFixed)
 {}
 constexpr bool Format::hasDepthOrStencilBits() const

--- a/src/libANGLE/renderer/Format_table_autogen.cpp
+++ b/src/libANGLE/renderer/Format_table_autogen.cpp
--- a/src/libANGLE/renderer/gen_angle_format_table.py
+++ b/src/libANGLE/renderer/gen_angle_format_table.py
@@ -63,7 +63,7 @@ static constexpr rx::FastCopyFunctionMap NoCopyFunctions;
 constexpr Format g_formatInfoTable[] = {{
    // clang-format off
-    {{ FormatID::NONE, GL_NONE, GL_NONE, nullptr, NoCopyFunctions, nullptr, nullptr, GL_NONE, 0, 0, 0, 0, 0, 0, 0, false }},
+    {{ FormatID::NONE, GL_NONE, GL_NONE, nullptr, NoCopyFunctions, nullptr, nullptr, GL_NONE, 0, 0, 0, 0, 0, 0, 0, false, false }},
 {angle_format_info_cases}    // clang-format on
 }};
@@ -170,7 +170,7 @@ def get_color_write_function(angle_format):
    return 'WriteColor<' + channel_struct + ', '+ write_component_type + '>'
-format_entry_template = """    {{ FormatID::{id}, {glInternalFormat}, {fboImplementationInternalFormat}, {mipGenerationFunction}, {fastCopyFunctions}, {colorReadFunction}, {colorWriteFunction}, {namedComponentType}, {R}, {G}, {B}, {A}, {D}, {S}, {pixelBytes}, {isBlock} }},
+format_entry_template = """    {{ FormatID::{id}, {glInternalFormat}, {fboImplementationInternalFormat}, {mipGenerationFunction}, {fastCopyFunctions}, {colorReadFunction}, {colorWriteFunction}, {namedComponentType}, {R}, {G}, {B}, {A}, {D}, {S}, {pixelBytes}, {isBlock}, {isFixed} }},
 """
 def get_named_component_type(component_type):
@@ -237,6 +237,7 @@ def json_to_table_data(format_id, json, angle_to_gl):
        sum_of_bits += int(parsed[channel])
    parsed["pixelBytes"] = sum_of_bits / 8
    parsed["isBlock"] = "true" if format_id.endswith("_BLOCK") else "false"
+    parsed["isFixed"] = "true" if "FIXED" in format_id else "false"
    return format_entry_template.format(**parsed)

--- a/src/libANGLE/renderer/vulkan/BufferVk.cpp
+++ b/src/libANGLE/renderer/vulkan/BufferVk.cpp
@@ -18,6 +18,15 @@
 namespace rx
 {
+namespace
+{
+// Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
+// the buffer is made in 4-byte chunks.  Assume the size of the buffer is 4k+n where n is in [0, 3).
+// On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
+// last n bytes.  By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
+constexpr size_t kBufferSizeGranularity = 4;
+}  // namespace
 BufferVk::BufferVk(const gl::BufferState &state) : BufferImpl(state) {}
 BufferVk::~BufferVk() {}
@@ -53,12 +62,12 @@ angle::Result BufferVk::setData(const gl::Context *context,
        const VkImageUsageFlags usageFlags =
            VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
            VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
-            VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
+            VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
        VkBufferCreateInfo createInfo    = {};
        createInfo.sType                 = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
        createInfo.flags                 = 0;
-        createInfo.size                  = size;
+        createInfo.size                  = roundUp(size, kBufferSizeGranularity);
        createInfo.usage                 = usageFlags;
        createInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
        createInfo.queueFamilyIndexCount = 0;

--- a/src/libANGLE/renderer/vulkan/DispatchUtilsVk.cpp
+++ b/src/libANGLE/renderer/vulkan/DispatchUtilsVk.cpp
--- a/src/libANGLE/renderer/vulkan/DispatchUtilsVk.h
+++ b/src/libANGLE/renderer/vulkan/DispatchUtilsVk.h
@@ -10,6 +10,9 @@
 //    - Buffer clear: Implemented, but no current users
 //    - Buffer copy:
 //      * Used by VertexArrayVk::updateIndexTranslation() to convert a ubyte index array to ushort
+//    - Convert vertex attribute:
+//      * Used by VertexArrayVk::convertVertexBuffer() to convert vertex attributes from unsupported
+//        formats to their fallbacks.
 //    - Mipmap generation: Not yet implemented
 //
@@ -22,10 +25,6 @@
 namespace rx
 {
-class BufferVk;
-class RendererVk;
 class DispatchUtilsVk : angle::NonCopyable
 {
  public:
@@ -48,6 +47,16 @@ class DispatchUtilsVk : angle::NonCopyable
        size_t size;
    };
+    struct ConvertVertexParameters
+    {
+        size_t vertexCount;
+        const angle::Format *srcFormat;
+        const angle::Format *destFormat;
+        size_t srcStride;
+        size_t srcOffset;
+        size_t destOffset;
+    };
    angle::Result clearBuffer(vk::Context *context,
                              vk::BufferHelper *dest,
                              const ClearParameters &params);
@@ -55,9 +64,13 @@ class DispatchUtilsVk : angle::NonCopyable
                             vk::BufferHelper *dest,
                             vk::BufferHelper *src,
                             const CopyParameters &params);
+    angle::Result convertVertexBuffer(vk::Context *context,
+                                      vk::BufferHelper *dest,
+                                      vk::BufferHelper *src,
+                                      const ConvertVertexParameters &params);
  private:
-    struct ShaderParams
+    struct BufferUtilsShaderParams
    {
        // Structure matching PushConstants in BufferUtils.comp
        uint32_t destOffset          = 0;
@@ -67,25 +80,58 @@ class DispatchUtilsVk : angle::NonCopyable
        VkClearColorValue clearValue = {};
    };
-    // Common function that creates the pipeline for the specified function, binds it and prepares
+    struct ConvertVertexShaderParams
-    // the dispatch call. The possible values of `function` comes from
+    {
-    // vk::InternalShader::BufferUtils_comp defined in vk_internal_shaders_autogen.h
+        // Structure matching PushConstants in ConvertVertex.comp
-    angle::Result setupProgram(vk::Context *context,
+        uint32_t outputCount    = 0;
-                               uint32_t function,
+        uint32_t componentCount = 0;
-                               const VkDescriptorSet &descriptorSet,
+        uint32_t srcOffset      = 0;
-                               const ShaderParams &params,
+        uint32_t destOffset     = 0;
-                               vk::CommandBuffer *commandBuffer);
+        uint32_t Ns             = 0;
+        uint32_t Bs             = 0;
+        uint32_t Ss             = 0;
+        uint32_t Es             = 0;
+        uint32_t Nd             = 0;
+        uint32_t Bd             = 0;
+        uint32_t Sd             = 0;
+        uint32_t Ed             = 0;
+    };
    // Functions implemented by the class:
    enum class Function
    {
-        BufferClear = 0,
+        BufferClear         = 0,
-        BufferCopy  = 1,
+        BufferCopy          = 1,
+        ConvertVertexBuffer = 2,
-        InvalidEnum = 2,
+        InvalidEnum = 3,
-        EnumCount   = 2,
+        EnumCount   = 3,
    };
+    // Common function that creates the pipeline for the specified function, binds it and prepares
+    // the dispatch call. The possible values of `flags` comes from
+    // vk::InternalShader::* defined in vk_internal_shaders_autogen.h
+    angle::Result setupProgramCommon(vk::Context *context,
+                                     Function function,
+                                     vk::RefCounted<vk::ShaderAndSerial> *shader,
+                                     vk::ShaderProgramHelper *program,
+                                     const VkDescriptorSet descriptorSet,
+                                     const void *pushConstants,
+                                     size_t pushConstantsSize,
+                                     vk::CommandBuffer *commandBuffer);
+    using GetShader = angle::Result (vk::ShaderLibrary::*)(vk::Context *,
+                                                           uint32_t,
+                                                           vk::RefCounted<vk::ShaderAndSerial> **);
+    template <GetShader getShader, Function function, typename ShaderParams>
+    angle::Result setupProgram(vk::Context *context,
+                               vk::ShaderProgramHelper *program,
+                               uint32_t flags,
+                               const VkDescriptorSet &descriptorSet,
+                               const ShaderParams &params,
+                               vk::CommandBuffer *commandBuffer);
    // Initializes descriptor set layout, pipeline layout and descriptor pool corresponding to given
    // function, if not already initialized.  Uses setSizes to create the layout.  For example, if
    // this array has two entries {STORAGE_TEXEL_BUFFER, 1} and {UNIFORM_TEXEL_BUFFER, 3}, then the
@@ -94,20 +140,26 @@ class DispatchUtilsVk : angle::NonCopyable
    angle::Result ensureResourcesInitialized(vk::Context *context,
                                             Function function,
                                             VkDescriptorPoolSize *setSizes,
-                                             size_t setSizesCount);
+                                             size_t setSizesCount,
+                                             size_t pushConstantsSize);
    // Initializers corresponding to functions, calling into ensureResourcesInitialized with the
    // appropriate parameters.
    angle::Result ensureBufferClearInitialized(vk::Context *context);
    angle::Result ensureBufferCopyInitialized(vk::Context *context);
+    angle::Result ensureConvertVertexInitialized(vk::Context *context);
    angle::PackedEnumMap<Function, vk::DescriptorSetLayoutPointerArray> mDescriptorSetLayouts;
    angle::PackedEnumMap<Function, vk::BindingPointer<vk::PipelineLayout>> mPipelineLayouts;
    angle::PackedEnumMap<Function, vk::DynamicDescriptorPool> mDescriptorPools;
-    vk::ShaderProgramHelper mPrograms[vk::InternalShader::BufferUtils_comp::kFlagsMask |
+    vk::ShaderProgramHelper
-                                      vk::InternalShader::BufferUtils_comp::kFunctionMask |
+        mBufferUtilsPrograms[vk::InternalShader::BufferUtils_comp::kFlagsMask |
-                                      vk::InternalShader::BufferUtils_comp::kFormatMask];
+                             vk::InternalShader::BufferUtils_comp::kFunctionMask |
+                             vk::InternalShader::BufferUtils_comp::kFormatMask];
+    vk::ShaderProgramHelper
+        mConvertVertexPrograms[vk::InternalShader::ConvertVertex_comp::kFlagsMask |
+                               vk::InternalShader::ConvertVertex_comp::kConversionMask];
 };
 }  // namespace rx

--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -791,6 +791,13 @@ void RendererVk::initFeatures()
    {
        mFeatures.clampPointSize = true;
    }
+#if defined(ANGLE_PLATFORM_ANDROID)
+    // Work around ineffective compute-graphics barrier in android.
+    // TODO(syoussefi): Figure out which vendors and driver versions are affected.
+    // http://anglebug.com/3009
+    mFeatures.flushAfterVertexConversion = true;
+#endif
 }
 void RendererVk::initPipelineCacheVkKey()
@@ -1040,7 +1047,8 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
    // CPU should be throttled to avoid mInFlightCommands from growing too fast.  That is done on
    // swap() though, and there could be multiple submissions in between (through glFlush() calls),
-    // so the limit is larger than the expected number of images.
+    // so the limit is larger than the expected number of images.  The
+    // InterleavedAttributeDataBenchmark perf test for example issues a large number of flushes.
    ASSERT(mInFlightCommands.size() <= kInFlightCommandsLimit);
    // Increment the queue serial. If this fails, we should restart ANGLE.

--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@@ -164,7 +164,7 @@ class RendererVk : angle::NonCopyable
    vk::ShaderLibrary &getShaderLibrary() { return mShaderLibrary; }
    angle::Result getFullScreenClearShaderProgram(vk::Context *context,
                                                  vk::ShaderProgramHelper **programOut);
-    DispatchUtilsVk *getDispatchUtils() { return &mDispatchUtils; }
+    DispatchUtilsVk &getDispatchUtils() { return mDispatchUtils; }
    const angle::FeaturesVk &getFeatures() const { return mFeatures; }
    angle::Result getTimestamp(vk::Context *context, uint64_t *timestampOut);

--- a/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
+++ b/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
@@ -23,12 +23,11 @@ namespace rx
 {
 namespace
 {
-constexpr size_t kDynamicVertexDataSize              = 1024 * 1024;
+constexpr size_t kDynamicVertexDataSize    = 1024 * 1024;
-constexpr size_t kDynamicIndexDataSize               = 1024 * 8;
+constexpr size_t kDynamicIndexDataSize     = 1024 * 8;
-constexpr size_t kMaxVertexFormatAlignment           = 4;
+constexpr size_t kMaxVertexFormatAlignment = 4;
-constexpr VkBufferUsageFlags kVertexBufferUsageFlags = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+constexpr VkBufferUsageFlags kVertexBufferUsageFlags =
-                                                       VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+    VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-                                                       VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
 constexpr VkBufferUsageFlags kIndexBufferUsageFlags = VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
                                                      VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
                                                      VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
@@ -60,6 +59,23 @@ angle::Result StreamVertexData(ContextVk *contextVk,
    return angle::Result::Continue;
 }
+size_t GetVertexCount(BufferVk *srcBuffer, const gl::VertexBinding &binding, uint32_t srcFormatSize)
+{
+    // Bytes usable for vertex data.
+    GLint64 bytes = srcBuffer->getSize() - binding.getOffset();
+    if (bytes < srcFormatSize)
+        return 0;
+    // Count the last vertex.  It may occupy less than a full stride.
+    size_t numVertices = 1;
+    bytes -= srcFormatSize;
+    // Count how many strides fit remaining space.
+    if (bytes > 0)
+        numVertices += static_cast<size_t>(bytes) / binding.getStride();
+    return numVertices;
+}
 }  // anonymous namespace
 #define INIT                                    \
@@ -174,10 +190,60 @@ angle::Result VertexArrayVk::streamIndexData(ContextVk *contextVk,
 // and/or align it as we copy it to a DynamicBuffer. The assumption could be wrong
 // but the alternative of copying it piecemeal on each draw would have a lot more
 // overhead.
-angle::Result VertexArrayVk::convertVertexBuffer(ContextVk *contextVk,
+angle::Result VertexArrayVk::convertVertexBufferGpu(ContextVk *contextVk,
-                                                 BufferVk *srcBuffer,
+                                                    BufferVk *srcBuffer,
-                                                 const gl::VertexBinding &binding,
+                                                    const gl::VertexBinding &binding,
-                                                 size_t attribIndex)
+                                                    size_t attribIndex)
+{
+    RendererVk *renderer = contextVk->getRenderer();
+    const angle::Format &srcFormat  = mCurrentArrayBufferFormats[attribIndex]->angleFormat();
+    const angle::Format &destFormat = mCurrentArrayBufferFormats[attribIndex]->bufferFormat();
+    ASSERT(binding.getStride() % (srcFormat.pixelBytes / srcFormat.channelCount()) == 0);
+    unsigned srcFormatSize  = srcFormat.pixelBytes;
+    unsigned destFormatSize = destFormat.pixelBytes;
+    size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize);
+    if (numVertices == 0)
+    {
+        return angle::Result::Continue;
+    }
+    ASSERT(GetVertexInputAlignment(*mCurrentArrayBufferFormats[attribIndex]) <=
+           kMaxVertexFormatAlignment);
+    // Allocate buffer for results
+    mCurrentArrayBufferConversion[attribIndex].releaseRetainedBuffers(renderer);
+    ANGLE_TRY(mCurrentArrayBufferConversion[attribIndex].allocate(
+        contextVk, numVertices * destFormatSize, nullptr, nullptr,
+        &mCurrentArrayBufferOffsets[attribIndex], nullptr));
+    mCurrentArrayBuffers[attribIndex] =
+        mCurrentArrayBufferConversion[attribIndex].getCurrentBuffer();
+    DispatchUtilsVk::ConvertVertexParameters params;
+    params.vertexCount = numVertices;
+    params.srcFormat   = &srcFormat;
+    params.destFormat  = &destFormat;
+    params.srcStride   = binding.getStride();
+    params.srcOffset   = binding.getOffset();
+    params.destOffset  = static_cast<size_t>(mCurrentArrayBufferOffsets[attribIndex]);
+    ANGLE_TRY(renderer->getDispatchUtils().convertVertexBuffer(
+        contextVk, mCurrentArrayBuffers[attribIndex], &srcBuffer->getBuffer(), params));
+    mCurrentArrayBufferHandles[attribIndex] =
+        mCurrentArrayBuffers[attribIndex]->getBuffer().getHandle();
+    mCurrentArrayBufferConversionCanRelease[attribIndex] = true;
+    return angle::Result::Continue;
+}
+angle::Result VertexArrayVk::convertVertexBufferCpu(ContextVk *contextVk,
+                                                    BufferVk *srcBuffer,
+                                                    const gl::VertexBinding &binding,
+                                                    size_t attribIndex)
 {
    // Needed before reading buffer or we could get stale data.
    ANGLE_TRY(contextVk->getRenderer()->finish(contextVk));
@@ -187,18 +253,11 @@ angle::Result VertexArrayVk::convertVertexBuffer(ContextVk *contextVk,
    mCurrentArrayBufferConversion[attribIndex].releaseRetainedBuffers(contextVk->getRenderer());
-    // Bytes usable for vertex data.
+    size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize);
-    GLint64 bytes = srcBuffer->getSize() - binding.getOffset();
+    if (numVertices == 0)
-    if (bytes < srcFormatSize)
+    {
        return angle::Result::Continue;
+    }
-    // Count the last vertex.  It may occupy less than a full stride.
-    size_t numVertices = 1;
-    bytes -= srcFormatSize;
-    // Count how many strides fit remaining space.
-    if (bytes > 0)
-        numVertices += static_cast<size_t>(bytes) / binding.getStride();
    void *src = nullptr;
    ANGLE_TRY(srcBuffer->mapImpl(contextVk, &src));
@@ -322,8 +381,9 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
    // Invalidate the input description for pipelines.
    mDirtyPackedInputs.set(attribIndex);
-    RendererVk *renderer   = contextVk->getRenderer();
+    RendererVk *renderer               = contextVk->getRenderer();
-    bool releaseConversion = true;
+    bool releaseConversion             = true;
+    bool anyVertexBufferConvertedOnGpu = false;
    if (attrib.enabled)
    {
@@ -335,14 +395,25 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
            BufferVk *bufferVk = vk::GetImpl(bufferGL);
            unsigned componentSize =
                mCurrentArrayBufferFormats[attribIndex]->angleFormat().pixelBytes / attrib.size;
+            bool bindingIsAligned = BindingIsAligned(binding, componentSize);
            if (mCurrentArrayBufferFormats[attribIndex]->vertexLoadRequiresConversion ||
-                !BindingIsAligned(binding, componentSize))
+                !bindingIsAligned)
            {
                mCurrentArrayBufferStrides[attribIndex] =
                    mCurrentArrayBufferFormats[attribIndex]->bufferFormat().pixelBytes;
-                ANGLE_TRY(convertVertexBuffer(contextVk, bufferVk, binding, attribIndex));
+                if (bindingIsAligned)
+                {
+                    ANGLE_TRY(convertVertexBufferGpu(contextVk, bufferVk, binding, attribIndex));
+                    anyVertexBufferConvertedOnGpu = true;
+                }
+                else
+                {
+                    // TODO(syoussefi): Implement unaligned vertex buffer conversions in compute.
+                    // http://anglebug.com/3009
+                    ANGLE_TRY(convertVertexBufferCpu(contextVk, bufferVk, binding, attribIndex));
+                }
                releaseConversion = false;
            }
@@ -377,6 +448,11 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
            &renderer->getFormat(angle::FormatID::R32G32B32A32_FLOAT);
    }
+    if (anyVertexBufferConvertedOnGpu && renderer->getFeatures().flushAfterVertexConversion)
+    {
+        ANGLE_TRY(renderer->flush(contextVk));
+    }
    if (releaseConversion)
        ensureConversionReleased(renderer, attribIndex);
@@ -603,7 +679,7 @@ angle::Result VertexArrayVk::updateIndexTranslation(ContextVk *contextVk,
        // Note: this is a copy, which implicitly converts between formats.  Once support for
        // primitive restart is added, a specialized shader is likely needed to special case 0xFF ->
        // 0xFFFF.
-        ANGLE_TRY(renderer->getDispatchUtils()->copyBuffer(contextVk, dest, src, params));
+        ANGLE_TRY(renderer->getDispatchUtils().copyBuffer(contextVk, dest, src, params));
    }
    else
    {

--- a/src/libANGLE/renderer/vulkan/VertexArrayVk.h
+++ b/src/libANGLE/renderer/vulkan/VertexArrayVk.h
@@ -103,10 +103,14 @@ class VertexArrayVk : public VertexArrayImpl
                                  size_t indexCount,
                                  const void *sourcePointer,
                                  vk::DynamicBuffer *dynamicBuffer);
-    angle::Result convertVertexBuffer(ContextVk *contextVk,
+    angle::Result convertVertexBufferGpu(ContextVk *contextVk,
-                                      BufferVk *srcBuffer,
+                                         BufferVk *srcBuffer,
-                                      const gl::VertexBinding &binding,
+                                         const gl::VertexBinding &binding,
-                                      size_t attribIndex);
+                                         size_t attribIndex);
+    angle::Result convertVertexBufferCpu(ContextVk *contextVk,
+                                         BufferVk *srcBuffer,
+                                         const gl::VertexBinding &binding,
+                                         size_t attribIndex);
    void ensureConversionReleased(RendererVk *renderer, size_t attribIndex);
    angle::Result syncDirtyAttrib(ContextVk *contextVk,

--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000000.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000000.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000001.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000001.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000002.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000002.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000003.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000003.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000004.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000004.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000005.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000005.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000006.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000006.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000007.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000007.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000008.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000008.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000009.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000009.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000A.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000A.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000B.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000B.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000C.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000C.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000D.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000D.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000E.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000E.inc
--- a/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000F.inc
+++ b/src/libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000F.inc
--- a/src/libANGLE/renderer/vulkan/shaders/src/ConvertVertex.comp
+++ b/src/libANGLE/renderer/vulkan/shaders/src/ConvertVertex.comp
+//
+// Copyright 2018 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// ConvertVertex.comp: vertex buffer conversion.  Implements functionality in copyvertex.inc.
+//
+// Each thread of the dispatch call fills in one 4-byte element, no matter how many components
+// fit in it.  The src data is laid out in the most general form as follows.  Note that component
+// size is assumed to divide buffer stride.
+//
+//    Ns components, each Bs bytes
+//         ____^_____
+//        /          |
+//       +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+//       |C1|C2|..|CN|..|..|..|..|C1|C2|..|CN|..|..|..|..|C1|C2|..|CN| ... Repeated V times
+//       +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+//        \__________ __________/
+//                   V
+//           Ss bytes of stride
+//
+// The output is the array of components converted to the destination format (each Bd bytes) with
+// stride Sd = Nd*Bd (i.e. packed).  The output size is therefore V*Nd*Bd bytes.  The dispatch size
+// is accordingly ciel(V*Nd*Bd / 4).
+//
+// The input is received in 4-byte elements, therefore each element has Es=4/Bs components.
+//
+// To output exactly one 4-byte element, each thread is responsible for Ed=4/Bd components.
+// Therefore, thread t is responsible for component indices [Ed*t, Ed*(t + 1)).
+//
+// Component index c is at source offset:
+//
+//     floor(c / Ns) * Ss + mod(c, Ns) * Bs
+//
+//   - Flags:
+//     * IsAligned: if true, assumes the workgroup size divides the output count, so there is no
+//                  need for bound checking.
+//     * IsBigEndian
+//   - Conversion:
+//     * IntToInt: covers byte, short and int types (distinguished by Bs and Bd).
+//     * UintToUint: covers ubyte, ushort and uint types (distinguished by Bs and Bd).
+//     * IntToFloat: Same types as IntToInt for source (including scaled).  Converts to float.
+//     * UintToFloat: Same types as UintToUint for source (including uscaled).  Converst to float.
+//     * SnormToFloat: Similar to IntToFloat, but normalized.
+//     * UnormToFloat: Similar to UintToFloat, but normalized.
+//     * FixedToFloat: 16.16 signed fixed-point to floating point.
+//     * FloatToFloat: float.
+//
+// IntToInt, UintToUint and FloatToFloat correspond to CopyNativeVertexData() and
+// Copy8SintTo16SintVertexData() in renderer/copyvertex.inc, FixedToFloat corresponds to
+// Copy32FixedTo32FVertexData, IntToFloat and UintToFloat correspond to CopyTo32FVertexData with
+// normalized=false and SnormToFloat and UnormToFloat correspond to CopyTo32FVertexData with
+// normalized=true.
+//
+#version 450 core
+// Source type
+#if IntToInt || IntToFloat
+#define SrcType int
+#elif UintToUint || UintToFloat
+#define SrcType uint
+#elif SnormToFloat || UnormToFloat || FixedToFloat || FloatToFloat
+#define SrcType float
+#else
+#error "Not all conversions are accounted for"
+#endif
+// Destination type
+#if IntToInt
+#define DestType int
+#define IsDestFloat 0
+#elif UintToUint
+#define DestType uint
+#define IsDestFloat 0
+#elif IntToFloat || UintToFloat || SnormToFloat || UnormToFloat || FixedToFloat || FloatToFloat
+#define DestType float
+#define IsDestFloat 1
+#else
+#error "Not all conversions are accounted for"
+#endif
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+layout (set = 0, binding = 0) buffer dest
+{
+    uint destData[];
+};
+layout (set = 0, binding = 1) buffer src
+{
+    uint srcData[];
+};
+layout (push_constant) uniform PushConstants
+{
+    // outputs to write (= total number of components / Ed): used for range checking
+    uint outputCount;
+    // total number of output components: used for range checking
+    uint componentCount;
+    // source and destination offsets are handled in the shader (instead of binding the buffer with
+    // these offsets), as the binding offset requires alignment with
+    // minStorageBufferOffsetAlignment, which is impossible to enforce on source, and therefore
+    // would limit the usability of the shader.  Note that source is a storage buffer, instead of a
+    // uniform buffer, so it wouldn't be affected by the possibly smaller max size of uniform
+    // buffers.
+    uint srcOffset;
+    uint destOffset;
+    // Parameters from the above explanation
+    uint Ns;       // Number of source components in one vertex attribute
+    uint Bs;       // Source component byte size
+    uint Ss;       // Source vertex attribyte byte stride
+    uint Es;       // Precalculated 4/Bs
+    uint Nd;       // Number of destination components in one vertex attribute
+    uint Bd;       // Destination component byte size
+    uint Sd;       // Precalculated Nd*Bd
+    uint Ed;       // Precalculated 4/Bd
+} params;
+// Define shorthands for more readable formulas:
+#define Ns params.Ns
+#define Ss params.Ss
+#define Nd params.Nd
+#define Sd params.Sd
+// With fixed-point and float types, Bs and Bd can only be 4, so they are hardcoded for more
+// efficiency.
+#if FixedToFloat || FloatToFloat
+#define Bs 4
+#define Es 1
+#else
+#define Bs params.Bs
+#define Es params.Es
+#endif
+#if IsDestFloat
+#define Bd 4
+#define Ed 1
+#else
+#define Bd params.Bd
+#define Ed params.Ed
+#endif
+uint getSourceComponentOffset(uint vertex, uint component)
+{
+    return vertex * Ss + component * Bs + params.srcOffset;
+}
+uint getDestinationComponentOffset(uint vertex, uint component)
+{
+    return vertex * Sd + component * Bd + params.destOffset;
+}
+uint getShiftBits(uint offset, uint B)
+{
+    // Given a byte offset, calculates the bit shift required to extract/store a component.
+    //
+    // On little endian, it implements the following function:
+    //
+    // Bs == 1: 0->0, 1->8, 2->16, 3->24
+    // Bs == 2: 0->0, 2->16   (1 and 3 are impossible values as Bx is assumed to divide Sx)
+    // Bs == 4: 0->0          (similarly, 1, 2, and 3 are impossible values)
+    //
+    // This is simply given by (offset % 4) * 8.
+    //
+    // On big endian, it implements the following function:
+    //
+    // Bs == 1: 0->24, 1->16, 2->8, 3->0
+    // Bs == 2: 0->16, 2->0
+    // Bs == 4: 0->0
+    //
+    // This is given by (4 - Bx - offset % 4) * 8
+    uint shift = (offset % 4) * 8;
+    // If big-endian, the most-significant bits contain the first components, so we reverse the
+    // shift count.
+#if IsBigEndian
+    shift = (4 - B) * 8 - shift;
+#endif
+    return shift;
+}
+SrcType loadSourceComponent(uint cd)
+{
+    // cd is component index in the destination buffer
+    uint vertex = cd / Nd;
+    uint component = cd % Nd;
+    // If no such component, return 0
+    if (component >= Ns)
+    {
+        return 0;
+    }
+    // Load the source component
+    uint offset = getSourceComponentOffset(vertex, component);
+    uint block = srcData[offset / 4];
+    uint shiftBits = getShiftBits(offset, Bs);
+    uint valueBits = Bs * 8;
+    uint valueMask = valueBits == 32 ? -1 : (1 << valueBits) - 1;
+    uint valueAsUint = (block >> shiftBits) & valueMask;
+    // Convert to SrcType
+#if IntToInt || IntToFloat
+    bool isNegative = (valueAsUint & (1 << (valueBits - 1))) != 0;
+    // Sign extend
+    SrcType value = SrcType(valueAsUint | (isNegative ? 0xFFFFFFFF << valueBits : 0));
+#elif UintToUint || UintToFloat
+    SrcType value = valueAsUint;
+#elif SnormToFloat
+    bool isNegative = (valueAsUint & (1 << (valueBits - 1))) != 0;
+    int valueAsInt = int(valueAsUint | (isNegative ? 0xFFFFFFFF << valueBits : 0));
+    SrcType value = (2 * float(valueAsInt) + 1) / valueMask;
+#elif UnormToFloat
+    float positiveMax = valueMask;
+    // Scale [0, P] to [0, 1]
+    SrcType value = valueAsUint / positiveMax;
+#elif FixedToFloat
+    float divisor = 1.0f / 65536.0f;
+    SrcType value = int(valueAsUint) * divisor;
+#elif FloatToFloat
+    SrcType value = uintBitsToFloat(valueAsUint);
+#else
+#error "Not all conversions are accounted for"
+#endif
+    return value;
+}
+DestType convertComponent(SrcType srcValue)
+{
+    // In all cases, SrcValue already contains the final value, except it may need a cast, which
+    // happens implicitly here.
+    return srcValue;
+}
+uint makeDestinationComponent(uint cd, DestType value)
+{
+    // Return valueAsUint, shifted to the right spot.  Multiple calls to this function should be |ed
+    // and eventually written to the destination.
+#if IntToInt || UintToUint
+    uint vertex = cd / Nd;
+    uint component = cd % Nd;
+    uint offset = getDestinationComponentOffset(vertex, component);
+    uint shiftBits = getShiftBits(offset, Bd);
+    uint valueBits = Bd * 8;
+    uint valueMask = valueBits == 32 ? -1 : (1 << valueBits) - 1;
+    uint valueAsUint = (uint(value) & valueMask) << shiftBits;
+#elif IsDestFloat
+    // If the destination is float, it will occupy the whole result.
+    uint valueAsUint = floatBitsToInt(value);
+#else
+#error "Not all conversions are accounted for"
+#endif
+    return valueAsUint;
+}
+void storeDestinationComponents(uint valueAsUint)
+{
+    // Note that the destination allocations are always aligned to kMaxVertexFormatAlignment.
+    destData[gl_GlobalInvocationID.x + params.destOffset / 4] = valueAsUint;
+}
+void main()
+{
+#if !IsAligned
+    if (gl_GlobalInvocationID.x >= params.outputCount)
+        return;
+#endif // IsAligned
+    uint valueOut = 0;
+    for (uint i = 0; i < Ed; ++i)
+    {
+        uint cd = gl_GlobalInvocationID.x * Ed + i;
+#if !IsAligned
+        if (cd >= params.componentCount)
+        {
+            break;
+        }
+#endif
+        SrcType srcValue = loadSourceComponent(cd);
+        DestType destValue = convertComponent(srcValue);
+        valueOut |= makeDestinationComponent(cd, destValue);
+    }
+    storeDestinationComponents(valueOut);
+}
--- a/src/libANGLE/renderer/vulkan/shaders/src/ConvertVertex.comp.json
+++ b/src/libANGLE/renderer/vulkan/shaders/src/ConvertVertex.comp.json
+{
+    "Description": [
+        "Copyright 2018 The ANGLE Project Authors. All rights reserved.",
+        "Use of this source code is governed by a BSD-style license that can be",
+        "found in the LICENSE file.",
+        "",
+        "ConvertVertex.comp.json: Build parameters for ConvertVertex.comp."
+    ],
+    "Flags": [
+        "IsAligned"
+    ],
+    "Conversion": [
+        "IntToInt",
+        "UintToUint",
+        "IntToFloat",
+        "UintToFloat",
+        "SnormToFloat",
+        "UnormToFloat",
+        "FixedToFloat",
+        "FloatToFloat"
+    ]
+}
--- a/src/libANGLE/renderer/vulkan/vk_format_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_format_utils.cpp
@@ -141,9 +141,12 @@ void Format::initTextureFallback(RendererVk *renderer,
 void Format::initBufferFallback(RendererVk *renderer, const BufferFormatInitInfo *info, int numInfo)
 {
-    int i          = FindSupportedFormat(renderer, info, numInfo, HasFullBufferFormatSupport);
+    size_t skip = renderer->getFeatures().forceFallbackFormat ? 1 : 0;
-    bufferFormatID = info[i].format;
+    int i = FindSupportedFormat(renderer, info + skip, numInfo - skip, HasFullBufferFormatSupport);
-    vkBufferFormat = info[i].vkFormat;
+    i += skip;
+    bufferFormatID               = info[i].format;
+    vkBufferFormat               = info[i].vkFormat;
    vkBufferFormatIsPacked       = info[i].vkFormatIsPacked;
    vertexLoadFunction           = info[i].vertexLoadFunction;
    vertexLoadRequiresConversion = info[i].vertexLoadRequiresConversion;

--- a/src/libANGLE/renderer/vulkan/vk_internal_shaders_autogen.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_internal_shaders_autogen.cpp
@@ -28,6 +28,22 @@ namespace
 #include "libANGLE/renderer/vulkan/shaders/gen/BufferUtils.comp.00000009.inc"
 #include "libANGLE/renderer/vulkan/shaders/gen/BufferUtils.comp.0000000A.inc"
 #include "libANGLE/renderer/vulkan/shaders/gen/BufferUtils.comp.0000000B.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000000.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000001.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000002.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000003.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000004.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000005.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000006.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000007.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000008.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.00000009.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000A.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000B.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000C.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000D.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000E.inc"
+#include "libANGLE/renderer/vulkan/shaders/gen/ConvertVertex.comp.0000000F.inc"
 #include "libANGLE/renderer/vulkan/shaders/gen/FullScreenQuad.vert.00000000.inc"
 #include "libANGLE/renderer/vulkan/shaders/gen/PushConstantColor.frag.00000000.inc"
@@ -52,6 +68,24 @@ constexpr ShaderBlob kBufferUtils_comp_shaders[] = {
    {kBufferUtils_comp_0000000A, sizeof(kBufferUtils_comp_0000000A)},
    {kBufferUtils_comp_0000000B, sizeof(kBufferUtils_comp_0000000B)},
 };
+constexpr ShaderBlob kConvertVertex_comp_shaders[] = {
+    {kConvertVertex_comp_00000000, sizeof(kConvertVertex_comp_00000000)},
+    {kConvertVertex_comp_00000001, sizeof(kConvertVertex_comp_00000001)},
+    {kConvertVertex_comp_00000002, sizeof(kConvertVertex_comp_00000002)},
+    {kConvertVertex_comp_00000003, sizeof(kConvertVertex_comp_00000003)},
+    {kConvertVertex_comp_00000004, sizeof(kConvertVertex_comp_00000004)},
+    {kConvertVertex_comp_00000005, sizeof(kConvertVertex_comp_00000005)},
+    {kConvertVertex_comp_00000006, sizeof(kConvertVertex_comp_00000006)},
+    {kConvertVertex_comp_00000007, sizeof(kConvertVertex_comp_00000007)},
+    {kConvertVertex_comp_00000008, sizeof(kConvertVertex_comp_00000008)},
+    {kConvertVertex_comp_00000009, sizeof(kConvertVertex_comp_00000009)},
+    {kConvertVertex_comp_0000000A, sizeof(kConvertVertex_comp_0000000A)},
+    {kConvertVertex_comp_0000000B, sizeof(kConvertVertex_comp_0000000B)},
+    {kConvertVertex_comp_0000000C, sizeof(kConvertVertex_comp_0000000C)},
+    {kConvertVertex_comp_0000000D, sizeof(kConvertVertex_comp_0000000D)},
+    {kConvertVertex_comp_0000000E, sizeof(kConvertVertex_comp_0000000E)},
+    {kConvertVertex_comp_0000000F, sizeof(kConvertVertex_comp_0000000F)},
+};
 constexpr ShaderBlob kFullScreenQuad_vert_shaders[] = {
    {kFullScreenQuad_vert_00000000, sizeof(kFullScreenQuad_vert_00000000)},
 };
@@ -93,6 +127,10 @@ void ShaderLibrary::destroy(VkDevice device)
    {
        shader.get().destroy(device);
    }
+    for (RefCounted<ShaderAndSerial> &shader : mConvertVertex_comp_shaders)
+    {
+        shader.get().destroy(device);
+    }
    for (RefCounted<ShaderAndSerial> &shader : mFullScreenQuad_vert_shaders)
    {
        shader.get().destroy(device);
@@ -111,6 +149,14 @@ angle::Result ShaderLibrary::getBufferUtils_comp(Context *context,
                     ArraySize(kBufferUtils_comp_shaders), shaderFlags, shaderOut);
 }
+angle::Result ShaderLibrary::getConvertVertex_comp(Context *context,
+                                                   uint32_t shaderFlags,
+                                                   RefCounted<ShaderAndSerial> **shaderOut)
+{
+    return GetShader(context, mConvertVertex_comp_shaders, kConvertVertex_comp_shaders,
+                     ArraySize(kConvertVertex_comp_shaders), shaderFlags, shaderOut);
+}
 angle::Result ShaderLibrary::getFullScreenQuad_vert(Context *context,
                                                    uint32_t shaderFlags,
                                                    RefCounted<ShaderAndSerial> **shaderOut)

--- a/src/libANGLE/renderer/vulkan/vk_internal_shaders_autogen.gni
+++ b/src/libANGLE/renderer/vulkan/vk_internal_shaders_autogen.gni
@@ -21,6 +21,22 @@ angle_vulkan_internal_shaders = [
  "shaders/gen/BufferUtils.comp.00000009.inc",
  "shaders/gen/BufferUtils.comp.0000000A.inc",
  "shaders/gen/BufferUtils.comp.0000000B.inc",
+  "shaders/gen/ConvertVertex.comp.00000000.inc",
+  "shaders/gen/ConvertVertex.comp.00000001.inc",
+  "shaders/gen/ConvertVertex.comp.00000002.inc",
+  "shaders/gen/ConvertVertex.comp.00000003.inc",
+  "shaders/gen/ConvertVertex.comp.00000004.inc",
+  "shaders/gen/ConvertVertex.comp.00000005.inc",
+  "shaders/gen/ConvertVertex.comp.00000006.inc",
+  "shaders/gen/ConvertVertex.comp.00000007.inc",
+  "shaders/gen/ConvertVertex.comp.00000008.inc",
+  "shaders/gen/ConvertVertex.comp.00000009.inc",
+  "shaders/gen/ConvertVertex.comp.0000000A.inc",
+  "shaders/gen/ConvertVertex.comp.0000000B.inc",
+  "shaders/gen/ConvertVertex.comp.0000000C.inc",
+  "shaders/gen/ConvertVertex.comp.0000000D.inc",
+  "shaders/gen/ConvertVertex.comp.0000000E.inc",
+  "shaders/gen/ConvertVertex.comp.0000000F.inc",
  "shaders/gen/FullScreenQuad.vert.00000000.inc",
  "shaders/gen/PushConstantColor.frag.00000000.inc",
 ]
--- a/src/libANGLE/renderer/vulkan/vk_internal_shaders_autogen.h
+++ b/src/libANGLE/renderer/vulkan/vk_internal_shaders_autogen.h
@@ -41,6 +41,27 @@ enum Format
 };
 }  // namespace BufferUtils_comp
+namespace ConvertVertex_comp
+{
+enum flags
+{
+    kIsAligned = 0x00000001,
+    kFlagsMask = 0x00000001,
+};
+enum Conversion
+{
+    kIntToInt       = 0x00000000,
+    kUintToUint     = 0x00000002,
+    kIntToFloat     = 0x00000004,
+    kUintToFloat    = 0x00000006,
+    kSnormToFloat   = 0x00000008,
+    kUnormToFloat   = 0x0000000A,
+    kFixedToFloat   = 0x0000000C,
+    kFloatToFloat   = 0x0000000E,
+    kConversionMask = 0x0000000E,
+};
+}  // namespace ConvertVertex_comp
 namespace FullScreenQuad_vert
 {}  // namespace FullScreenQuad_vert
@@ -60,6 +81,9 @@ class ShaderLibrary final : angle::NonCopyable
    angle::Result getBufferUtils_comp(Context *context,
                                      uint32_t shaderFlags,
                                      RefCounted<ShaderAndSerial> **shaderOut);
+    angle::Result getConvertVertex_comp(Context *context,
+                                        uint32_t shaderFlags,
+                                        RefCounted<ShaderAndSerial> **shaderOut);
    angle::Result getFullScreenQuad_vert(Context *context,
                                         uint32_t shaderFlags,
                                         RefCounted<ShaderAndSerial> **shaderOut);
@@ -72,6 +96,9 @@ class ShaderLibrary final : angle::NonCopyable
        mBufferUtils_comp_shaders[InternalShader::BufferUtils_comp::kFlagsMask |
                                  InternalShader::BufferUtils_comp::kFunctionMask |
                                  InternalShader::BufferUtils_comp::kFormatMask];
+    RefCounted<ShaderAndSerial>
+        mConvertVertex_comp_shaders[InternalShader::ConvertVertex_comp::kFlagsMask |
+                                    InternalShader::ConvertVertex_comp::kConversionMask];
    RefCounted<ShaderAndSerial> mFullScreenQuad_vert_shaders[1];
    RefCounted<ShaderAndSerial> mPushConstantColor_frag_shaders[1];
 };

--- a/src/tests/gl_tests/VertexAttributeTest.cpp
+++ b/src/tests/gl_tests/VertexAttributeTest.cpp
@@ -3,8 +3,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 #include "anglebase/numerics/safe_conversions.h"
+#include "platform/FeaturesVk.h"
 #include "test_utils/ANGLETest.h"
 #include "test_utils/gl_raii.h"
@@ -208,6 +208,12 @@ class VertexAttributeTest : public ANGLETest
        ANGLETest::TearDown();
    }
+    // Override a feature to force emulation of attribute formats.
+    void overrideFeaturesVk(FeaturesVk *featuresVk) override
+    {
+        featuresVk->forceFallbackFormat = true;
+    }
    GLuint compileMultiAttribProgram(GLint attribCount)
    {
        std::stringstream shaderStream;

--- a/src/tests/perf_tests/InterleavedAttributeData.cpp
+++ b/src/tests/perf_tests/InterleavedAttributeData.cpp
@@ -185,7 +185,6 @@ void InterleavedAttributeDataBenchmark::drawBenchmark()
            // Then draw the colored pointsprites
            glDrawArrays(GL_POINTS, 0, GetParam().numSprites);
-            glFlush();
            glDisableVertexAttribArray(positionLocation);
            glDisableVertexAttribArray(colorLocation);