Vulkan: Support mixed column/row-major buffer fields

Adds comprehensive tests for mixed column/row-major interface blocks, which flush out various bugs in different OpenGL drivers too. Bug: angleproject:3443 Change-Id: Ie88cca743373891bbb49d9f564f30407475e07fb Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1749334Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>

Vulkan: Support mixed column/row-major buffer fields
d6c7fec1 · Shahbaz Youssefi · Commit Bot · 472c74c6 · d6c7fec1 · d6c7fec1
Commit d6c7fec1 authored Aug 12, 2019 by Shahbaz Youssefi Committed by Commit Bot Aug 23, 2019
18 changed files
--- a/src/compiler.gni
+++ b/src/compiler.gni
@@ -136,6 +136,8 @@ angle_translator_sources = [
  "src/compiler/translator/tree_ops/InitializeVariables.h",
  "src/compiler/translator/tree_ops/NameEmbeddedUniformStructs.cpp",
  "src/compiler/translator/tree_ops/NameEmbeddedUniformStructs.h",
+  "src/compiler/translator/tree_ops/NameNamelessUniformBuffers.cpp",
+  "src/compiler/translator/tree_ops/NameNamelessUniformBuffers.h",
  "src/compiler/translator/tree_ops/PruneEmptyCases.cpp",
  "src/compiler/translator/tree_ops/PruneEmptyCases.h",
  "src/compiler/translator/tree_ops/PruneNoOps.cpp",
@@ -168,6 +170,8 @@ angle_translator_sources = [
  "src/compiler/translator/tree_ops/RewriteStructSamplers.h",
  "src/compiler/translator/tree_ops/RewriteRepeatedAssignToSwizzled.cpp",
  "src/compiler/translator/tree_ops/RewriteRepeatedAssignToSwizzled.h",
+  "src/compiler/translator/tree_ops/RewriteRowMajorMatrices.cpp",
+  "src/compiler/translator/tree_ops/RewriteRowMajorMatrices.h",
  "src/compiler/translator/tree_ops/RewriteTexelFetchOffset.cpp",
  "src/compiler/translator/tree_ops/RewriteTexelFetchOffset.h",
  "src/compiler/translator/tree_ops/RewriteUnaryMinusOperatorFloat.cpp",

--- a/src/compiler/translator/Common.h
+++ b/src/compiler/translator/Common.h
@@ -71,6 +71,10 @@ class TVector : public std::vector<T, pool_allocator<T>>
    TVector() : std::vector<T, pool_allocator<T>>() {}
    TVector(const pool_allocator<T> &a) : std::vector<T, pool_allocator<T>>(a) {}
    TVector(size_type i) : std::vector<T, pool_allocator<T>>(i) {}
+    TVector(size_type i, const T &value) : std::vector<T, pool_allocator<T>>(i, value) {}
+    template <typename InputIt>
+    TVector(InputIt first, InputIt last) : std::vector<T, pool_allocator<T>>(first, last)
+    {}
    TVector(std::initializer_list<T> init) : std::vector<T, pool_allocator<T>>(init) {}
 };


--- a/src/compiler/translator/IntermNode.h
+++ b/src/compiler/translator/IntermNode.h
@@ -915,7 +915,7 @@ enum class PreprocessorDirective
 class TIntermPreprocessorDirective : public TIntermNode
 {
  public:
-    // This could also take an ImmutbleString as an argument.
+    // This could also take an ImmutableString as an argument.
    TIntermPreprocessorDirective(PreprocessorDirective directive, ImmutableString command);
    ~TIntermPreprocessorDirective() final;


--- a/src/compiler/translator/OutputGLSLBase.cpp
+++ b/src/compiler/translator/OutputGLSLBase.cpp
@@ -293,6 +293,35 @@ void TOutputGLSLBase::writeLayoutQualifier(TIntermTyped *variable)
    out << ") ";
 }

+void TOutputGLSLBase::writeFieldLayoutQualifier(const TField *field)
+{
+    if (!field->type()->isMatrix() && !field->type()->isStructureContainingMatrices())
+    {
+        return;
+    }
+
+    TInfoSinkBase &out = objSink();
+
+    out << "layout(";
+    switch (field->type()->getLayoutQualifier().matrixPacking)
+    {
+        case EmpUnspecified:
+        case EmpColumnMajor:
+            // Default matrix packing is column major.
+            out << "column_major";
+            break;
+
+        case EmpRowMajor:
+            out << "row_major";
+            break;
+
+        default:
+            UNREACHABLE();
+            break;
+    }
+    out << ") ";
+}
+
 void TOutputGLSLBase::writeQualifier(TQualifier qualifier, const TType &type, const TSymbol *symbol)
 {
    const char *result = mapQualifierToString(qualifier);
@@ -1312,27 +1341,7 @@ void TOutputGLSLBase::declareInterfaceBlock(const TInterfaceBlock *interfaceBloc
    const TFieldList &fields = interfaceBlock->fields();
    for (const TField *field : fields)
    {
-        if (field->type()->isMatrix() || field->type()->isStructureContainingMatrices())
-        {
-            out << "layout(";
-            switch (field->type()->getLayoutQualifier().matrixPacking)
-            {
-                case EmpUnspecified:
-                case EmpColumnMajor:
-                    // Default matrix packing is column major.
-                    out << "column_major";
-                    break;
-
-                case EmpRowMajor:
-                    out << "row_major";
-                    break;
-
-                default:
-                    UNREACHABLE();
-                    break;
-            }
-            out << ") ";
-        }
+        writeFieldLayoutQualifier(field);

        if (writeVariablePrecision(field->type()->getPrecision()))
            out << " ";

--- a/src/compiler/translator/OutputGLSLBase.h
+++ b/src/compiler/translator/OutputGLSLBase.h
@@ -43,6 +43,7 @@ class TOutputGLSLBase : public TIntermTraverser
    std::string getCommonLayoutQualifiers(TIntermTyped *variable);
    std::string getMemoryQualifiers(const TType &type);
    virtual void writeLayoutQualifier(TIntermTyped *variable);
+    virtual void writeFieldLayoutQualifier(const TField *field);
    void writeInvariantQualifier(const TType &type);
    virtual void writeVariableType(const TType &type, const TSymbol *symbol);
    virtual bool writeVariablePrecision(TPrecision precision) = 0;

--- a/src/compiler/translator/OutputVulkanGLSL.cpp
+++ b/src/compiler/translator/OutputVulkanGLSL.cpp
@@ -54,7 +54,6 @@ void TOutputVulkanGLSL::writeLayoutQualifier(TIntermTyped *variable)
    }

    TInfoSinkBase &out                      = objSink();
-    const TLayoutQualifier &layoutQualifier = type.getLayoutQualifier();

    // This isn't super clean, but it gets the job done.
    // See corresponding code in GlslangWrapper.cpp.
@@ -88,12 +87,12 @@ void TOutputVulkanGLSL::writeLayoutQualifier(TIntermTyped *variable)
        {
            blockStorage = getBlockStorageString(storage);
        }
-    }

-    // Specify matrix packing if necessary.
-    if (layoutQualifier.matrixPacking != EmpUnspecified)
-    {
-        matrixPacking = getMatrixPackingString(layoutQualifier.matrixPacking);
+        // We expect all interface blocks to have been transformed to column major, so we don't
+        // specify the packing.  Any remaining interface block qualified with row_major shouldn't
+        // have any matrices inside.
+        ASSERT(type.getLayoutQualifier().matrixPacking != EmpRowMajor ||
+               !interfaceBlock->containsMatrices());
    }

    if (needsCustomLayout)
@@ -132,6 +131,14 @@ void TOutputVulkanGLSL::writeLayoutQualifier(TIntermTyped *variable)
    }
 }

+void TOutputVulkanGLSL::writeFieldLayoutQualifier(const TField *field)
+{
+    // We expect all interface blocks to have been transformed to column major, as Vulkan GLSL
+    // doesn't allow layout qualifiers on interface block fields.  Any remaining interface block
+    // qualified with row_major shouldn't have any matrices inside, so the qualifier can be
+    // dropped.
+}
+
 void TOutputVulkanGLSL::writeQualifier(TQualifier qualifier,
                                       const TType &type,
                                       const TSymbol *symbol)

--- a/src/compiler/translator/OutputVulkanGLSL.h
+++ b/src/compiler/translator/OutputVulkanGLSL.h
@@ -31,6 +31,7 @@ class TOutputVulkanGLSL : public TOutputGLSL

  protected:
    void writeLayoutQualifier(TIntermTyped *variable) override;
+    void writeFieldLayoutQualifier(const TField *field) override;
    void writeQualifier(TQualifier qualifier, const TType &type, const TSymbol *symbol) override;
    void writeVariableType(const TType &type, const TSymbol *symbol) override;
    void visitSymbol(TIntermSymbol *node) override;

--- a/src/compiler/translator/TranslatorVulkan.cpp
+++ b/src/compiler/translator/TranslatorVulkan.cpp
@@ -17,9 +17,11 @@
 #include "compiler/translator/OutputVulkanGLSL.h"
 #include "compiler/translator/StaticType.h"
 #include "compiler/translator/tree_ops/NameEmbeddedUniformStructs.h"
+#include "compiler/translator/tree_ops/NameNamelessUniformBuffers.h"
 #include "compiler/translator/tree_ops/RewriteAtomicCounters.h"
 #include "compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h"
 #include "compiler/translator/tree_ops/RewriteDfdy.h"
+#include "compiler/translator/tree_ops/RewriteRowMajorMatrices.h"
 #include "compiler/translator/tree_ops/RewriteStructSamplers.h"
 #include "compiler/translator/tree_util/BuiltIn_autogen.h"
 #include "compiler/translator/tree_util/FindFunction.h"
@@ -728,6 +730,25 @@ bool TranslatorVulkan::translate(TIntermBlock *root,
        }
    }

+    if (getShaderVersion() >= 300)
+    {
+        // Make sure every uniform buffer variable has a name.  The following transformation relies
+        // on this.
+        if (!NameNamelessUniformBuffers(this, root, &getSymbolTable()))
+        {
+            return false;
+        }
+
+        // In GLES3+, matrices can be declared row- or column-major.  Transform all to column-major
+        // as interface block field layout qualifiers are not allowed.  This should be done after
+        // samplers are taken out of structs (as structs could be rewritten), but before uniforms
+        // are collected in a uniform buffer as they are handled especially.
+        if (!RewriteRowMajorMatrices(this, root, &getSymbolTable()))
+        {
+            return false;
+        }
+    }
+
    if (defaultUniformCount > 0)
    {
        sink << "\n@@ LAYOUT-defaultUniforms(std140) @@ uniform defaultUniforms\n{\n";

--- a/src/compiler/translator/tree_ops/NameNamelessUniformBuffers.cpp
+++ b/src/compiler/translator/tree_ops/NameNamelessUniformBuffers.cpp
+//
+// Copyright 2019 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// NameNamelessUniformBuffers: Gives nameless uniform buffer variables internal names.
+//
+
+#include "compiler/translator/tree_ops/NameNamelessUniformBuffers.h"
+
+#include "compiler/translator/SymbolTable.h"
+#include "compiler/translator/tree_util/IntermNode_util.h"
+#include "compiler/translator/tree_util/IntermTraverse.h"
+
+namespace sh
+{
+namespace
+{
+// Traverse uniform buffer declarations and give name to nameless declarations.  Keeps track of
+// the interface fields which will be used in the source without the interface block variable name
+// and replaces them with name.field.
+class NameUniformBufferVariablesTraverser : public TIntermTraverser
+{
+  public:
+    explicit NameUniformBufferVariablesTraverser(TSymbolTable *symbolTable)
+        : TIntermTraverser(true, false, false, symbolTable)
+    {}
+
+    bool visitDeclaration(Visit visit, TIntermDeclaration *decl) override
+    {
+        ASSERT(visit == PreVisit);
+
+        const TIntermSequence &sequence = *(decl->getSequence());
+
+        TIntermTyped *variableNode = sequence.front()->getAsTyped();
+        const TType &type          = variableNode->getType();
+
+        // If it's an interface block, it may have to be converted if it contains any row-major
+        // fields.
+        if (!type.isInterfaceBlock())
+        {
+            return true;
+        }
+
+        // Multi declaration statements are already separated, so there can only be one variable
+        // here.
+        ASSERT(sequence.size() == 1);
+        const TVariable *variable = &variableNode->getAsSymbolNode()->variable();
+        if (variable->symbolType() != SymbolType::Empty)
+        {
+            return false;
+        }
+
+        TIntermDeclaration *newDeclaration = new TIntermDeclaration;
+        TVariable *newVariable = new TVariable(mSymbolTable, kEmptyImmutableString, &type,
+                                               SymbolType::AngleInternal, variable->extension());
+        newDeclaration->appendDeclarator(new TIntermSymbol(newVariable));
+
+        queueReplacement(newDeclaration, OriginalNode::IS_DROPPED);
+
+        // It's safe to key the map with the interface block, as there couldn't have been multiple
+        // declarations with this interface block (as the variable is nameless), so for nameless
+        // uniform buffers, the interface block is unique.
+        mNamelessUniformBuffersMap[type.getInterfaceBlock()] = newVariable;
+
+        return false;
+    }
+
+    void visitSymbol(TIntermSymbol *symbol) override
+    {
+        const TType &type = symbol->getType();
+
+        // The symbols we are looking for have the interface block pointer set, but are not
+        // interface blocks.  These are references to fields of nameless uniform buffers.
+        if (type.isInterfaceBlock() || type.getInterfaceBlock() == nullptr)
+        {
+            return;
+        }
+
+        const TInterfaceBlock *block = type.getInterfaceBlock();
+
+        // If block variable is not nameless, there's nothing to do.
+        if (mNamelessUniformBuffersMap.count(block) == 0)
+        {
+            return;
+        }
+
+        const ImmutableString symbolName = symbol->getName();
+
+        // Find which field it is
+        const TVector<TField *> fields = block->fields();
+        for (size_t fieldIndex = 0; fieldIndex < fields.size(); ++fieldIndex)
+        {
+            const TField *field = fields[fieldIndex];
+            if (field->name() != symbolName)
+            {
+                continue;
+            }
+
+            // Replace this node with a binary node that indexes the named uniform buffer.
+            TIntermSymbol *namedUniformBuffer =
+                new TIntermSymbol(mNamelessUniformBuffersMap[block]);
+            TIntermBinary *replacement =
+                new TIntermBinary(EOpIndexDirectInterfaceBlock, namedUniformBuffer,
+                                  CreateIndexNode(static_cast<uint32_t>(fieldIndex)));
+
+            queueReplacement(replacement, OriginalNode::IS_DROPPED);
+
+            return;
+        }
+
+        UNREACHABLE();
+    }
+
+  private:
+    // A map from nameless uniform buffers to their named replacements.
+    std::unordered_map<const TInterfaceBlock *, const TVariable *> mNamelessUniformBuffersMap;
+};
+}  // anonymous namespace
+
+bool NameNamelessUniformBuffers(TCompiler *compiler, TIntermBlock *root, TSymbolTable *symbolTable)
+{
+    NameUniformBufferVariablesTraverser nameUniformBufferVariables(symbolTable);
+    root->traverse(&nameUniformBufferVariables);
+    return nameUniformBufferVariables.updateTree(compiler, root);
+}
+}  // namespace sh
--- a/src/compiler/translator/tree_ops/NameNamelessUniformBuffers.h
+++ b/src/compiler/translator/tree_ops/NameNamelessUniformBuffers.h
+//
+// Copyright 2019 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// NameNamelessUniformBuffers: Gives nameless uniform buffer variables internal names.
+//
+// For example:
+//   uniform UniformBuffer { int a; };
+//   x = a;
+// becomes:
+//   uniform UniformBuffer { int a; } s123;
+//   x = s123.a;
+//
+
+#ifndef COMPILER_TRANSLATOR_TREEOPS_NAMENAMELESSUNIFORMBUFFERS_H_
+#define COMPILER_TRANSLATOR_TREEOPS_NAMENAMELESSUNIFORMBUFFERS_H_
+
+#include "common/angleutils.h"
+
+namespace sh
+{
+class TCompiler;
+class TIntermBlock;
+class TSymbolTable;
+
+ANGLE_NO_DISCARD bool NameNamelessUniformBuffers(TCompiler *compiler,
+                                                 TIntermBlock *root,
+                                                 TSymbolTable *symbolTable);
+}  // namespace sh
+
+#endif  // COMPILER_TRANSLATOR_TREEOPS_NAMENAMELESSUNIFORMBUFFERS_H_
--- a/src/compiler/translator/tree_ops/RewriteRowMajorMatrices.cpp
+++ b/src/compiler/translator/tree_ops/RewriteRowMajorMatrices.cpp
+//
+// Copyright 2019 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// RewriteRowMajorMatrices: Rewrite row-major matrices as column-major.
+//
+
+#include "compiler/translator/tree_ops/RewriteRowMajorMatrices.h"
+
+#include "compiler/translator/Compiler.h"
+#include "compiler/translator/ImmutableStringBuilder.h"
+#include "compiler/translator/StaticType.h"
+#include "compiler/translator/SymbolTable.h"
+#include "compiler/translator/tree_util/IntermNode_util.h"
+#include "compiler/translator/tree_util/IntermTraverse.h"
+#include "compiler/translator/tree_util/ReplaceVariable.h"
+
+namespace sh
+{
+namespace
+{
+// Only structs with matrices are tracked.  If layout(row_major) is applied to a struct that doesn't
+// have matrices, it's silently dropped.  This is also used to avoid creating duplicates for inner
+// structs that don't have matrices.
+struct StructConversionData
+{
+    // The converted struct with every matrix transposed.
+    TStructure *convertedStruct = nullptr;
+
+    // The copy-from and copy-to functions copying from a struct to its converted version and back.
+    TFunction *copyFromOriginal = nullptr;
+    TFunction *copyToOriginal   = nullptr;
+};
+
+bool DoesFieldContainRowMajorMatrix(const TField *field, bool isBlockRowMajor)
+{
+    TLayoutMatrixPacking matrixPacking = field->type()->getLayoutQualifier().matrixPacking;
+
+    // The field is row major if either explicitly specified as such, or if it inherits it from the
+    // block layout qualifier.
+    if (matrixPacking == EmpColumnMajor || (matrixPacking == EmpUnspecified && !isBlockRowMajor))
+    {
+        return false;
+    }
+
+    // The field is qualified with row_major, but if it's not a matrix or a struct containing
+    // matrices, that's a useless qualifier.
+    const TType *type = field->type();
+    return type->isMatrix() || type->isStructureContainingMatrices();
+}
+
+TField *DuplicateField(const TField *field)
+{
+    return new TField(new TType(*field->type()), field->name(), field->line(), field->symbolType());
+}
+
+void SetColumnMajor(TType *type)
+{
+    TLayoutQualifier layoutQualifier = type->getLayoutQualifier();
+    layoutQualifier.matrixPacking    = EmpColumnMajor;
+    type->setLayoutQualifier(layoutQualifier);
+}
+
+TType *TransposeMatrixType(const TType *type)
+{
+    TType *newType = new TType(*type);
+
+    SetColumnMajor(newType);
+
+    newType->setPrimarySize(static_cast<unsigned char>(type->getRows()));
+    newType->setSecondarySize(static_cast<unsigned char>(type->getCols()));
+
+    return newType;
+}
+
+void CopyArraySizes(const TType *from, TType *to)
+{
+    if (from->isArray())
+    {
+        to->makeArrays(*from->getArraySizes());
+    }
+}
+
+// Determine if the node is an index node (array index or struct field selection).  For the purposes
+// of this transformation, swizzle nodes are considered index nodes too.
+bool IsIndexNode(TIntermNode *node, TIntermNode *child)
+{
+    if (node->getAsSwizzleNode())
+    {
+        return true;
+    }
+
+    TIntermBinary *binaryNode = node->getAsBinaryNode();
+    if (binaryNode == nullptr || child != binaryNode->getLeft())
+    {
+        return false;
+    }
+
+    TOperator op = binaryNode->getOp();
+
+    return op == EOpIndexDirect || op == EOpIndexDirectInterfaceBlock ||
+           op == EOpIndexDirectStruct || op == EOpIndexIndirect;
+}
+
+TIntermSymbol *CopyToTempVariable(TSymbolTable *symbolTable,
+                                  TIntermTyped *node,
+                                  TIntermSequence *prependStatements)
+{
+    TVariable *temp              = CreateTempVariable(symbolTable, &node->getType());
+    TIntermDeclaration *tempDecl = CreateTempInitDeclarationNode(temp, node);
+    prependStatements->push_back(tempDecl);
+
+    return new TIntermSymbol(temp);
+}
+
+TIntermAggregate *CreateStructCopyCall(const TFunction *copyFunc, TIntermTyped *expression)
+{
+    return TIntermAggregate::CreateFunctionCall(*copyFunc, new TIntermSequence({expression}));
+}
+
+TIntermTyped *CreateTransposeCall(TSymbolTable *symbolTable, TIntermTyped *expression)
+{
+    return CreateBuiltInFunctionCallNode("transpose", new TIntermSequence({expression}),
+                                         *symbolTable, 300);
+}
+
+TOperator GetIndex(TSymbolTable *symbolTable,
+                   TIntermNode *node,
+                   TIntermSequence *indices,
+                   TIntermSequence *prependStatements)
+{
+    // Swizzle nodes are converted EOpIndexDirect for simplicity, with one index per swizzle
+    // channel.
+    TIntermSwizzle *asSwizzle = node->getAsSwizzleNode();
+    if (asSwizzle)
+    {
+        for (int channel : asSwizzle->getSwizzleOffsets())
+        {
+            indices->push_back(CreateIndexNode(channel));
+        }
+        return EOpIndexDirect;
+    }
+
+    TIntermBinary *binaryNode = node->getAsBinaryNode();
+    ASSERT(binaryNode);
+
+    TOperator op = binaryNode->getOp();
+    ASSERT(op == EOpIndexDirect || op == EOpIndexDirectInterfaceBlock ||
+           op == EOpIndexDirectStruct || op == EOpIndexIndirect);
+
+    TIntermTyped *rhs = binaryNode->getRight()->deepCopy();
+    if (rhs->getAsConstantUnion() == nullptr)
+    {
+        rhs = CopyToTempVariable(symbolTable, rhs, prependStatements);
+    }
+
+    indices->push_back(rhs);
+    return op;
+}
+
+TIntermTyped *ReplicateIndexNode(TSymbolTable *symbolTable,
+                                 TIntermNode *node,
+                                 TIntermTyped *lhs,
+                                 TIntermSequence *indices)
+{
+    TIntermSwizzle *asSwizzle = node->getAsSwizzleNode();
+    if (asSwizzle)
+    {
+        return new TIntermSwizzle(lhs, asSwizzle->getSwizzleOffsets());
+    }
+
+    TIntermBinary *binaryNode = node->getAsBinaryNode();
+    ASSERT(binaryNode);
+
+    ASSERT(indices->size() == 1);
+    TIntermTyped *rhs = indices->front()->getAsTyped();
+
+    return new TIntermBinary(binaryNode->getOp(), lhs, rhs);
+}
+
+TOperator GetIndexOp(TIntermNode *node)
+{
+    return node->getAsConstantUnion() ? EOpIndexDirect : EOpIndexIndirect;
+}
+
+bool IsConvertedField(TIntermTyped *indexNode,
+                      const std::unordered_map<const TField *, bool> &convertedFields)
+{
+    TIntermBinary *asBinary = indexNode->getAsBinaryNode();
+    if (asBinary == nullptr)
+    {
+        return false;
+    }
+
+    if (asBinary->getOp() != EOpIndexDirectInterfaceBlock)
+    {
+        return false;
+    }
+
+    const TInterfaceBlock *interfaceBlock = asBinary->getLeft()->getType().getInterfaceBlock();
+    ASSERT(interfaceBlock);
+
+    TIntermConstantUnion *fieldIndexNode = asBinary->getRight()->getAsConstantUnion();
+    ASSERT(fieldIndexNode);
+    ASSERT(fieldIndexNode->getConstantValue() != nullptr);
+
+    int fieldIndex      = fieldIndexNode->getConstantValue()->getIConst();
+    const TField *field = interfaceBlock->fields()[fieldIndex];
+
+    return convertedFields.count(field) > 0 && convertedFields.at(field);
+}
+
+// A helper class to transform expressions of array type.  Iterates over every element of the
+// array.
+class TransformArrayHelper
+{
+  public:
+    TransformArrayHelper(TIntermTyped *baseExpression)
+        : mBaseExpression(baseExpression),
+          mBaseExpressionType(baseExpression->getType()),
+          mArrayIndices(mBaseExpressionType.getArraySizes()->size(), 0)
+    {}
+
+    TIntermTyped *getNextElement(TIntermTyped *valueExpression, TIntermTyped **valueElementOut)
+    {
+        const TVector<unsigned int> *arraySizes = mBaseExpressionType.getArraySizes();
+
+        // If the last index overflows, element enumeration is done.
+        if (mArrayIndices.back() >= arraySizes->back())
+        {
+            return nullptr;
+        }
+
+        TIntermTyped *element = getCurrentElement(mBaseExpression);
+        if (valueExpression)
+        {
+            *valueElementOut = getCurrentElement(valueExpression);
+        }
+
+        incrementIndices(arraySizes);
+        return element;
+    }
+
+    void accumulateForRead(TSymbolTable *symbolTable,
+                           TIntermTyped *transformedElement,
+                           TIntermSequence *prependStatements)
+    {
+        TIntermTyped *temp = CopyToTempVariable(symbolTable, transformedElement, prependStatements);
+        mReadTransformConstructorArgs.push_back(temp);
+    }
+
+    TIntermTyped *constructReadTransformExpression()
+    {
+        const TVector<unsigned int> &arraySizes = *mBaseExpressionType.getArraySizes();
+        TIntermTyped *firstElement = mReadTransformConstructorArgs.front()->getAsTyped();
+        const TType &baseType      = firstElement->getType();
+
+        // If N dimensions, acc[0] == size[0] and acc[i] == size[i] * acc[i-1].
+        // The last value is unused, and is not present.
+        TVector<unsigned int> accumulatedArraySizes(arraySizes.size() - 1);
+
+        accumulatedArraySizes[0] = arraySizes[0];
+        for (size_t index = 1; index + 1 < arraySizes.size(); ++index)
+        {
+            accumulatedArraySizes[index] = accumulatedArraySizes[index - 1] * arraySizes[index];
+        }
+
+        return constructReadTransformExpressionHelper(arraySizes, accumulatedArraySizes, baseType,
+                                                      0);
+    }
+
+  private:
+    TIntermTyped *getCurrentElement(TIntermTyped *expression)
+    {
+        TIntermTyped *element = expression->deepCopy();
+        for (auto it = mArrayIndices.rbegin(); it != mArrayIndices.rend(); ++it)
+        {
+            unsigned int index = *it;
+            element            = new TIntermBinary(EOpIndexDirect, element, CreateIndexNode(index));
+        }
+        return element;
+    }
+
+    void incrementIndices(const TVector<unsigned int> *arraySizes)
+    {
+        // Assume mArrayIndices is an N digit number, where digit i is in the range
+        // [0, arraySizes[i]).  This function increments this number.  Last digit is the most
+        // significant digit.
+        for (size_t digitIndex = 0; digitIndex < arraySizes->size(); ++digitIndex)
+        {
+            ++mArrayIndices[digitIndex];
+            if (mArrayIndices[digitIndex] < (*arraySizes)[digitIndex])
+            {
+                break;
+            }
+            if (digitIndex + 1 != arraySizes->size())
+            {
+                // This digit has now overflown and is reset to 0, carry will be added to the next
+                // digit.  The most significant digit will keep the overflow though, to make it
+                // clear we have exhausted the range.
+                mArrayIndices[digitIndex] = 0;
+            }
+        }
+    }
+
+    TIntermTyped *constructReadTransformExpressionHelper(
+        const TVector<unsigned int> arraySizes,
+        const TVector<unsigned int> accumulatedArraySizes,
+        const TType &baseType,
+        size_t elementsOffset)
+    {
+        ASSERT(!arraySizes.empty());
+
+        TType *transformedType = new TType(baseType);
+        transformedType->makeArrays(arraySizes);
+
+        // If one dimensional, create the constructor with the given elements.
+        if (arraySizes.size() == 1)
+        {
+            ASSERT(accumulatedArraySizes.size() == 0);
+
+            auto sliceStart = mReadTransformConstructorArgs.begin() + elementsOffset;
+            TIntermSequence slice(sliceStart, sliceStart + arraySizes[0]);
+
+            return TIntermAggregate::CreateConstructor(*transformedType, &slice);
+        }
+
+        // If not, create constructors for every column recursively.
+        TVector<unsigned int> subArraySizes(arraySizes.begin(), arraySizes.end() - 1);
+        TVector<unsigned int> subArrayAccumulatedSizes(accumulatedArraySizes.begin(),
+                                                       accumulatedArraySizes.end() - 1);
+
+        TIntermSequence constructorArgs;
+        unsigned int colStride = accumulatedArraySizes.back();
+        for (size_t col = 0; col < arraySizes.back(); ++col)
+        {
+            size_t colElementsOffset = elementsOffset + col * colStride;
+
+            constructorArgs.push_back(constructReadTransformExpressionHelper(
+                subArraySizes, subArrayAccumulatedSizes, baseType, colElementsOffset));
+        }
+
+        return TIntermAggregate::CreateConstructor(*transformedType, &constructorArgs);
+    }
+
+    TIntermTyped *mBaseExpression;
+    const TType &mBaseExpressionType;
+    TVector<unsigned int> mArrayIndices;
+
+    TIntermSequence mReadTransformConstructorArgs;
+};
+
+// Traverser that:
+//
+// 1. Converts |layout(row_major) matCxR M| to |layout(column_major) matRxC Mt|.
+// 2. Converts |layout(row_major) S s| to |layout(column_major) St st|, where S is a struct that
+//    contains matrices, and St is a new struct with the transformation in 1 applied to matrix
+//    members (recursively).
+// 3. When read from, the following transformations are applied:
+//
+//            M       -> transpose(Mt)
+//            M[c]    -> gvecN(Mt[0][c], Mt[1][c], ..., Mt[N-1][c])
+//            M[c][r] -> Mt[r][c]
+//            M[c].yz -> gvec2(Mt[1][c], Mt[2][c])
+//            MArr    -> MType[D1]..[DN](transpose(MtArr[0]...[0]), ...)
+//            s       -> copy_St_to_S(st)
+//            sArr    -> SType[D1]...[DN](copy_St_to_S(stArr[0]..[0]), ...)
+//            (matrix reads through struct are transformed similarly to M)
+//
+// 4. When written to, the following transformations are applied:
+//
+//      M = exp       -> Mt = transpose(exp)
+//      M[c] = exp    -> temp = exp
+//                       Mt[0][c] = temp[0]
+//                       Mt[1][c] = temp[1]
+//                       ...
+//                       Mt[N-1][c] = temp[N-1]
+//      M[c][r] = exp -> Mt[r][c] = exp
+//      M[c].yz = exp -> temp = exp
+//                       Mt[1][c] = temp[0]
+//                       Mt[2][c] = temp[1]
+//      MArr = exp    -> temp = exp
+//                       Mt = MtType[D1]..[DN](temp([0]...[0]), ...)
+//      s = exp       -> st = copy_S_to_St(exp)
+//      sArr = exp    -> temp = exp
+//                       St = StType[D1]...[DN](copy_S_to_St(temp[0]..[0]), ...)
+//      (matrix writes through struct are transformed similarly to M)
+//
+// 5. If any of the above is passed to an `inout` parameter, both transformations are applied:
+//
+//            f(M[c]) -> temp = gvecN(Mt[0][c], Mt[1][c], ..., Mt[N-1][c])
+//                       f(temp)
+//                       Mt[0][c] = temp[0]
+//                       Mt[1][c] = temp[1]
+//                       ...
+//                       Mt[N-1][c] = temp[N-1]
+//
+//               f(s) -> temp = copy_St_to_S(st)
+//                       f(temp)
+//                       st = copy_S_to_St(temp)
+//
+//    If passed to an `out` parameter, the `temp` parameter is simply not initialized.
+//
+// 6. If the expression leading to the matrix or struct has array subscripts, temp values are
+//    created for them to avoid duplicating side effects.
+//
+class RewriteRowMajorMatricesTraverser : public TIntermTraverser
+{
+  public:
+    RewriteRowMajorMatricesTraverser(TCompiler *compiler, TSymbolTable *symbolTable)
+        : TIntermTraverser(true, true, true, symbolTable),
+          mCompiler(compiler),
+          mStructMapOut(&mOuterPass.structMap),
+          mInterfaceBlockMapIn(mOuterPass.interfaceBlockMap),
+          mInterfaceBlockFieldConvertedIn(mOuterPass.interfaceBlockFieldConverted),
+          mCopyFunctionDefinitionsOut(&mOuterPass.copyFunctionDefinitions),
+          mOuterTraverser(nullptr),
+          mInnerPassRoot(nullptr),
+          mIsProcessingInnerPassSubtree(false)
+    {}
+
+    bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
+    {
+        // No need to process declarations in inner passes.
+        if (mInnerPassRoot != nullptr)
+        {
+            return true;
+        }
+
+        if (visit != PreVisit)
+        {
+            return true;
+        }
+
+        const TIntermSequence &sequence = *(node->getSequence());
+
+        TIntermTyped *variable = sequence.front()->getAsTyped();
+        const TType &type      = variable->getType();
+
+        // If it's a struct declaration that has matrices, remember it.  If a row-major instance
+        // of it is created, it will have to be converted.
+        if (type.isStructSpecifier() && type.isStructureContainingMatrices())
+        {
+            const TStructure *structure = type.getStruct();
+            ASSERT(structure);
+
+            ASSERT(mOuterPass.structMap.count(structure) == 0);
+
+            StructConversionData structData;
+            mOuterPass.structMap[structure] = structData;
+
+            return false;
+        }
+
+        // If it's an interface block, it may have to be converted if it contains any row-major
+        // fields.
+        if (type.isInterfaceBlock() && type.getInterfaceBlock()->containsMatrices())
+        {
+            const TInterfaceBlock *block = type.getInterfaceBlock();
+            ASSERT(block);
+            bool isBlockRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
+
+            const TFieldList &fields = block->fields();
+            bool anyRowMajor         = isBlockRowMajor;
+
+            for (const TField *field : fields)
+            {
+                if (DoesFieldContainRowMajorMatrix(field, isBlockRowMajor))
+                {
+                    anyRowMajor = true;
+                    break;
+                }
+            }
+
+            if (anyRowMajor)
+            {
+                convertInterfaceBlock(node);
+            }
+
+            return false;
+        }
+
+        return true;
+    }
+
+    void visitSymbol(TIntermSymbol *symbol) override
+    {
+        // If in inner pass, only process if the symbol is under that root.
+        if (mInnerPassRoot != nullptr && !mIsProcessingInnerPassSubtree)
+        {
+            return;
+        }
+
+        const TVariable *symbolVariable = &symbol->variable();
+
+        // If the symbol doesn't need to be replaced, there's nothing to do.
+        if (mInterfaceBlockMapIn.count(symbolVariable) == 0)
+        {
+            return;
+        }
+
+        transformExpression(symbol);
+    }
+
+    bool visitBinary(Visit visit, TIntermBinary *node) override
+    {
+        if (node == mInnerPassRoot)
+        {
+            // We only want to process the right-hand side of an assignment in inner passes.  When
+            // visit is InVisit, the left-hand side is already processed, and the right-hand side is
+            // next.  Set a flag to mark this duration.
+            mIsProcessingInnerPassSubtree = visit == InVisit;
+        }
+
+        return true;
+    }
+
+    TIntermSequence *getStructCopyFunctions() { return &mOuterPass.copyFunctionDefinitions; }
+
+  private:
+    typedef std::unordered_map<const TStructure *, StructConversionData> StructMap;
+    typedef std::unordered_map<const TVariable *, TVariable *> InterfaceBlockMap;
+    typedef std::unordered_map<const TField *, bool> InterfaceBlockFieldConverted;
+
+    RewriteRowMajorMatricesTraverser(
+        TSymbolTable *symbolTable,
+        RewriteRowMajorMatricesTraverser *outerTraverser,
+        const InterfaceBlockMap &interfaceBlockMap,
+        const InterfaceBlockFieldConverted &interfaceBlockFieldConverted,
+        StructMap *structMap,
+        TIntermSequence *copyFunctionDefinitions,
+        TIntermBinary *innerPassRoot)
+        : TIntermTraverser(true, true, true, symbolTable),
+          mStructMapOut(structMap),
+          mInterfaceBlockMapIn(interfaceBlockMap),
+          mInterfaceBlockFieldConvertedIn(interfaceBlockFieldConverted),
+          mCopyFunctionDefinitionsOut(copyFunctionDefinitions),
+          mOuterTraverser(outerTraverser),
+          mInnerPassRoot(innerPassRoot),
+          mIsProcessingInnerPassSubtree(false)
+    {}
+
+    void convertInterfaceBlock(TIntermDeclaration *node)
+    {
+        ASSERT(mInnerPassRoot == nullptr);
+
+        const TIntermSequence &sequence = *(node->getSequence());
+
+        TIntermTyped *variableNode   = sequence.front()->getAsTyped();
+        const TType &type            = variableNode->getType();
+        const TInterfaceBlock *block = type.getInterfaceBlock();
+        ASSERT(block);
+
+        bool isBlockRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
+
+        // Recreate the struct with its row-major fields converted to column-major equivalents.
+        TIntermSequence newDeclarations;
+
+        TFieldList *newFields = new TFieldList;
+        for (const TField *field : block->fields())
+        {
+            TField *newField = nullptr;
+
+            if (DoesFieldContainRowMajorMatrix(field, isBlockRowMajor))
+            {
+                newField = convertField(field, &newDeclarations);
+
+                // Remember that this field was converted.
+                mOuterPass.interfaceBlockFieldConverted[field] = true;
+            }
+            else
+            {
+                newField = DuplicateField(field);
+            }
+
+            newFields->push_back(newField);
+        }
+
+        // Create a new interface block with these fields.
+        TLayoutQualifier blockLayoutQualifier = type.getLayoutQualifier();
+        blockLayoutQualifier.matrixPacking    = EmpColumnMajor;
+
+        TInterfaceBlock *newInterfaceBlock =
+            new TInterfaceBlock(mSymbolTable, block->name(), newFields, blockLayoutQualifier,
+                                block->symbolType(), block->extension());
+
+        // Create a new declaration with the new type.  Declarations are separated at this point,
+        // so there should be only one variable here.
+        ASSERT(sequence.size() == 1);
+
+        TType *newInterfaceBlockType =
+            new TType(newInterfaceBlock, type.getQualifier(), blockLayoutQualifier);
+
+        TIntermDeclaration *newDeclaration = new TIntermDeclaration;
+        const TVariable *variable          = &variableNode->getAsSymbolNode()->variable();
+
+        const TType *newType = newInterfaceBlockType;
+        if (type.isArray())
+        {
+            TType *newArrayType = new TType(*newType);
+            CopyArraySizes(&type, newArrayType);
+            newType = newArrayType;
+        }
+
+        // If the interface block variable itself is temp, use an empty name.
+        bool variableIsTemp = variable->symbolType() == SymbolType::Empty;
+        const ImmutableString &variableName =
+            variableIsTemp ? kEmptyImmutableString : variable->name();
+
+        TVariable *newVariable = new TVariable(mSymbolTable, variableName, newType,
+                                               variable->symbolType(), variable->extension());
+
+        newDeclaration->appendDeclarator(new TIntermSymbol(newVariable));
+
+        mOuterPass.interfaceBlockMap[variable] = newVariable;
+
+        newDeclarations.push_back(newDeclaration);
+
+        // Replace the interface block definition with the new one, prepending any new struct
+        // definitions.
+        mMultiReplacements.emplace_back(getParentNode()->getAsBlock(), node, newDeclarations);
+    }
+
+    void convertStruct(const TStructure *structure, TIntermSequence *newDeclarations)
+    {
+        ASSERT(mInnerPassRoot == nullptr);
+
+        ASSERT(mOuterPass.structMap.count(structure) != 0);
+        StructConversionData *structData = &mOuterPass.structMap[structure];
+
+        if (structData->convertedStruct)
+        {
+            return;
+        }
+
+        TFieldList *newFields = new TFieldList;
+        for (const TField *field : structure->fields())
+        {
+            newFields->push_back(convertField(field, newDeclarations));
+        }
+
+        // Create unique names for the converted structs.  We can't leave them nameless and have
+        // a name autogenerated similar to temp variables, as nameless structs exist.  A fake
+        // variable is created for the sole purpose of generating a temp name.
+        TVariable *newStructTypeName =
+            new TVariable(mSymbolTable, kEmptyImmutableString, StaticType::GetBasic<EbtUInt>(),
+                          SymbolType::Empty);
+
+        TStructure *newStruct = new TStructure(mSymbolTable, newStructTypeName->name(), newFields,
+                                               SymbolType::AngleInternal);
+        TType *newType        = new TType(newStruct, true);
+        TVariable *newStructVar =
+            new TVariable(mSymbolTable, kEmptyImmutableString, newType, SymbolType::Empty);
+
+        TIntermDeclaration *structDecl = new TIntermDeclaration;
+        structDecl->appendDeclarator(new TIntermSymbol(newStructVar));
+
+        newDeclarations->push_back(structDecl);
+
+        structData->convertedStruct = newStruct;
+    }
+
+    TField *convertField(const TField *field, TIntermSequence *newDeclarations)
+    {
+        ASSERT(mInnerPassRoot == nullptr);
+
+        TField *newField = nullptr;
+
+        const TType *fieldType = field->type();
+        TType *newType         = nullptr;
+
+        if (fieldType->isStructureContainingMatrices())
+        {
+            // If the field is a struct instance, convert the struct and replace the field
+            // with an instance of the new struct.
+            const TStructure *fieldTypeStruct = fieldType->getStruct();
+            convertStruct(fieldTypeStruct, newDeclarations);
+
+            StructConversionData &structData = mOuterPass.structMap[fieldTypeStruct];
+            newType                          = new TType(structData.convertedStruct, false);
+            SetColumnMajor(newType);
+            CopyArraySizes(fieldType, newType);
+        }
+        else if (fieldType->isMatrix())
+        {
+            // If the field is a matrix, transpose the matrix and replace the field with
+            // that, removing the matrix packing qualifier.
+            newType = TransposeMatrixType(fieldType);
+        }
+
+        if (newType)
+        {
+            newField = new TField(newType, field->name(), field->line(), field->symbolType());
+        }
+        else
+        {
+            newField = DuplicateField(field);
+        }
+
+        return newField;
+    }
+
+    void determineAccess(TIntermNode *expression,
+                         TIntermNode *accessor,
+                         bool *isReadOut,
+                         bool *isWriteOut)
+    {
+        // If passing to a function, look at whether the parameter is in, out or inout.
+        TIntermAggregate *functionCall = accessor->getAsAggregate();
+
+        if (functionCall)
+        {
+            TIntermSequence *arguments = functionCall->getSequence();
+            for (size_t argIndex = 0; argIndex < arguments->size(); ++argIndex)
+            {
+                if ((*arguments)[argIndex] == expression)
+                {
+                    TQualifier qualifier = EvqIn;
+
+                    // If the aggregate is not a function call, it's a constructor, and so every
+                    // argument is an input.
+                    const TFunction *function = functionCall->getFunction();
+                    if (function)
+                    {
+                        const TVariable *param = function->getParam(argIndex);
+                        qualifier              = param->getType().getQualifier();
+                    }
+
+                    *isReadOut  = qualifier != EvqOut;
+                    *isWriteOut = qualifier == EvqOut || qualifier == EvqInOut;
+                    break;
+                }
+            }
+            return;
+        }
+
+        TIntermBinary *assignment = accessor->getAsBinaryNode();
+        if (assignment && IsAssignment(assignment->getOp()))
+        {
+            // If expression is on the right of assignment, it's being read from.
+            *isReadOut = assignment->getRight() == expression;
+            // If it's on the left of assignment, it's being written to.
+            *isWriteOut = assignment->getLeft() == expression;
+            return;
+        }
+
+        // Any other usage is a read.
+        *isReadOut  = true;
+        *isWriteOut = false;
+    }
+
+    void transformExpression(TIntermSymbol *symbol)
+    {
+        // Walk up the parent chain while the nodes are EOpIndex* (whether array indexing or struct
+        // field selection) or swizzle and construct the replacement expression.  This traversal can
+        // lead to one of the following possibilities:
+        //
+        // - a.b[N].etc.s (struct, or struct array): copy function should be declared and used,
+        // - a.b[N].etc.M (matrix or matrix array): transpose() should be used,
+        // - a.b[N].etc.M[c] (a column): each element in column needs to be handled separately,
+        // - a.b[N].etc.M[c].yz (multiple elements): similar to whole column, but a subset of
+        //   elements,
+        // - a.b[N].etc.M[c][r] (an element): single element to handle.
+        // - a.b[N].etc.x (not struct or matrix): not modified
+        //
+        // primaryIndex will contain c, if any.  secondaryIndices will contain {0, ..., R-1}
+        // (if no [r] or swizzle), {r} (if [r]), or {1, 2} (corresponding to .yz) if any.
+        //
+        // In all cases, the base symbol is replaced.  |baseExpression| will contain everything up
+        // to (and not including) the last index/swizzle operations, i.e. a.b[N].etc.s/M/x.  Any
+        // non constant array subscript is assigned to a temp variable to avoid duplicating side
+        // effects.
+        //
+        // ---
+        //
+        // NOTE that due to the use of insertStatementsInParentBlock, cases like this will be
+        // mistranslated, and this bug is likely present in most transformations that use this
+        // feature:
+        //
+        //     if (x == 1 && a.b[x = 2].etc.M = value)
+        //
+        // which will translate to:
+        //
+        //     temp = (x = 2)
+        //     if (x == 1 && a.b[temp].etc.M = transpose(value))
+        //
+        // See http://anglebug.com/3829.
+        //
+        TIntermTyped *baseExpression =
+            new TIntermSymbol(mInterfaceBlockMapIn.at(&symbol->variable()));
+        const TStructure *structure = nullptr;
+
+        TIntermNode *primaryIndex = nullptr;
+        TIntermSequence secondaryIndices;
+
+        // In some cases, it is necessary to prepend or append statements.  Those are captured in
+        // |prependStatements| and |appendStatements|.
+        TIntermSequence prependStatements;
+        TIntermSequence appendStatements;
+
+        // If the expression is neither a struct or matrix, no modification is necessary.
+        // If it's a struct that doesn't have matrices, again there's no transformation necessary.
+        // If it's an interface block matrix field that didn't need to be transposed, no
+        // transpformation is necessary.
+        //
+        // In all these cases, |baseExpression| contains all of the original expression.
+        bool requiresTransformation = false;
+
+        uint32_t accessorIndex         = 0;
+        TIntermTyped *previousAncestor = symbol;
+        while (IsIndexNode(getAncestorNode(accessorIndex), previousAncestor))
+        {
+            TIntermTyped *ancestor = getAncestorNode(accessorIndex)->getAsTyped();
+            ASSERT(ancestor);
+
+            const TType &previousAncestorType = previousAncestor->getType();
+
+            TIntermSequence indices;
+            TOperator op = GetIndex(mSymbolTable, ancestor, &indices, &prependStatements);
+
+            bool opIsIndex     = op == EOpIndexDirect || op == EOpIndexIndirect;
+            bool isArrayIndex  = opIsIndex && previousAncestorType.isArray();
+            bool isMatrixIndex = opIsIndex && previousAncestorType.isMatrix();
+
+            // If it's a direct index in a matrix, it's the primary index.
+            bool isMatrixPrimarySubscript = isMatrixIndex && !isArrayIndex;
+            ASSERT(!isMatrixPrimarySubscript ||
+                   (primaryIndex == nullptr && secondaryIndices.empty()));
+            // If primary index is seen and the ancestor is still an index, it must be a direct
+            // index as the secondary one.  Note that if primaryIndex is set, there can only ever be
+            // one more parent of interest, and that's subscripting the second dimension.
+            bool isMatrixSecondarySubscript = primaryIndex != nullptr;
+            ASSERT(!isMatrixSecondarySubscript || (opIsIndex && !isArrayIndex));
+
+            if (requiresTransformation && isMatrixPrimarySubscript)
+            {
+                ASSERT(indices.size() == 1);
+                primaryIndex = indices.front();
+
+                // Default the secondary indices to include every row.  If there's a secondary
+                // subscript provided, it will override this.
+                int rows = previousAncestorType.getRows();
+                for (int r = 0; r < rows; ++r)
+                {
+                    secondaryIndices.push_back(CreateIndexNode(r));
+                }
+            }
+            else if (isMatrixSecondarySubscript)
+            {
+                ASSERT(requiresTransformation);
+
+                secondaryIndices = indices;
+
+                // Indices after this point are not interesting.  There can't actually be any other
+                // index nodes other than desktop GLSL's swizzles on scalars, like M[1][2].yyy.
+                ++accessorIndex;
+                break;
+            }
+            else
+            {
+                // Replicate the expression otherwise.
+                baseExpression =
+                    ReplicateIndexNode(mSymbolTable, ancestor, baseExpression, &indices);
+
+                const TType &ancestorType = ancestor->getType();
+                structure                 = ancestorType.getStruct();
+
+                requiresTransformation =
+                    requiresTransformation ||
+                    IsConvertedField(ancestor, mInterfaceBlockFieldConvertedIn);
+
+                // If we reach a point where the expression is neither a matrix-containing struct
+                // nor a matrix, there's no transformation required.  This can happen if we decend
+                // through a struct marked with row-major but arrive at a member that doesn't
+                // include a matrix.
+                if (!ancestorType.isMatrix() && !ancestorType.isStructureContainingMatrices())
+                {
+                    requiresTransformation = false;
+                }
+            }
+
+            previousAncestor = ancestor;
+            ++accessorIndex;
+        }
+
+        TIntermNode *originalExpression =
+            accessorIndex == 0 ? symbol : getAncestorNode(accessorIndex - 1);
+        TIntermNode *accessor = getAncestorNode(accessorIndex);
+
+        if (!requiresTransformation)
+        {
+            ASSERT(primaryIndex == nullptr);
+            queueReplacementWithParent(accessor, originalExpression, baseExpression,
+                                       OriginalNode::IS_DROPPED);
+
+            RewriteRowMajorMatricesTraverser *traverser = mOuterTraverser ? mOuterTraverser : this;
+            traverser->insertStatementsInParentBlock(prependStatements, appendStatements);
+            return;
+        }
+
+        ASSERT(structure == nullptr || primaryIndex == nullptr);
+        ASSERT(structure != nullptr || baseExpression->getType().isMatrix());
+
+        // At the end, we can determine if the expression is being read from or written to (or both,
+        // if sent as an inout parameter to a function).  For the sake of the transformation, the
+        // left-hand side of operations like += can be treated as "written to", without necessarily
+        // "read from".
+        bool isRead  = false;
+        bool isWrite = false;
+
+        determineAccess(originalExpression, accessor, &isRead, &isWrite);
+
+        ASSERT(isRead || isWrite);
+
+        TIntermTyped *readExpression = nullptr;
+        if (isRead)
+        {
+            readExpression = transformReadExpression(
+                baseExpression, primaryIndex, &secondaryIndices, structure, &prependStatements);
+
+            // If both read from and written to (i.e. passed to inout parameter), store the
+            // expression in a temp variable and pass that to the function.
+            if (isWrite)
+            {
+                readExpression =
+                    CopyToTempVariable(mSymbolTable, readExpression, &prependStatements);
+            }
+
+            // Replace the original expression with the transformed one.  Read transformations
+            // always generate a single expression that can be used in place of the original (as
+            // oppposed to write transformations that can generate multiple statements).
+            queueReplacementWithParent(accessor, originalExpression, readExpression,
+                                       OriginalNode::IS_DROPPED);
+        }
+
+        TIntermSequence postTransformPrependStatements;
+        TIntermSequence *writeStatements = &appendStatements;
+        TOperator assignmentOperator     = EOpAssign;
+
+        if (isWrite)
+        {
+            TIntermTyped *valueExpression = readExpression;
+
+            if (!valueExpression)
+            {
+                // If there's already a read expression, this was an inout parameter and
+                // |valueExpression| will contain the temp variable that was passed to the function
+                // instead.
+                //
+                // If not, then the modification is either through being passed as an out parameter
+                // to a function, or an assignment.  In the former case, create a temp variable to
+                // be passed to the function.  In the latter case, create a temp variable that holds
+                // the right hand side expression.
+                //
+                // In either case, use that temp value as the value to assign to |baseExpression|.
+
+                TVariable *temp =
+                    CreateTempVariable(mSymbolTable, &originalExpression->getAsTyped()->getType());
+                TIntermDeclaration *tempDecl = nullptr;
+
+                valueExpression = new TIntermSymbol(temp);
+
+                TIntermBinary *assignment = accessor->getAsBinaryNode();
+                if (assignment)
+                {
+                    assignmentOperator = assignment->getOp();
+                    ASSERT(IsAssignment(assignmentOperator));
+
+                    // We are converting the assignment to the left-hand side of an expression in
+                    // the form M=exp.  A subexpression of exp itself could require a
+                    // transformation.  This complicates things as there would be two replacements:
+                    //
+                    // - Replace M=exp with temp (because the return value of the assignment could
+                    //   be used)
+                    // - Replace exp with exp2, where parent is M=exp
+                    //
+                    // The second replacement however is ineffective as the whole of M=exp is
+                    // already transformed.  What's worse, M=exp is transformed without taking exp's
+                    // transformations into account.  To address this issue, this same traverser is
+                    // called on the right-hand side expression, with a special flag such that it
+                    // only processes that expression.
+                    //
+                    RewriteRowMajorMatricesTraverser *outerTraverser =
+                        mOuterTraverser ? mOuterTraverser : this;
+                    RewriteRowMajorMatricesTraverser rhsTraverser(
+                        mSymbolTable, outerTraverser, mInterfaceBlockMapIn,
+                        mInterfaceBlockFieldConvertedIn, mStructMapOut, mCopyFunctionDefinitionsOut,
+                        assignment);
+                    getRootNode()->traverse(&rhsTraverser);
+                    bool valid = rhsTraverser.updateTree(mCompiler, getRootNode());
+                    ASSERT(valid);
+
+                    tempDecl = CreateTempInitDeclarationNode(temp, assignment->getRight());
+
+                    // Replace the whole assignment expression with the right-hand side as a read
+                    // expression, in case the result of the assignment is used.  For example, this
+                    // transforms:
+                    //
+                    //     if ((M += exp) == X)
+                    //     {
+                    //         // use M
+                    //     }
+                    //
+                    // to:
+                    //
+                    //     temp = exp;
+                    //     M += transform(temp);
+                    //     if (transform(M) == X)
+                    //     {
+                    //         // use M
+                    //     }
+                    //
+                    // Note that in this case the assignment to M must be prepended in the parent
+                    // block.  In contrast, when sent to a function, the assignment to M should be
+                    // done after the current function call is done.
+                    //
+                    // If the read from M itself (to replace assigmnet) needs to generate extra
+                    // statements, they should be appended after the statements that write to M.
+                    // These statements are stored in postTransformPrependStatements and appended to
+                    // prependStatements in the end.
+                    //
+                    writeStatements = &prependStatements;
+
+                    TIntermTyped *assignmentResultExpression = transformReadExpression(
+                        baseExpression->deepCopy(), primaryIndex, &secondaryIndices, structure,
+                        &postTransformPrependStatements);
+
+                    // Replace the whole assignment, instead of just the right hand side.
+                    TIntermNode *accessorParent = getAncestorNode(accessorIndex + 1);
+                    queueReplacementWithParent(accessorParent, accessor, assignmentResultExpression,
+                                               OriginalNode::IS_DROPPED);
+                }
+                else
+                {
+                    tempDecl = CreateTempDeclarationNode(temp);
+
+                    // Replace the write expression (a function call argument) with the temp
+                    // variable.
+                    queueReplacementWithParent(accessor, originalExpression, valueExpression,
+                                               OriginalNode::IS_DROPPED);
+                }
+                prependStatements.push_back(tempDecl);
+            }
+
+            if (isRead)
+            {
+                baseExpression = baseExpression->deepCopy();
+            }
+            transformWriteExpression(baseExpression, primaryIndex, &secondaryIndices, structure,
+                                     valueExpression, assignmentOperator, writeStatements);
+        }
+
+        prependStatements.insert(prependStatements.end(), postTransformPrependStatements.begin(),
+                                 postTransformPrependStatements.end());
+
+        RewriteRowMajorMatricesTraverser *traverser = mOuterTraverser ? mOuterTraverser : this;
+        traverser->insertStatementsInParentBlock(prependStatements, appendStatements);
+    }
+
+    TIntermTyped *transformReadExpression(TIntermTyped *baseExpression,
+                                          TIntermNode *primaryIndex,
+                                          TIntermSequence *secondaryIndices,
+                                          const TStructure *structure,
+                                          TIntermSequence *prependStatements)
+    {
+        const TType &baseExpressionType = baseExpression->getType();
+
+        if (structure)
+        {
+            ASSERT(primaryIndex == nullptr && secondaryIndices->empty());
+            ASSERT(mStructMapOut->count(structure) != 0);
+            ASSERT((*mStructMapOut)[structure].convertedStruct != nullptr);
+
+            // Declare copy-from-converted-to-original-struct function (if not already).
+            declareStructCopyToOriginal(structure);
+
+            const TFunction *copyToOriginal = (*mStructMapOut)[structure].copyToOriginal;
+
+            if (baseExpressionType.isArray())
+            {
+                // If base expression is an array, transform every element.
+                TransformArrayHelper transformHelper(baseExpression);
+
+                TIntermTyped *element = nullptr;
+                while ((element = transformHelper.getNextElement(nullptr, nullptr)) != nullptr)
+                {
+                    TIntermTyped *transformedElement =
+                        CreateStructCopyCall(copyToOriginal, element);
+                    transformHelper.accumulateForRead(mSymbolTable, transformedElement,
+                                                      prependStatements);
+                }
+                return transformHelper.constructReadTransformExpression();
+            }
+            else
+            {
+                // If not reading an array, the result is simply a call to this function with the
+                // base expression.
+                return CreateStructCopyCall(copyToOriginal, baseExpression);
+            }
+        }
+
+        // If not indexed, the result is transpose(exp)
+        if (primaryIndex == nullptr)
+        {
+            ASSERT(secondaryIndices->empty());
+
+            if (baseExpressionType.isArray())
+            {
+                // If array, transpose every element.
+                TransformArrayHelper transformHelper(baseExpression);
+
+                TIntermTyped *element = nullptr;
+                while ((element = transformHelper.getNextElement(nullptr, nullptr)) != nullptr)
+                {
+                    TIntermTyped *transformedElement = CreateTransposeCall(mSymbolTable, element);
+                    transformHelper.accumulateForRead(mSymbolTable, transformedElement,
+                                                      prependStatements);
+                }
+                return transformHelper.constructReadTransformExpression();
+            }
+            else
+            {
+                return CreateTransposeCall(mSymbolTable, baseExpression);
+            }
+        }
+
+        // If indexed the result is a vector (or just one element) where the primary and secondary
+        // indices are swapped.
+        ASSERT(!secondaryIndices->empty());
+
+        TOperator primaryIndexOp          = GetIndexOp(primaryIndex);
+        TIntermTyped *primaryIndexAsTyped = primaryIndex->getAsTyped();
+
+        TIntermSequence transposedColumn;
+        for (TIntermNode *secondaryIndex : *secondaryIndices)
+        {
+            TOperator secondaryIndexOp          = GetIndexOp(secondaryIndex);
+            TIntermTyped *secondaryIndexAsTyped = secondaryIndex->getAsTyped();
+
+            TIntermBinary *colIndexed = new TIntermBinary(
+                secondaryIndexOp, baseExpression->deepCopy(), secondaryIndexAsTyped->deepCopy());
+            TIntermBinary *colRowIndexed =
+                new TIntermBinary(primaryIndexOp, colIndexed, primaryIndexAsTyped->deepCopy());
+
+            transposedColumn.push_back(colRowIndexed);
+        }
+
+        if (secondaryIndices->size() == 1)
+        {
+            // If only one element, return that directly.
+            return transposedColumn.front()->getAsTyped();
+        }
+
+        // Otherwise create a constructor with the appropriate dimension.
+        TType *vecType = new TType(baseExpressionType.getBasicType(), secondaryIndices->size());
+        return TIntermAggregate::CreateConstructor(*vecType, &transposedColumn);
+    }
+
+    void transformWriteExpression(TIntermTyped *baseExpression,
+                                  TIntermNode *primaryIndex,
+                                  TIntermSequence *secondaryIndices,
+                                  const TStructure *structure,
+                                  TIntermTyped *valueExpression,
+                                  TOperator assignmentOperator,
+                                  TIntermSequence *writeStatements)
+    {
+        const TType &baseExpressionType = baseExpression->getType();
+
+        if (structure)
+        {
+            ASSERT(primaryIndex == nullptr && secondaryIndices->empty());
+            ASSERT(mStructMapOut->count(structure) != 0);
+            ASSERT((*mStructMapOut)[structure].convertedStruct != nullptr);
+
+            // Declare copy-to-converted-from-original-struct function (if not already).
+            declareStructCopyFromOriginal(structure);
+
+            // The result is call to this function with the value expression assigned to base
+            // expression.
+            const TFunction *copyFromOriginal = (*mStructMapOut)[structure].copyFromOriginal;
+
+            if (baseExpressionType.isArray())
+            {
+                // If array, assign every element.
+                TransformArrayHelper transformHelper(baseExpression);
+
+                TIntermTyped *element      = nullptr;
+                TIntermTyped *valueElement = nullptr;
+                while ((element = transformHelper.getNextElement(valueExpression, &valueElement)) !=
+                       nullptr)
+                {
+                    TIntermTyped *functionCall =
+                        CreateStructCopyCall(copyFromOriginal, valueElement);
+                    writeStatements->push_back(new TIntermBinary(EOpAssign, element, functionCall));
+                }
+            }
+            else
+            {
+                TIntermTyped *functionCall =
+                    CreateStructCopyCall(copyFromOriginal, valueExpression->deepCopy());
+                writeStatements->push_back(
+                    new TIntermBinary(EOpAssign, baseExpression, functionCall));
+            }
+
+            return;
+        }
+
+        // If not indexed, the result is transpose(exp)
+        if (primaryIndex == nullptr)
+        {
+            ASSERT(secondaryIndices->empty());
+
+            if (baseExpressionType.isArray())
+            {
+                // If array, assign every element.
+                TransformArrayHelper transformHelper(baseExpression);
+
+                TIntermTyped *element      = nullptr;
+                TIntermTyped *valueElement = nullptr;
+                while ((element = transformHelper.getNextElement(valueExpression, &valueElement)) !=
+                       nullptr)
+                {
+                    TIntermTyped *valueTransposed = CreateTransposeCall(mSymbolTable, valueElement);
+                    writeStatements->push_back(
+                        new TIntermBinary(EOpAssign, element, valueTransposed));
+                }
+            }
+            else
+            {
+                TIntermTyped *valueTransposed =
+                    CreateTransposeCall(mSymbolTable, valueExpression->deepCopy());
+                writeStatements->push_back(
+                    new TIntermBinary(assignmentOperator, baseExpression, valueTransposed));
+            }
+
+            return;
+        }
+
+        // If indexed, create one assignment per secondary index.  If the right-hand side is a
+        // scalar, it's used with every assignment.  If it's a vector, the assignment is
+        // per-component.  The right-hand side cannot be a matrix as that would imply left-hand
+        // side being a matrix too, which is covered above where |primaryIndex == nullptr|.
+        ASSERT(!secondaryIndices->empty());
+
+        bool isValueExpressionScalar = valueExpression->getType().getNominalSize() == 1;
+        ASSERT(isValueExpressionScalar || valueExpression->getType().getNominalSize() ==
+                                              static_cast<int>(secondaryIndices->size()));
+
+        TOperator primaryIndexOp          = GetIndexOp(primaryIndex);
+        TIntermTyped *primaryIndexAsTyped = primaryIndex->getAsTyped();
+
+        for (TIntermNode *secondaryIndex : *secondaryIndices)
+        {
+            TOperator secondaryIndexOp          = GetIndexOp(secondaryIndex);
+            TIntermTyped *secondaryIndexAsTyped = secondaryIndex->getAsTyped();
+
+            TIntermBinary *colIndexed = new TIntermBinary(
+                secondaryIndexOp, baseExpression->deepCopy(), secondaryIndexAsTyped->deepCopy());
+            TIntermBinary *colRowIndexed =
+                new TIntermBinary(primaryIndexOp, colIndexed, primaryIndexAsTyped->deepCopy());
+
+            TIntermTyped *valueExpressionIndexed = valueExpression->deepCopy();
+            if (!isValueExpressionScalar)
+            {
+                valueExpressionIndexed = new TIntermBinary(secondaryIndexOp, valueExpressionIndexed,
+                                                           secondaryIndexAsTyped->deepCopy());
+            }
+
+            writeStatements->push_back(
+                new TIntermBinary(assignmentOperator, colRowIndexed, valueExpressionIndexed));
+        }
+    }
+
+    const TFunction *getCopyStructFieldFunction(const TType *fromFieldType,
+                                                const TType *toFieldType,
+                                                bool isCopyToOriginal)
+    {
+        ASSERT(fromFieldType->getStruct());
+        ASSERT(toFieldType->getStruct());
+
+        // If copying from or to the original struct, the "to" field struct could require
+        // conversion to or from the "from" field struct.  |isCopyToOriginal| tells us if we
+        // should expect to find toField or fromField in mStructMapOut, if true or false
+        // respectively.
+        const TFunction *fieldCopyFunction = nullptr;
+        if (isCopyToOriginal)
+        {
+            const TStructure *toFieldStruct = toFieldType->getStruct();
+
+            auto iter = mStructMapOut->find(toFieldStruct);
+            if (iter != mStructMapOut->end())
+            {
+                declareStructCopyToOriginal(toFieldStruct);
+                fieldCopyFunction = iter->second.copyToOriginal;
+            }
+        }
+        else
+        {
+            const TStructure *fromFieldStruct = fromFieldType->getStruct();
+
+            auto iter = mStructMapOut->find(fromFieldStruct);
+            if (iter != mStructMapOut->end())
+            {
+                declareStructCopyFromOriginal(fromFieldStruct);
+                fieldCopyFunction = iter->second.copyFromOriginal;
+            }
+        }
+
+        return fieldCopyFunction;
+    }
+
+    void addFieldCopy(TIntermBlock *body,
+                      TIntermTyped *to,
+                      TIntermTyped *from,
+                      bool isCopyToOriginal)
+    {
+        const TType &fromType = from->getType();
+        const TType &toType   = to->getType();
+
+        TIntermTyped *rhs = from;
+
+        if (fromType.getStruct())
+        {
+            const TFunction *fieldCopyFunction =
+                getCopyStructFieldFunction(&fromType, &toType, isCopyToOriginal);
+
+            if (fieldCopyFunction)
+            {
+                rhs = CreateStructCopyCall(fieldCopyFunction, from);
+            }
+        }
+        else if (fromType.isMatrix())
+        {
+            rhs = CreateTransposeCall(mSymbolTable, from);
+        }
+
+        body->appendStatement(new TIntermBinary(EOpAssign, to, rhs));
+    }
+
+    TFunction *declareStructCopy(const TStructure *from,
+                                 const TStructure *to,
+                                 bool isCopyToOriginal)
+    {
+        TType *fromType = new TType(from, true);
+        TType *toType   = new TType(to, true);
+
+        // Create the parameter and return value variables.
+        TVariable *fromVar = new TVariable(mSymbolTable, ImmutableString("from"), fromType,
+                                           SymbolType::AngleInternal);
+        TVariable *toVar =
+            new TVariable(mSymbolTable, ImmutableString("to"), toType, SymbolType::AngleInternal);
+
+        TIntermSymbol *fromSymbol = new TIntermSymbol(fromVar);
+        TIntermSymbol *toSymbol   = new TIntermSymbol(toVar);
+
+        // Create the function body as statements are generated.
+        TIntermBlock *body = new TIntermBlock;
+
+        // Declare the result variable.
+        TIntermDeclaration *toDecl = new TIntermDeclaration();
+        toDecl->appendDeclarator(toSymbol);
+        body->appendStatement(toDecl);
+
+        // Iterate over fields of the struct and copy one by one, transposing the matrices.  If a
+        // struct is encountered that requires a transformation, this function is recursively
+        // called.  As a result, it is important that the copy functions are placed in the code in
+        // order.
+        const TFieldList &fromFields = from->fields();
+        const TFieldList &toFields   = to->fields();
+        ASSERT(fromFields.size() == toFields.size());
+
+        for (size_t fieldIndex = 0; fieldIndex < fromFields.size(); ++fieldIndex)
+        {
+            TIntermTyped *fieldIndexNode = CreateIndexNode(static_cast<int>(fieldIndex));
+
+            TIntermTyped *fromField =
+                new TIntermBinary(EOpIndexDirectStruct, fromSymbol->deepCopy(), fieldIndexNode);
+            TIntermTyped *toField = new TIntermBinary(EOpIndexDirectStruct, toSymbol->deepCopy(),
+                                                      fieldIndexNode->deepCopy());
+
+            const TType *fromFieldType = fromFields[fieldIndex]->type();
+            bool isStructOrMatrix      = fromFieldType->getStruct() || fromFieldType->isMatrix();
+
+            if (fromFieldType->isArray() && isStructOrMatrix)
+            {
+                // If struct or matrix array, we need to copy element by element.
+                TransformArrayHelper transformHelper(toField);
+
+                TIntermTyped *toElement   = nullptr;
+                TIntermTyped *fromElement = nullptr;
+                while ((toElement = transformHelper.getNextElement(fromField, &fromElement)) !=
+                       nullptr)
+                {
+                    addFieldCopy(body, toElement, fromElement, isCopyToOriginal);
+                }
+            }
+            else
+            {
+                addFieldCopy(body, toField, fromField, isCopyToOriginal);
+            }
+        }
+
+        // Add return statement.
+        body->appendStatement(new TIntermBranch(EOpReturn, toSymbol->deepCopy()));
+
+        // Declare the function
+        TFunction *copyFunction = new TFunction(mSymbolTable, kEmptyImmutableString,
+                                                SymbolType::AngleInternal, toType, true);
+        copyFunction->addParameter(fromVar);
+
+        TIntermFunctionDefinition *functionDef =
+            CreateInternalFunctionDefinitionNode(*copyFunction, body);
+        mCopyFunctionDefinitionsOut->push_back(functionDef);
+
+        return copyFunction;
+    }
+
+    void declareStructCopyFromOriginal(const TStructure *structure)
+    {
+        StructConversionData *structData = &(*mStructMapOut)[structure];
+        if (structData->copyFromOriginal)
+        {
+            return;
+        }
+
+        structData->copyFromOriginal =
+            declareStructCopy(structure, structData->convertedStruct, false);
+    }
+
+    void declareStructCopyToOriginal(const TStructure *structure)
+    {
+        StructConversionData *structData = &(*mStructMapOut)[structure];
+        if (structData->copyToOriginal)
+        {
+            return;
+        }
+
+        structData->copyToOriginal =
+            declareStructCopy(structData->convertedStruct, structure, true);
+    }
+
+    TCompiler *mCompiler;
+
+    // This traverser can call itself to transform a subexpression before moving on.  However, it
+    // needs to accumulate conversion functions in inner passes.  The fields below marked with Out
+    // or In are inherited from the outer pass (for inner passes), or point to storage fields in
+    // mOuterPass (for the outer pass).  The latter should not be used by the inner passes as they
+    // would be empty, so they are placed inside a struct to make them explicit.
+    struct
+    {
+        StructMap structMap;
+        InterfaceBlockMap interfaceBlockMap;
+        InterfaceBlockFieldConverted interfaceBlockFieldConverted;
+        TIntermSequence copyFunctionDefinitions;
+    } mOuterPass;
+
+    // A map from structures with matrices to their converted version.
+    StructMap *mStructMapOut;
+    // A map from interface block instances with row-major matrices to their converted variable.
+    const InterfaceBlockMap &mInterfaceBlockMapIn;
+    // A map from interface block fields to whether they need to be converted.  If a field was
+    // already column-major, it shouldn't be transposed.
+    const InterfaceBlockFieldConverted &mInterfaceBlockFieldConvertedIn;
+
+    TIntermSequence *mCopyFunctionDefinitionsOut;
+
+    // If set, it's an inner pass and this will point to the outer pass traverser.  All statement
+    // insertions are stored in the outer traverser and applied at once in the end.  This prevents
+    // the inner passes from adding statements which invalidates the outer traverser's statement
+    // position tracking.
+    RewriteRowMajorMatricesTraverser *mOuterTraverser;
+
+    // If set, it's an inner pass that should only process the right-hand side of this particular
+    // node.
+    TIntermBinary *mInnerPassRoot;
+    bool mIsProcessingInnerPassSubtree;
+};
+
+}  // anonymous namespace
+
+bool RewriteRowMajorMatrices(TCompiler *compiler, TIntermBlock *root, TSymbolTable *symbolTable)
+{
+    RewriteRowMajorMatricesTraverser traverser(compiler, symbolTable);
+    root->traverse(&traverser);
+    if (!traverser.updateTree(compiler, root))
+    {
+        return false;
+    }
+
+    size_t firstFunctionIndex = FindFirstFunctionDefinitionIndex(root);
+    root->insertChildNodes(firstFunctionIndex, *traverser.getStructCopyFunctions());
+
+    return compiler->validateAST(root);
+}
+}  // namespace sh
--- a/src/compiler/translator/tree_ops/RewriteRowMajorMatrices.h
+++ b/src/compiler/translator/tree_ops/RewriteRowMajorMatrices.h
+//
+// Copyright 2019 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// RewriteRowMajorMatrices: Change row-major matrices to column-major in uniform and storage
+// buffers.
+
+#ifndef COMPILER_TRANSLATOR_TREEOPS_REWRITEROWMAJORMATRICES_H_
+#define COMPILER_TRANSLATOR_TREEOPS_REWRITEROWMAJORMATRICES_H_
+
+#include "common/angleutils.h"
+
+namespace sh
+{
+class TCompiler;
+class TIntermBlock;
+class TSymbolTable;
+
+ANGLE_NO_DISCARD bool RewriteRowMajorMatrices(TCompiler *compiler,
+                                              TIntermBlock *root,
+                                              TSymbolTable *symbolTable);
+}  // namespace sh
+
+#endif  // COMPILER_TRANSLATOR_TREEOPS_REWRITEROWMAJORMATRICES_H_
--- a/src/compiler/translator/tree_util/IntermTraverse.h
+++ b/src/compiler/translator/tree_util/IntermTraverse.h
@@ -133,10 +133,13 @@ class TIntermTraverser : angle::NonCopyable
    friend void TIntermConstantUnion::traverse(TIntermTraverser *);
    friend void TIntermFunctionPrototype::traverse(TIntermTraverser *);

-    TIntermNode *getParentNode() { return mPath.size() <= 1 ? nullptr : mPath[mPath.size() - 2u]; }
+    TIntermNode *getParentNode() const
+    {
+        return mPath.size() <= 1 ? nullptr : mPath[mPath.size() - 2u];
+    }

    // Return the nth ancestor of the node being traversed. getAncestorNode(0) == getParentNode()
-    TIntermNode *getAncestorNode(unsigned int n)
+    TIntermNode *getAncestorNode(unsigned int n) const
    {
        if (mPath.size() > n + 1u)
        {
@@ -147,6 +150,12 @@ class TIntermTraverser : angle::NonCopyable

    const TIntermBlock *getParentBlock() const;

+    TIntermNode *getRootNode() const
+    {
+        ASSERT(!mPath.empty());
+        return mPath.front();
+    }
+
    void pushParentBlock(TIntermBlock *node);
    void incrementParentBlockPos();
    void popParentBlock();

--- a/src/tests/deqp_support/deqp_gles31_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_gles31_test_expectations.txt
@@ -715,21 +715,6 @@
 3520 VULKAN : dEQP-GLES31.functional.state_query.texture_level.texture_2d.compressed_integer = SKIP
 3520 VULKAN : dEQP-GLES31.functional.state_query.texture_level.texture_2d.compressed_float = SKIP

-// column/row_major specified on struct member:
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.2 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.9 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.12 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.16 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.23 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.25 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.29 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_per_block_buffers.33 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.3 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.11 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.25 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.39 = FAIL
-3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.45 = FAIL
-
 // Inactive SSBOs with flexible array member (about 20% of these tests are affected):
 3714 VULKAN : dEQP-GLES31.functional.ssbo.layout.random.* = FAIL


--- a/src/tests/deqp_support/deqp_gles3_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_gles3_test_expectations.txt
@@ -560,29 +560,6 @@
 3219 VULKAN : dEQP-GLES3.functional.negative_api.shader.link_program = FAIL
 3219 VULKAN : dEQP-GLES3.functional.negative_api.shader.use_program = FAIL

-// column/row_major specified on struct member:
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.basic_arrays.10 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.all_per_block_buffers.8 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.all_per_block_buffers.17 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.all_per_block_buffers.25 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.all_per_block_buffers.49 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.all_shared_buffer.11 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.all_shared_buffer.48 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.basic_arrays.18 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.basic_arrays.20 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.basic_arrays.23 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.basic_arrays.8 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.basic_instance_arrays.13 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs.12 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs.17 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs.5 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs_arrays.2 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs_arrays.3 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs_arrays.4 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.12 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.18 = FAIL
-3443 VULKAN : dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.24 = FAIL
-
 3221 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.attribute_divisor.2*_instances = FAIL
 3221 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.attribute_divisor.4_instances = FAIL
 3221 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.mixed.2*_instances = FAIL

--- a/src/tests/deqp_support/deqp_khr_gles3_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_khr_gles3_test_expectations.txt
@@ -115,17 +115,5 @@
 3818 VULKAN : KHR-GLES3.copy_tex_image_conversions.forbidden.renderbuffer_cubemap_posy = FAIL
 3818 VULKAN : KHR-GLES3.copy_tex_image_conversions.forbidden.renderbuffer_cubemap_posz = FAIL

-// column/row_major specified on struct member:
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.all_per_block_buffers.16 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.all_per_block_buffers.3 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.all_per_block_buffers.5 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.all_per_block_buffers.7 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.all_shared_buffer.0 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.basic_instance_arrays.0 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.basic_instance_arrays.4 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.basic_instance_arrays.5 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.nested_structs_arrays.7 = FAIL
-3443 VULKAN : KHR-GLES3.shaders.uniform_block.random.nested_structs_arrays_instance_arrays.3 = FAIL
-
 // Require 3D textures.
 3188 VULKAN : KHR-GLES3.packed_pixels.varied_rectangle.* = SKIP
--- a/src/tests/gl_tests/GLSLTest.cpp
+++ b/src/tests/gl_tests/GLSLTest.cpp
@@ -1552,7 +1552,7 @@ TEST_P(GLSLTest, FixedShaderLength)
 {
    GLuint shader = glCreateShader(GL_FRAGMENT_SHADER);

-    const std::string appendGarbage = "abcasdfasdfasdfasdfasdf";
+    const std::string appendGarbage = "abcdefghijklmnopqrstuvwxyz";
    const std::string source   = "void main() { gl_FragColor = vec4(0, 0, 0, 0); }" + appendGarbage;
    const char *sourceArray[1] = {source.c_str()};
    GLint lengths[1]           = {static_cast<GLint>(source.length() - appendGarbage.length())};
@@ -1625,7 +1625,7 @@ TEST_P(GLSLTest, ZeroShaderLength)
    GLuint shader = glCreateShader(GL_FRAGMENT_SHADER);

    const char *sourceArray[] = {
-        "adfasdf", "34534", "void main() { gl_FragColor = vec4(0, 0, 0, 0); }", "", "asdfasdfsdsdf",
+        "abcdefg", "34534", "void main() { gl_FragColor = vec4(0, 0, 0, 0); }", "", "abcdefghijklm",
    };
    GLint lengths[] = {
        0, 0, -1, 0, 0,
@@ -2823,8 +2823,6 @@ TEST_P(WebGLGLSLTest, MaxVaryingVec3ArrayAndMaxPlusOneFloatArray)
                    false);
 }

-}  // anonymous namespace
-
 // Test that FindLSB and FindMSB return correct values in their corner cases.
 TEST_P(GLSLTest_ES31, FindMSBAndFindLSBCornerCases)
 {
@@ -6390,6 +6388,845 @@ TEST_P(GLSLTest, MemoryExhaustedTest)
    EXPECT_NE(0u, program);
 }

+// Helper functions for MixedRowAndColumnMajorMatrices* tests
+
+// Round up to alignment, assuming it's a power of 2
+uint32_t RoundUpPow2(uint32_t value, uint32_t alignment)
+{
+    return (value + alignment - 1) & ~(alignment - 1);
+}
+
+// Fill provided buffer with matrices based on the given dimensions.  The buffer should be large
+// enough to accomodate the data.
+uint32_t FillBuffer(const std::pair<uint32_t, uint32_t> matrixDims[],
+                    const bool matrixIsColMajor[],
+                    size_t matrixCount,
+                    float data[],
+                    bool isStd430,
+                    bool isTransposed)
+{
+    size_t offset = 0;
+    for (size_t m = 0; m < matrixCount; ++m)
+    {
+        uint32_t cols   = matrixDims[m].first;
+        uint32_t rows   = matrixDims[m].second;
+        bool isColMajor = matrixIsColMajor[m] != isTransposed;
+
+        uint32_t arraySize              = isColMajor ? cols : rows;
+        uint32_t arrayElementComponents = isColMajor ? rows : cols;
+        uint32_t stride                 = isStd430 ? RoundUpPow2(arrayElementComponents, 2) : 4;
+
+        offset = RoundUpPow2(offset, stride);
+
+        for (uint32_t i = 0; i < arraySize; ++i)
+        {
+            for (uint32_t c = 0; c < arrayElementComponents; ++c)
+            {
+                uint32_t row = isColMajor ? c : i;
+                uint32_t col = isColMajor ? i : c;
+
+                data[offset + i * stride + c] = col * 4 + row;
+            }
+        }
+
+        offset += arraySize * stride;
+    }
+    return offset;
+}
+
+// Initialize and bind the buffer.
+void InitBuffer(GLuint program,
+                const char *name,
+                GLuint buffer,
+                uint32_t bindingIndex,
+                float data[],
+                uint32_t dataSize,
+                bool isUniform)
+{
+    GLenum bindPoint = isUniform ? GL_UNIFORM_BUFFER : GL_SHADER_STORAGE_BUFFER;
+
+    glBindBufferBase(bindPoint, bindingIndex, buffer);
+    glBufferData(bindPoint, dataSize * sizeof(*data), data, GL_STATIC_DRAW);
+
+    if (isUniform)
+    {
+        GLint blockIndex = glGetUniformBlockIndex(program, name);
+        glUniformBlockBinding(program, blockIndex, bindingIndex);
+    }
+}
+
+// Verify that buffer data is written by the shader as expected.
+bool VerifyBuffer(GLuint buffer, const float data[], uint32_t dataSize)
+{
+    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
+
+    const float *ptr = reinterpret_cast<const float *>(
+        glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, dataSize, GL_MAP_READ_BIT));
+
+    bool isCorrect = memcmp(ptr, data, dataSize * sizeof(*data)) == 0;
+    glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+
+    return isCorrect;
+}
+
+// Test reading from UBOs and SSBOs and writing to SSBOs with mixed row- and colum-major layouts in
+// both std140 and std430 layouts.  Tests many combinations of std140 vs std430, struct being used
+// as row- or column-major in different UBOs, reading from UBOs and SSBOs and writing to SSBOs,
+// nested structs, matrix arrays, inout parameters etc.
+//
+// Some very specific corner cases that are not covered here are tested in the subsequent tests.
+TEST_P(GLSLTest_ES31, MixedRowAndColumnMajorMatrices)
+{
+    // Fails on Nvidia because having |Matrices| qualified as row-major in one UBO makes the other
+    // UBO also see it as row-major despite explicit column-major qualifier.
+    // http://anglebug.com/3830
+    ANGLE_SKIP_TEST_IF(IsNVIDIA() && IsOpenGL());
+
+    // Fails on mesa because in the first UBO which is qualified as column-major, |Matrices| is
+    // read column-major despite explicit row-major qualifier.  http://anglebug.com/3837
+    ANGLE_SKIP_TEST_IF(IsLinux() && IsIntel() && IsOpenGL());
+
+    // Fails on windows AMD on GL: http://anglebug.com/3838
+    ANGLE_SKIP_TEST_IF(IsWindows() && IsOpenGL() && IsAMD());
+
+    // Fails to compile the shader on Android.  http://anglebug.com/3839
+    ANGLE_SKIP_TEST_IF(IsAndroid() && IsOpenGL());
+
+    // Fails on assertion in translation to D3D.  http://anglebug.com/3841
+    ANGLE_SKIP_TEST_IF(IsD3D11());
+
+    // Fails on SSBO validation on Android/Vulkan.  http://anglebug.com/3840
+    ANGLE_SKIP_TEST_IF(IsAndroid() && IsVulkan());
+
+    // Fails input verification as well as std140 SSBO validation.  http://anglebug.com/3844
+    ANGLE_SKIP_TEST_IF(IsWindows() && IsAMD() && IsVulkan());
+
+    constexpr char kFS[] = R"(#version 310 es
+precision highp float;
+out vec4 outColor;
+
+struct Inner
+{
+    mat3x4 m3c4r;
+    mat4x3 m4c3r;
+};
+
+struct Matrices
+{
+    mat2 m2c2r;
+    mat2x3 m2c3r[2];
+    mat3x2 m3c2r;
+    Inner inner;
+};
+
+// For simplicity, the layouts are either of:
+// - col-major mat4, row-major rest
+// - row-major mat4, col-major rest
+//
+// The former is tagged with c, the latter with r.
+layout(std140, column_major) uniform Ubo140c
+{
+    mat4 m4c4r;
+    layout(row_major) Matrices m;
+} ubo140cIn;
+
+layout(std140, row_major) uniform Ubo140r
+{
+    mat4 m4c4r;
+    layout(column_major) Matrices m;
+} ubo140rIn;
+
+layout(std140, row_major, binding = 0) buffer Ssbo140c
+{
+    layout(column_major) mat4 m4c4r;
+    Matrices m;
+} ssbo140cIn;
+
+layout(std140, column_major, binding = 1) buffer Ssbo140r
+{
+    layout(row_major) mat4 m4c4r;
+    Matrices m;
+} ssbo140rIn;
+
+layout(std430, column_major, binding = 2) buffer Ssbo430c
+{
+    mat4 m4c4r;
+    layout(row_major) Matrices m;
+} ssbo430cIn;
+
+layout(std430, row_major, binding = 3) buffer Ssbo430r
+{
+    mat4 m4c4r;
+    layout(column_major) Matrices m;
+} ssbo430rIn;
+
+layout(std140, row_major, binding = 4) buffer Ssbo140cOut
+{
+    layout(column_major) mat4 m4c4r;
+    Matrices m;
+} ssbo140cOut;
+
+layout(std140, column_major, binding = 5) buffer Ssbo140rOut
+{
+    layout(row_major) mat4 m4c4r;
+    Matrices m;
+} ssbo140rOut;
+
+layout(std430, column_major, binding = 6) buffer Ssbo430cOut
+{
+    mat4 m4c4r;
+    layout(row_major) Matrices m;
+} ssbo430cOut;
+
+layout(std430, row_major, binding = 7) buffer Ssbo430rOut
+{
+    mat4 m4c4r;
+    layout(column_major) Matrices m;
+} ssbo430rOut;
+
+#define EXPECT(result, expression, value) if ((expression) != value) { result = false; }
+#define EXPECTV(result, expression, value) if (any(notEqual(expression, value))) { result = false; }
+
+#define VERIFY_IN(result, mat, cols, rows)                  \
+    EXPECT(result, mat[0].x, 0.0);                          \
+    EXPECT(result, mat[0][1], 1.0);                         \
+    EXPECTV(result, mat[0].xy, vec2(0, 1));                 \
+    EXPECTV(result, mat[1].xy, vec2(4, 5));                 \
+    for (int c = 0; c < cols; ++c)                          \
+    {                                                       \
+        for (int r = 0; r < rows; ++r)                      \
+        {                                                   \
+            EXPECT(result, mat[c][r], float(c * 4 + r));    \
+        }                                                   \
+    }
+
+#define COPY(matIn, matOut, cols, rows)     \
+    matOut = matOut + matIn;                \
+    /* random operations for testing */     \
+    matOut[0].x += matIn[0].x + matIn[1].x; \
+    matOut[0].x -= matIn[1].x;              \
+    matOut[0][1] += matIn[0][1];            \
+    matOut[1] += matIn[1];                  \
+    matOut[1].xy -= matIn[1].xy;            \
+    /* undo the above to get back matIn */  \
+    matOut[0].x -= matIn[0].x;              \
+    matOut[0][1] -= matIn[0][1];            \
+    matOut[1] -= matIn[1];                  \
+    matOut[1].xy += matIn[1].xy;
+
+bool verifyMatrices(in Matrices m)
+{
+    bool result = true;
+    VERIFY_IN(result, m.m2c2r, 2, 2);
+    VERIFY_IN(result, m.m2c3r[0], 2, 3);
+    VERIFY_IN(result, m.m2c3r[1], 2, 3);
+    VERIFY_IN(result, m.m3c2r, 3, 2);
+    VERIFY_IN(result, m.inner.m3c4r, 3, 4);
+    VERIFY_IN(result, m.inner.m4c3r, 4, 3);
+    return result;
+}
+
+mat4 copyMat4(in mat4 m)
+{
+    return m;
+}
+
+void copyMatrices(in Matrices mIn, inout Matrices mOut)
+{
+    COPY(mIn.m2c2r, mOut.m2c2r, 2, 2);
+    COPY(mIn.m2c3r[0], mOut.m2c3r[0], 2, 3);
+    COPY(mIn.m2c3r[1], mOut.m2c3r[1], 2, 3);
+    COPY(mIn.m3c2r, mOut.m3c2r, 3, 2);
+    COPY(mIn.inner.m3c4r, mOut.inner.m3c4r, 3, 4);
+    COPY(mIn.inner.m4c3r, mOut.inner.m4c3r, 4, 3);
+}
+
+void main()
+{
+    bool result = true;
+
+    VERIFY_IN(result, ubo140cIn.m4c4r, 4, 4);
+    VERIFY_IN(result, ubo140cIn.m.m2c3r[0], 2, 3);
+    EXPECT(result, verifyMatrices(ubo140cIn.m), true);
+
+    VERIFY_IN(result, ubo140rIn.m4c4r, 4, 4);
+    VERIFY_IN(result, ubo140rIn.m.m2c2r, 2, 2);
+    EXPECT(result, verifyMatrices(ubo140rIn.m), true);
+
+    VERIFY_IN(result, ssbo140cIn.m4c4r, 4, 4);
+    VERIFY_IN(result, ssbo140cIn.m.m3c2r, 3, 2);
+    EXPECT(result, verifyMatrices(ssbo140cIn.m), true);
+
+    VERIFY_IN(result, ssbo140rIn.m4c4r, 4, 4);
+    VERIFY_IN(result, ssbo140rIn.m.inner.m4c3r, 4, 3);
+    EXPECT(result, verifyMatrices(ssbo140rIn.m), true);
+
+    VERIFY_IN(result, ssbo430cIn.m4c4r, 4, 4);
+    VERIFY_IN(result, ssbo430cIn.m.m2c3r[1], 2, 3);
+    EXPECT(result, verifyMatrices(ssbo430cIn.m), true);
+
+    VERIFY_IN(result, ssbo430rIn.m4c4r, 4, 4);
+    VERIFY_IN(result, ssbo430rIn.m.inner.m3c4r, 3, 4);
+    EXPECT(result, verifyMatrices(ssbo430rIn.m), true);
+
+    // Only assign to SSBO from a single pixel.
+    bool isOriginPixel = all(lessThan(gl_FragCoord.xy, vec2(1.0, 1.0)));
+    if (isOriginPixel)
+    {
+        ssbo140cOut.m4c4r = copyMat4(ssbo140cIn.m4c4r);
+        copyMatrices(ssbo430cIn.m, ssbo140cOut.m);
+        ssbo140cOut.m.m2c3r[1] = mat2x3(0);
+        COPY(ssbo430cIn.m.m2c3r[1], ssbo140cOut.m.m2c3r[1], 2, 3);
+
+        ssbo140rOut.m4c4r = copyMat4(ssbo140rIn.m4c4r);
+        copyMatrices(ssbo430rIn.m, ssbo140rOut.m);
+        ssbo140rOut.m.inner.m3c4r = mat3x4(0);
+        COPY(ssbo430rIn.m.inner.m3c4r, ssbo140rOut.m.inner.m3c4r, 3, 4);
+
+        ssbo430cOut.m4c4r = copyMat4(ssbo430cIn.m4c4r);
+        copyMatrices(ssbo140cIn.m, ssbo430cOut.m);
+        ssbo430cOut.m.m3c2r = mat3x2(0);
+        COPY(ssbo430cIn.m.m3c2r, ssbo430cOut.m.m3c2r, 3, 2);
+
+        ssbo430rOut.m4c4r = copyMat4(ssbo430rIn.m4c4r);
+        copyMatrices(ssbo140rIn.m, ssbo430rOut.m);
+        ssbo430rOut.m.inner.m4c3r = mat4x3(0);
+        COPY(ssbo430rIn.m.inner.m4c3r, ssbo430rOut.m.inner.m4c3r, 4, 3);
+    }
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl31_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 7;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4}, {2, 2}, {2, 3}, {2, 3}, {3, 2}, {3, 4}, {4, 3},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {
+        true, false, false, false, false, false, false,
+    };
+
+    float dataStd140ColMajor[kMatrixCount * 4 * 4] = {};
+    float dataStd140RowMajor[kMatrixCount * 4 * 4] = {};
+    float dataStd430ColMajor[kMatrixCount * 4 * 4] = {};
+    float dataStd430RowMajor[kMatrixCount * 4 * 4] = {};
+    float dataZeros[kMatrixCount * 4 * 4]          = {};
+
+    const uint32_t sizeStd140ColMajor =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, dataStd140ColMajor, false, false);
+    const uint32_t sizeStd140RowMajor =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, dataStd140RowMajor, false, true);
+    const uint32_t sizeStd430ColMajor =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, dataStd430ColMajor, true, false);
+    const uint32_t sizeStd430RowMajor =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, dataStd430RowMajor, true, true);
+
+    GLBuffer uboStd140ColMajor, uboStd140RowMajor;
+    GLBuffer ssboStd140ColMajor, ssboStd140RowMajor;
+    GLBuffer ssboStd430ColMajor, ssboStd430RowMajor;
+    GLBuffer ssboStd140ColMajorOut, ssboStd140RowMajorOut;
+    GLBuffer ssboStd430ColMajorOut, ssboStd430RowMajorOut;
+
+    InitBuffer(program, "Ubo140c", uboStd140ColMajor, 0, dataStd140ColMajor, sizeStd140ColMajor,
+               true);
+    InitBuffer(program, "Ubo140r", uboStd140RowMajor, 1, dataStd140RowMajor, sizeStd140RowMajor,
+               true);
+    InitBuffer(program, "Ssbo140c", ssboStd140ColMajor, 0, dataStd140ColMajor, sizeStd140ColMajor,
+               false);
+    InitBuffer(program, "Ssbo140r", ssboStd140RowMajor, 1, dataStd140RowMajor, sizeStd140RowMajor,
+               false);
+    InitBuffer(program, "Ssbo430c", ssboStd430ColMajor, 2, dataStd430ColMajor, sizeStd430ColMajor,
+               false);
+    InitBuffer(program, "Ssbo430r", ssboStd430RowMajor, 3, dataStd430RowMajor, sizeStd430RowMajor,
+               false);
+    InitBuffer(program, "Ssbo140cOut", ssboStd140ColMajorOut, 4, dataZeros, sizeStd140ColMajor,
+               false);
+    InitBuffer(program, "Ssbo140rOut", ssboStd140RowMajorOut, 5, dataZeros, sizeStd140RowMajor,
+               false);
+    InitBuffer(program, "Ssbo430cOut", ssboStd430ColMajorOut, 6, dataZeros, sizeStd430ColMajor,
+               false);
+    InitBuffer(program, "Ssbo430rOut", ssboStd430RowMajorOut, 7, dataZeros, sizeStd430RowMajor,
+               false);
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+
+    EXPECT_TRUE(VerifyBuffer(ssboStd140ColMajorOut, dataStd140ColMajor, sizeStd140ColMajor));
+    EXPECT_TRUE(VerifyBuffer(ssboStd140RowMajorOut, dataStd140RowMajor, sizeStd140RowMajor));
+    EXPECT_TRUE(VerifyBuffer(ssboStd430ColMajorOut, dataStd430ColMajor, sizeStd430ColMajor));
+    EXPECT_TRUE(VerifyBuffer(ssboStd430RowMajorOut, dataStd430RowMajor, sizeStd430RowMajor));
+}
+
+// Test that array UBOs are transformed correctly.
+TEST_P(GLSLTest_ES3, MixedRowAndColumnMajorMatrices_ArrayBufferDeclaration)
+{
+    // Fails to compile the shader on Android: http://anglebug.com/3839
+    ANGLE_SKIP_TEST_IF(IsAndroid() && IsOpenGL());
+
+    // http://anglebug.com/3837
+    ANGLE_SKIP_TEST_IF(IsLinux() && IsIntel() && IsOpenGL());
+
+    // Fails on Mac on Intel and AMD: http://anglebug.com/3842
+    ANGLE_SKIP_TEST_IF(IsOSX() && IsOpenGL() && (IsIntel() || IsAMD()));
+
+    // Fails on windows AMD on GL: http://anglebug.com/3838
+    ANGLE_SKIP_TEST_IF(IsWindows() && IsOpenGL() && IsAMD());
+
+    // Fails on D3D due to mistranslation: http://anglebug.com/3841
+    ANGLE_SKIP_TEST_IF(IsD3D11());
+
+    constexpr char kFS[] = R"(#version 300 es
+precision highp float;
+out vec4 outColor;
+
+layout(std140, column_major) uniform Ubo
+{
+    mat4 m1;
+    layout(row_major) mat4 m2;
+} ubo[3];
+
+#define EXPECT(result, expression, value) if ((expression) != value) { result = false; }
+
+#define VERIFY_IN(result, mat, cols, rows)                  \
+    for (int c = 0; c < cols; ++c)                          \
+    {                                                       \
+        for (int r = 0; r < rows; ++r)                      \
+        {                                                   \
+            EXPECT(result, mat[c][r], float(c * 4 + r));    \
+        }                                                   \
+    }
+
+void main()
+{
+    bool result = true;
+
+    VERIFY_IN(result, ubo[0].m1, 4, 4);
+    VERIFY_IN(result, ubo[0].m2, 4, 4);
+
+    VERIFY_IN(result, ubo[1].m1, 4, 4);
+    VERIFY_IN(result, ubo[1].m2, 4, 4);
+
+    VERIFY_IN(result, ubo[2].m1, 4, 4);
+    VERIFY_IN(result, ubo[2].m2, 4, 4);
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 2;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4},
+        {4, 4},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {
+        true,
+        false,
+    };
+
+    float data[kMatrixCount * 4 * 4] = {};
+
+    const uint32_t size =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, data, false, false);
+
+    GLBuffer ubos[3];
+
+    InitBuffer(program, "Ubo[0]", ubos[0], 0, data, size, true);
+    InitBuffer(program, "Ubo[1]", ubos[1], 0, data, size, true);
+    InitBuffer(program, "Ubo[2]", ubos[2], 0, data, size, true);
+
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+
+// Test that side effects when transforming read operations are preserved.
+TEST_P(GLSLTest_ES3, MixedRowAndColumnMajorMatrices_ReadSideEffect)
+{
+    // http://anglebug.com/3831
+    ANGLE_SKIP_TEST_IF(IsNVIDIA() && IsOpenGL());
+
+    // Fails on Mac on Intel and AMD: http://anglebug.com/3842
+    ANGLE_SKIP_TEST_IF(IsOSX() && IsOpenGL() && (IsIntel() || IsAMD()));
+
+    // Fails on D3D due to mistranslation: http://anglebug.com/3841
+    ANGLE_SKIP_TEST_IF(IsD3D11());
+
+    constexpr char kFS[] = R"(#version 300 es
+precision highp float;
+out vec4 outColor;
+
+struct S
+{
+    mat2x3 m2[2];
+};
+
+layout(std140, column_major) uniform Ubo
+{
+    mat4 m1;
+    layout(row_major) S s[3];
+} ubo;
+
+#define EXPECT(result, expression, value) if ((expression) != value) { result = false; }
+
+#define VERIFY_IN(result, mat, cols, rows)                  \
+    for (int c = 0; c < cols; ++c)                          \
+    {                                                       \
+        for (int r = 0; r < rows; ++r)                      \
+        {                                                   \
+            EXPECT(result, mat[c][r], float(c * 4 + r));    \
+        }                                                   \
+    }
+
+bool verify2x3(mat2x3 mat)
+{
+    bool result = true;
+
+    for (int c = 0; c < 2; ++c)
+    {
+        for (int r = 0; r < 3; ++r)
+        {
+            EXPECT(result, mat[c][r], float(c * 4 + r));
+        }
+    }
+
+    return result;
+}
+
+void main()
+{
+    bool result = true;
+
+    int sideEffect = 0;
+    VERIFY_IN(result, ubo.m1, 4, 4);
+    EXPECT(result, verify2x3(ubo.s[0].m2[0]), true);
+    EXPECT(result, verify2x3(ubo.s[0].m2[sideEffect += 1]), true);
+    EXPECT(result, verify2x3(ubo.s[0].m2[sideEffect += 1]), true);
+
+    EXPECT(result, sideEffect, 2);
+
+    EXPECT(result, verify2x3(ubo.s[sideEffect = 1].m2[0]), true);
+    EXPECT(result, verify2x3(ubo.s[1].m2[(sideEffect = 4) - 3]), true);
+    EXPECT(result, verify2x3(ubo.s[1].m2[sideEffect - 2]), true);
+
+    EXPECT(result, sideEffect, 4);
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 7;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {
+        true, false, false, false, false, false, false,
+    };
+
+    float data[kMatrixCount * 4 * 4] = {};
+
+    const uint32_t size =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, data, false, false);
+
+    GLBuffer ubo;
+    InitBuffer(program, "Ubo", ubo, 0, data, size, true);
+
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+
+// Test that side effects respect the order of logical expression operands.
+TEST_P(GLSLTest_ES3, MixedRowAndColumnMajorMatrices_ReadSideEffectOrder)
+{
+    // IntermTraverser::insertStatementsInParentBlock that's used to move side effects does not
+    // respect the order of evaluation of logical expressions.  http://anglebug.com/3829.
+    ANGLE_SKIP_TEST_IF(IsVulkan());
+
+    // http://anglebug.com/3837
+    ANGLE_SKIP_TEST_IF(IsLinux() && IsIntel() && IsOpenGL());
+
+    // Fails on Mac on Intel and AMD: http://anglebug.com/3842
+    ANGLE_SKIP_TEST_IF(IsOSX() && IsOpenGL() && (IsIntel() || IsAMD()));
+
+    constexpr char kFS[] = R"(#version 300 es
+precision highp float;
+out vec4 outColor;
+
+layout(std140, column_major) uniform Ubo
+{
+    mat4 m1;
+    layout(row_major) mat4 m2[2];
+} ubo;
+
+void main()
+{
+    bool result = true;
+
+    int x = 0;
+    if (x == 0 && ubo.m2[x = 1][1][1] == 5.0)
+    {
+        result = true;
+    }
+    else
+    {
+        result = false;
+    }
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 3;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4},
+        {4, 4},
+        {4, 4},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {true, false, false};
+
+    float data[kMatrixCount * 4 * 4] = {};
+
+    const uint32_t size =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, data, false, false);
+
+    GLBuffer ubo;
+    InitBuffer(program, "Ubo", ubo, 0, data, size, true);
+
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+
+// Test that side effects respect short-circuit.
+TEST_P(GLSLTest_ES3, MixedRowAndColumnMajorMatrices_ReadSideEffectShortCircuit)
+{
+    // IntermTraverser::insertStatementsInParentBlock that's used to move side effects does not
+    // respect short-circuiting in evaluation of logical expressions.  http://anglebug.com/3829.
+    ANGLE_SKIP_TEST_IF(IsVulkan());
+
+    // Fails on Android: http://anglebug.com/3839
+    ANGLE_SKIP_TEST_IF(IsAndroid() && IsOpenGL());
+
+    // Fails on Mac on Intel and AMD: http://anglebug.com/3842
+    ANGLE_SKIP_TEST_IF(IsOSX() && IsOpenGL() && (IsIntel() || IsAMD()));
+
+    // Fails on Mac on Nvidia: http://anglebug.com/3843
+    ANGLE_SKIP_TEST_IF(IsOSX() && IsOpenGL() && IsNVIDIA());
+
+    constexpr char kFS[] = R"(#version 300 es
+precision highp float;
+out vec4 outColor;
+
+layout(std140, column_major) uniform Ubo
+{
+    mat4 m1;
+    layout(row_major) mat4 m2[2];
+} ubo;
+
+void main()
+{
+    bool result = true;
+
+    int x = 0;
+    if (x == 1 && ubo.m2[x = 1][1][1] == 5.0)
+    {
+        // First x == 1 should prevent the side effect of the second expression (x = 1) from
+        // being executed.  If x = 1 is run before the if, the condition of the if would be true,
+        // which is a failure.
+        result = false;
+    }
+    if (x == 1)
+    {
+        result = false;
+    }
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 3;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4},
+        {4, 4},
+        {4, 4},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {true, false, false};
+
+    float data[kMatrixCount * 4 * 4] = {};
+
+    const uint32_t size =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, data, false, false);
+
+    GLBuffer ubo;
+    InitBuffer(program, "Ubo", ubo, 0, data, size, true);
+
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+
+// Test that multiple nested assignments are handled correctly.
+TEST_P(GLSLTest_ES31, MixedRowAndColumnMajorMatrices_WriteSideEffect)
+{
+    // http://anglebug.com/3831
+    ANGLE_SKIP_TEST_IF(IsNVIDIA() && IsOpenGL());
+
+    // Fails on windows AMD on GL: http://anglebug.com/3838
+    ANGLE_SKIP_TEST_IF(IsWindows() && IsOpenGL() && IsAMD());
+
+    // Fails on D3D due to mistranslation: http://anglebug.com/3841
+    ANGLE_SKIP_TEST_IF(IsD3D11());
+
+    constexpr char kFS[] = R"(#version 310 es
+precision highp float;
+out vec4 outColor;
+
+layout(std140, column_major) uniform Ubo
+{
+    mat4 m1;
+    layout(row_major) mat4 m2;
+} ubo;
+
+layout(std140, row_major, binding = 0) buffer Ssbo
+{
+    layout(column_major) mat4 m1;
+    mat4 m2;
+} ssbo;
+
+void main()
+{
+    bool result = true;
+
+    // Only assign to SSBO from a single pixel.
+    bool isOriginPixel = all(lessThan(gl_FragCoord.xy, vec2(1.0, 1.0)));
+    if (isOriginPixel)
+    {
+        if ((ssbo.m2 = ssbo.m1 = ubo.m1) != ubo.m2)
+        {
+            result = false;
+        }
+    }
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl31_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 2;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4},
+        {4, 4},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {
+        true,
+        false,
+    };
+
+    float data[kMatrixCount * 4 * 4]  = {};
+    float zeros[kMatrixCount * 4 * 4] = {};
+
+    const uint32_t size =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, data, false, false);
+
+    GLBuffer ubo, ssbo;
+
+    InitBuffer(program, "Ubo", ubo, 0, data, size, true);
+    InitBuffer(program, "Ssbo", ssbo, 0, zeros, size, false);
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+
+    EXPECT_TRUE(VerifyBuffer(ssbo, data, size));
+}
+
+// Test that assignments to array of array of matrices are handled correctly.
+TEST_P(GLSLTest_ES31, MixedRowAndColumnMajorMatrices_WriteArrayOfArray)
+{
+    // Fails on windows AMD on GL: http://anglebug.com/3838
+    ANGLE_SKIP_TEST_IF(IsWindows() && IsOpenGL() && IsAMD());
+
+    // Fails on D3D due to mistranslation: http://anglebug.com/3841
+    ANGLE_SKIP_TEST_IF(IsD3D11());
+
+    constexpr char kFS[] = R"(#version 310 es
+precision highp float;
+out vec4 outColor;
+
+layout(std140, column_major) uniform Ubo
+{
+    mat4 m1;
+    layout(row_major) mat4 m2[2][3];
+} ubo;
+
+layout(std140, row_major, binding = 0) buffer Ssbo
+{
+    layout(column_major) mat4 m1;
+    mat4 m2[2][3];
+} ssbo;
+
+void main()
+{
+    bool result = true;
+
+    // Only assign to SSBO from a single pixel.
+    bool isOriginPixel = all(lessThan(gl_FragCoord.xy, vec2(1.0, 1.0)));
+    if (isOriginPixel)
+    {
+        ssbo.m1 = ubo.m1;
+        ssbo.m2 = ubo.m2;
+    }
+
+    outColor = result ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl31_shaders::vs::Simple(), kFS);
+    EXPECT_GL_NO_ERROR();
+
+    constexpr size_t kMatrixCount                                     = 7;
+    constexpr std::pair<uint32_t, uint32_t> kMatrixDims[kMatrixCount] = {
+        {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4},
+    };
+    constexpr bool kMatrixIsColMajor[kMatrixCount] = {
+        true, false, false, false, false, false, false,
+    };
+
+    float data[kMatrixCount * 4 * 4]  = {};
+    float zeros[kMatrixCount * 4 * 4] = {};
+
+    const uint32_t size =
+        FillBuffer(kMatrixDims, kMatrixIsColMajor, kMatrixCount, data, false, false);
+
+    GLBuffer ubo, ssbo;
+
+    InitBuffer(program, "Ubo", ubo, 0, data, size, true);
+    InitBuffer(program, "Ssbo", ssbo, 0, zeros, size, false);
+    EXPECT_GL_NO_ERROR();
+
+    drawQuad(program, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+
+    EXPECT_TRUE(VerifyBuffer(ssbo, data, size));
+}
+
+}  // anonymous namespace
+
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these
 // tests should be run against.
 ANGLE_INSTANTIATE_TEST(GLSLTest,

--- a/src/tests/gl_tests/UniformBufferTest.cpp
+++ b/src/tests/gl_tests/UniformBufferTest.cpp
@@ -1596,6 +1596,10 @@ TEST_P(UniformBufferTest, SizeOver65535)
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these
 // tests should be run against.
 ANGLE_INSTANTIATE_TEST(UniformBufferTest, ES3_D3D11(), ES3_OPENGL(), ES3_OPENGLES(), ES3_VULKAN());
-ANGLE_INSTANTIATE_TEST(UniformBufferTest31, ES31_D3D11(), ES31_OPENGL(), ES31_OPENGLES());
+ANGLE_INSTANTIATE_TEST(UniformBufferTest31,
+                       ES31_D3D11(),
+                       ES31_OPENGL(),
+                       ES31_OPENGLES(),
+                       ES31_VULKAN());

 }  // namespace