Implement GL_NV_cooperative_matrix

4605e2ed · Jeff Bolz · ec484527 · 4605e2ed · 4605e2ed · 4605e2ed
Commit 4605e2ed authored Feb 19, 2019 by Jeff Bolz
37 changed files
--- a/SPIRV/GLSL.ext.NV.h
+++ b/SPIRV/GLSL.ext.NV.h
@@ -72,4 +72,7 @@ const char* const E_SPV_NV_ray_tracing = "SPV_NV_ray_tracing";
 //SPV_NV_shading_rate
 const char* const E_SPV_NV_shading_rate = "SPV_NV_shading_rate";

+//SPV_NV_cooperative_matrix
+const char* const E_SPV_NV_cooperative_matrix = "SPV_NV_cooperative_matrix";
+
 #endif  // #ifndef GLSLextNV_H
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
--- a/SPIRV/SpvBuilder.cpp
+++ b/SPIRV/SpvBuilder.cpp
@@ -388,6 +388,33 @@ Id Builder::makeMatrixType(Id component, int cols, int rows)
    return type->getResultId();
 }

+Id Builder::makeCooperativeMatrixType(Id component, Id scope, Id rows, Id cols)
+{
+    // try to find it
+    Instruction* type;
+    for (int t = 0; t < (int)groupedTypes[OpTypeCooperativeMatrixNV].size(); ++t) {
+        type = groupedTypes[OpTypeCooperativeMatrixNV][t];
+        if (type->getIdOperand(0) == component &&
+            type->getIdOperand(1) == scope &&
+            type->getIdOperand(2) == rows &&
+            type->getIdOperand(3) == cols)
+            return type->getResultId();
+    }
+
+    // not found, make it
+    type = new Instruction(getUniqueId(), NoType, OpTypeCooperativeMatrixNV);
+    type->addIdOperand(component);
+    type->addIdOperand(scope);
+    type->addIdOperand(rows);
+    type->addIdOperand(cols);
+    groupedTypes[OpTypeCooperativeMatrixNV].push_back(type);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
+    module.mapInstruction(type);
+
+    return type->getResultId();
+}
+
+
 // TODO: performance: track arrays per stride
 // If a stride is supplied (non-zero) make an array.
 // If no stride (0), reuse previous array types.
@@ -623,6 +650,9 @@ int Builder::getNumTypeConstituents(Id typeId) const
    }
    case OpTypeStruct:
        return instr->getNumOperands();
+    case OpTypeCooperativeMatrixNV:
+        // has only one constituent when used with OpCompositeConstruct.
+        return 1;
    default:
        assert(0);
        return 1;
@@ -669,6 +699,7 @@ Id Builder::getContainedTypeId(Id typeId, int member) const
    case OpTypeMatrix:
    case OpTypeArray:
    case OpTypeRuntimeArray:
+    case OpTypeCooperativeMatrixNV:
        return instr->getIdOperand(0);
    case OpTypePointer:
        return instr->getIdOperand(1);
@@ -981,15 +1012,14 @@ Id Builder::makeFpConstant(Id type, double d, bool specConstant)
        return NoResult;
 }

-Id Builder::findCompositeConstant(Op typeClass, const std::vector<Id>& comps)
+Id Builder::findCompositeConstant(Op typeClass, Id typeId, const std::vector<Id>& comps)
 {
    Instruction* constant = 0;
    bool found = false;
    for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) {
        constant = groupedConstants[typeClass][i];

-        // same shape?
-        if (constant->getNumOperands() != (int)comps.size())
+        if (constant->getTypeId() != typeId)
            continue;

        // same contents?
@@ -1044,8 +1074,9 @@ Id Builder::makeCompositeConstant(Id typeId, const std::vector<Id>& members, boo
    case OpTypeVector:
    case OpTypeArray:
    case OpTypeMatrix:
+    case OpTypeCooperativeMatrixNV:
        if (! specConstant) {
-            Id existing = findCompositeConstant(typeClass, members);
+            Id existing = findCompositeConstant(typeClass, typeId, members);
            if (existing)
                return existing;
        }
@@ -1408,6 +1439,23 @@ Id Builder::createArrayLength(Id base, unsigned int member)
    return length->getResultId();
 }

+Id Builder::createCooperativeMatrixLength(Id type)
+{
+    spv::Id intType = makeUintType(32);
+
+    // Generate code for spec constants if in spec constant operation
+    // generation mode.
+    if (generatingOpCodeForSpecConst) {
+        return createSpecConstantOp(OpCooperativeMatrixLengthNV, intType, std::vector<Id>(1, type), std::vector<Id>());
+    }
+
+    Instruction* length = new Instruction(getUniqueId(), intType, OpCooperativeMatrixLengthNV);
+    length->addIdOperand(type);
+    buildPoint->addInstruction(std::unique_ptr<Instruction>(length));
+
+    return length->getResultId();
+}
+
 Id Builder::createCompositeExtract(Id composite, Id typeId, unsigned index)
 {
    // Generate code for spec constants if in spec constant operation
@@ -2598,9 +2646,9 @@ Id Builder::accessChainLoad(Decoration precision, Decoration nonUniform, Id resu
                }
            }

-            if (constant)
+            if (constant) {
                id = createCompositeExtract(accessChain.base, swizzleBase, indexes);
-            else {
+            } else {
                // make a new function variable for this r-value
                Id lValue = createVariable(StorageClassFunction, getTypeId(accessChain.base), "indexable");


--- a/SPIRV/SpvBuilder.h
+++ b/SPIRV/SpvBuilder.h
@@ -155,6 +155,7 @@ public:
    Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format);
    Id makeSamplerType();
    Id makeSampledImageType(Id imageType);
+    Id makeCooperativeMatrixType(Id component, Id scope, Id rows, Id cols);

    // accelerationStructureNV type
    Id makeAccelerationStructureNVType();
@@ -178,6 +179,7 @@ public:
    bool isScalar(Id resultId)       const { return isScalarType(getTypeId(resultId)); }
    bool isVector(Id resultId)       const { return isVectorType(getTypeId(resultId)); }
    bool isMatrix(Id resultId)       const { return isMatrixType(getTypeId(resultId)); }
+    bool isCooperativeMatrix(Id resultId)const { return isCooperativeMatrixType(getTypeId(resultId)); }
    bool isAggregate(Id resultId)    const { return isAggregateType(getTypeId(resultId)); }
    bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); }

@@ -191,7 +193,8 @@ public:
    bool isMatrixType(Id typeId)       const { return getTypeClass(typeId) == OpTypeMatrix; }
    bool isStructType(Id typeId)       const { return getTypeClass(typeId) == OpTypeStruct; }
    bool isArrayType(Id typeId)        const { return getTypeClass(typeId) == OpTypeArray; }
-    bool isAggregateType(Id typeId)    const { return isArrayType(typeId) || isStructType(typeId); }
+    bool isCooperativeMatrixType(Id typeId)const { return getTypeClass(typeId) == OpTypeCooperativeMatrixNV; }
+    bool isAggregateType(Id typeId)    const { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); }
    bool isImageType(Id typeId)        const { return getTypeClass(typeId) == OpTypeImage; }
    bool isSamplerType(Id typeId)      const { return getTypeClass(typeId) == OpTypeSampler; }
    bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; }
@@ -314,6 +317,9 @@ public:
    // Create an OpArrayLength instruction
    Id createArrayLength(Id base, unsigned int member);

+    // Create an OpCooperativeMatrixLengthNV instruction
+    Id createCooperativeMatrixLength(Id type);
+
    // Create an OpCompositeExtract instruction
    Id createCompositeExtract(Id composite, Id typeId, unsigned index);
    Id createCompositeExtract(Id composite, Id typeId, const std::vector<unsigned>& indexes);
@@ -670,7 +676,7 @@ public:
    Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant);
    Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value);
    Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2);
-    Id findCompositeConstant(Op typeClass, const std::vector<Id>& comps);
+    Id findCompositeConstant(Op typeClass, Id typeId, const std::vector<Id>& comps);
    Id findStructConstant(Id typeId, const std::vector<Id>& comps);
    Id collapseAccessChain();
    void remapDynamicSwizzle();

--- a/SPIRV/doc.cpp
+++ b/SPIRV/doc.cpp
@@ -930,6 +930,10 @@ const char* CapabilityString(int info)

    case CapabilityPhysicalStorageBufferAddressesEXT:   return "CapabilityPhysicalStorageBufferAddressesEXT";

+    case CapabilityVariablePointers:                    return "CapabilityVariablePointers";
+
+    case CapabilityCooperativeMatrixNV:     return "CapabilityCooperativeMatrixNV";
+
    default: return "Bad";
    }
 }
@@ -1333,6 +1337,12 @@ const char* OpcodeString(int op)
    case OpWritePackedPrimitiveIndices4x8NV: return "OpWritePackedPrimitiveIndices4x8NV";
 #endif

+    case OpTypeCooperativeMatrixNV:         return "OpTypeCooperativeMatrixNV";
+    case OpCooperativeMatrixLoadNV:         return "OpCooperativeMatrixLoadNV";
+    case OpCooperativeMatrixStoreNV:        return "OpCooperativeMatrixStoreNV";
+    case OpCooperativeMatrixMulAddNV:       return "OpCooperativeMatrixMulAddNV";
+    case OpCooperativeMatrixLengthNV:       return "OpCooperativeMatrixLengthNV";
+
    default:
        return "Bad";
    }
@@ -1444,6 +1454,8 @@ void Parameterize()
    InstructionDesc[OpGroupWaitEvents].setResultAndType(false, false);
    InstructionDesc[OpAtomicFlagClear].setResultAndType(false, false);
    InstructionDesc[OpModuleProcessed].setResultAndType(false, false);
+    InstructionDesc[OpTypeCooperativeMatrixNV].setResultAndType(true, false);
+    InstructionDesc[OpCooperativeMatrixStoreNV].setResultAndType(false, false);

    // Specific additional context-dependent operands

@@ -2714,6 +2726,32 @@ void Parameterize()
    InstructionDesc[OpWritePackedPrimitiveIndices4x8NV].operands.push(OperandId, "'Index Offset'");
    InstructionDesc[OpWritePackedPrimitiveIndices4x8NV].operands.push(OperandId, "'Packed Indices'");
 #endif
+
+    InstructionDesc[OpTypeCooperativeMatrixNV].operands.push(OperandId, "'Component Type'");
+    InstructionDesc[OpTypeCooperativeMatrixNV].operands.push(OperandId, "'Scope'");
+    InstructionDesc[OpTypeCooperativeMatrixNV].operands.push(OperandId, "'Rows'");
+    InstructionDesc[OpTypeCooperativeMatrixNV].operands.push(OperandId, "'Columns'");
+
+    InstructionDesc[OpCooperativeMatrixLoadNV].operands.push(OperandId, "'Pointer'");
+    InstructionDesc[OpCooperativeMatrixLoadNV].operands.push(OperandId, "'Stride'");
+    InstructionDesc[OpCooperativeMatrixLoadNV].operands.push(OperandId, "'Column Major'");
+    InstructionDesc[OpCooperativeMatrixLoadNV].operands.push(OperandMemoryAccess, "'Memory Access'");
+    InstructionDesc[OpCooperativeMatrixLoadNV].operands.push(OperandLiteralNumber, "", true);
+    InstructionDesc[OpCooperativeMatrixLoadNV].operands.push(OperandId, "", true);
+
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandId, "'Pointer'");
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandId, "'Object'");
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandId, "'Stride'");
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandId, "'Column Major'");
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandMemoryAccess, "'Memory Access'");
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandLiteralNumber, "", true);
+    InstructionDesc[OpCooperativeMatrixStoreNV].operands.push(OperandId, "", true);
+
+    InstructionDesc[OpCooperativeMatrixMulAddNV].operands.push(OperandId, "'A'");
+    InstructionDesc[OpCooperativeMatrixMulAddNV].operands.push(OperandId, "'B'");
+    InstructionDesc[OpCooperativeMatrixMulAddNV].operands.push(OperandId, "'C'");
+
+    InstructionDesc[OpCooperativeMatrixLengthNV].operands.push(OperandId, "'Type'");
 }

 }; // end spv namespace
--- a/SPIRV/spirv.hpp
+++ b/SPIRV/spirv.hpp
@@ -811,6 +811,7 @@ enum Capability {
    CapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
    CapabilityPhysicalStorageBufferAddressesEXT = 5347,
    CapabilityComputeDerivativeGroupLinearNV = 5350,
+    CapabilityCooperativeMatrixNV = 5357,
    CapabilitySubgroupShuffleINTEL = 5568,
    CapabilitySubgroupBufferBlockIOINTEL = 5569,
    CapabilitySubgroupImageBlockIOINTEL = 5570,
@@ -1183,6 +1184,11 @@ enum Op {
    OpTraceNV = 5337,
    OpTypeAccelerationStructureNV = 5341,
    OpExecuteCallableNV = 5344,
+    OpTypeCooperativeMatrixNV = 5358,
+    OpCooperativeMatrixLoadNV = 5359,
+    OpCooperativeMatrixStoreNV = 5360,
+    OpCooperativeMatrixMulAddNV = 5361,
+    OpCooperativeMatrixLengthNV = 5362,
    OpSubgroupShuffleINTEL = 5571,
    OpSubgroupShuffleDownINTEL = 5572,
    OpSubgroupShuffleUpINTEL = 5573,

--- a/SPIRV/spvIR.h
+++ b/SPIRV/spvIR.h
@@ -83,6 +83,7 @@ const MemorySemanticsMask MemorySemanticsAllMemory =
 struct IdImmediate {
    bool isId;      // true if word is an Id, false if word is an immediate
    unsigned word;
+    IdImmediate(bool i, unsigned w) : isId(i), word(w) {}
 };

 //

--- a/Test/baseResults/420.vert.out
+++ b/Test/baseResults/420.vert.out
@@ -8,7 +8,7 @@ ERROR: 0:12: '' : can only have one auxiliary qualifier (centroid, patch, and sa
 ERROR: 0:13: 'uniform' : too many storage qualifiers 
 ERROR: 0:18: '=' : global const initializers must be constant ' const int'
 ERROR: 0:20: 'const' : no qualifiers allowed for function return 
-ERROR: 0:27: '' : array size must be a constant integer expression 
+ERROR: 0:27: '' : array size must be a constant integer expression
 ERROR: 0:38: 'j' : undeclared identifier 
 ERROR: 0:38: '=' :  cannot convert from ' temp float' to ' temp int'
 ERROR: 0:39: 'k' : undeclared identifier 
@@ -31,7 +31,7 @@ ERROR: 0:85: 'patch' : not supported in this stage: vertex
 ERROR: 0:85: '' : vertex input cannot be further qualified 
 ERROR: 0:86: 'patch' : not supported in this stage: vertex
 ERROR: 0:100: '=' : global const initializers must be constant ' const int'
-ERROR: 0:101: '' : array size must be a constant integer expression 
+ERROR: 0:101: '' : array size must be a constant integer expression
 ERROR: 0:107: 'image variables not declared 'writeonly' and without a format layout qualifier' : not supported for this version or the enabled extensions 
 ERROR: 0:114: 'imageAtomicMin' : only supported on image with format r32i or r32ui 
 ERROR: 0:115: 'imageAtomicMax' : no matching overloaded function found 

--- a/Test/baseResults/constErrors.frag.out
+++ b/Test/baseResults/constErrors.frag.out
 constErrors.frag
 ERROR: 0:14: 'non-constant initializer' : not supported for this version or the enabled extensions 
-ERROR: 0:17: '' : array size must be a constant integer expression 
-ERROR: 0:18: '' : array size must be a constant integer expression 
-ERROR: 0:19: '' : array size must be a constant integer expression 
+ERROR: 0:17: '' : array size must be a constant integer expression
+ERROR: 0:18: '' : array size must be a constant integer expression
+ERROR: 0:19: '' : array size must be a constant integer expression
 ERROR: 0:27: '=' : global const initializers must be constant ' const structure{ global 3-component vector of float v3,  global 2-component vector of int iv2}'
 ERROR: 0:33: '=' : global const initializers must be constant ' const structure{ global 3-component vector of float v3,  global 2-component vector of int iv2,  global 2X4 matrix of float m}'
 ERROR: 6 compilation errors.  No code generated.

--- a/Test/baseResults/negativeArraySize.comp.out
+++ b/Test/baseResults/negativeArraySize.comp.out
 negativeArraySize.comp
-ERROR: 0:9: '' : array size must be a positive integer 
+ERROR: 0:9: '' : array size must be a positive integer
 ERROR: 1 compilation errors.  No code generated.



--- a/Test/baseResults/spv.1.3.coopmat.comp.out
+++ b/Test/baseResults/spv.1.3.coopmat.comp.out
+spv.1.3.coopmat.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 52
+
+                              Capability Shader
+                              Capability CapabilityVariablePointers
+                              Capability CapabilityVulkanMemoryModelKHR
+                              Capability CapabilityCooperativeMatrixNV
+                              Extension  "SPV_KHR_vulkan_memory_model"
+                              Extension  "SPV_NV_cooperative_matrix"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical VulkanKHR
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 64 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_KHR_memory_scope_semantics"
+                              SourceExtension  "GL_NV_cooperative_matrix"
+                              Name 4  "main"
+                              Name 13  "m"
+                              Name 29  "tempArg"
+                              Name 33  "Block"
+                              MemberName 33(Block) 0  "y"
+                              MemberName 33(Block) 1  "x"
+                              Name 35  "block"
+                              Decorate 31 ArrayStride 4
+                              Decorate 32 ArrayStride 4
+                              MemberDecorate 33(Block) 0 Offset 0
+                              MemberDecorate 33(Block) 1 Offset 4194304
+                              Decorate 33(Block) Block
+                              Decorate 35(block) DescriptorSet 0
+                              Decorate 35(block) Binding 0
+                              Decorate 51 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeInt 32 0
+               8:      7(int) Constant 3
+               9:      7(int) Constant 16
+              10:      7(int) Constant 8
+              11:             TypeCooperativeMatrixNV 6(float) 8 9 10
+              12:             TypePointer Function 11
+              14:    6(float) Constant 0
+              15:          11 ConstantComposite 14
+              24:    6(float) Constant 1073741824
+              30:      7(int) Constant 1048576
+              31:             TypeArray 6(float) 30
+              32:             TypeRuntimeArray 6(float)
+       33(Block):             TypeStruct 31 32
+              34:             TypePointer StorageBuffer 33(Block)
+       35(block):     34(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 1
+              38:      7(int) Constant 5
+              39:             TypePointer StorageBuffer 6(float)
+              41:      7(int) Constant 128
+              42:             TypeBool
+              43:    42(bool) ConstantFalse
+              48:             TypeVector 7(int) 3
+              49:      7(int) Constant 64
+              50:      7(int) Constant 1
+              51:   48(ivec3) ConstantComposite 49 50 50
+         4(main):           2 Function None 3
+               5:             Label
+           13(m):     12(ptr) Variable Function
+     29(tempArg):     12(ptr) Variable Function
+                              Store 13(m) 15
+              16:          11 Load 13(m)
+              17:          11 Load 13(m)
+              18:          11 FAdd 16 17
+                              Store 13(m) 18
+              19:          11 Load 13(m)
+              20:          11 Load 13(m)
+              21:          11 FSub 19 20
+                              Store 13(m) 21
+              22:          11 Load 13(m)
+              23:          11 FNegate 22
+                              Store 13(m) 23
+              25:          11 Load 13(m)
+              26:          11 MatrixTimesScalar 25 24
+                              Store 13(m) 26
+              27:          11 Load 13(m)
+              28:          11 MatrixTimesScalar 27 24
+                              Store 13(m) 28
+              40:     39(ptr) AccessChain 35(block) 37 9
+              44:          11 CooperativeMatrixLoadNV 40 41 43 MakePointerVisibleKHR NonPrivatePointerKHR 38
+                              Store 29(tempArg) 44
+              45:          11 Load 29(tempArg)
+                              Store 13(m) 45
+              46:          11 Load 13(m)
+              47:     39(ptr) AccessChain 35(block) 37 9
+                              CooperativeMatrixStoreNV 47 46 41 43 MakePointerAvailableKHR NonPrivatePointerKHR 38
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.coopmat.comp.out
+++ b/Test/baseResults/spv.coopmat.comp.out
--- a/Test/baseResults/spv.coopmat_Error.comp.out
+++ b/Test/baseResults/spv.coopmat_Error.comp.out
+spv.coopmat_Error.comp
+ERROR: 0:8: 'ftemplate16' : unexpected type parameters 
+ERROR: 0:10: 'fnoparams' : expected four type parameters 
+ERROR: 0:12: 'fbadbits' : expected 16, 32, or 64 bits for first type parameter 
+ERROR: 0:14: 'fbadnumparams' : expected four type parameters 
+ERROR: 0:18: '' : type parameter must be a constant integer expression
+ERROR: 0:20: 'constant_id' : can only be applied to 'const'-qualified scalar 
+ERROR: 0:22: 'Cooperative matrix types must not be used in shared memory' : qualifier 
+ERROR: 0:25: 'bufmat' : member of block cannot be or contain a cooperative matrix type 
+ERROR: 0:34: 'assign' :  cannot convert from ' temp<16, 3, 16, 8> float16_t' to ' temp<32, 3, 16, 8> highp float'
+ERROR: 0:35: 'assign' :  cannot convert from ' temp<16, 3, 16, 8> float16_t' to ' temp<32, 3, 16, 8> highp float'
+ERROR: 0:40: 'assign' :  cannot convert from ' temp<16, 3, 8, 8> float16_t' to ' temp<16, 3, 16, 8> float16_t'
+ERROR: 0:46: 'assign' :  cannot convert from ' temp<16, 3, 8, 1> float16_t' to ' temp<16, 3, 8, 1> float16_t'
+ERROR: 0:49: 'constructor' : too many arguments 
+ERROR: 0:49: 'assign' :  cannot convert from ' const float' to ' temp<16, 3, 8, 8> float16_t'
+ERROR: 0:53: 'constructor' : Cooperative matrix constructor argument must be scalar or cooperative matrix 
+ERROR: 0:53: '=' :  cannot convert from ' const float' to ' temp<32, 3, 4, 4> highp float'
+ERROR: 0:56: 'expression' :  left of '[' is not of type array, matrix, or vector  
+ERROR: 0:59: '.' : cannot apply to a cooperative matrix type: x
+ERROR: 0:61: '*' :  wrong operand types: no operation '*' exists that takes a left-hand operand of type ' temp<16, 3, 16, 8> float16_t' and a right operand of type ' temp<16, 3, 16, 8> float16_t' (or there is no acceptable conversion)
+ERROR: 0:63: '+' :  wrong operand types: no operation '+' exists that takes a left-hand operand of type ' temp<16, 3, 16, 8> float16_t' and a right operand of type ' const float' (or there is no acceptable conversion)
+ERROR: 0:64: '-' :  wrong operand types: no operation '-' exists that takes a left-hand operand of type ' temp<16, 3, 16, 8> float16_t' and a right operand of type ' const float' (or there is no acceptable conversion)
+ERROR: 0:65: '/' :  wrong operand types: no operation '/' exists that takes a left-hand operand of type ' temp<16, 3, 16, 8> float16_t' and a right operand of type ' const float' (or there is no acceptable conversion)
+ERROR: 0:66: 'assign' :  cannot convert from ' const float' to ' temp<16, 3, 16, 8> float16_t'
+ERROR: 0:67: 'assign' :  cannot convert from ' const float' to ' temp<16, 3, 16, 8> float16_t'
+ERROR: 0:68: 'assign' :  cannot convert from ' const float' to ' temp<16, 3, 16, 8> float16_t'
+ERROR: 0:70: '*' :  wrong operand types: no operation '*' exists that takes a left-hand operand of type ' temp<16, 3, 16, 8> float16_t' and a right operand of type ' const float' (or there is no acceptable conversion)
+ERROR: 0:71: '*' :  wrong operand types: no operation '*' exists that takes a left-hand operand of type ' const float' and a right operand of type ' temp<16, 3, 16, 8> float16_t' (or there is no acceptable conversion)
+ERROR: 0:72: '*' :  wrong operand types: no operation '*' exists that takes a left-hand operand of type ' temp<32, 3, 16, 8> highp float' and a right operand of type ' const float16_t' (or there is no acceptable conversion)
+ERROR: 0:73: '*' :  wrong operand types: no operation '*' exists that takes a left-hand operand of type ' const float16_t' and a right operand of type ' temp<32, 3, 16, 8> highp float' (or there is no acceptable conversion)
+ERROR: 0:75: 'transpose' : no matching overloaded function found 
+ERROR: 30 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
--- a/Test/baseResults/vulkan.vert.out
+++ b/Test/baseResults/vulkan.vert.out
@@ -30,14 +30,11 @@ ERROR: 0:32: 'initializer' : can't use with types containing arrays sized with a
 ERROR: 0:34: '=' : can't use with types containing arrays sized with a specialization constant 
 ERROR: 0:35: '==' : can't use with types containing arrays sized with a specialization constant 
 ERROR: 0:39: 'set' : cannot be used with push_constant 
-ERROR: 0:49: '[]' : only outermost dimension of an array of arrays can be a specialization constant 
-ERROR: 0:50: '[]' : only outermost dimension of an array of arrays can be a specialization constant 
-ERROR: 0:51: '[]' : only outermost dimension of an array of arrays can be a specialization constant 
 ERROR: 0:54: '[]' : only outermost dimension of an array of arrays can be a specialization constant 
 ERROR: 0:54: 'location' : SPIR-V requires location for user input/output 
 ERROR: 0:58: 'location' : SPIR-V requires location for user input/output 
 ERROR: 0:65: 'location' : overlapping use of location 10
-ERROR: 38 compilation errors.  No code generated.
+ERROR: 35 compilation errors.  No code generated.


 SPIR-V is not generated for failed compile or link
--- a/Test/spv.1.3.coopmat.comp
+++ b/Test/spv.1.3.coopmat.comp
+#version 450 core
+#extension GL_KHR_memory_scope_semantics : enable
+#extension GL_NV_cooperative_matrix : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
+
+#pragma use_variable_pointers
+
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) coherent buffer Block {
+    float y[1024*1024];
+    float x[];
+} block;
+
+
+void main()
+{
+    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(0.0);
+
+    m = m + m;
+    m = m - m;
+    m = -m;
+    m = 2.0*m;
+    m = m*2.0;
+
+    coopMatLoadNV(m, block.x, 16, 128, false);
+    coopMatStoreNV(m, block.x, 16, 128, false);
+}
--- a/Test/spv.coopmat.comp
+++ b/Test/spv.coopmat.comp
+#version 450 core
+#extension GL_KHR_memory_scope_semantics : enable
+#extension GL_NV_cooperative_matrix : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
+#extension GL_EXT_buffer_reference : enable
+
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+const int X = 8;
+layout(constant_id = 0) const int Y = 2;
+const int Z = X*Y;
+
+fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC;
+fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC2[3];
+
+int arr[mC.length()];
+int arr2[mC2[1].length()];
+
+layout(constant_id = 1) const float F = 3.0;
+
+const fcoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = fcoopmatNV<32, gl_ScopeSubgroup, Z, 8>(0.0);
+const fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> mD2 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1);
+
+struct S { int a; int b; int c; };
+
+const S s = S(12, 23, 34);
+
+layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
+    float y[1024*1024];
+    float x[];
+} block;
+
+layout(set = 0, binding = 0) coherent buffer Block16 {
+    float16_t y[1024*1024];
+    float16_t x[];
+
+    Block b;
+} block16;
+
+fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16(fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> m) { return -m; }
+fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> f32(fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> m) { return -m; }
+
+layout(constant_id = 2) const int SC = 1;
+fcoopmatNV<16, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
+
+// sized for fcoopmatNV<16, gl_ScopeSubgroup, 16, 16>
+shared uvec4 shmatrix[16*16*2/16];
+
+void main()
+{
+    fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)>(0.0);
+
+    m = m + m;
+    m = m - m;
+    m = -m;
+    m = 2.0*m;
+    m = m*2.0;
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> m2 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(m);
+
+    float x = m[1];
+    m[0] = x;
+
+    coopMatLoadNV(m, block.x, 16, 128, false);
+    coopMatStoreNV(m, block.x, 16, 128, false);
+    coopMatLoadNV(m2, block16.x, 16, 128, false);
+    coopMatStoreNV(m2, block16.x, 16, 128, false);
+    coopMatLoadNV(m, block16.b.x, 16, 128, false);
+    coopMatStoreNV(m, block16.b.x, 16, 128, false);
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> A;
+    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> B;
+    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> C;
+    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> D;
+    D = coopMatMulAddNV(A, B, C);
+
+    int l = D.length();
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> E;
+
+    fcoopmatNV<16, gl_ScopeSubgroup, Z, Z> F = fcoopmatNV<16, gl_ScopeSubgroup, Z, Z>(0.0);
+
+    fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
+    a[3][0] = 1.0;
+
+    float md1 = mD[1];
+
+    md1 += (m += m)[1234];
+
+    mC2[1] = mC2[2];
+
+    coopMatLoadNV(m, block.y, 16, 128, false);
+    coopMatStoreNV(m, block.y, 16, 128, false);
+    coopMatLoadNV(m2, block16.y, 16, 128, false);
+    coopMatStoreNV(m2, block16.y, 16, 128, false);
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> p1;
+    fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> p2;
+
+    p1 = f16(p1);
+    p2 = f32(p2);
+
+    p1 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(0.0);
+    p2 = fcoopmatNV<32, gl_ScopeSubgroup, 8, 8>(0.0);
+
+    p1 /= p1;
+
+    p1 *= float16_t(2.0);
+    p2 *= 4.0;
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> ms;
+    coopMatLoadNV(ms, shmatrix, 1, 2, false);
+    coopMatStoreNV(ms, shmatrix, 1, 2, false);
+
+}
--- a/Test/spv.coopmat_Error.comp
+++ b/Test/spv.coopmat_Error.comp
+#version 450 core
+#extension GL_KHR_memory_scope_semantics : enable
+#extension GL_NV_cooperative_matrix : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
+
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+float<16> ftemplate16;
+
+fcoopmatNV fnoparams;
+
+fcoopmatNV<8, gl_ScopeSubgroup, 8, 8> fbadbits;
+
+fcoopmatNV<16, gl_ScopeSubgroup, 8> fbadnumparams;
+
+int X = 8;
+
+fcoopmatNV<16, gl_ScopeSubgroup, 8, X> fbadparam;
+
+layout(constant_id = 0) int Y = 1;
+
+shared fcoopmatNV<16, gl_ScopeSubgroup, 16, 16> sharedmat;
+
+layout(set = 0, binding = 0) buffer InvBlock {
+    fcoopmatNV<16, gl_ScopeSubgroup, 16, 16> bufmat;
+} invblock;
+
+void main()
+{
+    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> f32_16_8;
+    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> f16_16_8;
+
+    // invalid implicit conversions
+    f32_16_8 = f16_16_8;
+    f32_16_8 = f16_16_8 + f16_16_8;
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16_8_8;
+
+    // mismatching dimensions
+    f16_16_8 = f16_8_8;
+
+    fcoopmatNV<16, gl_ScopeSubgroup, 8, Y> f16_8_Y;
+    fcoopmatNV<16, gl_ScopeSubgroup, 8, (Y+1)> f16_8_Y1;
+
+    // mismatching dimensions with specialization constants
+    f16_8_Y = f16_8_Y1;
+
+    // wrong arguments for constructor
+    f16_8_8 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1, 1);
+
+    // can't construct from a builtin type
+    mat4 m4;
+    fcoopmatNV<32, gl_ScopeSubgroup, 4, 4> f32_4_4 = fcoopmatNV<32, gl_ScopeSubgroup, 4, 4>(m4);
+
+    // only support a single array subscript
+    f16_16_8[0][0];
+
+    // don't support scalar component selection
+    f16_16_8.x;
+
+    f16_16_8 * f16_16_8;
+
+    f16_16_8 + 1.0;
+    f16_16_8 - 1.0;
+    f16_16_8 / 1.0;
+    f16_16_8 += 1.0;
+    f16_16_8 -= 1.0;
+    f16_16_8 /= 1.0;
+
+    f16_16_8*2.0;
+    2.0*f16_16_8;
+    f32_16_8*float16_t(2.0);
+    float16_t(2.0)*f32_16_8;
+
+    transpose(f16_8_8);
+}
--- a/Test/vulkan.vert
+++ b/Test/vulkan.vert
@@ -46,9 +46,9 @@ layout(set = 1, push_constant) uniform badpc { int a; } badpcI;  // ERROR, no de
 #error VULKAN should be 100
 #endif

-float AofA0[2][arraySize];              // ERROR, only outer dimension
-float AofA1[arraySize][arraySize];      // ERROR, only outer dimension
-float AofA2[arraySize][2 + arraySize];  // ERROR, only outer dimension
+float AofA0[2][arraySize];
+float AofA1[arraySize][arraySize];
+float AofA2[arraySize][2 + arraySize];
 float AofA3[arraySize][2];

 out ban1 {                              // ERROR, only outer dimension

--- a/glslang/Include/Types.h
+++ b/glslang/Include/Types.h
--- a/glslang/Include/arrays.h
+++ b/glslang/Include/arrays.h
@@ -254,7 +254,9 @@ struct TArraySizes {
    void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); }
    void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); }
    void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); }
-    void addInnerSize(TArraySize pair) { sizes.push_back(pair.size, pair.node); }
+    void addInnerSize(TArraySize pair) {
+        sizes.push_back(pair.size, pair.node);
+    }
    void addInnerSizes(const TArraySizes& s) { sizes.push_back(s.sizes); }
    void changeOuterSize(int s) { sizes.changeFront((unsigned)s); }
    int getImplicitSize() const { return implicitArraySize; }
@@ -318,8 +320,8 @@ struct TArraySizes {
    void setVariablyIndexed() { variablyIndexed = true; }
    bool isVariablyIndexed() const { return variablyIndexed; }

-    bool operator==(const TArraySizes& rhs) { return sizes == rhs.sizes; }
-    bool operator!=(const TArraySizes& rhs) { return sizes != rhs.sizes; }
+    bool operator==(const TArraySizes& rhs) const { return sizes == rhs.sizes; }
+    bool operator!=(const TArraySizes& rhs) const { return sizes != rhs.sizes; }

 protected:
    TSmallArrayVector sizes;

--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
@@ -615,6 +615,10 @@ enum TOperator {
    EOpAny,
    EOpAll,

+    EOpCooperativeMatrixLoad,
+    EOpCooperativeMatrixStore,
+    EOpCooperativeMatrixMulAdd,
+
    //
    // Branch
    //
@@ -737,6 +741,7 @@ enum TOperator {
    EOpConstructTextureSampler,
    EOpConstructNonuniform,     // expected to be transformed away, not present in final AST
    EOpConstructReference,
+    EOpConstructCooperativeMatrix,
    EOpConstructGuardEnd,

    //

--- a/glslang/MachineIndependent/Constant.cpp
+++ b/glslang/MachineIndependent/Constant.cpp
@@ -1354,7 +1354,9 @@ TIntermTyped* TIntermediate::foldDereference(TIntermTyped* node, int index, cons
    // arrays, vectors, matrices, all use simple multiplicative math
    // while structures need to add up heterogeneous members
    int start;
-    if (node->isArray() || ! node->isStruct())
+    if (node->getType().isCoopMat())
+        start = 0;
+    else if (node->isArray() || ! node->isStruct())
        start = size * index;
    else {
        // it is a structure

--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
@@ -4928,6 +4928,34 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
    commonBuiltins.append("void controlBarrier(int, int, int, int);\n"
                          "void memoryBarrier(int, int, int);\n");

+    if (profile != EEsProfile && version >= 450) {
+        // coopMatStoreNV perhaps ought to have "out" on the buf parameter, but
+        // adding it introduces undesirable tempArgs on the stack. What we want
+        // is more like "buf" thought of as a pointer value being an in parameter.
+        stageBuiltins[EShLangCompute].append(
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent float16_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent float[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent uint8_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent uint16_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent uint[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent uint64_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent uvec2[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatLoadNV(out fcoopmatNV m, volatile coherent uvec4[] buf, uint element, uint stride, bool colMajor);\n"
+
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent float16_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent float[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent float64_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent uint8_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent uint16_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent uint[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent uint64_t[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent uvec2[] buf, uint element, uint stride, bool colMajor);\n"
+            "void coopMatStoreNV(fcoopmatNV m, volatile coherent uvec4[] buf, uint element, uint stride, bool colMajor);\n"
+
+            "fcoopmatNV coopMatMulAddNV(fcoopmatNV A, fcoopmatNV B, fcoopmatNV C);\n"
+            );
+    }
+
    //============================================================================
    //
    // Prototypes for built-in functions seen by fragment shaders only.
@@ -8658,6 +8686,11 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion

            symbolTable.setFunctionExtensions("subgroupMemoryBarrierShared", 1, &E_GL_KHR_shader_subgroup_basic);
        }
+
+        symbolTable.setFunctionExtensions("coopMatLoadNV",              1, &E_GL_NV_cooperative_matrix);
+        symbolTable.setFunctionExtensions("coopMatStoreNV",             1, &E_GL_NV_cooperative_matrix);
+        symbolTable.setFunctionExtensions("coopMatMulAddNV",            1, &E_GL_NV_cooperative_matrix);
+
        break;
 #ifdef NV_EXTENSIONS
    case EShLangRayGenNV:
@@ -9462,6 +9495,9 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
            symbolTable.relateToOperator("fwidthCoarse",EOpFwidthCoarse);
        }
 #endif
+        symbolTable.relateToOperator("coopMatLoadNV",              EOpCooperativeMatrixLoad);
+        symbolTable.relateToOperator("coopMatStoreNV",             EOpCooperativeMatrixStore);
+        symbolTable.relateToOperator("coopMatMulAddNV",            EOpCooperativeMatrixMulAdd);
        break;

 #ifdef NV_EXTENSIONS

--- a/glslang/MachineIndependent/Intermediate.cpp
+++ b/glslang/MachineIndependent/Intermediate.cpp
@@ -725,6 +725,11 @@ TIntermTyped* TIntermediate::createConversion(TBasicType convertTo, TIntermTyped
    return newNode;
 }

+TIntermTyped* TIntermediate::addConversion(TBasicType convertTo, TIntermTyped* node) const
+{
+    return createConversion(convertTo, node);
+}
+
 // For converting a pair of operands to a binary operation to compatible
 // types with each other, relative to the operation in 'op'.
 // This does not cover assignment operations, which is asymmetric in that the
@@ -751,6 +756,10 @@ TIntermediate::addConversion(TOperator op, TIntermTyped* node0, TIntermTyped* no
        // If differing arrays, then no conversions.
        if (node0->getType().isArray() || node1->getType().isArray())
            return std::make_tuple(nullptr, nullptr);
+
+        // No implicit conversions for operations involving cooperative matrices
+        if (node0->getType().isCoopMat() || node1->getType().isCoopMat())
+            return std::make_tuple(node0, node1);
    }

    auto promoteTo = std::make_tuple(EbtNumTypes, EbtNumTypes);
@@ -983,6 +992,7 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt

    case EOpSequence:
    case EOpConstructStruct:
+    case EOpConstructCooperativeMatrix:

        if (type.getBasicType() == EbtReference || node->getType().getBasicType() == EbtReference) {
            // types must match to assign a reference
@@ -998,7 +1008,7 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
        if (canImplicitlyPromote(node->getBasicType(), type.getBasicType(), op))
            promoteTo = type.getBasicType();
        else
-           return nullptr;
+            return nullptr;
        break;

    // For GLSL, there are no conversions needed; the shift amount just needs to be an
@@ -1847,6 +1857,9 @@ TOperator TIntermediate::mapTypeToConstructorOp(const TType& type) const
    if (type.getQualifier().nonUniform)
        return EOpConstructNonuniform;

+    if (type.isCoopMat())
+        return EOpConstructCooperativeMatrix;
+
    switch (type.getBasicType()) {
    case EbtStruct:
        op = EOpConstructStruct;
@@ -3319,6 +3332,40 @@ bool TIntermediate::promoteBinary(TIntermBinary& node)
        break;
    }

+    if (left->getType().isCoopMat() || right->getType().isCoopMat()) {
+        if (left->getType().isCoopMat() && right->getType().isCoopMat() &&
+            *left->getType().getTypeParameters() != *right->getType().getTypeParameters()) {
+            return false;
+        }
+        switch (op) {
+        case EOpMul:
+        case EOpMulAssign:
+            if (left->getType().isCoopMat() && right->getType().isCoopMat()) {
+                return false;
+            }
+            if (op == EOpMulAssign && right->getType().isCoopMat()) {
+                return false;
+            }
+            node.setOp(op == EOpMulAssign ? EOpMatrixTimesScalarAssign : EOpMatrixTimesScalar);
+            if (right->getType().isCoopMat()) {
+                node.setType(right->getType());
+            }
+            return true;
+        case EOpAdd:
+        case EOpSub:
+        case EOpDiv:
+        case EOpAssign:
+            // These require both to be cooperative matrices
+            if (!left->getType().isCoopMat() || !right->getType().isCoopMat()) {
+                return false;
+            }
+            return true;
+        default:
+            break;
+        }
+        return false;
+    }
+
    // Finish handling the case, for all ops, where both operands are scalars.
    if (left->isScalar() && right->isScalar())
        return true;

--- a/glslang/MachineIndependent/ParseHelper.cpp
+++ b/glslang/MachineIndependent/ParseHelper.cpp
--- a/glslang/MachineIndependent/ParseHelper.h
+++ b/glslang/MachineIndependent/ParseHelper.h
@@ -337,7 +337,7 @@ public:
    void globalCheck(const TSourceLoc&, const char* token);
    bool constructorError(const TSourceLoc&, TIntermNode*, TFunction&, TOperator, TType&);
    bool constructorTextureSamplerError(const TSourceLoc&, const TFunction&);
-    void arraySizeCheck(const TSourceLoc&, TIntermTyped* expr, TArraySize&);
+    void arraySizeCheck(const TSourceLoc&, TIntermTyped* expr, TArraySize&, const char *sizeType);
    bool arrayQualifierError(const TSourceLoc&, const TQualifier&);
    bool arrayError(const TSourceLoc&, const TType&);
    void arraySizeRequiredCheck(const TSourceLoc&, const TArraySizes&);

--- a/glslang/MachineIndependent/Scan.cpp
+++ b/glslang/MachineIndependent/Scan.cpp
@@ -714,6 +714,8 @@ void TScanContext::fillInKeywordMap()
    (*KeywordMap)["taskNV"] =                  PERTASKNV;
 #endif

+    (*KeywordMap)["fcoopmatNV"] =              FCOOPMATNV;
+
    ReservedSet = new std::unordered_set<const char*, str_hash, str_eq>;

    ReservedSet->insert("common");
@@ -1612,6 +1614,13 @@ int TScanContext::tokenizeIdentifier()
        return identifierOrType();
 #endif

+    case FCOOPMATNV:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_NV_cooperative_matrix))
+            return keyword;
+        return identifierOrType();
+
    default:
        parseContext.infoSink.info.message(EPrefixInternalError, "Unknown glslang keyword", loc);
        return 0;

--- a/glslang/MachineIndependent/Versions.cpp
+++ b/glslang/MachineIndependent/Versions.cpp
@@ -248,6 +248,8 @@ void TParseVersions::initializeExtensionBehavior()
    extensionBehavior[E_GL_NV_mesh_shader]                           = EBhDisable;
 #endif

+    extensionBehavior[E_GL_NV_cooperative_matrix]                    = EBhDisable;
+
    // AEP
    extensionBehavior[E_GL_ANDROID_extension_pack_es31a]             = EBhDisable;
    extensionBehavior[E_GL_KHR_blend_equation_advanced]              = EBhDisable;
@@ -427,6 +429,8 @@ void TParseVersions::getPreamble(std::string& preamble)
            "#define GL_NV_shader_texture_footprint 1\n"
            "#define GL_NV_mesh_shader 1\n"
 #endif
+            "#define GL_NV_cooperative_matrix 1\n"
+
            "#define GL_EXT_shader_explicit_arithmetic_types 1\n"
            "#define GL_EXT_shader_explicit_arithmetic_types_int8 1\n"
            "#define GL_EXT_shader_explicit_arithmetic_types_int16 1\n"
@@ -1083,6 +1087,14 @@ void TParseVersions::int64Check(const TSourceLoc& loc, const char* op, bool buil
    }
 }

+void TParseVersions::fcoopmatCheck(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {E_GL_NV_cooperative_matrix};
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
 // Call for any operation removed because SPIR-V is in use.
 void TParseVersions::spvRemoved(const TSourceLoc& loc, const char* op)
 {

--- a/glslang/MachineIndependent/Versions.h
+++ b/glslang/MachineIndependent/Versions.h
@@ -225,6 +225,8 @@ const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_
 const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]);
 #endif

+const char* const E_GL_NV_cooperative_matrix                    = "GL_NV_cooperative_matrix";
+
 // AEP
 const char* const E_GL_ANDROID_extension_pack_es31a             = "GL_ANDROID_extension_pack_es31a";
 const char* const E_GL_KHR_blend_equation_advanced              = "GL_KHR_blend_equation_advanced";

--- a/glslang/MachineIndependent/glslang.y
+++ b/glslang/MachineIndependent/glslang.y
@@ -100,6 +100,7 @@ using namespace glslang;
            glslang::TArraySizes* arraySizes;
            glslang::TIdentifierList* identifierList;
        };
+        glslang::TArraySizes* typeParameters;
    } interm;
 }

@@ -166,6 +167,7 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> F64MAT4X2 F64MAT4X3 F64MAT4X4
 %token <lex> ATOMIC_UINT
 %token <lex> ACCSTRUCTNV
+%token <lex> FCOOPMATNV

 // combined image/sampler
 %token <lex> SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW
@@ -273,6 +275,10 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %type <interm.type> layout_qualifier layout_qualifier_id_list layout_qualifier_id
 %type <interm.type> non_uniform_qualifier

+%type <interm.typeParameters> type_parameter_specifier
+%type <interm.typeParameters> type_parameter_specifier_opt
+%type <interm.typeParameters> type_parameter_specifier_list
+
 %type <interm.type> type_qualifier fully_specified_type type_specifier
 %type <interm.type> single_type_qualifier
 %type <interm.type> type_specifier_nonarray
@@ -1487,15 +1493,17 @@ type_name_list
    ;

 type_specifier
-    : type_specifier_nonarray {
+    : type_specifier_nonarray type_parameter_specifier_opt {
        $$ = $1;
        $$.qualifier.precision = parseContext.getDefaultPrecision($$);
+        $$.typeParameters = $2;
    }
-    | type_specifier_nonarray array_specifier {
-        parseContext.arrayOfArrayVersionCheck($2.loc, $2.arraySizes);
+    | type_specifier_nonarray type_parameter_specifier_opt array_specifier {
+        parseContext.arrayOfArrayVersionCheck($3.loc, $3.arraySizes);
        $$ = $1;
        $$.qualifier.precision = parseContext.getDefaultPrecision($$);
-        $$.arraySizes = $2.arraySizes;
+        $$.typeParameters = $2;
+        $$.arraySizes = $3.arraySizes;
    }
    ;

@@ -1510,7 +1518,7 @@ array_specifier
        $$.arraySizes = new TArraySizes;

        TArraySize size;
-        parseContext.arraySizeCheck($2->getLoc(), $2, size);
+        parseContext.arraySizeCheck($2->getLoc(), $2, size, "array size");
        $$.arraySizes->addInnerSize(size);
    }
    | array_specifier LEFT_BRACKET RIGHT_BRACKET {
@@ -1521,11 +1529,43 @@ array_specifier
        $$ = $1;

        TArraySize size;
-        parseContext.arraySizeCheck($3->getLoc(), $3, size);
+        parseContext.arraySizeCheck($3->getLoc(), $3, size, "array size");
        $$.arraySizes->addInnerSize(size);
    }
    ;

+type_parameter_specifier_opt
+    : type_parameter_specifier {
+        $$ = $1;
+    }
+    | /* May be null */ {
+        $$ = 0;
+    }
+    ;
+
+type_parameter_specifier
+    : LEFT_ANGLE type_parameter_specifier_list RIGHT_ANGLE {
+        $$ = $2;
+    }
+    ;
+
+type_parameter_specifier_list
+    : unary_expression {
+        $$ = new TArraySizes;
+
+        TArraySize size;
+        parseContext.arraySizeCheck($1->getLoc(), $1, size, "type parameter");
+        $$->addInnerSize(size);
+    }
+    | type_parameter_specifier_list COMMA unary_expression {
+        $$ = $1;
+
+        TArraySize size;
+        parseContext.arraySizeCheck($3->getLoc(), $3, size, "type parameter");
+        $$->addInnerSize(size);
+    }
+    ;
+
 type_specifier_nonarray
    : VOID {
        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
@@ -3172,6 +3212,12 @@ type_specifier_nonarray
        $$.basicType = EbtSampler;
        $$.sampler.setSubpass(EbtUint, true);
    }
+    | FCOOPMATNV {
+        parseContext.fcoopmatCheck($1.loc, "fcoopmatNV", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloat;
+        $$.coopmat = true;
+    }
    | struct_specifier {
        $$ = $1;
        $$.qualifier.storage = parseContext.symbolTable.atGlobalLevel() ? EvqGlobal : EvqTemporary;

--- a/glslang/MachineIndependent/glslang_tab.cpp
+++ b/glslang/MachineIndependent/glslang_tab.cpp
--- a/glslang/MachineIndependent/glslang_tab.cpp.h
+++ b/glslang/MachineIndependent/glslang_tab.cpp.h
--- a/glslang/MachineIndependent/intermOut.cpp
+++ b/glslang/MachineIndependent/intermOut.cpp
@@ -817,6 +817,7 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
    case EOpConstructStruct:  out.debug << "Construct structure";  break;
    case EOpConstructTextureSampler: out.debug << "Construct combined texture-sampler"; break;
    case EOpConstructReference:  out.debug << "Construct reference";  break;
+    case EOpConstructCooperativeMatrix:  out.debug << "Construct cooperative matrix";  break;

    case EOpLessThan:         out.debug << "Compare Less Than";             break;
    case EOpGreaterThan:      out.debug << "Compare Greater Than";          break;
@@ -1066,6 +1067,10 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
    case EOpWritePackedPrimitiveIndices4x8NV: out.debug << "writePackedPrimitiveIndices4x8NV"; break;
 #endif

+    case EOpCooperativeMatrixLoad:  out.debug << "Load cooperative matrix";  break;
+    case EOpCooperativeMatrixStore:  out.debug << "Store cooperative matrix";  break;
+    case EOpCooperativeMatrixMulAdd: out.debug << "MulAdd cooperative matrices"; break;
+
    default: out.debug.message(EPrefixError, "Bad aggregation op");
    }


--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@@ -261,6 +261,7 @@ public:
        useStorageBuffer(false),
        useVulkanMemoryModel(false),
        hlslIoMapping(false),
+        useVariablePointers(false),
        textureSamplerTransformMode(EShTexSampTransKeep),
        needToLegalize(false),
        binaryDoubleOutput(false),
@@ -405,6 +406,12 @@ public:
        usePhysicalStorageBuffer = true;
    }
    bool usingPhysicalStorageBuffer() const { return usePhysicalStorageBuffer; }
+    void setUseVariablePointers()
+    {
+        useVariablePointers = true;
+        processes.addProcess("use-variable-pointers");
+    }
+    bool usingVariablePointers() const { return useVariablePointers; }

    template<class T> T addCounterBufferName(const T& name) const { return name + implicitCounterName; }
    bool hasCounterBufferName(const TString& name) const {
@@ -491,6 +498,7 @@ public:
    TIntermTyped* addConversion(TOperator, const TType&, TIntermTyped*) const;
    std::tuple<TIntermTyped*, TIntermTyped*> addConversion(TOperator op, TIntermTyped* node0, TIntermTyped* node1) const;
    TIntermTyped* addUniShapeConversion(TOperator, const TType&, TIntermTyped*);
+    TIntermTyped* addConversion(TBasicType convertTo, TIntermTyped* node) const;
    void addBiShapeConversion(TOperator, TIntermTyped*& lhsNode, TIntermTyped*& rhsNode);
    TIntermTyped* addShapeConversion(const TType&, TIntermTyped*);
    TIntermTyped* addBinaryMath(TOperator, TIntermTyped* left, TIntermTyped* right, TSourceLoc);
@@ -852,6 +860,7 @@ protected:
    bool useStorageBuffer;
    bool useVulkanMemoryModel;
    bool hlslIoMapping;
+    bool useVariablePointers;

    std::set<TString> ioAccessed;           // set of names of statically read/written I/O that might need extra checking
    std::vector<TIoRange> usedIo[4];        // sets of used locations, one for each of in, out, uniform, and buffers

--- a/glslang/MachineIndependent/parseVersions.h
+++ b/glslang/MachineIndependent/parseVersions.h
@@ -104,6 +104,7 @@ public:
    virtual bool checkExtensionsRequested(const TSourceLoc&, int numExtensions, const char* const extensions[], const char* featureDesc);
    virtual void updateExtensionBehavior(const char* const extension, TExtensionBehavior);
    virtual void checkExtensionStage(const TSourceLoc&, const char* const extension);
+    virtual void fcoopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false);

    virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken,
        const char* szExtraInfoFormat, ...) = 0;

--- a/gtests/Spv.FromFile.cpp
+++ b/gtests/Spv.FromFile.cpp
@@ -286,6 +286,8 @@ INSTANTIATE_TEST_CASE_P(
        "spv.constStruct.vert",
        "spv.controlFlowAttributes.frag",
        "spv.conversion.frag",
+        "spv.coopmat.comp",
+        "spv.coopmat_Error.comp",
        "spv.dataOut.frag",
        "spv.dataOutIndirect.frag",
        "spv.dataOutIndirect.vert",
@@ -410,6 +412,7 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::ValuesIn(std::vector<std::string>({
        "spv.1.3.8bitstorage-ubo.vert",
        "spv.1.3.8bitstorage-ssbo.vert",
+        "spv.1.3.coopmat.comp",
        "spv.deviceGroup.frag",
        "spv.drawParams.vert",
        "spv.int8.frag",

--- a/known_good.json
+++ b/known_good.json
@@ -5,14 +5,14 @@
      "site" : "github",
      "subrepo" : "KhronosGroup/SPIRV-Tools",
      "subdir" : "External/spirv-tools",
-      "commit" : "5994ae2a045015004cce24802dc47c33736486ea"
+      "commit" : "002ef361cabc486a2f3567d646363334d50cc462"
    },
    {
      "name" : "spirv-tools/external/spirv-headers",
      "site" : "github",
      "subrepo" : "KhronosGroup/SPIRV-Headers",
      "subdir" : "External/spirv-tools/external/spirv-headers",
-      "commit" : "79b6681aadcb53c27d1052e5f8a0e82a981dbf2f"
+      "commit" : "e74c389f81915d0a48d6df1af83c3862c5ad85ab"
    }
  ]
 }