Vulkan: Improve cubemap emulation seam handling

Changes seamful cubemap emulation to always compute the derivative, emulating the bias parameter by scaling the provided derivatives. This results in more accurate mipmap levels for seams within primitives. There are some artifacts as a result of how derivatives are calculated, but this matches the native driver. Bug: angleproject:3243 Bug: angleproject:3732 Change-Id: Icb976e2a7e14cb4210645571edc037d4e607bd0d Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1754383 Commit-Queue: James Dong <dongja@google.com> Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>

Vulkan: Improve cubemap emulation seam handling
83a369bb · James Dong · Commit Bot · 2f4a7518 · 83a369bb · 83a369bb
Commit 83a369bb authored Aug 14, 2019 by James Dong Committed by Commit Bot Aug 23, 2019
11 changed files
--- a/include/GLSLANG/ShaderLang.h
+++ b/include/GLSLANG/ShaderLang.h
@@ -290,12 +290,7 @@ const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 
 // Emulate seamful cube map sampling for OpenGL ES2.0.  Currently only applies to the Vulkan
 // backend, as is done after samplers are moved out of structs.  Can likely be made to work on
 // the other backends as well.
-//
-// There are two variations of this.  One using subgroup operations where available, and another
-// that emulates those operations using dFdxFine and dFdyFine.  The latter is more universally
-// available, but is buggy on Nvidia.
 const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44;
-const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP = UINT64_C(1) << 45;
 // If requested, validates the AST after every transformation.  Useful for debugging.
 const ShCompileOptions SH_VALIDATE_AST = UINT64_C(1) << 46;

--- a/src/compiler/translator/TranslatorVulkan.cpp
+++ b/src/compiler/translator/TranslatorVulkan.cpp
@@ -665,11 +665,6 @@ bool TranslatorVulkan::translate(TIntermBlock *root,
    sink << "#version 450 core\n";
-    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP)
-    {
-        sink << "#extension GL_KHR_shader_subgroup_quad : require\n";
-    }
    // Write out default uniforms into a uniform block assigned to a specific set/binding.
    int defaultUniformCount           = 0;
    int aggregateTypesUsedForUniforms = 0;
@@ -732,12 +727,10 @@ bool TranslatorVulkan::translate(TIntermBlock *root,
    // Rewrite samplerCubes as sampler2DArrays.  This must be done after rewriting struct samplers
    // as it doesn't expect that.
-    if (compileOptions & (SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING |
+    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
-                          SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP))
    {
-        if (!RewriteCubeMapSamplersAs2DArray(
+        if (!RewriteCubeMapSamplersAs2DArray(this, root, &getSymbolTable(),
-                this, root, &getSymbolTable(), getShaderType() == GL_FRAGMENT_SHADER,
+                                             getShaderType() == GL_FRAGMENT_SHADER))
-                compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP))
        {
            return false;
        }

--- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
+++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
@@ -22,182 +22,30 @@ namespace sh
 namespace
 {
 constexpr ImmutableString kCoordTransformFuncName("ANGLECubeMapCoordTransform");
+constexpr ImmutableString kCoordTransformFuncNameImplicit("ANGLECubeMapCoordTransformImplicit");
-// Retrieve a value from another invocation in the quad.  See comment in
+TIntermTyped *DerivativeQuotient(TIntermTyped *u,
-// declareCoordTranslationFunction.
+                                 TIntermTyped *du,
-TIntermSymbol *GetValueFromNeighbor(TSymbolTable *symbolTable,
+                                 TIntermTyped *v,
-                                    TIntermBlock *body,
+                                 TIntermTyped *dv,
-                                    TFunction *quadSwap,
+                                 TIntermTyped *vRecip)
-                                    TIntermTyped *variable,
-                                    const TType *variableType)
 {
-    TIntermTyped *neighborValue =
+    // (du v - dv u) / v^2
-        TIntermAggregate::CreateRawFunctionCall(*quadSwap, new TIntermSequence({variable}));
+    return new TIntermBinary(
+        EOpMul,
-    TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType));
+        new TIntermBinary(EOpSub, new TIntermBinary(EOpMul, du->deepCopy(), v->deepCopy()),
-    body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), neighborValue));
+                          new TIntermBinary(EOpMul, dv->deepCopy(), u->deepCopy())),
+        new TIntermBinary(EOpMul, vRecip->deepCopy(), vRecip->deepCopy()));
-    return neighbor;
 }
-// Calculate the difference of a value with another invocation in the quad.  Used to emulate
+TIntermTyped *Swizzle1(TIntermTyped *array, int i)
-// GetValueFromNeighbor where subgroup operations are not present.
-//
-// See comment in declareCoordTranslationFunction.
-TIntermSymbol *GetDiffWithNeighbor(TSymbolTable *symbolTable,
-                                   TIntermBlock *body,
-                                   TFunction *dFdxyFine,
-                                   TIntermTyped *variable,
-                                   const TType *variableType)
 {
-    TIntermTyped *neighborValue =
+    return new TIntermSwizzle(array, {i});
-        TIntermAggregate::CreateRawFunctionCall(*dFdxyFine, new TIntermSequence({variable}));
-    TIntermTyped *absNeighborValue = new TIntermUnary(EOpAbs, neighborValue, nullptr);
-    TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType));
-    body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), absNeighborValue));
-    return neighbor;
 }
-// Used to emulate GetValueFromNeighbor with bool values.
+TIntermTyped *IndexDirect(TIntermTyped *array, int i)
-TIntermSymbol *IsNeighborNonHelper(TSymbolTable *symbolTable,
-                                   TIntermBlock *body,
-                                   TFunction *dFdxyFine,
-                                   TIntermTyped *gl_HelperInvocation)
 {
-    const TType *boolType  = StaticType::GetBasic<EbtBool>();
+    return new TIntermBinary(EOpIndexDirect, array, CreateIndexNode(i));
-    const TType *floatType = StaticType::GetBasic<EbtFloat>();
-    TIntermTyped *gl_HelperInvocationAsFloat =
-        TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({gl_HelperInvocation}));
-    TIntermSymbol *diffWithNeighbor =
-        GetDiffWithNeighbor(symbolTable, body, dFdxyFine, gl_HelperInvocationAsFloat, floatType);
-    TIntermTyped *isNeighborNonHelperValue =
-        new TIntermBinary(EOpGreaterThan, diffWithNeighbor, CreateFloatNode(0.5f));
-    TIntermSymbol *isNeighborNonHelper =
-        new TIntermSymbol(CreateTempVariable(symbolTable, boolType));
-    body->appendStatement(
-        CreateTempInitDeclarationNode(&isNeighborNonHelper->variable(), isNeighborNonHelperValue));
-    return isNeighborNonHelper;
-}
-// If this is a helper invocation, retrieve the layer index (cube map face) from another invocation
-// in the quad that is not a helper.  See comment in declareCoordTranslationFunction.
-void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable,
-                                     TIntermBlock *body,
-                                     TIntermTyped *l,
-                                     bool useSubgroupOps)
-{
-    TVariable *gl_HelperInvocationVar =
-        new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"),
-                      StaticType::GetBasic<EbtBool>(), SymbolType::AngleInternal);
-    TIntermSymbol *gl_HelperInvocation = new TIntermSymbol(gl_HelperInvocationVar);
-    const TType *boolType  = StaticType::GetBasic<EbtBool>();
-    const TType *floatType = StaticType::GetBasic<EbtFloat>();
-    TIntermSymbol *lH;
-    TIntermSymbol *lV;
-    TIntermSymbol *lD;
-    TIntermTyped *horizontalIsNonHelper;
-    TIntermTyped *verticalIsNonHelper;
-    if (useSubgroupOps)
-    {
-        TFunction *quadSwapHorizontalBool =
-            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
-                          SymbolType::AngleInternal, boolType, true);
-        TFunction *quadSwapHorizontalFloat =
-            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
-                          SymbolType::AngleInternal, floatType, true);
-        TFunction *quadSwapVerticalBool =
-            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
-                          SymbolType::AngleInternal, boolType, true);
-        TFunction *quadSwapVerticalFloat =
-            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
-                          SymbolType::AngleInternal, floatType, true);
-        TFunction *quadSwapDiagonalFloat =
-            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
-                          SymbolType::AngleInternal, floatType, true);
-        quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
-        quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
-        quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-        quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-        quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-        // Get the layer from the horizontal, vertical and diagonal neighbor.  These should be done
-        // outside `if`s so the non-helper thread is not turned inactive.
-        lH = GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
-        lV = GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(),
-                                  floatType);
-        lD = GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(),
-                                  floatType);
-        // Get the value of gl_HelperInvocation from the neighbors too.
-        TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
-            symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
-        TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
-            symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
-        // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
-        // not enough to test if the neighbor is not a helper, we should also check if it's active.
-        horizontalIsNonHelper = new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
-        verticalIsNonHelper   = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
-    }
-    else
-    {
-        TFunction *dFdxFineBool  = new TFunction(symbolTable, ImmutableString("dFdxFine"),
-                                                SymbolType::AngleInternal, boolType, true);
-        TFunction *dFdxFineFloat = new TFunction(symbolTable, ImmutableString("dFdxFine"),
-                                                 SymbolType::AngleInternal, floatType, true);
-        TFunction *dFdyFineBool  = new TFunction(symbolTable, ImmutableString("dFdyFine"),
-                                                SymbolType::AngleInternal, boolType, true);
-        TFunction *dFdyFineFloat = new TFunction(symbolTable, ImmutableString("dFdyFine"),
-                                                 SymbolType::AngleInternal, floatType, true);
-        dFdxFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
-        dFdyFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
-        dFdxFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-        dFdyFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-        // layerQuadSwapHelper = gl_HelperInvocation ? 0.0 : layer;
-        TIntermTyped *layerQuadSwapHelperValue =
-            new TIntermTernary(gl_HelperInvocation->deepCopy(), CreateZeroNode(*floatType), l);
-        TIntermSymbol *layerQuadSwapHelper =
-            new TIntermSymbol(CreateTempVariable(symbolTable, floatType));
-        body->appendStatement(CreateTempInitDeclarationNode(&layerQuadSwapHelper->variable(),
-                                                            layerQuadSwapHelperValue));
-        // Get the layer from the horizontal, vertical and diagonal neighbor.  These should be done
-        // outside `if`s so the non-helper thread is not turned inactive.
-        lH = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, layerQuadSwapHelper, floatType);
-        lV = GetDiffWithNeighbor(symbolTable, body, dFdyFineFloat, layerQuadSwapHelper->deepCopy(),
-                                 floatType);
-        lD = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, lV->deepCopy(), floatType);
-        // Get the value of gl_HelperInvocation from the neighbors too.
-        //
-        // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
-        // not enough to test if the neighbor is not a helper, we should also check if it's active.
-        horizontalIsNonHelper =
-            IsNeighborNonHelper(symbolTable, body, dFdxFineBool, gl_HelperInvocation->deepCopy());
-        verticalIsNonHelper =
-            IsNeighborNonHelper(symbolTable, body, dFdyFineBool, gl_HelperInvocation->deepCopy());
-    }
-    TIntermTyped *lVD  = new TIntermTernary(verticalIsNonHelper, lV, lD);
-    TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD);
-    TIntermBlock *helperBody = new TIntermBlock;
-    helperBody->appendStatement(new TIntermBinary(EOpAssign, l->deepCopy(), lHVD));
-    TIntermIfElse *ifHelper = new TIntermIfElse(gl_HelperInvocation, helperBody, nullptr);
-    body->appendStatement(ifHelper);
 }
 // Generated the common transformation in each coord transformation case.  See comment in
@@ -221,6 +69,39 @@ void TransformXMajor(TIntermBlock *block,
    block->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vcValue));
 }
+void TransformDerivativeXMajor(TIntermBlock *block,
+                               TSymbolTable *symbolTable,
+                               TIntermTyped *x,
+                               TIntermTyped *y,
+                               TIntermTyped *z,
+                               TIntermTyped *dx,
+                               TIntermTyped *dy,
+                               TIntermTyped *dz,
+                               TIntermTyped *du,
+                               TIntermTyped *dv,
+                               TIntermTyped *xRecip)
+{
+    // Only the magnitude of the derivative matters, so we ignore the sign(x)
+    // and the negations.
+    TIntermTyped *duValue = DerivativeQuotient(z, dz, x, dx, xRecip);
+    TIntermTyped *dvValue = DerivativeQuotient(y, dy, x, dx, xRecip);
+    duValue               = new TIntermBinary(EOpMul, duValue, CreateFloatNode(0.5f));
+    dvValue               = new TIntermBinary(EOpMul, dvValue, CreateFloatNode(0.5f));
+    block->appendStatement(new TIntermBinary(EOpAssign, du->deepCopy(), duValue));
+    block->appendStatement(new TIntermBinary(EOpAssign, dv->deepCopy(), dvValue));
+}
+void TransformImplicitDerivativeXMajor(TIntermBlock *block,
+                                       TIntermTyped *dOuter,
+                                       TIntermTyped *du,
+                                       TIntermTyped *dv)
+{
+    block->appendStatement(
+        new TIntermBinary(EOpAssign, du->deepCopy(), Swizzle1(dOuter->deepCopy(), 2)));
+    block->appendStatement(
+        new TIntermBinary(EOpAssign, dv->deepCopy(), Swizzle1(dOuter->deepCopy(), 1)));
+}
 void TransformYMajor(TIntermBlock *block,
                     TIntermTyped *x,
                     TIntermTyped *y,
@@ -239,6 +120,39 @@ void TransformYMajor(TIntermBlock *block,
    block->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vcValue));
 }
+void TransformDerivativeYMajor(TIntermBlock *block,
+                               TSymbolTable *symbolTable,
+                               TIntermTyped *x,
+                               TIntermTyped *y,
+                               TIntermTyped *z,
+                               TIntermTyped *dx,
+                               TIntermTyped *dy,
+                               TIntermTyped *dz,
+                               TIntermTyped *du,
+                               TIntermTyped *dv,
+                               TIntermTyped *yRecip)
+{
+    // Only the magnitude of the derivative matters, so we ignore the sign(x)
+    // and the negations.
+    TIntermTyped *duValue = DerivativeQuotient(x, dx, y, dy, yRecip);
+    TIntermTyped *dvValue = DerivativeQuotient(z, dz, y, dy, yRecip);
+    duValue               = new TIntermBinary(EOpMul, duValue, CreateFloatNode(0.5f));
+    dvValue               = new TIntermBinary(EOpMul, dvValue, CreateFloatNode(0.5f));
+    block->appendStatement(new TIntermBinary(EOpAssign, du->deepCopy(), duValue));
+    block->appendStatement(new TIntermBinary(EOpAssign, dv->deepCopy(), dvValue));
+}
+void TransformImplicitDerivativeYMajor(TIntermBlock *block,
+                                       TIntermTyped *dOuter,
+                                       TIntermTyped *du,
+                                       TIntermTyped *dv)
+{
+    block->appendStatement(
+        new TIntermBinary(EOpAssign, du->deepCopy(), Swizzle1(dOuter->deepCopy(), 0)));
+    block->appendStatement(
+        new TIntermBinary(EOpAssign, dv->deepCopy(), Swizzle1(dOuter->deepCopy(), 2)));
+}
 void TransformZMajor(TIntermBlock *block,
                     TIntermTyped *x,
                     TIntermTyped *y,
@@ -257,17 +171,49 @@ void TransformZMajor(TIntermBlock *block,
    block->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vcValue));
 }
+void TransformDerivativeZMajor(TIntermBlock *block,
+                               TSymbolTable *symbolTable,
+                               TIntermTyped *x,
+                               TIntermTyped *y,
+                               TIntermTyped *z,
+                               TIntermTyped *dx,
+                               TIntermTyped *dy,
+                               TIntermTyped *dz,
+                               TIntermTyped *du,
+                               TIntermTyped *dv,
+                               TIntermTyped *zRecip)
+{
+    // Only the magnitude of the derivative matters, so we ignore the sign(x)
+    // and the negations.
+    TIntermTyped *duValue = DerivativeQuotient(x, dx, z, dz, zRecip);
+    TIntermTyped *dvValue = DerivativeQuotient(y, dy, z, dz, zRecip);
+    duValue               = new TIntermBinary(EOpMul, duValue, CreateFloatNode(0.5f));
+    dvValue               = new TIntermBinary(EOpMul, dvValue, CreateFloatNode(0.5f));
+    block->appendStatement(new TIntermBinary(EOpAssign, du->deepCopy(), duValue));
+    block->appendStatement(new TIntermBinary(EOpAssign, dv->deepCopy(), dvValue));
+}
+void TransformImplicitDerivativeZMajor(TIntermBlock *block,
+                                       TIntermTyped *dOuter,
+                                       TIntermTyped *du,
+                                       TIntermTyped *dv)
+{
+    block->appendStatement(
+        new TIntermBinary(EOpAssign, du->deepCopy(), Swizzle1(dOuter->deepCopy(), 0)));
+    block->appendStatement(
+        new TIntermBinary(EOpAssign, dv->deepCopy(), Swizzle1(dOuter->deepCopy(), 1)));
+}
 class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
 {
  public:
-    RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable,
+    RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader)
-                                             bool isFragmentShader,
-                                             bool useSubgroupOps)
        : TIntermTraverser(true, true, true, symbolTable),
          mCubeXYZToArrayUVL(nullptr),
+          mCubeXYZToArrayUVLImplicit(nullptr),
          mIsFragmentShader(isFragmentShader),
-          mUseSubgroupOps(useSubgroupOps),
+          mCoordTranslationFunctionDecl(nullptr),
-          mCoordTranslationFunctionDecl(nullptr)
+          mCoordTranslationFunctionImplicitDecl(nullptr)
    {}
    bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
@@ -385,6 +331,11 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        return mCoordTranslationFunctionDecl;
    }
+    TIntermFunctionDefinition *getCoordTranslationFunctionDeclImplicit()
+    {
+        return mCoordTranslationFunctionImplicitDecl;
+    }
  private:
    void declareSampler2DArray(const TVariable *samplerCubeVar, TIntermDeclaration *node)
    {
@@ -392,7 +343,14 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        {
            // If not done yet, declare the function that transforms cube map texture sampling
            // coordinates to face index and uv coordinates.
-            declareCoordTranslationFunction();
+            declareCoordTranslationFunction(false, kCoordTransformFuncName, &mCubeXYZToArrayUVL,
+                                            &mCoordTranslationFunctionDecl);
+        }
+        if (mCubeXYZToArrayUVLImplicit == nullptr && mIsFragmentShader)
+        {
+            declareCoordTranslationFunction(true, kCoordTransformFuncNameImplicit,
+                                            &mCubeXYZToArrayUVLImplicit,
+                                            &mCoordTranslationFunctionImplicitDecl);
        }
        TType *newType = new TType(samplerCubeVar->getType());
@@ -412,7 +370,10 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        mRetyper.replaceGlobalVariable(samplerCubeVar, sampler2DArrayVar);
    }
-    void declareCoordTranslationFunction()
+    void declareCoordTranslationFunction(bool implicit,
+                                         const ImmutableString &name,
+                                         TFunction **functionOut,
+                                         TIntermFunctionDefinition **declOut)
    {
        // GLES2.0 (as well as desktop OpenGL 2.0) define the coordination transformation as
        // follows.  Given xyz cube coordinates, where each channel is in [-1, 1], the following
@@ -555,6 +516,69 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        body->appendStatement(absYDecl);
        body->appendStatement(absZDecl);
+        // Create temporary variable for division outer product matrix and its
+        // derivatives.
+        // recipOuter[i][j] = 0.5 * P[j] / P[i]
+        const TType *mat3Type     = StaticType::GetBasic<EbtFloat, 3, 3>();
+        TIntermSymbol *recipOuter = new TIntermSymbol(CreateTempVariable(mSymbolTable, mat3Type));
+        TIntermTyped *pRecip     = new TIntermBinary(EOpDiv, CreateFloatNode(1.0), p->deepCopy());
+        TIntermSymbol *pRecipVar = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+        body->appendStatement(CreateTempInitDeclarationNode(&pRecipVar->variable(), pRecip));
+        TIntermDeclaration *recipOuterDecl = CreateTempInitDeclarationNode(
+            &recipOuter->variable(),
+            CreateBuiltInFunctionCallNode(
+                "outerProduct",
+                new TIntermSequence(
+                    {p->deepCopy(), new TIntermBinary(EOpVectorTimesScalar, CreateFloatNode(0.5),
+                                                      pRecipVar->deepCopy())}),
+                *mSymbolTable, 300));
+        body->appendStatement(recipOuterDecl);
+        TIntermSymbol *dPDXdx;
+        TIntermSymbol *dPDYdx;
+        TIntermSymbol *dPDZdx;
+        TIntermSymbol *dPDXdy;
+        TIntermSymbol *dPDYdy;
+        TIntermSymbol *dPDZdy;
+        if (implicit)
+        {
+            dPDXdx = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+            dPDYdx = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+            dPDZdx = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+            dPDXdy = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+            dPDYdy = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+            dPDZdy = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+            TIntermDeclaration *dPDXdxDecl = CreateTempInitDeclarationNode(
+                &dPDXdx->variable(),
+                new TIntermUnary(EOpDFdx, IndexDirect(recipOuter, 0)->deepCopy(), nullptr));
+            TIntermDeclaration *dPDYdxDecl = CreateTempInitDeclarationNode(
+                &dPDYdx->variable(),
+                new TIntermUnary(EOpDFdx, IndexDirect(recipOuter, 1)->deepCopy(), nullptr));
+            TIntermDeclaration *dPDZdxDecl = CreateTempInitDeclarationNode(
+                &dPDZdx->variable(),
+                new TIntermUnary(EOpDFdx, IndexDirect(recipOuter, 2)->deepCopy(), nullptr));
+            TIntermDeclaration *dPDXdyDecl = CreateTempInitDeclarationNode(
+                &dPDXdy->variable(),
+                new TIntermUnary(EOpDFdy, IndexDirect(recipOuter, 0)->deepCopy(), nullptr));
+            TIntermDeclaration *dPDYdyDecl = CreateTempInitDeclarationNode(
+                &dPDYdy->variable(),
+                new TIntermUnary(EOpDFdy, IndexDirect(recipOuter, 1)->deepCopy(), nullptr));
+            TIntermDeclaration *dPDZdyDecl = CreateTempInitDeclarationNode(
+                &dPDZdy->variable(),
+                new TIntermUnary(EOpDFdy, IndexDirect(recipOuter, 2)->deepCopy(), nullptr));
+            body->appendStatement(dPDXdxDecl);
+            body->appendStatement(dPDYdxDecl);
+            body->appendStatement(dPDZdxDecl);
+            body->appendStatement(dPDXdyDecl);
+            body->appendStatement(dPDYdyDecl);
+            body->appendStatement(dPDZdyDecl);
+        }
        // Create temporary variables for ma, uc, vc, and l (layer), as well as dUdx, dVdx, dUdy
        // and dVdy.
        TIntermSymbol *ma   = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
@@ -621,39 +645,6 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        TIntermIfElse *calculateXYZL = new TIntermIfElse(isXMajor, calculateXL, calculateYZLBlock);
        body->appendStatement(calculateXYZL);
-        // If the input coordinates come from a varying, they are interpolated between values
-        // provided by the vertex shader.  Say the vertex shader provides the coordinates
-        // corresponding to corners of a face.  For the sake of the argument, say this is the
-        // positive X face.  The coordinates would thus look as follows:
-        //
-        //  - (A, A, A)
-        //  - (B, B, -B)
-        //  - (C, -C, C)
-        //  - (D, -D, -D)
-        //
-        // The values A, B, C and D could be equal, but not necessarily.  All fragments inside this
-        // quad will have X as the major axis.  The transformation described the spec works for
-        // these samples.
-        //
-        // However, WQM (Whole Quad Mode) can enable a few invocations outside the borders of the
-        // quad for the sole purpose of calculating derivatives.  These invocations will extrapolate
-        // the coordinates that are input from varyings and end up with a different major axis.  In
-        // turn, their transformed UV would correspond to a different face and while the sampling
-        // is done on the correct face (by fragments inside the quad), the derivatives would be
-        // incorrect and the wrong mip would be selected.
-        //
-        // We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap*
-        // (where available) or dFdx/dFdy (emulating subgroupQuadSwap*) to retrieve the layer from a
-        // non-helper invocation.  As a result, the UVs calculated for the helper invocations
-        // correspond to the same face and end up outside the [0, 1] range, but result in correct
-        // derivatives.  Indeed, sampling from any other kind of texture using varyings that range
-        // from [0, 1] would follow the same behavior (where helper invocations generate UVs out of
-        // range).
-        if (mIsFragmentShader)
-        {
-            GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), mUseSubgroupOps);
-        }
        // layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5
        // (covering faces 2 and 3, corresponding to major axis being Y).  Used to determine which
        // of the three transformations to apply.  Previously, ma == |X| and ma == |Y| was used,
@@ -674,22 +665,42 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        calculateXUcVc->appendStatement(
            new TIntermBinary(EOpAssign, ma->deepCopy(), absX->deepCopy()));
        TransformXMajor(calculateXUcVc, x, y, z, uc, vc);
-        TransformXMajor(calculateXUcVc, dPdxX, dPdxY, dPdxZ, dUdx, dVdx);
-        TransformXMajor(calculateXUcVc, dPdyX, dPdyY, dPdyZ, dUdy, dVdy);
        TIntermBlock *calculateYUcVc = new TIntermBlock;
        calculateYUcVc->appendStatement(
            new TIntermBinary(EOpAssign, ma->deepCopy(), absY->deepCopy()));
        TransformYMajor(calculateYUcVc, x, y, z, uc, vc);
-        TransformYMajor(calculateYUcVc, dPdxX, dPdxY, dPdxZ, dUdx, dVdx);
-        TransformYMajor(calculateYUcVc, dPdyX, dPdyY, dPdyZ, dUdy, dVdy);
        TIntermBlock *calculateZUcVc = new TIntermBlock;
        calculateZUcVc->appendStatement(
            new TIntermBinary(EOpAssign, ma->deepCopy(), absZ->deepCopy()));
        TransformZMajor(calculateZUcVc, x, y, z, uc, vc);
-        TransformZMajor(calculateZUcVc, dPdxX, dPdxY, dPdxZ, dUdx, dVdx);
-        TransformZMajor(calculateZUcVc, dPdyX, dPdyY, dPdyZ, dUdy, dVdy);
+        // Compute derivatives.
+        if (implicit)
+        {
+            TransformImplicitDerivativeXMajor(calculateXUcVc, dPDXdx, dUdx, dVdx);
+            TransformImplicitDerivativeXMajor(calculateXUcVc, dPDXdy, dUdy, dVdy);
+            TransformImplicitDerivativeYMajor(calculateYUcVc, dPDYdx, dUdx, dVdx);
+            TransformImplicitDerivativeYMajor(calculateYUcVc, dPDYdy, dUdy, dVdy);
+            TransformImplicitDerivativeZMajor(calculateZUcVc, dPDZdx, dUdx, dVdx);
+            TransformImplicitDerivativeZMajor(calculateZUcVc, dPDZdy, dUdy, dVdy);
+        }
+        else
+        {
+            TransformDerivativeXMajor(calculateXUcVc, mSymbolTable, x, y, z, dPdxX, dPdxY, dPdxZ,
+                                      dUdx, dVdx, Swizzle1(pRecipVar->deepCopy(), 0));
+            TransformDerivativeXMajor(calculateXUcVc, mSymbolTable, x, y, z, dPdyX, dPdyY, dPdyZ,
+                                      dUdy, dVdy, Swizzle1(pRecipVar->deepCopy(), 0));
+            TransformDerivativeYMajor(calculateYUcVc, mSymbolTable, x, y, z, dPdxX, dPdxY, dPdxZ,
+                                      dUdx, dVdx, Swizzle1(pRecipVar->deepCopy(), 1));
+            TransformDerivativeYMajor(calculateYUcVc, mSymbolTable, x, y, z, dPdyX, dPdyY, dPdyZ,
+                                      dUdy, dVdy, Swizzle1(pRecipVar->deepCopy(), 1));
+            TransformDerivativeZMajor(calculateZUcVc, mSymbolTable, x, y, z, dPdxX, dPdxY, dPdxZ,
+                                      dUdx, dVdx, Swizzle1(pRecipVar->deepCopy(), 2));
+            TransformDerivativeZMajor(calculateZUcVc, mSymbolTable, x, y, z, dPdyX, dPdyY, dPdyZ,
+                                      dUdy, dVdy, Swizzle1(pRecipVar->deepCopy(), 2));
+        }
        // Create the if-else paths:
        TIntermIfElse *calculateYZUcVc =
@@ -702,30 +713,23 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        // u = (1 + uc/|ma|) / 2
        // v = (1 + vc/|ma|) / 2
-        TIntermTyped *maTimesTwo =
+        TIntermTyped *maTimesTwoRecip =
-            new TIntermBinary(EOpMulAssign, ma->deepCopy(), CreateFloatNode(2.0));
+            new TIntermBinary(EOpAssign, ma->deepCopy(),
-        body->appendStatement(maTimesTwo);
+                              new TIntermBinary(EOpDiv, CreateFloatNode(0.5f), ma->deepCopy()));
+        body->appendStatement(maTimesTwoRecip);
-        TIntermTyped *ucDivMa     = new TIntermBinary(EOpDiv, uc, ma->deepCopy());
+        TIntermTyped *ucDivMa     = new TIntermBinary(EOpMul, uc, ma->deepCopy());
-        TIntermTyped *vcDivMa     = new TIntermBinary(EOpDiv, vc, ma->deepCopy());
+        TIntermTyped *vcDivMa     = new TIntermBinary(EOpMul, vc, ma->deepCopy());
        TIntermTyped *uNormalized = new TIntermBinary(EOpAdd, CreateFloatNode(0.5f), ucDivMa);
        TIntermTyped *vNormalized = new TIntermBinary(EOpAdd, CreateFloatNode(0.5f), vcDivMa);
        body->appendStatement(new TIntermBinary(EOpAssign, uc->deepCopy(), uNormalized));
        body->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vNormalized));
-        // dUdx / (ma*2).  Similarly for dVdx, dUdy and dVdy
+        TIntermTyped *dUVdxValue =
-        TIntermTyped *dUdxNormalized = new TIntermBinary(EOpDiv, dUdx, ma->deepCopy());
+            TIntermAggregate::CreateConstructor(*vec2Type, new TIntermSequence({dUdx, dVdx}));
-        TIntermTyped *dVdxNormalized = new TIntermBinary(EOpDiv, dVdx, ma->deepCopy());
+        TIntermTyped *dUVdyValue =
-        TIntermTyped *dUdyNormalized = new TIntermBinary(EOpDiv, dUdy, ma->deepCopy());
+            TIntermAggregate::CreateConstructor(*vec2Type, new TIntermSequence({dUdy, dVdy}));
-        TIntermTyped *dVdyNormalized = new TIntermBinary(EOpDiv, dVdy, ma->deepCopy());
-        // dUVdx = vec2(dUdx/2ma, dVdx/2ma)
-        // dUVdy = vec2(dUdy/2ma, dVdy/2ma)
-        TIntermTyped *dUVdxValue = TIntermAggregate::CreateConstructor(
-            *vec2Type, new TIntermSequence({dUdxNormalized, dVdxNormalized}));
-        TIntermTyped *dUVdyValue = TIntermAggregate::CreateConstructor(
-            *vec2Type, new TIntermSequence({dUdyNormalized, dVdyNormalized}));
        body->appendStatement(new TIntermBinary(EOpAssign, dUVdx, dUVdxValue));
        body->appendStatement(new TIntermBinary(EOpAssign, dUVdy, dUVdyValue));
@@ -736,16 +740,17 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
                           *vec3Type, new TIntermSequence({uc->deepCopy(), vc->deepCopy(), l})));
        body->appendStatement(returnStatement);
-        mCubeXYZToArrayUVL = new TFunction(mSymbolTable, kCoordTransformFuncName,
+        TFunction *function;
-                                           SymbolType::AngleInternal, vec3Type, true);
+        function = new TFunction(mSymbolTable, name, SymbolType::AngleInternal, vec3Type, true);
-        mCubeXYZToArrayUVL->addParameter(pVar);
+        function->addParameter(pVar);
-        mCubeXYZToArrayUVL->addParameter(dPdxVar);
+        function->addParameter(dPdxVar);
-        mCubeXYZToArrayUVL->addParameter(dPdyVar);
+        function->addParameter(dPdyVar);
-        mCubeXYZToArrayUVL->addParameter(dUVdxVar);
+        function->addParameter(dUVdxVar);
-        mCubeXYZToArrayUVL->addParameter(dUVdyVar);
+        function->addParameter(dUVdyVar);
-        mCoordTranslationFunctionDecl =
+        *functionOut = function;
-            CreateInternalFunctionDefinitionNode(*mCubeXYZToArrayUVL, body);
+        *declOut = CreateInternalFunctionDefinitionNode(*function, body);
    }
    TIntermTyped *createCoordTransformationCall(TIntermTyped *P,
@@ -758,6 +763,17 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        return TIntermAggregate::CreateFunctionCall(*mCubeXYZToArrayUVL, args);
    }
+    TIntermTyped *createImplicitCoordTransformationCall(TIntermTyped *P,
+                                                        TIntermTyped *dUVdx,
+                                                        TIntermTyped *dUVdy)
+    {
+        const TType *vec3Type = StaticType::GetBasic<EbtFloat, 3>();
+        TIntermTyped *dPdx    = CreateZeroNode(*vec3Type);
+        TIntermTyped *dPdy    = CreateZeroNode(*vec3Type);
+        TIntermSequence *args = new TIntermSequence({P, dPdx, dPdy, dUVdx, dUVdy});
+        return TIntermAggregate::CreateFunctionCall(*mCubeXYZToArrayUVLImplicit, args);
+    }
    TVariable *convertFunctionParameter(TIntermNode *parent, const TVariable *param)
    {
        if (!param->getType().isSamplerCube())
@@ -795,7 +811,7 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        //
        // The intrinsics map as follows:
        //
-        //     textureCube -> texture
+        //     textureCube -> textureGrad
        //     textureCubeLod -> textureLod
        //     textureCubeLodEXT -> textureLod
        //     textureCubeGrad -> textureGrad
@@ -842,17 +858,24 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        // The calculation of dPdx and dPdy is declared as implementation-dependent, so we have
        // freedom to calculate it as fit, even if not precisely the same as hardware might.
-        const char *substituteFunctionName = "texture";
+        const char *substituteFunctionName = "textureGrad";
        bool isGrad                        = false;
+        bool isTranslatedGrad              = true;
+        bool hasBias                       = false;
        if (function->name().beginsWith("textureCubeLod"))
        {
            substituteFunctionName = "textureLod";
+            isTranslatedGrad       = false;
        }
        else if (function->name().beginsWith("textureCubeGrad"))
        {
-            substituteFunctionName = "textureGrad";
            isGrad                 = true;
        }
+        else if (!mIsFragmentShader)
+        {
+            substituteFunctionName = "texture";
+            isTranslatedGrad       = false;
+        }
        TIntermSequence *arguments = node->getSequence();
        ASSERT(arguments->size() >= 2);
@@ -871,21 +894,58 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
            dPdx = (*arguments)[2]->getAsTyped()->deepCopy();
            dPdy = (*arguments)[3]->getAsTyped()->deepCopy();
        }
+        else if (isTranslatedGrad && mIsFragmentShader && arguments->size() == 3)
+        {
+            hasBias = true;
+        }
        else
        {
            dPdx = CreateZeroNode(*vec3Type);
            dPdy = CreateZeroNode(*vec3Type);
        }
+        if (isTranslatedGrad && !mIsFragmentShader)
+        {
+            substituteFunctionName = "texture";
+            isTranslatedGrad       = false;
+        }
        // The function call to transform the coordinates, dPdx and dPdy.  If not textureCubeGrad,
        // the driver compiler will optimize out the unnecessary calculations.
        TIntermSequence *coordTransform = new TIntermSequence;
        coordTransform->push_back(CreateTempDeclarationNode(&dUVdx->variable()));
        coordTransform->push_back(CreateTempDeclarationNode(&dUVdy->variable()));
-        TIntermTyped *coordTransformCall = createCoordTransformationCall(
+        TIntermTyped *coordTransformCall;
+        if (isGrad || !isTranslatedGrad)
+        {
+            coordTransformCall = createCoordTransformationCall(
                (*arguments)[1]->getAsTyped()->deepCopy(), dPdx, dPdy, dUVdx, dUVdy);
+        }
+        else
+        {
+            coordTransformCall = createImplicitCoordTransformationCall(
+                (*arguments)[1]->getAsTyped()->deepCopy(), dUVdx, dUVdy);
+        }
        coordTransform->push_back(
            CreateTempInitDeclarationNode(&uvl->variable(), coordTransformCall));
+        TIntermTyped *dUVdxArg = dUVdx;
+        TIntermTyped *dUVdyArg = dUVdy;
+        if (hasBias)
+        {
+            const TType *floatType = StaticType::GetBasic<EbtFloat>();
+            TIntermTyped *bias     = (*arguments)[2]->getAsTyped()->deepCopy();
+            TIntermTyped *exp2Call = CreateBuiltInFunctionCallNode(
+                "exp2", new TIntermSequence({bias}), *mSymbolTable, 100);
+            TIntermSymbol *biasFac = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+            coordTransform->push_back(
+                CreateTempInitDeclarationNode(&biasFac->variable(), exp2Call));
+            dUVdxArg =
+                new TIntermBinary(EOpVectorTimesScalar, biasFac->deepCopy(), dUVdx->deepCopy());
+            dUVdyArg =
+                new TIntermBinary(EOpVectorTimesScalar, biasFac->deepCopy(), dUVdy->deepCopy());
+        }
        insertStatementsInParentBlock(*coordTransform);
        TIntermSequence *substituteArguments = new TIntermSequence;
@@ -893,10 +953,10 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
        substituteArguments->push_back(mRetyper.getFunctionCallArgReplacement((*arguments)[0]));
        // Replace the second argument with the coordination transformation.
        substituteArguments->push_back(uvl->deepCopy());
-        if (isGrad)
+        if (isTranslatedGrad)
        {
-            substituteArguments->push_back(dUVdx->deepCopy());
+            substituteArguments->push_back(dUVdxArg->deepCopy());
-            substituteArguments->push_back(dUVdy->deepCopy());
+            substituteArguments->push_back(dUVdyArg->deepCopy());
        }
        else
        {
@@ -918,12 +978,14 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
    // A helper function to convert xyz coordinates passed to a cube map sampling function into the
    // array layer (cube map face) and uv coordinates.
    TFunction *mCubeXYZToArrayUVL;
+    // A specialized version of the same function which uses implicit derivatives.
+    TFunction *mCubeXYZToArrayUVLImplicit;
    bool mIsFragmentShader;
-    bool mUseSubgroupOps;
    // Stored to be put before the first function after the pass.
    TIntermFunctionDefinition *mCoordTranslationFunctionDecl;
+    TIntermFunctionDefinition *mCoordTranslationFunctionImplicitDecl;
 };
 }  // anonymous namespace
@@ -931,11 +993,9 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
 bool RewriteCubeMapSamplersAs2DArray(TCompiler *compiler,
                                     TIntermBlock *root,
                                     TSymbolTable *symbolTable,
-                                     bool isFragmentShader,
+                                     bool isFragmentShader)
-                                     bool useSubgroupOps)
 {
-    RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader,
+    RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader);
-                                                       useSubgroupOps);
    root->traverse(&traverser);
    if (!traverser.updateTree(compiler, root))
    {
@@ -944,11 +1004,18 @@ bool RewriteCubeMapSamplersAs2DArray(TCompiler *compiler,
    TIntermFunctionDefinition *coordTranslationFunctionDecl =
        traverser.getCoordTranslationFunctionDecl();
+    TIntermFunctionDefinition *coordTranslationFunctionDeclImplicit =
+        traverser.getCoordTranslationFunctionDeclImplicit();
+    size_t firstFunctionIndex = FindFirstFunctionDefinitionIndex(root);
    if (coordTranslationFunctionDecl)
    {
-        size_t firstFunctionIndex = FindFirstFunctionDefinitionIndex(root);
        root->insertChildNodes(firstFunctionIndex, TIntermSequence({coordTranslationFunctionDecl}));
    }
+    if (coordTranslationFunctionDeclImplicit)
+    {
+        root->insertChildNodes(firstFunctionIndex,
+                               TIntermSequence({coordTranslationFunctionDeclImplicit}));
+    }
    return compiler->validateAST(root);
 }

--- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
+++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
@@ -23,8 +23,7 @@ class TSymbolTable;
 ANGLE_NO_DISCARD bool RewriteCubeMapSamplersAs2DArray(TCompiler *compiler,
                                                      TIntermBlock *root,
                                                      TSymbolTable *symbolTable,
-                                                      bool isFragmentShader,
+                                                      bool isFragmentShader);
-                                                      bool useSubgroupOps);
 }  // namespace sh
 #endif  // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -239,7 +239,6 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk 
      mFlipYForCurrentSurface(false),
      mIsAnyHostVisibleBufferWritten(false),
      mEmulateSeamfulCubeMapSampling(false),
-      mEmulateSeamfulCubeMapSamplingWithSubgroupOps(false),
      mUseOldRewriteStructSamplers(false),
      mLastCompletedQueueSerial(renderer->nextSerial()),
      mCurrentQueueSerial(renderer->nextSerial()),
@@ -444,8 +443,7 @@ angle::Result ContextVk::initialize()
        ANGLE_TRY(synchronizeCpuGpuTime());
    }
-    mEmulateSeamfulCubeMapSampling =
+    mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling();
-        shouldEmulateSeamfulCubeMapSampling(&mEmulateSeamfulCubeMapSamplingWithSubgroupOps);
    mUseOldRewriteStructSamplers = shouldUseOldRewriteStructSamplers();
@@ -2926,7 +2924,7 @@ vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc(
    return desc;
 }
-bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
+bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
 {
    // Only allow seamful cube map sampling in non-webgl ES2.
    if (mState.getClientMajorVersion() != 2 || mState.isWebGL())
@@ -2939,15 +2937,6 @@ bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) con
        return false;
    }
-    // Use subgroup ops where available.
-    constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations =
-        VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
-        VK_SUBGROUP_FEATURE_QUAD_BIT;
-    const VkSubgroupFeatureFlags deviceSupportedOperations =
-        mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations;
-    *useSubgroupOpsOut = (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
-                         kSeamfulCubeMapSubgroupOperations;
    return true;
 }

--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -327,11 +327,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
    void updateScissor(const gl::State &glState);
-    bool emulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
+    bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; }
-    {
-        *useSubgroupOpsOut = mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
-        return mEmulateSeamfulCubeMapSampling;
-    }
    bool useOldRewriteStructSamplers() const { return mUseOldRewriteStructSamplers; }
@@ -492,7 +488,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
    void waitForSwapchainImageIfNecessary();
-    bool shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const;
+    bool shouldEmulateSeamfulCubeMapSampling() const;
    bool shouldUseOldRewriteStructSamplers() const;
@@ -557,10 +553,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
    // at the end of the command buffer to make that write available to the host.
    bool mIsAnyHostVisibleBufferWritten;
-    // Whether this context should do seamful cube map sampling emulation, and whether subgroup
+    // Whether this context should do seamful cube map sampling emulation.
-    // operations should be used.
    bool mEmulateSeamfulCubeMapSampling;
-    bool mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
    // Whether this context should use the old version of the
    // RewriteStructSamplers pass.

--- a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
+++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
@@ -951,7 +951,6 @@ void GlslangWrapper::GetShaderSource(bool useOldRewriteStructSamplers,
 angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
                                            const gl::Caps &glCaps,
                                            bool enableLineRasterEmulation,
-                                            bool enableSubgroupOps,
                                            const gl::ShaderMap<std::string> &shaderSources,
                                            gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
 {
@@ -971,18 +970,17 @@ angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
                                               kVersionDefine, kLineRasterDefine),
                       VK_ERROR_INVALID_SHADER_NV);
-        return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, patchedSources, shaderCodeOut);
+        return GetShaderCodeImpl(context, glCaps, patchedSources, shaderCodeOut);
    }
    else
    {
-        return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, shaderSources, shaderCodeOut);
+        return GetShaderCodeImpl(context, glCaps, shaderSources, shaderCodeOut);
    }
 }
 // static
 angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
                                                const gl::Caps &glCaps,
-                                                bool enableSubgroupOps,
                                                const gl::ShaderMap<std::string> &shaderSources,
                                                gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
 {
@@ -1018,11 +1016,6 @@ angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
        glslang::TShader *shader = shaders[shaderType];
        shader->setStringsWithLengths(&shaderString, &shaderLength, 1);
        shader->setEntryPoint("main");
-        if (enableSubgroupOps)
-        {
-            // Enable SPIR-V 1.3 if to be able to use subgroup operations.
-            shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
-        }
        bool result = shader->parse(&builtInResources, 450, ECoreProfile, false, false, messages);
        if (!result)

--- a/src/libANGLE/renderer/vulkan/GlslangWrapper.h
+++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.h
@@ -30,14 +30,12 @@ class GlslangWrapper
    static angle::Result GetShaderCode(vk::Context *context,
                                       const gl::Caps &glCaps,
                                       bool enableLineRasterEmulation,
-                                       bool enableSubgroupOps,
                                       const gl::ShaderMap<std::string> &shaderSources,
                                       gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
  private:
    static angle::Result GetShaderCodeImpl(vk::Context *context,
                                           const gl::Caps &glCaps,
-                                           bool enableSubgroupOps,
                                           const gl::ShaderMap<std::string> &shaderSources,
                                           gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
 };

--- a/src/libANGLE/renderer/vulkan/ProgramVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ProgramVk.cpp
@@ -304,16 +304,9 @@ angle::Result ProgramVk::ShaderInfo::initShaders(ContextVk *contextVk,
 {
    ASSERT(!valid());
-    bool useSubgroupOpsWithSeamfulCubeMapEmulation = false;
-    bool emulateSeamfulCubeMapSampling =
-        contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOpsWithSeamfulCubeMapEmulation);
-    bool useSubgroupOps =
-        emulateSeamfulCubeMapSampling && useSubgroupOpsWithSeamfulCubeMapEmulation;
    gl::ShaderMap<std::vector<uint32_t>> shaderCodes;
-    ANGLE_TRY(GlslangWrapper::GetShaderCode(contextVk, contextVk->getCaps(),
+    ANGLE_TRY(GlslangWrapper::GetShaderCode(
-                                            enableLineRasterEmulation, useSubgroupOps,
+        contextVk, contextVk->getCaps(), enableLineRasterEmulation, shaderSources, &shaderCodes));
-                                            shaderSources, &shaderCodes));
    for (const gl::ShaderType shaderType : gl::AllShaderTypes())
    {
@@ -1475,8 +1468,7 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk)
    const gl::ActiveTextureArray<vk::TextureUnit> &activeTextures = contextVk->getActiveTextures();
-    bool useSubgroupOps                = false;
+    bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling();
-    bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps);
    bool useOldRewriteStructSamplers   = contextVk->useOldRewriteStructSamplers();
    std::unordered_map<std::string, uint32_t> mappedSamplerNameToBindingIndex;

--- a/src/libANGLE/renderer/vulkan/ShaderVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ShaderVk.cpp
@@ -40,18 +40,10 @@ std::shared_ptr<WaitableCompileEvent> ShaderVk::compile(const gl::Context *conte
        compileOptions |= SH_CLAMP_POINT_SIZE;
    }
-    bool useSubgroupOps = false;
+    if (contextVk->emulateSeamfulCubeMapSampling())
-    if (contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps))
-    {
-        if (useSubgroupOps)
-        {
-            compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP;
-        }
-        else
    {
        compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
    }
-    }
    if (contextVk->useOldRewriteStructSamplers())
    {

--- a/src/tests/gl_tests/CubeMapTextureTest.cpp
+++ b/src/tests/gl_tests/CubeMapTextureTest.cpp
@@ -46,6 +46,8 @@ class CubeMapTextureTest : public ANGLETest
    void testTearDown() override { glDeleteProgram(mProgram); }
+    void runSampleCoordinateTransformTest(const char *shader);
    GLuint mProgram;
    GLint mColorLocation;
 };
@@ -113,9 +115,7 @@ TEST_P(CubeMapTextureTest, RenderToFacesConsecutively)
    EXPECT_GL_NO_ERROR();
 }
-// Verify that cube map sampling follows the rules that map cubemap coordinates to coordinates
+void CubeMapTextureTest::runSampleCoordinateTransformTest(const char *shader)
-// within each face.  See section 3.7.5 of GLES2.0 (Cube Map Texture Selection).
-TEST_P(CubeMapTextureTest, SampleCoordinateTransform)
 {
    // Fails to compile the shader.  anglebug.com/3776
    ANGLE_SKIP_TEST_IF(IsOpenGL() && IsIntel() && IsWindows());
@@ -182,6 +182,54 @@ TEST_P(CubeMapTextureTest, SampleCoordinateTransform)
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fboTex, 0);
    EXPECT_GL_NO_ERROR();
+    ANGLE_GL_PROGRAM(program, essl1_shaders::vs::Simple(), shader);
+    glUseProgram(program);
+    GLint texCubeLocation = glGetUniformLocation(program, "texCube");
+    ASSERT_NE(-1, texCubeLocation);
+    glUniform1i(texCubeLocation, 0);
+    drawQuad(program, essl1_shaders::PositionAttrib(), 0.5f);
+    EXPECT_GL_NO_ERROR();
+    for (GLenum face = 0; face < kCubeFaceCount; face++)
+    {
+        // The following table defines the translation from textureCube coordinates to coordinates
+        // in each face.  The framebuffer has width 6 and height 4.  Every column corresponding to
+        // an x value represents one cube face.  The values in rows are samples from the four
+        // sections of the face.
+        //
+        // Major    Axis Direction Target    sc  tc  ma
+        //  +rx  TEXTURE_CUBE_MAP_POSITIVE_X −rz −ry rx
+        //  −rx  TEXTURE_CUBE_MAP_NEGATIVE_X  rz −ry rx
+        //  +ry  TEXTURE_CUBE_MAP_POSITIVE_Y  rx  rz ry
+        //  −ry  TEXTURE_CUBE_MAP_NEGATIVE_Y  rx −rz ry
+        //  +rz  TEXTURE_CUBE_MAP_POSITIVE_Z  rx −ry rz
+        //  −rz  TEXTURE_CUBE_MAP_NEGATIVE_Z −rx −ry rz
+        //
+        // This table is used only to determine the direction of growth for s and t.  The shader
+        // always generates (row,col) coordinates (0, 0), (0, 1), (1, 0), (1, 1) which is the order
+        // the data is uploaded to the faces, but based on the table above, the sample order would
+        // be different.
+        constexpr size_t faceSampledSections[kCubeFaceCount][kCubeFaceSectionCount] = {
+            {3, 2, 1, 0}, {2, 3, 0, 1}, {0, 1, 2, 3}, {2, 3, 0, 1}, {2, 3, 0, 1}, {3, 2, 1, 0},
+        };
+        for (size_t section = 0; section < kCubeFaceSectionCount; ++section)
+        {
+            const GLColor sectionColor = faceColors[face][faceSampledSections[face][section]];
+            EXPECT_PIXEL_COLOR_EQ(face, section, sectionColor)
+                << "face " << face << ", section " << section;
+        }
+    }
+    EXPECT_GL_NO_ERROR();
+}
+// Verify that cube map sampling follows the rules that map cubemap coordinates to coordinates
+// within each face.  See section 3.7.5 of GLES2.0 (Cube Map Texture Selection).
+TEST_P(CubeMapTextureTest, SampleCoordinateTransform)
+{
    // Create a program that samples from 6x4 directions of the cubemap, draw and verify that the
    // colors match the right color from |faceColors|.
    constexpr char kFS[] = R"(precision mediump float;
@@ -217,48 +265,52 @@ void main()
    gl_FragColor = textureCube(texCube, coord);
 })";
-    ANGLE_GL_PROGRAM(program, essl1_shaders::vs::Simple(), kFS);
+    runSampleCoordinateTransformTest(kFS);
-    glUseProgram(program);
+}
-    GLint texCubeLocation = glGetUniformLocation(program, "texCube");
+// On Android Vulkan, unequal x and y derivatives cause this test to fail.
-    ASSERT_NE(-1, texCubeLocation);
+TEST_P(CubeMapTextureTest, SampleCoordinateTransformGrad)
-    glUniform1i(texCubeLocation, 0);
+{
+    ANGLE_SKIP_TEST_IF(IsAndroid() && IsVulkan());  // anglebug.com/3814
+    ANGLE_SKIP_TEST_IF(IsD3D11());                  // anglebug.com/3856
+    ANGLE_SKIP_TEST_IF(!IsGLExtensionEnabled("GL_EXT_shader_texture_lod"));
-    drawQuad(program, essl1_shaders::PositionAttrib(), 0.5f);
+    constexpr char kFS[] = R"(#extension GL_EXT_shader_texture_lod : require
-    EXPECT_GL_NO_ERROR();
+precision mediump float;
-    for (GLenum face = 0; face < kCubeFaceCount; face++)
+uniform samplerCube texCube;
-    {
-        // The following table defines the translation from textureCube coordinates to coordinates
-        // in each face.  The framebuffer has width 6 and height 4.  Every column corresponding to
-        // an x value represents one cube face.  The values in rows are samples from the four
-        // sections of the face.
-        //
-        // Major    Axis Direction Target    sc  tc  ma
-        //  +rx  TEXTURE_CUBE_MAP_POSITIVE_X −rz −ry rx
-        //  −rx  TEXTURE_CUBE_MAP_NEGATIVE_X  rz −ry rx
-        //  +ry  TEXTURE_CUBE_MAP_POSITIVE_Y  rx  rz ry
-        //  −ry  TEXTURE_CUBE_MAP_NEGATIVE_Y  rx −rz ry
-        //  +rz  TEXTURE_CUBE_MAP_POSITIVE_Z  rx −ry rz
-        //  −rz  TEXTURE_CUBE_MAP_NEGATIVE_Z −rx −ry rz
-        //
-        // This table is used only to determine the direction of growth for s and t.  The shader
-        // always generates (row,col) coordinates (0, 0), (0, 1), (1, 0), (1, 1) which is the order
-        // the data is uploaded to the faces, but based on the table above, the sample order would
-        // be different.
-        constexpr size_t faceSampledSections[kCubeFaceCount][kCubeFaceSectionCount] = {
-            {3, 2, 1, 0}, {2, 3, 0, 1}, {0, 1, 2, 3}, {2, 3, 0, 1}, {2, 3, 0, 1}, {3, 2, 1, 0},
-        };
-        for (size_t section = 0; section < kCubeFaceSectionCount; ++section)
+const mat4 coordInSection = mat4(
-        {
+    vec4(-0.5, -0.5, 0, 0),
-            const GLColor sectionColor = faceColors[face][faceSampledSections[face][section]];
+    vec4( 0.5, -0.5, 0, 0),
+    vec4(-0.5,  0.5, 0, 0),
+    vec4( 0.5,  0.5, 0, 0)
+);
-            EXPECT_PIXEL_COLOR_EQ(face, section, sectionColor)
+void main()
-                << "face " << face << ", section " << section;
+{
+    vec3 coord;
+    if (gl_FragCoord.x < 2.0)
+    {
+        coord.x = gl_FragCoord.x < 1.0 ? 1.0 : -1.0;
+        coord.zy = coordInSection[int(gl_FragCoord.y)].xy;
    }
+    else if (gl_FragCoord.x < 4.0)
+    {
+        coord.y = gl_FragCoord.x < 3.0 ? 1.0 : -1.0;
+        coord.xz = coordInSection[int(gl_FragCoord.y)].xy;
    }
-    EXPECT_GL_NO_ERROR();
+    else
+    {
+        coord.z = gl_FragCoord.x < 5.0 ? 1.0 : -1.0;
+        coord.xy = coordInSection[int(gl_FragCoord.y)].xy;
+    }
+    gl_FragColor = textureCubeGradEXT(texCube, coord,
+                                      vec3(10.0, 10.0, 0.0), vec3(0.0, 10.0, 10.0));
+})";
+    runSampleCoordinateTransformTest(kFS);
 }
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these