Work around NVIDIA GLSL vector-scalar op bug

This adds a new AST transform VectorizeVectorScalarArithmetic. The AST transform works around incorrect handling of certain types of GLSL arithmetic operations by NVIDIA's GL driver. It works around only the most common cases where the bug reproduces, since detecting all the cases would take more sophisticated analysis of the code than what is currently easily implementable in ANGLE. When a float add operator has both vector and scalar operands, the AST transform turns the scalar operand into a vector operand. Example: vec4 f; f += 1.0; gets turned into: vec4 f; f += vec4(1.0); When a vector constructor contains a binary scalar float multiplication or division operation as its only argument, the AST transform turns both operands of the binary operation into vector operands. Example: float f, g; vec4(f * g); gets turned into: float f, g; vec4(vec4(f) * vec4(g)); Another example with compound assignment: float f, g; vec4(f *= g); gets turned into: float f, g; vec4 s0 = vec4(f); (s0 *= g, f = s0.x), s0; This latter transformation only works in case the compound assignment left hand expression doesn't have side effects. BUG=chromium:772651 TEST=angle_end2end_tests Change-Id: I84ec04287793c56a94845a725785439565debdaf Reviewed-on: https://chromium-review.googlesource.com/721321 Commit-Queue: Olli Etuaho <oetuaho@nvidia.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org>

Work around NVIDIA GLSL vector-scalar op bug
661fc487 · Olli Etuaho · Commit Bot · 73dcc60c · 661fc487 · 661fc487
Commit 661fc487 authored Oct 16, 2017 by Olli Etuaho Committed by Commit Bot Nov 09, 2017
9 changed files
--- a/include/GLSLANG/ShaderLang.h
+++ b/include/GLSLANG/ShaderLang.h
@@ -25,7 +25,7 @@
 // Version number for shader translation API.
 // It is incremented every time the API changes.
-#define ANGLE_SH_VERSION 185
+#define ANGLE_SH_VERSION 186
 enum ShShaderSpec
 {
@@ -245,6 +245,14 @@ const ShCompileOptions SH_SELECT_VIEW_IN_NV_GLSL_VERTEX_SHADER = UINT64_C(1) << 
 // ShBuiltInResources in vertex shaders.
 const ShCompileOptions SH_CLAMP_POINT_SIZE = UINT64_C(1) << 35;
+// Turn some arithmetic operations that operate on a float vector-scalar pair into vector-vector
+// operations. This is done recursively. Some scalar binary operations inside vector constructors
+// are also turned into vector operations.
+//
+// This is targeted to work around a bug in NVIDIA OpenGL drivers that was reproducible on NVIDIA
+// driver version 387.92. It works around the most common occurrences of the bug.
+const ShCompileOptions SH_REWRITE_VECTOR_SCALAR_ARITHMETIC = UINT64_C(1) << 36;
 // Defines alternate strategies for implementing array index clamping.
 enum ShArrayIndexClampingStrategy
 {

--- a/src/compiler.gypi
+++ b/src/compiler.gypi
@@ -162,6 +162,8 @@
            'compiler/translator/ValidateVaryingLocations.h',
            'compiler/translator/VariablePacker.cpp',
            'compiler/translator/VariablePacker.h',
+            'compiler/translator/VectorizeVectorScalarArithmetic.cpp',
+            'compiler/translator/VectorizeVectorScalarArithmetic.h',
            'compiler/translator/blocklayout.cpp',
            'compiler/translator/blocklayout.h',
            'compiler/translator/glslang.h',

--- a/src/compiler/translator/Compiler.cpp
+++ b/src/compiler/translator/Compiler.cpp
@@ -44,6 +44,7 @@
 #include "compiler/translator/ValidateOutputs.h"
 #include "compiler/translator/ValidateVaryingLocations.h"
 #include "compiler/translator/VariablePacker.h"
+#include "compiler/translator/VectorizeVectorScalarArithmetic.h"
 #include "third_party/compiler/ArrayBoundsClamper.h"
 namespace sh
@@ -606,6 +607,7 @@ bool TCompiler::checkAndSimplifyAST(TIntermBlock *root,
                                       IntermNodePatternMatcher::kNamelessStructDeclaration,
                                   &getSymbolTable(), getShaderVersion());
        }
        InitializeUninitializedLocals(root, getShaderVersion());
    }
@@ -614,6 +616,11 @@ bool TCompiler::checkAndSimplifyAST(TIntermBlock *root,
        ClampPointSize(root, compileResources.MaxPointSize, &getSymbolTable());
    }
+    if (compileOptions & SH_REWRITE_VECTOR_SCALAR_ARITHMETIC)
+    {
+        VectorizeVectorScalarArithmetic(root, &getSymbolTable());
+    }
    return true;
 }

--- a/src/compiler/translator/VectorizeVectorScalarArithmetic.cpp
+++ b/src/compiler/translator/VectorizeVectorScalarArithmetic.cpp
+// Copyright (c) 2017 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// VectorizeVectorScalarArithmetic.cpp: Turn some arithmetic operations that operate on a float
+// vector-scalar pair into vector-vector operations. This is done recursively. Some scalar binary
+// operations inside vector constructors are also turned into vector operations.
+//
+// This is targeted to work around a bug in NVIDIA OpenGL drivers that was reproducible on NVIDIA
+// driver version 387.92. It works around the most common occurrences of the bug.
+#include "compiler/translator/VectorizeVectorScalarArithmetic.h"
+#include "compiler/translator/IntermNode.h"
+#include "compiler/translator/IntermTraverse.h"
+namespace sh
+{
+namespace
+{
+class VectorizeVectorScalarArithmeticTraverser : public TIntermTraverser
+{
+  public:
+    VectorizeVectorScalarArithmeticTraverser(TSymbolTable *symbolTable)
+        : TIntermTraverser(true, false, false, symbolTable), mReplaced(false)
+    {
+    }
+    bool didReplaceScalarsWithVectors() { return mReplaced; }
+    void nextIteration() { mReplaced = false; }
+  protected:
+    bool visitBinary(Visit visit, TIntermBinary *node) override;
+    bool visitAggregate(Visit visit, TIntermAggregate *node) override;
+  private:
+    // These helpers should only be called from visitAggregate when visiting a constructor.
+    // argBinary is the only argument of the constructor.
+    void replaceMathInsideConstructor(TIntermAggregate *node, TIntermBinary *argBinary);
+    void replaceAssignInsideConstructor(const TIntermAggregate *node,
+                                        const TIntermBinary *argBinary);
+    static TIntermTyped *Vectorize(TIntermTyped *node,
+                                   TType vectorType,
+                                   TIntermTraverser::OriginalNode *originalNodeFate);
+    bool mReplaced;
+};
+TIntermTyped *VectorizeVectorScalarArithmeticTraverser::Vectorize(
+    TIntermTyped *node,
+    TType vectorType,
+    TIntermTraverser::OriginalNode *originalNodeFate)
+{
+    ASSERT(node->isScalar());
+    vectorType.setQualifier(EvqTemporary);
+    TIntermSequence vectorConstructorArgs;
+    vectorConstructorArgs.push_back(node);
+    TIntermAggregate *vectorized =
+        TIntermAggregate::CreateConstructor(vectorType, &vectorConstructorArgs);
+    TIntermTyped *vectorizedFolded = vectorized->fold(nullptr);
+    if (originalNodeFate != nullptr)
+    {
+        if (vectorizedFolded != vectorized)
+        {
+            *originalNodeFate = OriginalNode::IS_DROPPED;
+        }
+        else
+        {
+            *originalNodeFate = OriginalNode::BECOMES_CHILD;
+        }
+    }
+    return vectorizedFolded;
+}
+bool VectorizeVectorScalarArithmeticTraverser::visitBinary(Visit /*visit*/, TIntermBinary *node)
+{
+    TIntermTyped *left  = node->getLeft();
+    TIntermTyped *right = node->getRight();
+    ASSERT(left);
+    ASSERT(right);
+    switch (node->getOp())
+    {
+        case EOpAdd:
+        case EOpAddAssign:
+            // Only these specific ops are necessary to turn into vector ops.
+            break;
+        default:
+            return true;
+    }
+    if (node->getBasicType() != EbtFloat)
+    {
+        // Only float ops have reproduced the bug.
+        return true;
+    }
+    if (left->isScalar() && right->isVector())
+    {
+        ASSERT(!node->isAssignment());
+        ASSERT(!right->isArray());
+        OriginalNode originalNodeFate;
+        TIntermTyped *leftVectorized = Vectorize(left, right->getType(), &originalNodeFate);
+        queueReplacementWithParent(node, left, leftVectorized, originalNodeFate);
+        mReplaced = true;
+        // Don't replace more nodes in the same subtree on this traversal. However, nodes elsewhere
+        // in the tree may still be replaced.
+        return false;
+    }
+    else if (left->isVector() && right->isScalar())
+    {
+        OriginalNode originalNodeFate;
+        TIntermTyped *rightVectorized = Vectorize(right, left->getType(), &originalNodeFate);
+        queueReplacementWithParent(node, right, rightVectorized, originalNodeFate);
+        mReplaced = true;
+        // Don't replace more nodes in the same subtree on this traversal. However, nodes elsewhere
+        // in the tree may still be replaced.
+        return false;
+    }
+    return true;
+}
+void VectorizeVectorScalarArithmeticTraverser::replaceMathInsideConstructor(
+    TIntermAggregate *node,
+    TIntermBinary *argBinary)
+{
+    // Turn:
+    //   a * b
+    // into:
+    //   gvec(a) * gvec(b)
+    TIntermTyped *left  = argBinary->getLeft();
+    TIntermTyped *right = argBinary->getRight();
+    ASSERT(left->isScalar() && right->isScalar());
+    TType leftVectorizedType = left->getType();
+    leftVectorizedType.setPrimarySize(static_cast<unsigned char>(node->getType().getNominalSize()));
+    TIntermTyped *leftVectorized = Vectorize(left, leftVectorizedType, nullptr);
+    TType rightVectorizedType    = right->getType();
+    rightVectorizedType.setPrimarySize(
+        static_cast<unsigned char>(node->getType().getNominalSize()));
+    TIntermTyped *rightVectorized = Vectorize(right, rightVectorizedType, nullptr);
+    TIntermBinary *newArg = new TIntermBinary(argBinary->getOp(), leftVectorized, rightVectorized);
+    queueReplacementWithParent(node, argBinary, newArg, OriginalNode::IS_DROPPED);
+}
+void VectorizeVectorScalarArithmeticTraverser::replaceAssignInsideConstructor(
+    const TIntermAggregate *node,
+    const TIntermBinary *argBinary)
+{
+    // Turn:
+    //   gvec(a *= b);
+    // into:
+    //   // This is inserted into the parent block:
+    //   gvec s0 = gvec(a);
+    //
+    //   // This goes where the gvec constructor used to be:
+    //   ((s0 *= b, a = s0.x), s0);
+    TIntermTyped *left  = argBinary->getLeft();
+    TIntermTyped *right = argBinary->getRight();
+    ASSERT(left->isScalar() && right->isScalar());
+    ASSERT(!left->hasSideEffects());
+    TType vecType = node->getType();
+    vecType.setQualifier(EvqTemporary);
+    nextTemporaryId();
+    // gvec s0 = gvec(a);
+    // s0 is called "tempAssignmentTarget" below.
+    TIntermTyped *tempAssignmentTargetInitializer = Vectorize(left->deepCopy(), vecType, nullptr);
+    TIntermDeclaration *tempAssignmentTargetDeclaration =
+        createTempInitDeclaration(tempAssignmentTargetInitializer);
+    // s0 *= b
+    TOperator compoundAssignmentOp = argBinary->getOp();
+    if (compoundAssignmentOp == EOpMulAssign)
+    {
+        compoundAssignmentOp = EOpVectorTimesScalarAssign;
+    }
+    TIntermBinary *replacementCompoundAssignment =
+        new TIntermBinary(compoundAssignmentOp, createTempSymbol(vecType), right->deepCopy());
+    // s0.x
+    TVector<int> swizzleXOffset;
+    swizzleXOffset.push_back(0);
+    TIntermSwizzle *tempAssignmentTargetX =
+        new TIntermSwizzle(createTempSymbol(vecType), swizzleXOffset);
+    // a = s0.x
+    TIntermBinary *replacementAssignBackToTarget =
+        new TIntermBinary(EOpAssign, left->deepCopy(), tempAssignmentTargetX);
+    // s0 *= b, a = s0.x
+    TIntermBinary *replacementSequenceLeft =
+        new TIntermBinary(EOpComma, replacementCompoundAssignment, replacementAssignBackToTarget);
+    // (s0 *= b, a = s0.x), s0
+    TIntermBinary *replacementSequence =
+        new TIntermBinary(EOpComma, replacementSequenceLeft, createTempSymbol(vecType));
+    insertStatementInParentBlock(tempAssignmentTargetDeclaration);
+    queueReplacement(replacementSequence, OriginalNode::IS_DROPPED);
+}
+bool VectorizeVectorScalarArithmeticTraverser::visitAggregate(Visit /*visit*/,
+                                                              TIntermAggregate *node)
+{
+    // Transform scalar binary expressions inside vector constructors.
+    if (!node->isConstructor() || !node->isVector() || node->getSequence()->size() != 1)
+    {
+        return true;
+    }
+    TIntermTyped *argument = node->getSequence()->back()->getAsTyped();
+    ASSERT(argument);
+    if (!argument->isScalar() || argument->getBasicType() != EbtFloat)
+    {
+        return true;
+    }
+    TIntermBinary *argBinary = argument->getAsBinaryNode();
+    if (!argBinary)
+    {
+        return true;
+    }
+    // Only specific ops are necessary to change.
+    switch (argBinary->getOp())
+    {
+        case EOpMul:
+        case EOpDiv:
+        {
+            replaceMathInsideConstructor(node, argBinary);
+            mReplaced = true;
+            // Don't replace more nodes in the same subtree on this traversal. However, nodes
+            // elsewhere in the tree may still be replaced.
+            return false;
+        }
+        case EOpMulAssign:
+        case EOpDivAssign:
+        {
+            // The case where the left side has side effects is too complicated to deal with, so we
+            // leave that be.
+            if (!argBinary->getLeft()->hasSideEffects())
+            {
+                replaceAssignInsideConstructor(node, argBinary);
+                mReplaced = true;
+                // Don't replace more nodes in the same subtree on this traversal.
+                // However, nodes elsewhere in the tree may still be replaced.
+                return false;
+            }
+            break;
+        }
+        default:
+            return true;
+    }
+    return true;
+}
+}  // anonymous namespace
+void VectorizeVectorScalarArithmetic(TIntermBlock *root, TSymbolTable *symbolTable)
+{
+    VectorizeVectorScalarArithmeticTraverser traverser(symbolTable);
+    do
+    {
+        traverser.nextIteration();
+        root->traverse(&traverser);
+        traverser.updateTree();
+    } while (traverser.didReplaceScalarsWithVectors());
+}
+}  // namespace sh
\ No newline at end of file
--- a/src/compiler/translator/VectorizeVectorScalarArithmetic.h
+++ b/src/compiler/translator/VectorizeVectorScalarArithmetic.h
+// Copyright (c) 2017 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// VectorizeVectorScalarArithmetic.h: Turn some arithmetic operations that operate on a float
+// vector-scalar pair into vector-vector operations. This is done recursively. Some scalar binary
+// operations inside vector constructors are also turned into vector operations.
+//
+// This is targeted to work around a bug in NVIDIA OpenGL drivers that was reproducible on NVIDIA
+// driver version 387.92. It works around the most common occurrences of the bug.
+#ifndef COMPILER_TRANSLATOR_VECTORIZEVECTORSCALARARITHMETIC_H_
+#define COMPILER_TRANSLATOR_VECTORIZEVECTORSCALARARITHMETIC_H_
+namespace sh
+{
+class TIntermBlock;
+class TSymbolTable;
+void VectorizeVectorScalarArithmetic(TIntermBlock *root, TSymbolTable *symbolTable);
+}  // namespace sh
+#endif  // COMPILER_TRANSLATOR_VECTORIZEVECTORSCALARARITHMETIC_H_
\ No newline at end of file
--- a/src/libANGLE/renderer/gl/ShaderGL.cpp
+++ b/src/libANGLE/renderer/gl/ShaderGL.cpp
@@ -117,6 +117,11 @@ ShCompileOptions ShaderGL::prepareSourceAndReturnOptions(std::stringstream *sour
        options |= SH_CLAMP_POINT_SIZE;
    }
+    if (mWorkarounds.rewriteVectorScalarArithmetic)
+    {
+        options |= SH_REWRITE_VECTOR_SCALAR_ARITHMETIC;
+    }
    if (mMultiviewImplementationType == MultiviewImplementationTypeGL::NV_VIEWPORT_ARRAY2)
    {
        options |= SH_INITIALIZE_BUILTINS_FOR_INSTANCED_MULTIVIEW;

--- a/src/libANGLE/renderer/gl/WorkaroundsGL.h
+++ b/src/libANGLE/renderer/gl/WorkaroundsGL.h
@@ -133,6 +133,11 @@ struct WorkaroundsGL
    // On some NVIDIA drivers the point size range reported from the API is inconsistent with the
    // actual behavior. Clamp the point size to the value from the API to fix this.
    bool clampPointSize = false;
+    // On some NVIDIA drivers certain types of GLSL arithmetic ops mixing vectors and scalars may be
+    // executed incorrectly. Change them in the shader translator. Tracking bug:
+    // http://crbug.com/772651
+    bool rewriteVectorScalarArithmetic = false;
 };
 }  // namespace rx

--- a/src/libANGLE/renderer/gl/renderergl_utils.cpp
+++ b/src/libANGLE/renderer/gl/renderergl_utils.cpp
@@ -1107,6 +1107,8 @@ void GenerateWorkarounds(const FunctionsGL *functions, WorkaroundsGL *workaround
    workarounds->clampPointSize = IsNvidia(vendor);
+    workarounds->rewriteVectorScalarArithmetic = IsNvidia(vendor);
 #if defined(ANGLE_PLATFORM_ANDROID)
    // TODO(jmadill): Narrow workaround range for specific devices.
    workarounds->reapplyUBOBindingsAfterUsingBinaryProgram = true;

--- a/src/tests/gl_tests/GLSLTest.cpp
+++ b/src/tests/gl_tests/GLSLTest.cpp
@@ -3796,6 +3796,65 @@ TEST_P(GLSLTest_ES3, VaryingStructWithInlineDefinition)
    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
 }
+// Test vector/scalar arithmetic (in this case multiplication and addition). Meant to reproduce a
+// bug that appeared in NVIDIA OpenGL drivers and that is worked around by
+// VectorizeVectorScalarArithmetic AST transform.
+TEST_P(GLSLTest, VectorScalarMultiplyAndAddInLoop)
+{
+    const std::string &fragmentShader =
+        R"(
+        precision mediump float;
+        void main() {
+            gl_FragColor = vec4(0.0, 0.0, 0.0, 0.0);
+            for (int i = 0; i < 2; i++)
+            {
+                gl_FragColor += (2.0 * gl_FragCoord.x);
+            }
+            if (gl_FragColor.g == gl_FragColor.r &&
+                gl_FragColor.b == gl_FragColor.r &&
+                gl_FragColor.a == gl_FragColor.r)
+            {
+                gl_FragColor = vec4(0, 1, 0, 1);
+            }
+        })";
+    ANGLE_GL_PROGRAM(program, mSimpleVSSource, fragmentShader);
+    drawQuad(program.get(), "inputAttribute", 0.5f);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+// Test vector/scalar arithmetic (in this case compound division and addition). Meant to reproduce a
+// bug that appeared in NVIDIA OpenGL drivers and that is worked around by
+// VectorizeVectorScalarArithmetic AST transform.
+TEST_P(GLSLTest, VectorScalarDivideAndAddInLoop)
+{
+    const std::string &fragmentShader =
+        R"(
+        precision mediump float;
+        void main() {
+            gl_FragColor = vec4(0.0, 0.0, 0.0, 0.0);
+            for (int i = 0; i < 2; i++)
+            {
+                float x = gl_FragCoord.x;
+                gl_FragColor = gl_FragColor + (x /= 2.0);
+            }
+            if (gl_FragColor.g == gl_FragColor.r &&
+                gl_FragColor.b == gl_FragColor.r &&
+                gl_FragColor.a == gl_FragColor.r)
+            {
+                gl_FragColor = vec4(0, 1, 0, 1);
+            }
+        })";
+    ANGLE_GL_PROGRAM(program, mSimpleVSSource, fragmentShader);
+    drawQuad(program.get(), "inputAttribute", 0.5f);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these tests should be run against.
 ANGLE_INSTANTIATE_TEST(GLSLTest,
                       ES2_D3D9(),