ES31: Support atomic functions on D3D11 - Part I

This patch is the first one of the implementation of atomic functions in D3D11. There are mainly two differences in the usage of GLSL and HLSL atomic functions: 1. All GLSL atomic functions have return values, which all represent the original value of the shared or ssbo variable; while all HLSL atomic functions don't, and the original value can be stored in the last parameter of the function call. 2. For HLSL atomic functions, the last parameter that stores the original value is optional except for InterlockedExchange and InterlockedCompareExchange. Missing original_value in the call of InterlockedExchange and InterlockedCompareExchange results in a compile error from HLSL compiler. To handle these differences, we plan to implement the translation in two steps: 1. Support direct translations from GLSL atomic functions to HLSL ones. Direct translation can only handle the following two situations: (1) The sentence is a GLSL atomic function call without requesting a return value and it is not atomicExchange or atomicCompSwap: e.g. GLSL: atomicAdd(mem, value); -> HLSL: InterlockedAdd(mem, value); (2) The sentence is a simple assignment expression: its right is a GLSL atomic function call and its left is a declared variable. e.g. GLSL: oldValue = atomicAdd(mem, value); -> HLSL: InterlockedAdd(mem, value, oldValue); 2. Support atomic functions in the situations that don't support direct translations. We will modify the intermediate tree to make direct translation work on all these situations. e.g. atomicExchange(mem, value); -> int oldValue; oldValue = atomicExchange(mem, value); int oldValue = atomicAdd(mem, value); -> int oldValue; oldValue = atomicAdd(mem, value); return atomicAdd(mem, value); -> int temp; temp = atomicAdd(mem, value); return temp; for (i = 0; i < atomicAdd(mem, value); ++i) -> int temp; temp = atomicAdd(mem, value); for (i = 0; i < temp; ++i) { ... temp = atomicAdd(mem, value); } int result = isTrue ? atomicAdd(mem, value) : 0; -> int result; if (isTrue) { result = atomicAdd(mem, value); } else { result = 0; } This patch completes Step 1 which mainly focus on the translation from GLSL atomic functions to HLSL ones. BUG=angleproject:2682 TEST=angle_end2end_tests Change-Id: I3b655b6e286dad4fd97f255f7fe87521c94db30c Reviewed-on: https://chromium-review.googlesource.com/1121835 Commit-Queue: Jiawei Shao <jiawei.shao@intel.com> Reviewed-by: Olli Etuaho <oetuaho@nvidia.com>

ES31: Support atomic functions on D3D11 - Part I
a6a7842f · Jiawei Shao · Commit Bot · a2f043d8 · a6a7842f · a6a7842f
Commit a6a7842f authored Jun 28, 2018 by Jiawei Shao Committed by Commit Bot Jul 11, 2018
9 changed files
--- a/src/compiler/translator/Operator.cpp
+++ b/src/compiler/translator/Operator.cpp
@@ -345,6 +345,23 @@ const char *GetOperatorString(TOperator op)
        case EOpGroupMemoryBarrier:
            return "groupMemoryBarrier";
+        case EOpAtomicAdd:
+            return "atomicAdd";
+        case EOpAtomicMin:
+            return "atomicMin";
+        case EOpAtomicMax:
+            return "atomicMax";
+        case EOpAtomicAnd:
+            return "atomicAnd";
+        case EOpAtomicOr:
+            return "atomicOr";
+        case EOpAtomicXor:
+            return "atomicXor";
+        case EOpAtomicExchange:
+            return "atomicExchange";
+        case EOpAtomicCompSwap:
+            return "atomicCompSwap";
        case EOpEmitVertex:
            return "EmitVertex";
        case EOpEndPrimitive:
@@ -383,3 +400,21 @@ bool IsAssignment(TOperator op)
            return false;
    }
 }
+bool IsAtomicFunction(TOperator op)
+{
+    switch (op)
+    {
+        case EOpAtomicAdd:
+        case EOpAtomicMin:
+        case EOpAtomicMax:
+        case EOpAtomicAnd:
+        case EOpAtomicOr:
+        case EOpAtomicXor:
+        case EOpAtomicExchange:
+        case EOpAtomicCompSwap:
+            return true;
+        default:
+            return false;
+    }
+}
--- a/src/compiler/translator/Operator.h
+++ b/src/compiler/translator/Operator.h
@@ -241,6 +241,16 @@ enum TOperator
    EOpMemoryBarrierShared,
    EOpGroupMemoryBarrier,
+    // Atomic functions
+    EOpAtomicAdd,
+    EOpAtomicMin,
+    EOpAtomicMax,
+    EOpAtomicAnd,
+    EOpAtomicOr,
+    EOpAtomicXor,
+    EOpAtomicExchange,
+    EOpAtomicCompSwap,
    //  Geometry only
    EOpEmitVertex,
    EOpEndPrimitive
@@ -252,4 +262,7 @@ const char *GetOperatorString(TOperator op);
 // Say whether or not a binary or unary operation changes the value of a variable.
 bool IsAssignment(TOperator op);
+// Say whether or not an operator represents an atomic function.
+bool IsAtomicFunction(TOperator op);
 #endif  // COMPILER_TRANSLATOR_OPERATOR_H_
--- a/src/compiler/translator/OutputGLSLBase.cpp
+++ b/src/compiler/translator/OutputGLSLBase.cpp
@@ -975,6 +975,14 @@ bool TOutputGLSLBase::visitAggregate(Visit visit, TIntermAggregate *node)
        case EOpMemoryBarrierImage:
        case EOpMemoryBarrierShared:
        case EOpGroupMemoryBarrier:
+        case EOpAtomicAdd:
+        case EOpAtomicMin:
+        case EOpAtomicMax:
+        case EOpAtomicAnd:
+        case EOpAtomicOr:
+        case EOpAtomicXor:
+        case EOpAtomicExchange:
+        case EOpAtomicCompSwap:
        case EOpEmitVertex:
        case EOpEndPrimitive:
            writeBuiltInFunctionTriplet(visit, node->getOp(), node->getUseEmulatedFunction());

--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -80,6 +80,38 @@ bool IsInStd140InterfaceBlock(TIntermTyped *node)
    return false;
 }
+const char *GetHLSLAtomicFunctionStringAndLeftParenthesis(TOperator op)
+{
+    switch (op)
+    {
+        case EOpAtomicAdd:
+            return "InterlockedAdd(";
+        case EOpAtomicMin:
+            return "InterlockedMin(";
+        case EOpAtomicMax:
+            return "InterlockedMax(";
+        case EOpAtomicAnd:
+            return "InterlockedAnd(";
+        case EOpAtomicOr:
+            return "InterlockedOr(";
+        case EOpAtomicXor:
+            return "InterlockedXor(";
+        case EOpAtomicExchange:
+            return "InterlockedExchange(";
+        case EOpAtomicCompSwap:
+            return "InterlockedCompareExchange(";
+        default:
+            UNREACHABLE();
+            return "";
+    }
+}
+bool IsAtomicFunctionDirectAssign(const TIntermBinary &node)
+{
+    return node.getOp() == EOpAssign && node.getRight()->getAsAggregate() &&
+           IsAtomicFunction(node.getRight()->getAsAggregate()->getOp());
+}
 }  // anonymous namespace
 TReferencedBlock::TReferencedBlock(const TInterfaceBlock *aBlock,
@@ -1142,6 +1174,27 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
                // function call is assigned.
                ASSERT(rightAgg == nullptr);
            }
+            // Assignment expressions with atomic functions should be transformed into atomic
+            // function calls in HLSL.
+            // e.g. original_value = atomicAdd(dest, value) should be translated into
+            //      InterlockedAdd(dest, value, original_value);
+            else if (IsAtomicFunctionDirectAssign(*node))
+            {
+                TIntermAggregate *atomicFunctionNode = node->getRight()->getAsAggregate();
+                TOperator atomicFunctionOp           = atomicFunctionNode->getOp();
+                out << GetHLSLAtomicFunctionStringAndLeftParenthesis(atomicFunctionOp);
+                TIntermSequence *argumentSeq = atomicFunctionNode->getSequence();
+                ASSERT(argumentSeq->size() >= 2u);
+                for (auto &argument : *argumentSeq)
+                {
+                    argument->traverse(this);
+                    out << ", ";
+                }
+                node->getLeft()->traverse(this);
+                out << ")";
+                return false;
+            }
            outputAssign(visit, node->getType(), out);
            break;
        case EOpInitialize:
@@ -2139,6 +2192,29 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
        case EOpMemoryBarrier:
            outputTriplet(out, visit, "AllMemoryBarrier(", "", ")");
            break;
+        // Single atomic function calls without return value.
+        // e.g. atomicAdd(dest, value) should be translated into InterlockedAdd(dest, value).
+        case EOpAtomicAdd:
+        case EOpAtomicMin:
+        case EOpAtomicMax:
+        case EOpAtomicAnd:
+        case EOpAtomicOr:
+        case EOpAtomicXor:
+            outputTriplet(out, visit, GetHLSLAtomicFunctionStringAndLeftParenthesis(node->getOp()),
+                          ",", ")");
+            break;
+        // The parameter 'original_value' of InterlockedExchange(dest, value, original_value) and
+        // InterlockedCompareExchange(dest, compare_value, value, original_value) is not optional.
+        // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/interlockedexchange
+        // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/interlockedcompareexchange
+        // So all the call of atomicExchange(dest, value) and atomicCompSwap(dest, compare_value,
+        // value) should all be modified into the form of "int temp; temp = atomicExchange(dest,
+        // value);" and "int temp; temp = atomicCompSwap(dest, compare_value, value);" in the
+        // intermediate tree before traversing outputHLSL.
+        case EOpAtomicExchange:
+        case EOpAtomicCompSwap:
        default:
            UNREACHABLE();
    }

--- a/src/compiler/translator/ParseContext.cpp
+++ b/src/compiler/translator/ParseContext.cpp
@@ -5620,10 +5620,10 @@ void TParseContext::checkTextureOffsetConst(TIntermAggregate *functionCall)
 void TParseContext::checkAtomicMemoryBuiltinFunctions(TIntermAggregate *functionCall)
 {
-    ASSERT(functionCall->getOp() == EOpCallBuiltInFunction);
    const TFunction *func = functionCall->getFunction();
    if (BuiltInGroup::isAtomicMemory(func))
    {
+        ASSERT(IsAtomicFunction(functionCall->getOp()));
        TIntermSequence *arguments = functionCall->getSequence();
        TIntermTyped *memNode      = (*arguments)[0]->getAsTyped();
@@ -5845,10 +5845,13 @@ TIntermTyped *TParseContext::addNonConstructorFunctionCall(TFunctionLookup *fnCa
                    ASSERT(callNode != nullptr);
                    return callNode;
                }
                TIntermAggregate *callNode =
                    TIntermAggregate::CreateBuiltInFunctionCall(*fnCandidate, &fnCall->arguments());
                callNode->setLine(loc);
+                checkAtomicMemoryBuiltinFunctions(callNode);
                // Some built-in functions have out parameters too.
                functionCallRValueLValueErrorCheck(fnCandidate, callNode);
@@ -5864,7 +5867,6 @@ TIntermTyped *TParseContext::addNonConstructorFunctionCall(TFunctionLookup *fnCa
            checkTextureOffsetConst(callNode);
            checkTextureGather(callNode);
            checkImageMemoryAccessForBuiltinFunctions(callNode);
-            checkAtomicMemoryBuiltinFunctions(callNode);
            functionCallRValueLValueErrorCheck(fnCandidate, callNode);
            return callNode;
        }

--- a/src/compiler/translator/SymbolTable_autogen.cpp
+++ b/src/compiler/translator/SymbolTable_autogen.cpp
@@ -9793,7 +9793,7 @@ constexpr const TFunction kFunction_atomicAdd_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicAdd,
    false);
 constexpr const TFunction kFunction_atomicAdd_0C0C(
    BuiltInId::atomicAdd_Int1_Int1,
@@ -9802,7 +9802,7 @@ constexpr const TFunction kFunction_atomicAdd_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicAdd,
    false);
 constexpr const TFunction kFunction_atomicMin_0D0D(
    BuiltInId::atomicMin_UInt1_UInt1,
@@ -9811,7 +9811,7 @@ constexpr const TFunction kFunction_atomicMin_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicMin,
    false);
 constexpr const TFunction kFunction_atomicMin_0C0C(
    BuiltInId::atomicMin_Int1_Int1,
@@ -9820,7 +9820,7 @@ constexpr const TFunction kFunction_atomicMin_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicMin,
    false);
 constexpr const TFunction kFunction_atomicMax_0D0D(
    BuiltInId::atomicMax_UInt1_UInt1,
@@ -9829,7 +9829,7 @@ constexpr const TFunction kFunction_atomicMax_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicMax,
    false);
 constexpr const TFunction kFunction_atomicMax_0C0C(
    BuiltInId::atomicMax_Int1_Int1,
@@ -9838,7 +9838,7 @@ constexpr const TFunction kFunction_atomicMax_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicMax,
    false);
 constexpr const TFunction kFunction_atomicAnd_0D0D(
    BuiltInId::atomicAnd_UInt1_UInt1,
@@ -9847,7 +9847,7 @@ constexpr const TFunction kFunction_atomicAnd_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicAnd,
    false);
 constexpr const TFunction kFunction_atomicAnd_0C0C(
    BuiltInId::atomicAnd_Int1_Int1,
@@ -9856,7 +9856,7 @@ constexpr const TFunction kFunction_atomicAnd_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicAnd,
    false);
 constexpr const TFunction kFunction_atomicOr_0D0D(
    BuiltInId::atomicOr_UInt1_UInt1,
@@ -9865,7 +9865,7 @@ constexpr const TFunction kFunction_atomicOr_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicOr,
    false);
 constexpr const TFunction kFunction_atomicOr_0C0C(
    BuiltInId::atomicOr_Int1_Int1,
@@ -9874,7 +9874,7 @@ constexpr const TFunction kFunction_atomicOr_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicOr,
    false);
 constexpr const TFunction kFunction_atomicXor_0D0D(
    BuiltInId::atomicXor_UInt1_UInt1,
@@ -9883,7 +9883,7 @@ constexpr const TFunction kFunction_atomicXor_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicXor,
    false);
 constexpr const TFunction kFunction_atomicXor_0C0C(
    BuiltInId::atomicXor_Int1_Int1,
@@ -9892,7 +9892,7 @@ constexpr const TFunction kFunction_atomicXor_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicXor,
    false);
 constexpr const TFunction kFunction_atomicExchange_0D0D(
    BuiltInId::atomicExchange_UInt1_UInt1,
@@ -9901,7 +9901,7 @@ constexpr const TFunction kFunction_atomicExchange_0D0D(
    BuiltInParameters::p_io_0D0D0D,
    2,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicExchange,
    false);
 constexpr const TFunction kFunction_atomicExchange_0C0C(
    BuiltInId::atomicExchange_Int1_Int1,
@@ -9910,7 +9910,7 @@ constexpr const TFunction kFunction_atomicExchange_0C0C(
    BuiltInParameters::p_io_0C0C0C,
    2,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicExchange,
    false);
 constexpr const TFunction kFunction_atomicCompSwap_0D0D0D(
    BuiltInId::atomicCompSwap_UInt1_UInt1_UInt1,
@@ -9919,7 +9919,7 @@ constexpr const TFunction kFunction_atomicCompSwap_0D0D0D(
    BuiltInParameters::p_io_0D0D0D,
    3,
    StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicCompSwap,
    false);
 constexpr const TFunction kFunction_atomicCompSwap_0C0C0C(
    BuiltInId::atomicCompSwap_Int1_Int1_Int1,
@@ -9928,7 +9928,7 @@ constexpr const TFunction kFunction_atomicCompSwap_0C0C0C(
    BuiltInParameters::p_io_0C0C0C,
    3,
    StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
-    EOpCallBuiltInFunction,
+    EOpAtomicCompSwap,
    false);
 constexpr const TFunction kFunction_imageSize_0c(
    BuiltInId::imageSize_Image2D1,

--- a/src/compiler/translator/builtin_function_declarations.txt
+++ b/src/compiler/translator/builtin_function_declarations.txt
@@ -508,7 +508,7 @@ GROUP BEGIN AtomicCounter
 GROUP END AtomicCounter
 GROUP BEGIN AtomicMemory {"queryFunction": true}
-  DEFAULT METADATA {"level": "ESSL3_1_BUILTINS", "op": "CallBuiltInFunction"}
+  DEFAULT METADATA {"level": "ESSL3_1_BUILTINS", "op": "auto"}
    uint atomicAdd(inout uint, uint);
    int atomicAdd(inout int, int);
    uint atomicMin(inout uint, uint);

--- a/src/compiler/translator/builtin_symbols_hash_autogen.txt
+++ b/src/compiler/translator/builtin_symbols_hash_autogen.txt
-71be250213ce3f9d36417ff880fb4100
+fe7f387068b72dc71759198547902721
\ No newline at end of file
--- a/src/tests/gl_tests/ComputeShaderTest.cpp
+++ b/src/tests/gl_tests/ComputeShaderTest.cpp
@@ -20,34 +20,35 @@ class ComputeShaderTest : public ANGLETest
  protected:
    ComputeShaderTest() {}
-    template <GLint kWidth, GLint kHeight>
+    template <class T, GLint kWidth, GLint kHeight>
    void runSharedMemoryTest(const char *csSource,
-                             const std::array<GLuint, kWidth * kHeight> &inputData,
+                             GLenum internalFormat,
-                             const std::array<GLuint, kWidth * kHeight> &expectedValues)
+                             GLenum format,
+                             const std::array<T, kWidth * kHeight> &inputData,
+                             const std::array<T, kWidth * kHeight> &expectedValues)
    {
        GLTexture texture[2];
        GLFramebuffer framebuffer;
        glBindTexture(GL_TEXTURE_2D, texture[0]);
-        glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
+        glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, kWidth, kHeight);
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, format,
                        inputData.data());
        EXPECT_GL_NO_ERROR();
-        constexpr GLuint initData[kWidth * kHeight] = {};
+        constexpr T initData[kWidth * kHeight] = {};
        glBindTexture(GL_TEXTURE_2D, texture[1]);
-        glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
+        glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, kWidth, kHeight);
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, format, initData);
-                        initData);
        EXPECT_GL_NO_ERROR();
        ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
        glUseProgram(program.get());
-        glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
+        glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, internalFormat);
        EXPECT_GL_NO_ERROR();
-        glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
+        glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, internalFormat);
        EXPECT_GL_NO_ERROR();
        glDispatchCompute(1, 1, 1);
@@ -55,14 +56,14 @@ class ComputeShaderTest : public ANGLETest
        glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
-        GLuint outputValues[kWidth * kHeight];
+        T outputValues[kWidth * kHeight] = {};
        glUseProgram(0);
        glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture[1],
                               0);
        EXPECT_GL_NO_ERROR();
-        glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, outputValues);
+        glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, format, outputValues);
        EXPECT_GL_NO_ERROR();
        for (int i = 0; i < kWidth * kHeight; i++)
@@ -1386,7 +1387,8 @@ TEST_P(ComputeShaderTest, NonArraySharedVariable)
    const std::array<GLuint, 4> inputData      = {{250, 200, 150, 100}};
    const std::array<GLuint, 4> expectedValues = {{250, 200, 150, 250}};
-    runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
+    runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
 }
 // Verify shared non-struct array variables can work correctly.
@@ -1410,7 +1412,8 @@ TEST_P(ComputeShaderTest, NonStructArrayAsSharedVariable)
    const std::array<GLuint, 4> inputData      = {{250, 200, 150, 100}};
    const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
-    runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
+    runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
 }
 // Verify shared struct array variables work correctly.
@@ -1438,7 +1441,111 @@ TEST_P(ComputeShaderTest, StructArrayAsSharedVariable)
    const std::array<GLuint, 4> inputData      = {{250, 200, 150, 100}};
    const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
-    runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
+    runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
+}
+// Verify using atomic functions without return value can work correctly.
+// TODO(jiawei.shao@intel.com): add test on atomicExchange and atomicCompSwap.
+TEST_P(ComputeShaderTest, AtomicFunctionsNoReturnValue)
+{
+    // TODO(jiawei.shao@intel.com): find out why this shader causes a link error on Android Nexus 5
+    // bot.
+    ANGLE_SKIP_TEST_IF(IsAndroid());
+    const char kCSShader[] =
+        R"(#version 310 es
+        layout (local_size_x = 6, local_size_y = 1, local_size_z = 1) in;
+        layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
+        layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
+        const uint kSumIndex = 0u;
+        const uint kMinIndex = 1u;
+        const uint kMaxIndex = 2u;
+        const uint kOrIndex = 3u;
+        const uint kAndIndex = 4u;
+        const uint kXorIndex = 5u;
+        shared highp uint results[6];
+        void main()
+        {
+            if (gl_LocalInvocationID.x == kMinIndex || gl_LocalInvocationID.x == kAndIndex)
+            {
+                results[gl_LocalInvocationID.x] = 0xFFFFu;
+            }
+            else
+            {
+                results[gl_LocalInvocationID.x] = 0u;
+            }
+            memoryBarrierShared();
+            barrier();
+            uint value = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
+            atomicAdd(results[kSumIndex], value);
+            atomicMin(results[kMinIndex], value);
+            atomicMax(results[kMaxIndex], value);
+            atomicOr(results[kOrIndex], value);
+            atomicAnd(results[kAndIndex], value);
+            atomicXor(results[kXorIndex], value);
+            memoryBarrierShared();
+            barrier();
+            imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
+                       uvec4(results[gl_LocalInvocationID.x]));
+        })";
+    const std::array<GLuint, 6> inputData      = {{1, 2, 4, 8, 16, 32}};
+    const std::array<GLuint, 6> expectedValues = {{63, 1, 32, 63, 0, 63}};
+    runSharedMemoryTest<GLuint, 6, 1>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
+}
+// Verify using atomic functions in a non-initializer single assignment can work correctly.
+TEST_P(ComputeShaderTest, AtomicFunctionsInNonInitializerSingleAssignment)
+{
+    const char kCSShader[] =
+        R"(#version 310 es
+        layout (local_size_x = 9, local_size_y = 1, local_size_z = 1) in;
+        layout (r32i, binding = 0) readonly uniform highp iimage2D srcImage;
+        layout (r32i, binding = 1) writeonly uniform highp iimage2D dstImage;
+        shared highp int sharedVariable;
+        shared highp int inputData[9];
+        shared highp int outputData[9];
+        void main()
+        {
+            int inputValue = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
+            inputData[gl_LocalInvocationID.x] = inputValue;
+            memoryBarrierShared();
+            barrier();
+            if (gl_LocalInvocationID.x == 0u)
+            {
+                sharedVariable = 0;
+                outputData[0] = atomicAdd(sharedVariable, inputData[0]);
+                outputData[1] = atomicMin(sharedVariable, inputData[1]);
+                outputData[2] = atomicMax(sharedVariable, inputData[2]);
+                outputData[3] = atomicAnd(sharedVariable, inputData[3]);
+                outputData[4] = atomicOr(sharedVariable, inputData[4]);
+                outputData[5] = atomicXor(sharedVariable, inputData[5]);
+                outputData[6] = atomicExchange(sharedVariable, inputData[6]);
+                outputData[7] = atomicCompSwap(sharedVariable, 64, inputData[7]);
+                outputData[8] = atomicAdd(sharedVariable, inputData[8]);
+            }
+            memoryBarrierShared();
+            barrier();
+            imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
+                       ivec4(outputData[gl_LocalInvocationID.x]));
+        })";
+    const std::array<GLint, 9> inputData      = {{1, 2, 4, 8, 16, 32, 64, 128, 1}};
+    const std::array<GLint, 9> expectedValues = {{0, 1, 1, 4, 0, 16, 48, 64, 128}};
+    runSharedMemoryTest<GLint, 9, 1>(kCSShader, GL_R32I, GL_INT, inputData, expectedValues);
 }
 // Check that it is not possible to create a compute shader when the context does not support ES