Merge pull request #339 from steve-lunarg/intrinsics

HLSL: Implement atomic ops, bit conversions, fix minor intrinsics bugs

Merge pull request #339 from steve-lunarg/intrinsics
01de90bb · John Kessenich · GitHub · 7b04bdde · 58910709 · 01de90bb
Commit 01de90bb authored Jun 14, 2016 by John Kessenich Committed by GitHub Jun 14, 2016
16 changed files
--- a/Test/baseResults/hlsl.intrinsics.comp.out
+++ b/Test/baseResults/hlsl.intrinsics.comp.out
--- a/Test/baseResults/hlsl.intrinsics.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.frag.out
--- a/Test/baseResults/hlsl.intrinsics.negative.comp.out
+++ b/Test/baseResults/hlsl.intrinsics.negative.comp.out
--- a/Test/baseResults/hlsl.intrinsics.negative.vert.out
+++ b/Test/baseResults/hlsl.intrinsics.negative.vert.out
--- a/Test/baseResults/hlsl.intrinsics.vert.out
+++ b/Test/baseResults/hlsl.intrinsics.vert.out
--- a/Test/hlsl.intrinsics.comp
+++ b/Test/hlsl.intrinsics.comp
+#define gs     // TODO: define as groupshared when available in the grammar
+gs uint gs_ua;
+gs uint gs_ub;
+gs uint gs_uc;
+gs uint2 gs_ua2;
+gs uint2 gs_ub2;
+gs uint2 gs_uc2;
+gs uint3 gs_ua3;
+gs uint3 gs_ub3;
+gs uint3 gs_uc3;
+gs uint4 gs_ua4;
+gs uint4 gs_ub4;
+gs uint4 gs_uc4;
+float ComputeShaderFunction(float inF0, float inF1, float inF2, uint inU0, uint inU1)
+{
+    uint out_u1;
+    // Don't repeat all the pixel/vertex fns - just one for sanity.
+    all(inF0);
+    // Test atomics
+    InterlockedAdd(gs_ua, gs_ub);
+    InterlockedAdd(gs_ua, gs_ub, out_u1);
+    InterlockedAnd(gs_ua, gs_ub);
+    InterlockedAnd(gs_ua, gs_ub, out_u1);
+    InterlockedCompareExchange(gs_ua, gs_ub, gs_uc, out_u1);
+    InterlockedExchange(gs_ua, gs_ub, out_u1);
+    InterlockedMax(gs_ua, gs_ub);
+    InterlockedMax(gs_ua, gs_ub, out_u1);
+    InterlockedMin(gs_ua, gs_ub);
+    InterlockedMin(gs_ua, gs_ub, out_u1);
+    InterlockedOr(gs_ua, gs_ub);
+    InterlockedOr(gs_ua, gs_ub, out_u1);
+    InterlockedXor(gs_ua, gs_ub);
+    InterlockedXor(gs_ua, gs_ub, out_u1);
+    // CheckAccessFullyMapped(3);  // TODO: ...
+    return 0.0;
+}
+float1 ComputeShaderFunction(float1 inF0, float1 inF1, float1 inF2)
+{
+    // TODO: ... add when float1 prototypes are generated
+    return 0.0;
+}
+float2 ComputeShaderFunction(float2 inF0, float2 inF1, float2 inF2, uint2 inU0, uint2 inU1)
+{
+    uint2 out_u2;
+    // Don't repeat all the pixel/vertex fns - just one for sanity.
+    all(inF0);
+    // Test atomics
+    InterlockedAdd(gs_ua2, gs_ub2);
+    InterlockedAdd(gs_ua2, gs_ub2, out_u2);
+    InterlockedAnd(gs_ua2, gs_ub2);
+    InterlockedAnd(gs_ua2, gs_ub2, out_u2);
+    InterlockedCompareExchange(gs_ua2, gs_ub2, gs_uc2, out_u2);
+    InterlockedExchange(gs_ua2, gs_ub2, out_u2);
+    InterlockedMax(gs_ua2, gs_ub2);
+    InterlockedMax(gs_ua2, gs_ub2, out_u2);
+    InterlockedMin(gs_ua2, gs_ub2);
+    InterlockedMin(gs_ua2, gs_ub2, out_u2);
+    InterlockedOr(gs_ua2, gs_ub2);
+    InterlockedOr(gs_ua2, gs_ub2, out_u2);
+    InterlockedXor(gs_ua2, gs_ub2);
+    InterlockedXor(gs_ua2, gs_ub2, out_u2);
+    // TODO: ... add when float1 prototypes are generated
+    return float2(1,2);
+}
+float3 ComputeShaderFunction(float3 inF0, float3 inF1, float3 inF2, uint3 inU0, uint3 inU1)
+{
+    uint3 out_u3;
+    // Don't repeat all the pixel/vertex fns - just one for sanity.
+    all(inF0);
+    // Test atomics
+    InterlockedAdd(gs_ua3, gs_ub3);
+    InterlockedAdd(gs_ua3, gs_ub3, out_u3);
+    InterlockedAnd(gs_ua3, gs_ub3);
+    InterlockedAnd(gs_ua3, gs_ub3, out_u3);
+    InterlockedCompareExchange(gs_ua3, gs_ub3, gs_uc3, out_u3);
+    InterlockedExchange(gs_ua3, gs_ub3, out_u3);
+    InterlockedMax(gs_ua3, gs_ub3);
+    InterlockedMax(gs_ua3, gs_ub3, out_u3);
+    InterlockedMin(gs_ua3, gs_ub3);
+    InterlockedMin(gs_ua3, gs_ub3, out_u3);
+    InterlockedOr(gs_ua3, gs_ub3);
+    InterlockedOr(gs_ua3, gs_ub3, out_u3);
+    InterlockedXor(gs_ua3, gs_ub3);
+    InterlockedXor(gs_ua3, gs_ub3, out_u3);
+    // TODO: ... add when float1 prototypes are generated
+    return float3(1,2,3);
+}
+float4 ComputeShaderFunction(float4 inF0, float4 inF1, float4 inF2, uint4 inU0, uint4 inU1)
+{
+    uint4 out_u4;
+    // Don't repeat all the pixel/vertex fns - just one for sanity.
+    all(inF0);
+    // Test atomics
+    InterlockedAdd(gs_ua4, gs_ub4);
+    InterlockedAdd(gs_ua4, gs_ub4, out_u4);
+    InterlockedAnd(gs_ua4, gs_ub4);
+    InterlockedAnd(gs_ua4, gs_ub4, out_u4);
+    InterlockedCompareExchange(gs_ua4, gs_ub4, gs_uc4, out_u4);
+    InterlockedExchange(gs_ua4, gs_ub4, out_u4);
+    InterlockedMax(gs_ua4, gs_ub4);
+    InterlockedMax(gs_ua4, gs_ub4, out_u4);
+    InterlockedMin(gs_ua4, gs_ub4);
+    InterlockedMin(gs_ua4, gs_ub4, out_u4);
+    InterlockedOr(gs_ua4, gs_ub4);
+    InterlockedOr(gs_ua4, gs_ub4, out_u4);
+    InterlockedXor(gs_ua4, gs_ub4);
+    InterlockedXor(gs_ua4, gs_ub4, out_u4);
+    // TODO: ... add when float1 prototypes are generated
+    return float4(1,2,3,4);
+}
--- a/Test/hlsl.intrinsics.frag
+++ b/Test/hlsl.intrinsics.frag
-float PixelShaderFunction(float inF0, float inF1, float inF2)
+#define gs     // TODO: define as groupshared when available in the grammar
+gs uint gs_ua;
+gs uint gs_ub;
+gs uint gs_uc;
+gs uint2 gs_ua2;
+gs uint2 gs_ub2;
+gs uint2 gs_uc2;
+gs uint3 gs_ua3;
+gs uint3 gs_ub3;
+gs uint3 gs_uc3;
+gs uint4 gs_ua4;
+gs uint4 gs_ub4;
+gs uint4 gs_uc4;
+float PixelShaderFunction(float inF0, float inF1, float inF2, uint inU0, uint inU1)
 {
+    uint out_u1;
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -69,13 +90,19 @@ float1 PixelShaderFunction(float1 inF0, float1 inF1, float1 inF2)
    return 0.0;
 }
-float2 PixelShaderFunction(float2 inF0, float2 inF1, float2 inF2)
+float2 PixelShaderFunction(float2 inF0, float2 inF1, float2 inF2, uint2 inU0, uint2 inU1)
 {
+    uint2 out_u2;
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -142,13 +169,19 @@ float2 PixelShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    return float2(1,2);
 }
-float3 PixelShaderFunction(float3 inF0, float3 inF1, float3 inF2)
+float3 PixelShaderFunction(float3 inF0, float3 inF1, float3 inF2, uint3 inU0, uint3 inU1)
 {
+    uint3 out_u3;
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -216,13 +249,19 @@ float3 PixelShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    return float3(1,2,3);
 }
-float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
+float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2, uint4 inU0, uint4 inU1)
 {
+    uint4 out_u4;
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -290,6 +329,11 @@ float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    return float4(1,2,3,4);
 }
+// TODO: for mats:
+//    asfloat(inU0); \
+//    asint(inF0); \
+//    asuint(inF0); \
 // TODO: FXC doesn't accept this with (), but glslang doesn't accept it without.
 #define MATFNS() \
    all(inF0); \

--- a/Test/hlsl.intrinsics.negative.comp
+++ b/Test/hlsl.intrinsics.negative.comp
--- a/Test/hlsl.intrinsics.negative.frag
+++ b/Test/hlsl.intrinsics.negative.frag
@@ -78,16 +78,16 @@ float3 PixelShaderFunction(float3 inF0, float3 inF1, float3 inF2, int3 inI0)
 float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2, int4 inI0)
 {
-    CheckAccessFullyMapped(inF0);  // expected error: only valid on scalars
+    CheckAccessFullyMapped(inF0); // expected error: only valid on scalars
-    countbits(inF0);            // expected error: only integer inputs
+    countbits(inF0);              // expected error: only integer inputs
-    cross(inF0, inF1);          // expected error: only on float3 inputs
+    cross(inF0, inF1);            // expected error: only on float3 inputs
-    determinant(inF0);          // expected error: only valid on mats
+    determinant(inF0);            // expected error: only valid on mats
-    f16tof32(inF0);             // expected error: only integer inputs
+    f16tof32(inF0);               // expected error: only integer inputs
-    firstbithigh(inF0);         // expected error: only integer inputs
+    firstbithigh(inF0);           // expected error: only integer inputs
-    firstbitlow(inF0);          // expected error: only integer inputs
+    firstbitlow(inF0);            // expected error: only integer inputs
-    fma(inF0, inF1, inF2);      // expected error: only double inputs
+    fma(inF0, inF1, inF2);        // expected error: only double inputs
-    reversebits(inF0);          // expected error: only integer inputs
+    reversebits(inF0);            // expected error: only integer inputs
-    transpose(inF0);            // expected error: only valid on mats
+    transpose(inF0);              // expected error: only valid on mats
    return float4(1,2,3,4);
 }

--- a/Test/hlsl.intrinsics.negative.vert
+++ b/Test/hlsl.intrinsics.negative.vert
--- a/Test/hlsl.intrinsics.vert
+++ b/Test/hlsl.intrinsics.vert
-float VertexShaderFunction(float inF0, float inF1, float inF2)
+float VertexShaderFunction(float inF0, float inF1, float inF2, uint inU0, uint inU1)
 {
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -25,7 +29,6 @@ float VertexShaderFunction(float inF0, float inF1, float inF2)
    fmod(inF0, inF1);
    frac(inF0);
    frexp(inF0, inF1);
-    fwidth(inF0);
    isinf(inF0);
    isnan(inF0);
    ldexp(inF0, inF1);
@@ -62,13 +65,17 @@ float1 VertexShaderFunction(float1 inF0, float1 inF1, float1 inF2)
    return 0.0;
 }
-float2 VertexShaderFunction(float2 inF0, float2 inF1, float2 inF2)
+float2 VertexShaderFunction(float2 inF0, float2 inF1, float2 inF2, uint2 inU0, uint2 inU1)
 {
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -92,7 +99,6 @@ float2 VertexShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    fmod(inF0, inF1);
    frac(inF0);
    frexp(inF0, inF1);
-    fwidth(inF0);
    isinf(inF0);
    isnan(inF0);
    ldexp(inF0, inF1);
@@ -128,13 +134,17 @@ float2 VertexShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    return float2(1,2);
 }
-float3 VertexShaderFunction(float3 inF0, float3 inF1, float3 inF2)
+float3 VertexShaderFunction(float3 inF0, float3 inF1, float3 inF2, uint3 inU0, uint3 inU1)
 {
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -159,7 +169,6 @@ float3 VertexShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    fmod(inF0, inF1);
    frac(inF0);
    frexp(inF0, inF1);
-    fwidth(inF0);
    isinf(inF0);
    isnan(inF0);
    ldexp(inF0, inF1);
@@ -195,13 +204,17 @@ float3 VertexShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    return float3(1,2,3);
 }
-float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
+float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2, uint4 inU0, uint4 inU1)
 {
    all(inF0);
    abs(inF0);
    acos(inF0);
    any(inF0);
    asin(inF0);
+    asint(inF0);
+    asuint(inF0);
+    asfloat(inU0);
+    // asdouble(inU0, inU1);  // TODO: enable when HLSL parser used for intrinsics
    atan(inF0);
    atan2(inF0, inF1);
    ceil(inF0);
@@ -226,7 +239,6 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    fmod(inF0, inF1);
    frac(inF0);
    frexp(inF0, inF1);
-    fwidth(inF0);
    isinf(inF0);
    isnan(inF0);
    ldexp(inF0, inF1);
@@ -262,6 +274,11 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    return float4(1,2,3,4);
 }
+// TODO: for mats:
+//    asfloat(inU0); \
+//    asint(inF0); \
+//    asuint(inF0); \
 // TODO: FXC doesn't accept this with (), but glslang doesn't accept it without.
 #define MATFNS() \
    all(inF0); \
@@ -285,7 +302,6 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    fmod(inF0, inF1); \
    frac(inF0); \
    frexp(inF0, inF1); \
-    fwidth(inF0); \
    ldexp(inF0, inF1); \
    log(inF0); \
    log10(inF0); \

--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
@@ -498,14 +498,24 @@ enum TOperator {
    // HLSL operations
    //
-    EOpClip,
+    EOpClip,                // discard if input value < 0
    EOpIsFinite,
-    EOpLog10,
+    EOpLog10,               // base 10 log
-    EOpRcp,
+    EOpRcp,                 // 1/x
-    EOpSaturate,
+    EOpSaturate,            // clamp from 0 to 1
-    EOpSinCos,
+    EOpSinCos,              // sin and cos in out parameters
-    EOpGenMul,  // mul(x,y) on any of mat/vec/scalars
+    EOpGenMul,              // mul(x,y) on any of mat/vec/scalars
-    EOpDst,
+    EOpDst,                 // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w
+    EOpInterlockedAdd,      // atomic ops, but uses [optional] out arg instead of return
+    EOpInterlockedAnd,      // ...
+    EOpInterlockedCompareExchange, // ...
+    EOpInterlockedCompareStore,    // ...
+    EOpInterlockedExchange, // ...
+    EOpInterlockedMax,      // ...
+    EOpInterlockedMin,      // ...
+    EOpInterlockedOr,       // ...
+    EOpInterlockedXor,      // ...
 };
 class TIntermTraverser;

--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -80,7 +80,9 @@ INSTANTIATE_TEST_CASE_P(
        {"hlsl.float4.frag", "PixelShaderFunction"},
        {"hlsl.forLoop.frag", "PixelShaderFunction"},
        {"hlsl.if.frag", "PixelShaderFunction"},
+        {"hlsl.intrinsics.comp", "ComputeShaderFunction"},
        {"hlsl.intrinsics.frag", "PixelShaderFunction"},
+        {"hlsl.intrinsics.negative.comp", "ComputeShaderFunction"},
        {"hlsl.intrinsics.negative.frag", "PixelShaderFunction"},
        {"hlsl.intrinsics.negative.vert", "VertexShaderFunction"},
        {"hlsl.intrinsics.vert", "VertexShaderFunction"},

--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -771,6 +771,29 @@ void HlslParseContext::handleFunctionArgument(TFunction* function, TIntermTyped*
        arguments = newArg;
 }
+//
+// HLSL atomic operations have slightly different arguments than
+// GLSL/AST/SPIRV.  The semantics are converted below in decomposeIntrinsic.
+// This provides the post-decomposition equivalent opcode.
+//
+TOperator HlslParseContext::mapAtomicOp(const TSourceLoc& loc, TOperator op, bool isImage)
+{
+    switch (op) {
+    case EOpInterlockedAdd:             return isImage ? EOpImageAtomicAdd : EOpAtomicAdd;
+    case EOpInterlockedAnd:             return isImage ? EOpImageAtomicAnd : EOpAtomicAnd;
+    case EOpInterlockedCompareExchange: return isImage ? EOpImageAtomicCompSwap : EOpAtomicCompSwap;
+    case EOpInterlockedMax:             return isImage ? EOpImageAtomicMax : EOpAtomicMax;
+    case EOpInterlockedMin:             return isImage ? EOpImageAtomicMin : EOpAtomicMin;
+    case EOpInterlockedOr:              return isImage ? EOpImageAtomicOr : EOpAtomicOr;
+    case EOpInterlockedXor:             return isImage ? EOpImageAtomicXor : EOpAtomicXor;
+    case EOpInterlockedExchange:        return isImage ? EOpImageAtomicExchange : EOpAtomicExchange;
+    case EOpInterlockedCompareStore:  // TODO: ... 
+    default:
+        error(loc, "unknown atomic operation", "unknown op", "");
+        return EOpNull;
+    }
+}
 // Optionally decompose intrinsics to AST opcodes.
 //
 void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
@@ -825,6 +848,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& 
            clamp->getSequence().push_back(intermediate.addConstantUnion(1, type0, loc, true));
            clamp->setLoc(loc);
            clamp->setType(node->getType());
+            clamp->getWritableType().getQualifier().makeTemporary();
            node = clamp;
            break;
@@ -944,6 +968,61 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& 
            break;
        }
+    case EOpInterlockedAdd: // optional last argument (if present) is assigned from return value
+    case EOpInterlockedMin: // ...
+    case EOpInterlockedMax: // ...
+    case EOpInterlockedAnd: // ...
+    case EOpInterlockedOr:  // ...
+    case EOpInterlockedXor: // ...
+    case EOpInterlockedExchange: // always has output arg
+        {
+            TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
+            TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
+            const bool isImage = arg0->getType().isImage();
+            const TOperator atomicOp = mapAtomicOp(loc, op, isImage);
+            if (argAggregate->getSequence().size() > 2) {
+                // optional output param is present.  return value goes to arg2.
+                TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped();
+                TIntermAggregate* atomic = new TIntermAggregate(atomicOp);
+                atomic->getSequence().push_back(arg0);
+                atomic->getSequence().push_back(arg1);
+                atomic->setLoc(loc);
+                atomic->setType(arg0->getType());
+                atomic->getWritableType().getQualifier().makeTemporary();
+                node = intermediate.addAssign(EOpAssign, arg2, atomic, loc);
+            } else {
+                // Set the matching operator.  Since output is absent, this is all we need to do.
+                node->getAsAggregate()->setOperator(atomicOp);
+            }
+            break;
+        }
+    case EOpInterlockedCompareExchange:
+        {
+            TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();  // dest
+            TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();  // cmp
+            TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped();  // value
+            TIntermTyped* arg3 = argAggregate->getSequence()[3]->getAsTyped();  // orig
+            const bool isImage = arg0->getType().isImage();
+            TIntermAggregate* atomic = new TIntermAggregate(mapAtomicOp(loc, op, isImage));
+            atomic->getSequence().push_back(arg0);
+            atomic->getSequence().push_back(arg1);
+            atomic->getSequence().push_back(arg2);
+            atomic->setLoc(loc);
+            atomic->setType(arg2->getType());
+            atomic->getWritableType().getQualifier().makeTemporary();
+            node = intermediate.addAssign(EOpAssign, arg3, atomic, loc);
+            break;
+        }
    default:
        break; // most pass through unchanged
    }

--- a/hlsl/hlslParseHelper.h
+++ b/hlsl/hlslParseHelper.h
@@ -155,6 +155,7 @@ protected:
    TIntermNode* executeInitializer(const TSourceLoc&, TIntermTyped* initializer, TVariable* variable);
    TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer);
    TOperator mapTypeToConstructorOp(const TType&) const;
+    TOperator mapAtomicOp(const TSourceLoc& loc, TOperator op, bool isImage);
    void outputMessage(const TSourceLoc&, const char* szReason, const char* szToken,
                       const char* szExtraInfoFormat, TPrefixType prefix,
                       va_list args);

--- a/hlsl/hlslParseables.cpp
+++ b/hlsl/hlslParseables.cpp
@@ -296,20 +296,26 @@ void TBuiltInParseablesHlsl::initialize(int version, EProfile profile, int spv, 
        { "fmod",                             nullptr, nullptr,   "SVM,",       "F,",     EShLangAll },
        { "frac",                             nullptr, nullptr,   "SVM",        "F",      EShLangAll },
        { "frexp",                            nullptr, nullptr,   "SVM,",       "F,",     EShLangAll },
-        { "fwidth",                           nullptr, nullptr,   "SVM",        "F",      EShLangAll },
+        { "fwidth",                           nullptr, nullptr,   "SVM",        "F",      EShLangFragmentMask },
        { "GetRenderTargetSampleCount",       "S",     "U",       "-",          "-",      EShLangAll },
        { "GetRenderTargetSamplePosition",    "V2",    "F",       "V1",         "I",      EShLangAll },
        { "GroupMemoryBarrier",               nullptr, nullptr,   "-",          "-",      EShLangComputeMask },
        { "GroupMemoryBarrierWithGroupSync",  nullptr, nullptr,   "-",          "-",      EShLangComputeMask },
        { "InterlockedAdd",                   "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
+        { "InterlockedAdd",                   "-",     "-",       "SVM,",       "UI,",    EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedAnd",                   "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
+        { "InterlockedAnd",                   "-",     "-",       "SVM,",       "UI,",    EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedCompareExchange",       "-",     "-",       "SVM,,,>",    "UI,,,",  EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedCompareStore",          "-",     "-",       "SVM,,",      "UI,,",   EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedExchange",              "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedMax",                   "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
+        { "InterlockedMax",                   "-",     "-",       "SVM,",       "UI,",    EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedMin",                   "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
+        { "InterlockedMin",                   "-",     "-",       "SVM,",       "UI,",    EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedOr",                    "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
+        { "InterlockedOr",                    "-",     "-",       "SVM,",       "UI,",    EShLangFragmentMask | EShLangComputeMask },
        { "InterlockedXor",                   "-",     "-",       "SVM,,>",     "UI,,",   EShLangFragmentMask | EShLangComputeMask },
+        { "InterlockedXor",                   "-",     "-",       "SVM,",       "UI,",    EShLangFragmentMask | EShLangComputeMask },
        { "isfinite",                         nullptr, "B" ,      "SVM",        "F",      EShLangAll },
        { "isinf",                            nullptr, "B" ,      "SVM",        "F",      EShLangAll },
        { "isnan",                            nullptr, "B" ,      "SVM",        "F",      EShLangAll },
@@ -516,11 +522,11 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    // symbolTable.relateToOperator("AllMemoryBarrier");
    // symbolTable.relateToOperator("AllMemoryBarrierWithGroupSync");
    symbolTable.relateToOperator("any",                         EOpAny);
-    // symbolTable.relateToOperator("asdouble");
+    symbolTable.relateToOperator("asdouble",                    EOpUint64BitsToDouble);
-    // symbolTable.relateToOperator("asfloat");
+    symbolTable.relateToOperator("asfloat",                     EOpIntBitsToFloat);
    symbolTable.relateToOperator("asin",                        EOpAsin);
-    // symbolTable.relateToOperator("asint");
+    symbolTable.relateToOperator("asint",                       EOpFloatBitsToInt);
-    // symbolTable.relateToOperator("asuint");
+    symbolTable.relateToOperator("asuint",                      EOpFloatBitsToUint);
    symbolTable.relateToOperator("atan",                        EOpAtan);
    symbolTable.relateToOperator("atan2",                       EOpAtan);
    symbolTable.relateToOperator("ceil",                        EOpCeil);
@@ -566,15 +572,15 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    // symbolTable.relateToOperator("GetRenderTargetSamplePosition");
    // symbolTable.relateToOperator("GroupMemoryBarrier");
    // symbolTable.relateToOperator("GroupMemoryBarrierWithGroupSync");
-    // symbolTable.relateToOperator("InterlockedAdd");
+    symbolTable.relateToOperator("InterlockedAdd",              EOpInterlockedAdd);
-    // symbolTable.relateToOperator("InterlockedAnd");
+    symbolTable.relateToOperator("InterlockedAnd",              EOpInterlockedAnd);
-    // symbolTable.relateToOperator("InterlockedCompareExchange");
+    symbolTable.relateToOperator("InterlockedCompareExchange",  EOpInterlockedCompareExchange);
-    // symbolTable.relateToOperator("InterlockedCompareStore");
+    symbolTable.relateToOperator("InterlockedCompareStore",     EOpInterlockedCompareStore);
-    // symbolTable.relateToOperator("InterlockedExchange");
+    symbolTable.relateToOperator("InterlockedExchange",         EOpInterlockedExchange);
-    // symbolTable.relateToOperator("InterlockedMax");
+    symbolTable.relateToOperator("InterlockedMax",              EOpInterlockedMax);
-    // symbolTable.relateToOperator("InterlockedMin");
+    symbolTable.relateToOperator("InterlockedMin",              EOpInterlockedMin);
-    // symbolTable.relateToOperator("InterlockedOr");
+    symbolTable.relateToOperator("InterlockedOr",               EOpInterlockedOr);
-    // symbolTable.relateToOperator("InterlockedXor");
+    symbolTable.relateToOperator("InterlockedXor",              EOpInterlockedXor);
    symbolTable.relateToOperator("isfinite",                    EOpIsFinite);
    symbolTable.relateToOperator("isinf",                       EOpIsInf);
    symbolTable.relateToOperator("isnan",                       EOpIsNan);