Add decompositions for some HLSL intrinsics.

592860ca · LoopDawg · 41ebc429 · 592860ca · 592860ca · 592860ca
Commit 592860ca authored Jun 09, 2016 by LoopDawg
11 changed files
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
 //
-//Copyright (C) 2014-2015 LunarG, Inc.
+//Copyright (C) 2014-2016 LunarG, Inc.
 //Copyright (C) 2015-2016 Google, Inc.
 //
 //All rights reserved.
@@ -3032,7 +3032,7 @@ spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, spv::Dec
        return builder.setPrecision(result, precision);
    }
-    // Handle component-wise +, -, *, and / for all combinations of type.
+    // Handle component-wise +, -, *, %, and / for all combinations of type.
    // The result type of all of them is the same type as the (a) matrix operand.
    // The algorithm is to:
    //   - break the matrix(es) into vectors
@@ -3043,6 +3043,7 @@ spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, spv::Dec
    case spv::OpFAdd:
    case spv::OpFSub:
    case spv::OpFDiv:
+    case spv::OpFMod:
    case spv::OpFMul:
    {
        // one time set up...
@@ -3209,6 +3210,9 @@ spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, spv:
    case glslang::EOpIsInf:
        unaryOp = spv::OpIsInf;
        break;
+    case glslang::EOpIsFinite:
+        unaryOp = spv::OpIsFinite;
+        break;
    case glslang::EOpFloatBitsToInt:
    case glslang::EOpFloatBitsToUint:

--- a/Test/baseResults/hlsl.intrinsics.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.frag.out
--- a/Test/baseResults/hlsl.intrinsics.vert.out
+++ b/Test/baseResults/hlsl.intrinsics.vert.out
--- a/Test/hlsl.intrinsics.frag
+++ b/Test/hlsl.intrinsics.frag
@@ -9,6 +9,7 @@ float PixelShaderFunction(float inF0, float inF1, float inF2)
    atan2(inF0, inF1);
    ceil(inF0);
    clamp(inF0, inF1, inF2);
+    clip(inF0);
    cos(inF0);
    cosh(inF0);
    countbits(7);
@@ -36,17 +37,20 @@ float PixelShaderFunction(float inF0, float inF1, float inF2)
    isnan(inF0);
    ldexp(inF0, inF1);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
-    // TODO: mul(inF0, inF1);
    pow(inF0, inF1);
    radians(inF0);
+    rcp(inF0);
    reversebits(2);
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -76,6 +80,7 @@ float2 PixelShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    atan2(inF0, inF1);
    ceil(inF0);
    clamp(inF0, inF1, inF2);
+    clip(inF0);
    cos(inF0);
    cosh(inF0);
    countbits(int2(7,3));
@@ -107,20 +112,23 @@ float2 PixelShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    ldexp(inF0, inF1);
    length(inF0);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
-    // TODO: mul(inF0, inF1);
    normalize(inF0);
    pow(inF0, inF1);
    radians(inF0);
+    rcp(inF0);
    reflect(inF0, inF1);
    refract(inF0, inF1, 2.0);
    reversebits(int2(1,2));
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -145,6 +153,7 @@ float3 PixelShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    atan2(inF0, inF1);
    ceil(inF0);
    clamp(inF0, inF1, inF2);
+    clip(inF0);
    cos(inF0);
    cosh(inF0);
    countbits(int3(7,3,5));
@@ -177,20 +186,23 @@ float3 PixelShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    ldexp(inF0, inF1);
    length(inF0);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
-    // TODO: mul(inF0, inF1);
    normalize(inF0);
    pow(inF0, inF1);
    radians(inF0);
+    rcp(inF0);
    reflect(inF0, inF1);
    refract(inF0, inF1, 2.0);
    reversebits(int3(1,2,3));
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -215,6 +227,7 @@ float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    atan2(inF0, inF1);
    ceil(inF0);
    clamp(inF0, inF1, inF2);
+    clip(inF0);
    cos(inF0);
    cosh(inF0);
    countbits(int4(7,3,5,2));
@@ -227,6 +240,7 @@ float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    degrees(inF0);
    distance(inF0, inF1);
    dot(inF0, inF1);
+    dst(inF0, inF1);
    // EvaluateAttributeAtCentroid(inF0);
    // EvaluateAttributeAtSample(inF0, 0);
    // TODO: EvaluateAttributeSnapped(inF0, int2(1,2));
@@ -246,20 +260,23 @@ float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    ldexp(inF0, inF1);
    length(inF0);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
-    // TODO: mul(inF0, inF1);
    normalize(inF0);
    pow(inF0, inF1);
    radians(inF0);
+    rcp(inF0);
    reflect(inF0, inF1);
    refract(inF0, inF1, 2.0);
    reversebits(int4(1,2,3,4));
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -283,6 +300,7 @@ float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    atan(inF0); \
    atan2(inF0, inF1); \
    ceil(inF0); \
+    clip(inF0); \
    clamp(inF0, inF1, inF2); \
    cos(inF0); \
    cosh(inF0); \
@@ -305,15 +323,18 @@ float4 PixelShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    fwidth(inF0); \
    ldexp(inF0, inF1); \
    log(inF0); \
-    log2(inF0); \
+    log10(inF0); \
+    log2(inF0);      \
    max(inF0, inF1); \
    min(inF0, inF1); \
    pow(inF0, inF1); \
    radians(inF0); \
    round(inF0); \
    rsqrt(inF0); \
+    saturate(inF0); \
    sign(inF0); \
    sin(inF0); \
+    sincos(inF0, inF1, inF2); \
    sinh(inF0); \
    smoothstep(inF0, inF1, inF2); \
    sqrt(inF0); \
@@ -351,3 +372,36 @@ float4x4 PixelShaderFunction(float4x4 inF0, float4x4 inF1, float4x4 inF2)
    // TODO: ... add when float1 prototypes are generated
    return float4x4(4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4);
 }
+#define TESTGENMUL(ST, VT, MT) \
+    ST r0 = mul(inF0,  inF1);  \
+    VT r1 = mul(inFV0, inF0);  \
+    VT r2 = mul(inF0,  inFV0); \
+    ST r3 = mul(inFV0, inFV1); \
+    VT r4 = mul(inFM0, inFV0); \
+    VT r5 = mul(inFV0, inFM0); \
+    MT r6 = mul(inFM0, inF0);  \
+    MT r7 = mul(inF0, inFM0);  \
+    MT r8 = mul(inFM0, inFM1);
+void TestGenMul(float inF0, float inF1,
+                float2 inFV0, float2 inFV1,
+                float2x2 inFM0, float2x2 inFM1)
+{
+    TESTGENMUL(float, float2, float2x2);
+}
+void TestGenMul(float inF0, float inF1,
+                float3 inFV0, float3 inFV1,
+                float3x3 inFM0, float3x3 inFM1)
+{
+    TESTGENMUL(float, float3, float3x3);
+}
+void TestGenMul(float inF0, float inF1,
+                float4 inFV0, float4 inFV1,
+                float4x4 inFM0, float4x4 inFM1)
+{
+    TESTGENMUL(float, float4, float4x4);
+}
--- a/Test/hlsl.intrinsics.vert
+++ b/Test/hlsl.intrinsics.vert
@@ -30,6 +30,7 @@ float VertexShaderFunction(float inF0, float inF1, float inF2)
    isnan(inF0);
    ldexp(inF0, inF1);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
@@ -39,8 +40,10 @@ float VertexShaderFunction(float inF0, float inF1, float inF2)
    reversebits(2);
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -95,6 +98,7 @@ float2 VertexShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    ldexp(inF0, inF1);
    length(inF0);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
@@ -107,8 +111,10 @@ float2 VertexShaderFunction(float2 inF0, float2 inF1, float2 inF2)
    reversebits(int2(1,2));
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -159,6 +165,7 @@ float3 VertexShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    ldexp(inF0, inF1);
    length(inF0);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
@@ -171,8 +178,10 @@ float3 VertexShaderFunction(float3 inF0, float3 inF1, float3 inF2)
    reversebits(int3(1,2,3));
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -203,6 +212,7 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    degrees(inF0);
    distance(inF0, inF1);
    dot(inF0, inF1);
+    dst(inF0, inF1);
    // EvaluateAttributeAtCentroid(inF0);
    // EvaluateAttributeAtSample(inF0, 0);
    // TODO: EvaluateAttributeSnapped(inF0, int2(1,2));
@@ -222,6 +232,7 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    ldexp(inF0, inF1);
    length(inF0);
    log(inF0);
+    log10(inF0);
    log2(inF0);
    max(inF0, inF1);
    min(inF0, inF1);
@@ -234,8 +245,10 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    reversebits(int4(1,2,3,4));
    round(inF0);
    rsqrt(inF0);
+    saturate(inF0);
    sign(inF0);
    sin(inF0);
+    sincos(inF0, inF1, inF2);
    sinh(inF0);
    smoothstep(inF0, inF1, inF2);
    sqrt(inF0);
@@ -275,6 +288,7 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    fwidth(inF0); \
    ldexp(inF0, inF1); \
    log(inF0); \
+    log10(inF0); \
    log2(inF0); \
    max(inF0, inF1); \
    min(inF0, inF1); \
@@ -282,8 +296,10 @@ float4 VertexShaderFunction(float4 inF0, float4 inF1, float4 inF2)
    radians(inF0); \
    round(inF0); \
    rsqrt(inF0); \
+    saturate(inF0); \
    sign(inF0); \
    sin(inF0); \
+    sincos(inF0, inF1, inF2); \
    sinh(inF0); \
    smoothstep(inF0, inF1, inF2); \
    sqrt(inF0); \
@@ -321,3 +337,36 @@ float4x4 VertexShaderFunction(float4x4 inF0, float4x4 inF1, float4x4 inF2)
    // TODO: ... add when float1 prototypes are generated
    return float4x4(4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4);
 }
+#define TESTGENMUL(ST, VT, MT) \
+    ST r0 = mul(inF0,  inF1);  \
+    VT r1 = mul(inFV0, inF0);  \
+    VT r2 = mul(inF0,  inFV0); \
+    ST r3 = mul(inFV0, inFV1); \
+    VT r4 = mul(inFM0, inFV0); \
+    VT r5 = mul(inFV0, inFM0); \
+    MT r6 = mul(inFM0, inF0);  \
+    MT r7 = mul(inF0, inFM0);  \
+    MT r8 = mul(inFM0, inFM1);
+void TestGenMul(float inF0, float inF1,
+                float2 inFV0, float2 inFV1,
+                float2x2 inFM0, float2x2 inFM1)
+{
+    TESTGENMUL(float, float2, float2x2);
+}
+void TestGenMul(float inF0, float inF1,
+                float3 inFV0, float3 inFV1,
+                float3x3 inFM0, float3x3 inFM1)
+{
+    TESTGENMUL(float, float3, float3x3);
+}
+void TestGenMul(float inF0, float inF1,
+                float4 inFV0, float4 inFV1,
+                float4x4 inFM0, float4x4 inFM1)
+{
+    TESTGENMUL(float, float4, float4x4);
+}
--- a/glslang/Include/Types.h
+++ b/glslang/Include/Types.h
 //
 //Copyright (C) 2002-2005  3Dlabs Inc. Ltd.
-//Copyright (C) 2012-2015 LunarG, Inc.
+//Copyright (C) 2012-2016 LunarG, Inc.
 //Copyright (C) 2015-2016 Google, Inc.
 //
 //All rights reserved.
@@ -1043,8 +1043,9 @@ public:
    POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator())
    // for "empty" type (no args) or simple scalar/vector/matrix
-    explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0) :
+    explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0,
-                            basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(false),
+                   bool isVector = false) :
+                            basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1),
                            arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr)
                            {
                                sampler.clear();
@@ -1052,8 +1053,9 @@ public:
                                qualifier.storage = q;
                            }
    // for explicit precision qualifier
-    TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0) :
+    TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, 
-                            basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(false),
+          bool isVector = false) :
+                            basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1),
                            arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr)
                            {
                                sampler.clear();

--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
 //
 //Copyright (C) 2002-2005  3Dlabs Inc. Ltd.
-//Copyright (C) 2012-2013 LunarG, Inc.
+//Copyright (C) 2012-2016 LunarG, Inc.
 //
 //All rights reserved.
 //
@@ -493,6 +493,19 @@ enum TOperator {
    EOpBitCount,
    EOpFindLSB,
    EOpFindMSB,
+    //
+    // HLSL operations
+    //
+    EOpClip,
+    EOpIsFinite,
+    EOpLog10,
+    EOpRcp,
+    EOpSaturate,
+    EOpSinCos,
+    EOpGenMul,  // mul(x,y) on any of mat/vec/scalars
+    EOpDst,
 };
 class TIntermTraverser;

--- a/glslang/MachineIndependent/intermOut.cpp
+++ b/glslang/MachineIndependent/intermOut.cpp
 //
 //Copyright (C) 2002-2005  3Dlabs Inc. Ltd.
-//Copyright (C) 2012-2013 LunarG, Inc.
+//Copyright (C) 2012-2016 LunarG, Inc.
 //
 //All rights reserved.
 //
@@ -359,6 +359,12 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node)
    case EOpAllInvocations:         out.debug << "allInvocations";        break;
    case EOpAllInvocationsEqual:    out.debug << "allInvocationsEqual";   break;
+    case EOpClip:                   out.debug << "clip";                  break;
+    case EOpIsFinite:               out.debug << "isfinite";              break;
+    case EOpLog10:                  out.debug << "log10";                 break;
+    case EOpRcp:                    out.debug << "rcp";                   break;
+    case EOpSaturate:               out.debug << "saturate";              break;
    default: out.debug.message(EPrefixError, "Bad unary op");
    }
@@ -534,6 +540,9 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
    case EOpInterpolateAtSample:   out.debug << "interpolateAtSample";    break;
    case EOpInterpolateAtOffset:   out.debug << "interpolateAtOffset";    break;
+    case EOpSinCos:                     out.debug << "sincos";                break;
+    case EOpGenMul:                     out.debug << "mul";                   break;
    default: out.debug.message(EPrefixError, "Bad aggregation op");
    }

--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
 //
 //Copyright (C) 2016 Google, Inc.
+//Copyright (C) 2016 LunarG, Inc.
 //
 //All rights reserved.
 //
@@ -765,6 +766,184 @@ void HlslParseContext::handleFunctionArgument(TFunction* function, TIntermTyped*
        arguments = newArg;
 }
+// Optionally decompose intrinsics to AST opcodes.
+//
+void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
+{
+    // HLSL intrinsics can be pass through to native AST opcodes, or decomposed here to existing AST
+    // opcodes for compatibility with existing software stacks.
+    static const bool decomposeHlslIntrinsics = true;
+    if (!decomposeHlslIntrinsics || !node || !node->getAsOperator())
+        return;
+    const TIntermAggregate* argAggregate = arguments ? arguments->getAsAggregate() : nullptr;
+    TIntermUnary* fnUnary = node->getAsUnaryNode();
+    const TOperator op  = node->getAsOperator()->getOp();
+    switch (op) {
+    case EOpGenMul:
+        {
+            // mul(a,b) -> MatrixTimesMatrix, MatrixTimesVector, MatrixTimesScalar, VectorTimesScalar, Dot, Mul
+            TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
+            TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
+            if (arg0->isVector() && arg1->isVector()) {  // vec * vec
+                node->getAsAggregate()->setOperator(EOpDot);
+            } else {
+                node = handleBinaryMath(loc, "mul", EOpMul, arg0, arg1);
+            }
+            break;
+        }
+    case EOpRcp:
+        {
+            // rcp(a) -> 1 / a
+            TIntermTyped* arg0 = fnUnary->getOperand();
+            TBasicType   type0 = arg0->getBasicType();
+            TIntermTyped* one  = intermediate.addConstantUnion(1, type0, loc, true);
+            node  = handleBinaryMath(loc, "rcp", EOpDiv, one, arg0);
+            break;
+        }
+    case EOpSaturate:
+        {
+            // saturate(a) -> clamp(a,0,1)
+            TIntermTyped* arg0 = fnUnary->getOperand();
+            TBasicType   type0 = arg0->getBasicType();
+            TIntermAggregate* clamp = new TIntermAggregate(EOpClamp);
+            clamp->getSequence().push_back(arg0);
+            clamp->getSequence().push_back(intermediate.addConstantUnion(0, type0, loc, true));
+            clamp->getSequence().push_back(intermediate.addConstantUnion(1, type0, loc, true));
+            clamp->setLoc(loc);
+            clamp->setType(node->getType());
+            node = clamp;
+            break;
+        }
+    case EOpSinCos:
+        {
+            // sincos(a,b,c) -> b = sin(a), c = cos(a)
+            TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
+            TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
+            TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped();
+            TIntermTyped* sinStatement = handleUnaryMath(loc, "sin", EOpSin, arg0);
+            TIntermTyped* cosStatement = handleUnaryMath(loc, "cos", EOpCos, arg0);
+            TIntermTyped* sinAssign    = intermediate.addAssign(EOpAssign, arg1, sinStatement, loc);
+            TIntermTyped* cosAssign    = intermediate.addAssign(EOpAssign, arg2, cosStatement, loc);
+            TIntermAggregate* compoundStatement = intermediate.makeAggregate(sinAssign, loc);
+            compoundStatement = intermediate.growAggregate(compoundStatement, cosAssign);
+            compoundStatement->setOperator(EOpSequence);
+            compoundStatement->setLoc(loc);
+            node = compoundStatement;
+            break;
+        }
+    case EOpClip:
+        {
+            // clip(a) -> if (any(a<0)) discard;
+            TIntermTyped*  arg0 = fnUnary->getOperand();
+            TBasicType     type0 = arg0->getBasicType();
+            TIntermTyped*  compareNode = nullptr;
+            // For non-scalars: per experiment with FXC compiler, discard if any component < 0.
+            if (!arg0->isScalar()) {
+                // component-wise compare: a < 0
+                TIntermAggregate* less = new TIntermAggregate(EOpLessThan);
+                less->getSequence().push_back(arg0);
+                less->setLoc(loc);
+                // make vec or mat of bool matching dimensions of input
+                less->setType(TType(EbtBool, EvqTemporary,
+                                    arg0->getType().getVectorSize(),
+                                    arg0->getType().getMatrixCols(),
+                                    arg0->getType().getMatrixRows(),
+                                    arg0->getType().isVector()));
+                // calculate # of components for comparison const
+                const int constComponentCount = 
+                    std::max(arg0->getType().getVectorSize(), 1) *
+                    std::max(arg0->getType().getMatrixCols(), 1) *
+                    std::max(arg0->getType().getMatrixRows(), 1);
+                TConstUnion zero;
+                zero.setDConst(0.0);
+                TConstUnionArray zeros(constComponentCount, zero);
+                less->getSequence().push_back(intermediate.addConstantUnion(zeros, arg0->getType(), loc, true));
+                compareNode = intermediate.addBuiltInFunctionCall(loc, EOpAny, true, less, TType(EbtBool));
+            } else {
+                TIntermTyped* zero = intermediate.addConstantUnion(0, type0, loc, true);
+                compareNode = handleBinaryMath(loc, "clip", EOpLessThan, arg0, zero);
+            }
+            TIntermBranch* killNode = intermediate.addBranch(EOpKill, loc);
+            node = new TIntermSelection(compareNode, killNode, nullptr);
+            node->setLoc(loc);
+            break;
+        }
+    case EOpLog10:
+        {
+            // log10(a) -> log2(a) * 0.301029995663981  (== 1/log2(10))
+            TIntermTyped* arg0 = fnUnary->getOperand();
+            TIntermTyped* log2 = handleUnaryMath(loc, "log2", EOpLog2, arg0);
+            TIntermTyped* base = intermediate.addConstantUnion(0.301029995663981f, EbtFloat, loc, true);
+            node  = handleBinaryMath(loc, "mul", EOpMul, log2, base);
+            break;
+        }
+    case EOpDst:
+        {
+            // dest.x = 1;
+            // dest.y = src0.y * src1.y;
+            // dest.z = src0.z;
+            // dest.w = src1.w;
+            TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
+            TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
+            TBasicType    type0 = arg0->getBasicType();
+            TIntermTyped* x = intermediate.addConstantUnion(0, loc, true);
+            TIntermTyped* y = intermediate.addConstantUnion(1, loc, true);
+            TIntermTyped* z = intermediate.addConstantUnion(2, loc, true);
+            TIntermTyped* w = intermediate.addConstantUnion(3, loc, true);
+            TIntermTyped* src0y = intermediate.addIndex(EOpIndexDirect, arg0, y, loc);
+            TIntermTyped* src1y = intermediate.addIndex(EOpIndexDirect, arg1, y, loc);
+            TIntermTyped* src0z = intermediate.addIndex(EOpIndexDirect, arg0, z, loc);
+            TIntermTyped* src1w = intermediate.addIndex(EOpIndexDirect, arg1, w, loc);
+            TIntermAggregate* dst = new TIntermAggregate(EOpConstructVec4);
+            dst->getSequence().push_back(intermediate.addConstantUnion(1.0, EbtFloat, loc, true));
+            dst->getSequence().push_back(handleBinaryMath(loc, "mul", EOpMul, src0y, src1y));
+            dst->getSequence().push_back(src0z);
+            dst->getSequence().push_back(src1w);
+            dst->setLoc(loc);
+            node = dst;
+            break;
+        }
+    default:
+        break; // most pass through unchanged
+    }
+}
 //
 // Handle seeing function call syntax in the grammar, which could be any of
 //  - .length() method
@@ -867,6 +1046,8 @@ TIntermTyped* HlslParseContext::handleFunctionCall(const TSourceLoc& loc, TFunct
                }
                result = addOutputArgumentConversions(*fnCandidate, *result->getAsAggregate());
            }
+            decomposeIntrinsic(loc, result, arguments);
        }
    }

--- a/hlsl/hlslParseHelper.h
+++ b/hlsl/hlslParseHelper.h
 //
 //Copyright (C) 2016 Google, Inc.
+//Copyright (C) 2016 LunarG, Inc.
 //
 //All rights reserved.
 //
@@ -85,6 +86,7 @@ public:
    TIntermAggregate* handleFunctionDefinition(const TSourceLoc&, TFunction&);
    void handleFunctionArgument(TFunction*, TIntermTyped*& arguments, TIntermTyped* newArg);
    TIntermTyped* handleFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*);
+    void decomposeIntrinsic(const TSourceLoc&, TIntermTyped*& node, TIntermNode* arguments);
    TIntermTyped* handleLengthMethod(const TSourceLoc&, TFunction*, TIntermNode*);
    void addInputArgumentConversions(const TFunction&, TIntermNode*&) const;
    TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermAggregate&) const;

--- a/hlsl/hlslParseables.cpp
+++ b/hlsl/hlslParseables.cpp
@@ -279,7 +279,7 @@ void TBuiltInParseablesHlsl::initialize(int version, EProfile profile, int spv, 
        { "DeviceMemoryBarrierWithGroupSync", nullptr, nullptr,   "-",          "-",      EShLangComputeMask },
        { "distance",                         "S",     "F",       "V,",         "F,",     EShLangAll },
        { "dot",                              "S",     nullptr,   "V,",         "FI,",    EShLangAll },
-        { "dst",                              nullptr, nullptr,   "V,",         "F,",     EShLangAll },
+        { "dst",                              nullptr, nullptr,   "V4,V4",      "F,",     EShLangAll },
        // { "errorf",                           "-",     "-",       "",         "",     EShLangAll }, TODO: varargs
        { "EvaluateAttributeAtCentroid",      nullptr, nullptr,   "SVM",        "F",      EShLangFragmentMask },
        { "EvaluateAttributeAtSample",        nullptr, nullptr,   "SVM,S",      "F,U",    EShLangFragmentMask },
@@ -324,6 +324,7 @@ void TBuiltInParseablesHlsl::initialize(int version, EProfile profile, int spv, 
        { "min",                              nullptr, nullptr,   "SVM,",       "FI,",    EShLangAll },
        { "modf",                             nullptr, nullptr,   "SVM,>",      "FI,",    EShLangAll },
        { "msad4",                            "V4",    "U",       "S,V2,V4",    "U,,",    EShLangAll },
+        // TODO: fix matrix return size for non-square mats used with mul opcode
        { "mul",                              "S",     nullptr,   "S,S",        "FI,",    EShLangAll },
        { "mul",                              "V",     nullptr,   "S,V",        "FI,",    EShLangAll },
        { "mul",                              "M",     nullptr,   "S,M",        "FI,",    EShLangAll },
@@ -508,7 +509,7 @@ void TBuiltInParseablesHlsl::initialize(const TBuiltInResource &resources, int v
 void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int spv, int vulkan, EShLanguage language,
                                              TSymbolTable& symbolTable)
 {
-    // symbolTable.relateToOperator("abort");
+    // symbolTable.relateToOperator("abort",                       EOpAbort);
    symbolTable.relateToOperator("abs",                         EOpAbs);
    symbolTable.relateToOperator("acos",                        EOpAcos);
    symbolTable.relateToOperator("all",                         EOpAll);
@@ -525,12 +526,12 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    symbolTable.relateToOperator("ceil",                        EOpCeil);
    // symbolTable.relateToOperator("CheckAccessFullyMapped");
    symbolTable.relateToOperator("clamp",                       EOpClamp);
-    // symbolTable.relateToOperator("clip");
+    symbolTable.relateToOperator("clip",                        EOpClip);
    symbolTable.relateToOperator("cos",                         EOpCos);
    symbolTable.relateToOperator("cosh",                        EOpCosh);
    symbolTable.relateToOperator("countbits",                   EOpBitCount);
    symbolTable.relateToOperator("cross",                       EOpCross);
-    // symbolTable.relateToOperator("D3DCOLORtoUBYTE4");
+    // symbolTable.relateToOperator("D3DCOLORtoUBYTE4",            EOpD3DCOLORtoUBYTE4);
    symbolTable.relateToOperator("ddx",                         EOpDPdx);
    symbolTable.relateToOperator("ddx_coarse",                  EOpDPdxCoarse);
    symbolTable.relateToOperator("ddx_fine",                    EOpDPdxFine);
@@ -543,7 +544,7 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    // symbolTable.relateToOperator("DeviceMemoryBarrierWithGroupSync");
    symbolTable.relateToOperator("distance",                    EOpDistance);
    symbolTable.relateToOperator("dot",                         EOpDot);
-    // symbolTable.relateToOperator("dst");
+    symbolTable.relateToOperator("dst",                         EOpDst);
    // symbolTable.relateToOperator("errorf");
    symbolTable.relateToOperator("EvaluateAttributeAtCentroid", EOpInterpolateAtCentroid);
    symbolTable.relateToOperator("EvaluateAttributeAtSample",   EOpInterpolateAtSample);
@@ -557,7 +558,7 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    symbolTable.relateToOperator("firstbitlow",                 EOpFindLSB);
    symbolTable.relateToOperator("floor",                       EOpFloor);
    symbolTable.relateToOperator("fma",                         EOpFma);
-    // symbolTable.relateToOperator("fmod");
+    symbolTable.relateToOperator("fmod",                        EOpMod);
    symbolTable.relateToOperator("frac",                        EOpFract);
    symbolTable.relateToOperator("frexp",                       EOpFrexp);
    symbolTable.relateToOperator("fwidth",                      EOpFwidth);
@@ -574,21 +575,21 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    // symbolTable.relateToOperator("InterlockedMin");
    // symbolTable.relateToOperator("InterlockedOr");
    // symbolTable.relateToOperator("InterlockedXor");
-    // symbolTable.relateToOperator("isfinite");
+    symbolTable.relateToOperator("isfinite",                    EOpIsFinite);
    symbolTable.relateToOperator("isinf",                       EOpIsInf);
    symbolTable.relateToOperator("isnan",                       EOpIsNan);
    symbolTable.relateToOperator("ldexp",                       EOpLdexp);
    symbolTable.relateToOperator("length",                      EOpLength);
    // symbolTable.relateToOperator("lit");
    symbolTable.relateToOperator("log",                         EOpLog);
-    // symbolTable.relateToOperator("log10");
+    symbolTable.relateToOperator("log10",                       EOpLog10);
    symbolTable.relateToOperator("log2",                        EOpLog2);
    // symbolTable.relateToOperator("mad");
    symbolTable.relateToOperator("max",                         EOpMax);
    symbolTable.relateToOperator("min",                         EOpMin);
    symbolTable.relateToOperator("modf",                        EOpModf);
-    // symbolTable.relateToOperator("msad4");
+    // symbolTable.relateToOperator("msad4",                       EOpMsad4);
-    // symbolTable.relateToOperator("mul");
+    symbolTable.relateToOperator("mul",                         EOpGenMul);
    // symbolTable.relateToOperator("noise",                    EOpNoise); // TODO: check return type
    symbolTable.relateToOperator("normalize",                   EOpNormalize);
    symbolTable.relateToOperator("pow",                         EOpPow);
@@ -604,16 +605,16 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int version, EProfile profile, int
    // symbolTable.relateToOperator("ProcessTriTessFactorsMax");
    // symbolTable.relateToOperator("ProcessTriTessFactorsMin");
    symbolTable.relateToOperator("radians",                     EOpRadians);
-    // symbolTable.relateToOperator("rcp");
+    symbolTable.relateToOperator("rcp",                         EOpRcp);
    symbolTable.relateToOperator("reflect",                     EOpReflect);
    symbolTable.relateToOperator("refract",                     EOpRefract);
    symbolTable.relateToOperator("reversebits",                 EOpBitFieldReverse);
    symbolTable.relateToOperator("round",                       EOpRoundEven);
    symbolTable.relateToOperator("rsqrt",                       EOpInverseSqrt);
-    // symbolTable.relateToOperator("saturate"); 
+    symbolTable.relateToOperator("saturate",                    EOpSaturate);
    symbolTable.relateToOperator("sign",                        EOpSign);
    symbolTable.relateToOperator("sin",                         EOpSin);
-    // symbolTable.relateToOperator("sincos");
+    symbolTable.relateToOperator("sincos",                      EOpSinCos);
    symbolTable.relateToOperator("sinh",                        EOpSinh);
    symbolTable.relateToOperator("smoothstep",                  EOpSmoothStep);
    symbolTable.relateToOperator("sqrt",                        EOpSqrt);