Merge pull request #740 from steve-lunarg/f16tof32

HLSL: add f16tof32 and f32tof16 decompositions.

Merge pull request #740 from steve-lunarg/f16tof32
f67f9d7e · John Kessenich · GitHub · 42e33c9b · 86b510ef · f67f9d7e
Commit f67f9d7e authored Feb 28, 2017 by John Kessenich Committed by GitHub Feb 28, 2017
7 changed files
--- a/Test/baseResults/hlsl.intrinsics.f1632.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.f1632.frag.out
--- a/Test/baseResults/hlsl.intrinsics.f3216.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.f3216.frag.out
--- a/Test/baseResults/hlsl.intrinsics.negative.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.negative.frag.out
--- a/Test/hlsl.intrinsics.f1632.frag
+++ b/Test/hlsl.intrinsics.f1632.frag
-float PixelShaderFunctionS(float inF0)
+float PixelShaderFunctionS(uint inF0)
 {
-    f32tof16(inF0);
+    return f16tof32(inF0);
-    return 0.0;
 }
-float1 PixelShaderFunction1(float1 inF0)
+float1 PixelShaderFunction1(uint1 inF0)
 {
-    // TODO: ... add when float1 prototypes are generated
+    return f16tof32(inF0);
-    return 0.0;
 }
-float2 PixelShaderFunction2(float2 inF0)
+float2 PixelShaderFunction2(uint2 inF0)
 {
-    f32tof16(inF0);
+    return f16tof32(inF0);
-    return float2(1,2);
 }
-float3 PixelShaderFunction3(float3 inF0)
+float3 PixelShaderFunction3(uint3 inF0)
 {
-    f32tof16(inF0);
+    return f16tof32(inF0);
-    return float3(1,2,3);
 }
-float4 PixelShaderFunction(float4 inF0)
+float4 PixelShaderFunction(uint4 inF0)
 {
-    f32tof16(inF0);
+    return f16tof32(inF0);
-    return float4(1,2,3,4);
 }
+float4 main() : SV_Target0
+{
+    return 0;
+}
--- a/Test/hlsl.intrinsics.f3216.frag
+++ b/Test/hlsl.intrinsics.f3216.frag
+uint PixelShaderFunctionS(float inF0)
+{
+    return f32tof16(inF0);
+}
+uint1 PixelShaderFunction1(float1 inF0)
+{
+    return f32tof16(inF0);
+}
+uint2 PixelShaderFunction2(float2 inF0)
+{
+    return f32tof16(inF0);
+}
+uint3 PixelShaderFunction3(float3 inF0)
+{
+    return f32tof16(inF0);
+}
+uint4 PixelShaderFunction(float4 inF0)
+{
+    return f32tof16(inF0);
+}
+float4 main() : SV_Target0
+{
+    return 0;
+}
--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -132,7 +132,8 @@ INSTANTIATE_TEST_CASE_P(
        {"hlsl.intrinsics.evalfns.frag", "main"},
        {"hlsl.intrinsics.d3dcolortoubyte4.frag", "main"},
        {"hlsl.intrinsics.double.frag", "PixelShaderFunction"},
-        {"hlsl.intrinsics.f1632.frag", "PixelShaderFunction"},
+        {"hlsl.intrinsics.f1632.frag", "main"},
+        {"hlsl.intrinsics.f3216.frag", "main"},
        {"hlsl.intrinsics.frag", "main"},
        {"hlsl.intrinsics.lit.frag", "PixelShaderFunction"},
        {"hlsl.intrinsics.negative.comp", "ComputeShaderFunction"},

--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -3564,10 +3564,107 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& 
        }
    case EOpF16tof32:
+        {
+            // input uvecN with low 16 bits of each component holding a float16.  convert to float32.
+            TIntermTyped* argValue = node->getAsUnaryNode()->getOperand();
+            TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
+            const int vecSize = argValue->getType().getVectorSize();
+            TOperator constructOp = EOpNull;
+            switch (vecSize) {
+            case 1: constructOp = EOpNull;          break; // direct use, no construct needed
+            case 2: constructOp = EOpConstructVec2; break;
+            case 3: constructOp = EOpConstructVec3; break;
+            case 4: constructOp = EOpConstructVec4; break;
+            default: assert(0); break;
+            }
+            // For scalar case, we don't need to construct another type.
+            TIntermAggregate* result = (vecSize > 1) ? new TIntermAggregate(constructOp) : nullptr;
+            if (result) {
+                result->setType(TType(EbtFloat, EvqTemporary, vecSize));
+                result->setLoc(loc);
+            }
+            for (int idx = 0; idx < vecSize; ++idx) {
+                TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
+                TIntermTyped* component = argValue->getType().isVector() ? 
+                    intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc) : argValue;
+                if (component != argValue)
+                    component->setType(TType(argValue->getBasicType(), EvqTemporary));
+                TIntermTyped* unpackOp  = new TIntermUnary(EOpUnpackHalf2x16);
+                unpackOp->setType(TType(EbtFloat, EvqTemporary, 2));
+                unpackOp->getAsUnaryNode()->setOperand(component);
+                unpackOp->setLoc(loc);
+                TIntermTyped* lowOrder  = intermediate.addIndex(EOpIndexDirect, unpackOp, zero, loc);
+                if (result != nullptr) {
+                    result->getSequence().push_back(lowOrder);
+                    node = result;
+                } else {
+                    node = lowOrder;
+                }
+            }
+            break;
+        }
    case EOpF32tof16:
        {
-            // Temporary until decomposition is available.
+            // input floatN converted to 16 bit float in low order bits of each component of uintN
-            error(loc, "unimplemented intrinsic: handle natively", "f32tof16", "");
+            TIntermTyped* argValue = node->getAsUnaryNode()->getOperand();
+            TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
+            const int vecSize = argValue->getType().getVectorSize();
+            TOperator constructOp = EOpNull;
+            switch (vecSize) {
+            case 1: constructOp = EOpNull;           break; // direct use, no construct needed
+            case 2: constructOp = EOpConstructUVec2; break;
+            case 3: constructOp = EOpConstructUVec3; break;
+            case 4: constructOp = EOpConstructUVec4; break;
+            default: assert(0); break;
+            }
+            // For scalar case, we don't need to construct another type.
+            TIntermAggregate* result = (vecSize > 1) ? new TIntermAggregate(constructOp) : nullptr;
+            if (result) {
+                result->setType(TType(EbtUint, EvqTemporary, vecSize));
+                result->setLoc(loc);
+            }
+            for (int idx = 0; idx < vecSize; ++idx) {
+                TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
+                TIntermTyped* component = argValue->getType().isVector() ? 
+                    intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc) : argValue;
+                if (component != argValue)
+                    component->setType(TType(argValue->getBasicType(), EvqTemporary));
+                TIntermAggregate* vec2ComponentAndZero = new TIntermAggregate(EOpConstructVec2);
+                vec2ComponentAndZero->getSequence().push_back(component);
+                vec2ComponentAndZero->getSequence().push_back(zero);
+                vec2ComponentAndZero->setType(TType(EbtFloat, EvqTemporary, 2));
+                vec2ComponentAndZero->setLoc(loc);
+                TIntermTyped* packOp = new TIntermUnary(EOpPackHalf2x16);
+                packOp->getAsUnaryNode()->setOperand(vec2ComponentAndZero);
+                packOp->setLoc(loc);
+                packOp->setType(TType(EbtUint, EvqTemporary));
+                if (result != nullptr) {
+                    result->getSequence().push_back(packOp);
+                    node = result;
+                } else {
+                    node = packOp;
+                }
+            }
            break;
        }