HLSL: Include shape-changing conversions in overloaded signature selection.

This also enables vecN -> vec1 shape conversions for all places doing shape conversions. For signature selection, makes shape changes worse than any other comparison when deciding what conversions are better than others.

HLSL: Include shape-changing conversions in overloaded signature selection.
e3f2c8f9 · John Kessenich · 90dd70f7 · e3f2c8f9 · e3f2c8f9 · e3f2c8f9
Commit e3f2c8f9 authored Aug 25, 2016 by John Kessenich
6 changed files
--- a/Test/baseResults/hlsl.intrinsics.negative.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.negative.frag.out
 hlsl.intrinsics.negative.frag
-ERROR: 0:8: 'cross' : no matching overloaded function found 
-ERROR: 0:9: 'D3DCOLORtoUBYTE4' : no matching overloaded function found 
 ERROR: 0:10: 'determinant' : no matching overloaded function found 
 ERROR: 0:12: 'f32tof16' : unimplemented intrinsic: handle natively 
-ERROR: 0:23: 'length' : no matching overloaded function found 
-ERROR: 0:25: 'normalize' : no matching overloaded function found 
-ERROR: 0:26: 'reflect' : no matching overloaded function found 
-ERROR: 0:27: 'refract' : no matching overloaded function found 
 ERROR: 0:28: 'refract' : no matching overloaded function found 
 ERROR: 0:30: 'transpose' : no matching overloaded function found 
 ERROR: 0:39: 'GetRenderTargetSamplePosition' : no matching overloaded function found 
@@ -23,7 +17,6 @@ ERROR: 0:67: 'determinant' : no matching overloaded function found
 ERROR: 0:68: 'f32tof16' : unimplemented intrinsic: handle natively 
 ERROR: 0:73: 'transpose' : no matching overloaded function found 
 ERROR: 0:81: 'CheckAccessFullyMapped' : no matching overloaded function found 
-ERROR: 0:83: 'cross' : no matching overloaded function found 
 ERROR: 0:84: 'determinant' : no matching overloaded function found 
 ERROR: 0:85: 'f32tof16' : unimplemented intrinsic: handle natively 
 ERROR: 0:90: 'transpose' : no matching overloaded function found 
@@ -66,7 +59,7 @@ ERROR: 0:133: 'normalize' : no matching overloaded function found
 ERROR: 0:133: 'reflect' : no matching overloaded function found 
 ERROR: 0:133: 'refract' : no matching overloaded function found 
 ERROR: 0:133: 'reversebits' : no matching overloaded function found 
-ERROR: 67 compilation errors.  No code generated.
+ERROR: 60 compilation errors.  No code generated.
 Shader version: 450
@@ -91,10 +84,14 @@ ERROR: node is still EOpNull!
 0:7      bitCount (global uint)
 0:7        Convert float to uint (temp uint)
 0:7          'inF0' (in float)
-0:8      Constant:
+0:8      cross-product (global 3-component vector of float)
-0:8        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
-0:9      Constant:
+0:8          'inF0' (in float)
-0:9        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
+0:8          'inF1' (in float)
+0:9      Function Call: D3DCOLORtoUBYTE4(vf4; (global 4-component vector of int)
+0:9        Construct vec4 (in 4-component vector of float)
+0:9          'inF0' (in float)
 0:10      Constant:
 0:10        0.000000
 0:12      ERROR: Bad unary op
@@ -107,8 +104,9 @@ ERROR: node is still EOpNull!
 0:14      findLSB (global uint)
 0:14        Convert float to uint (temp uint)
 0:14          'inF0' (in float)
-0:23      Constant:
+0:23      length (global float)
-0:23        0.000000
+0:23        Construct vec2 (in 2-component vector of float)
+0:23          'inF0' (in float)
 0:24      Function Call: msad4(u1;vu2;vu4; (global 4-component vector of uint)
 0:24        Convert float to uint (temp uint)
 0:24          'inF0' (in float)
@@ -120,12 +118,20 @@ ERROR: node is still EOpNull!
 0:24          0 (const uint)
 0:24          0 (const uint)
 0:24          0 (const uint)
-0:25      Constant:
+0:25      normalize (global 2-component vector of float)
-0:25        0.000000
+0:25        Construct vec2 (in 2-component vector of float)
-0:26      Constant:
+0:25          'inF0' (in float)
-0:26        0.000000
+0:26      reflect (global 2-component vector of float)
-0:27      Constant:
+0:26        Construct vec2 (in 2-component vector of float)
-0:27        0.000000
+0:26          'inF0' (in float)
+0:26        Construct vec2 (in 2-component vector of float)
+0:26          'inF1' (in float)
+0:27      refract (global 2-component vector of float)
+0:27        Construct vec2 (in 2-component vector of float)
+0:27          'inF0' (in float)
+0:27        Construct vec2 (in 2-component vector of float)
+0:27          'inF1' (in float)
+0:27        'inF2' (in float)
 0:28      Constant:
 0:28        0.000000
 0:29      bitFieldReverse (global uint)
@@ -239,8 +245,11 @@ ERROR: node is still EOpNull!
 0:82      bitCount (global 4-component vector of uint)
 0:82        Convert float to uint (temp 4-component vector of uint)
 0:82          'inF0' (in 4-component vector of float)
-0:83      Constant:
+0:83      cross-product (global 3-component vector of float)
-0:83        0.000000
+0:83        Construct vec3 (in 3-component vector of float)
+0:83          'inF0' (in 4-component vector of float)
+0:83        Construct vec3 (in 3-component vector of float)
+0:83          'inF1' (in 4-component vector of float)
 0:84      Constant:
 0:84        0.000000
 0:85      ERROR: Bad unary op
@@ -423,10 +432,14 @@ ERROR: node is still EOpNull!
 0:7      bitCount (global uint)
 0:7        Convert float to uint (temp uint)
 0:7          'inF0' (in float)
-0:8      Constant:
+0:8      cross-product (global 3-component vector of float)
-0:8        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
-0:9      Constant:
+0:8          'inF0' (in float)
-0:9        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
+0:8          'inF1' (in float)
+0:9      Function Call: D3DCOLORtoUBYTE4(vf4; (global 4-component vector of int)
+0:9        Construct vec4 (in 4-component vector of float)
+0:9          'inF0' (in float)
 0:10      Constant:
 0:10        0.000000
 0:12      ERROR: Bad unary op
@@ -439,8 +452,9 @@ ERROR: node is still EOpNull!
 0:14      findLSB (global uint)
 0:14        Convert float to uint (temp uint)
 0:14          'inF0' (in float)
-0:23      Constant:
+0:23      length (global float)
-0:23        0.000000
+0:23        Construct vec2 (in 2-component vector of float)
+0:23          'inF0' (in float)
 0:24      Function Call: msad4(u1;vu2;vu4; (global 4-component vector of uint)
 0:24        Convert float to uint (temp uint)
 0:24          'inF0' (in float)
@@ -452,12 +466,20 @@ ERROR: node is still EOpNull!
 0:24          0 (const uint)
 0:24          0 (const uint)
 0:24          0 (const uint)
-0:25      Constant:
+0:25      normalize (global 2-component vector of float)
-0:25        0.000000
+0:25        Construct vec2 (in 2-component vector of float)
-0:26      Constant:
+0:25          'inF0' (in float)
-0:26        0.000000
+0:26      reflect (global 2-component vector of float)
-0:27      Constant:
+0:26        Construct vec2 (in 2-component vector of float)
-0:27        0.000000
+0:26          'inF0' (in float)
+0:26        Construct vec2 (in 2-component vector of float)
+0:26          'inF1' (in float)
+0:27      refract (global 2-component vector of float)
+0:27        Construct vec2 (in 2-component vector of float)
+0:27          'inF0' (in float)
+0:27        Construct vec2 (in 2-component vector of float)
+0:27          'inF1' (in float)
+0:27        'inF2' (in float)
 0:28      Constant:
 0:28        0.000000
 0:29      bitFieldReverse (global uint)
@@ -571,8 +593,11 @@ ERROR: node is still EOpNull!
 0:82      bitCount (global 4-component vector of uint)
 0:82        Convert float to uint (temp 4-component vector of uint)
 0:82          'inF0' (in 4-component vector of float)
-0:83      Constant:
+0:83      cross-product (global 3-component vector of float)
-0:83        0.000000
+0:83        Construct vec3 (in 3-component vector of float)
+0:83          'inF0' (in 4-component vector of float)
+0:83        Construct vec3 (in 3-component vector of float)
+0:83          'inF1' (in 4-component vector of float)
 0:84      Constant:
 0:84        0.000000
 0:85      ERROR: Bad unary op

--- a/Test/baseResults/hlsl.overload.frag.out
+++ b/Test/baseResults/hlsl.overload.frag.out
--- a/Test/hlsl.overload.frag
+++ b/Test/hlsl.overload.frag
@@ -20,12 +20,27 @@ void foo6(float b) {}
 void foo7(double b){}
 // shorter forward chain better than longer or backward chain
-void foo8(float);
+void foo8(float)  {}
-void foo8(double);
+void foo8(double) {}
-void foo9(int);
+void foo9(int)    {}
-void foo9(uint);
+void foo9(uint)   {}
-void foo10(bool);
+void foo10(bool)  {}
-void foo10(int);
+void foo10(int)   {}
+// shape change is worse
+void foo11(float3)  {}
+void foo11(double)  {}
+void foo11(int3)    {}
+void foo11(uint)    {}
+void foo12(float1)  {}
+void foo12(double3) {}
+void foo16(uint)    {}
+void foo16(uint2)   {}
+// shape change
+void foo13(float3)  {}
+void foo14(int1)     {}
+void foo15(bool1)   {}
 float4 PixelShaderFunction(float4 input) : COLOR0
 {
@@ -113,5 +128,15 @@ float4 PixelShaderFunction(float4 input) : COLOR0
    foo10(f);
    foo10(d);
+    foo11(b);
+    foo11(f);
+    foo12(float3(f));
+    foo16(int2(i,i));
+    foo13(f);
+    foo14(int4(i));
+    foo15(b);
+    foo15(bool3(b));
    return input;
 }
--- a/glslang/Include/revision.h
+++ b/glslang/Include/revision.h
@@ -2,5 +2,5 @@
 // For the version, it uses the latest git tag followed by the number of commits.
 // For the date, it uses the current date (when then script is run).
-#define GLSLANG_REVISION "Overload400-PrecQual.1434"
+#define GLSLANG_REVISION "Overload400-PrecQual.1438"
 #define GLSLANG_DATE "25-Aug-2016"
--- a/glslang/MachineIndependent/Intermediate.cpp
+++ b/glslang/MachineIndependent/Intermediate.cpp
@@ -702,6 +702,7 @@ TIntermTyped* TIntermediate::addShapeConversion(TOperator op, const TType& type,
    case EOpGreaterThan:
    case EOpLessThanEqual:
    case EOpGreaterThanEqual:
+    case EOpFunctionCall:
        break;
    default:
        return node;
@@ -715,9 +716,11 @@ TIntermTyped* TIntermediate::addShapeConversion(TOperator op, const TType& type,
    // The new node that handles the conversion
    TOperator constructorOp = mapTypeToConstructorOp(type);
-    // scalar -> smeared -> vector
+    // scalar -> smeared -> vector, or
-    if (type.isVector() && node->getType().isScalar())
+    // bigger vector -> smaller vector or scalar
-        return setAggregateOperator(node, constructorOp, type, node->getLoc());
+    if ((type.isVector() && node->getType().isScalar()) ||
+        (node->getVectorSize() > type.getVectorSize() && type.isVector()))
+        return setAggregateOperator(makeAggregate(node), constructorOp, type, node->getLoc());
    return node;
 }
@@ -731,6 +734,7 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
    if (profile == EEsProfile || version == 110)
        return false;
+    // TODO: Move more policies into language-specific handlers.
    // Some languages allow more general (or potentially, more specific) conversions under some conditions.
    if (source == EShSourceHlsl) {
        const bool fromConvertable = (from == EbtFloat || from == EbtDouble || from == EbtInt || from == EbtUint || from == EbtBool);

--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -2001,6 +2001,7 @@ void HlslParseContext::addInputArgumentConversions(const TFunction& function, TI
                // In-qualified arguments just need an extra node added above the argument to
                // convert to the correct type.
                arg = intermediate.addConversion(EOpFunctionCall, *function[i].type, arg);
+                arg = intermediate.addShapeConversion(EOpFunctionCall, *function[i].type, arg);
                if (arg) {
                    if (function.getParamCount() == 1)
                        arguments = arg;
@@ -3565,9 +3566,25 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
    auto convertible = [this](const TType& from, const TType& to) {
        if (from == to)
            return true;
-        if (from.isArray() || to.isArray() || ! from.sameElementShape(to))
+        // no aggregate conversions
+        if (from.isArray()  || to.isArray() || 
+            from.isStruct() || to.isStruct())
+            return false;
+        // basic types have to be convertible
+        if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
+            return false;
+        // shapes have to be convertible
+        if ((from.isScalar() && to.isScalar()) ||
+            (from.isScalar() && to.isVector()) ||
+            (from.isVector() && to.isVector() && from.getVectorSize() >= to.getVectorSize()))
+            return true;
+        // TODO: what are the matrix rules? they go here
        return false;
-        return intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall);
    };
    // Is 'to2' a better conversion than 'to1'?
@@ -3580,33 +3597,41 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
        if (from == to1)
            return false;
-        // float -> double is better than any other float conversion
+        // shape changes are always worse
-        if (from.getBasicType() == EbtFloat) {
+        if (from.isScalar() || from.isVector()) {
-            if (to2.getBasicType() == EbtDouble && to1.getBasicType() != EbtDouble)
+            if (from.getVectorSize() == to2.getVectorSize() &&
+                from.getVectorSize() != to1.getVectorSize())
                return true;
+            if (from.getVectorSize() == to1.getVectorSize() &&
+                from.getVectorSize() != to2.getVectorSize())
+                return false;
        }
-        // int -> uint is better than any other int conversion
+        // Might or might not be changing shape, which means basic type might
-        if (from.getBasicType() == EbtInt) {
+        // or might not match, so within that, the question is how big a
-            if (to2.getBasicType() == EbtUint && to1.getBasicType() != EbtUint)
+        // basic-type conversion is being done.
-                return true;
+        //
+        // Use a hierarchy of domains, translated to order of magnitude
+        // in a linearized view:
+        //   - floating-point vs. integer
+        //     - 32 vs. 64 bit (or width in general)
+        //       - bool vs. non bool
+        //         - signed vs. not signed
+        auto linearize = [](const TBasicType& basicType) {
+            switch (basicType) {
+            case EbtBool:     return 1;
+            case EbtInt:      return 10;
+            case EbtUint:     return 11;
+            case EbtInt64:    return 20;
+            case EbtUint64:   return 21;
+            case EbtFloat:    return 100;
+            case EbtDouble:   return 110;
+            default:          return 0;
            }
+        };
-        // TODO: these should be replaced by a more generic "shorter chain is better than longer chain" rule
+        return std::abs(linearize(to2.getBasicType()) - linearize(from.getBasicType())) <
+               std::abs(linearize(to1.getBasicType()) - linearize(from.getBasicType()));
-        // -> float is better than -> double
-        if (to2.getBasicType() == EbtFloat && to1.getBasicType() == EbtDouble)
-            return true;
-        // -> int is better than -> bool
-        if ((to2.getBasicType() == EbtInt || to2.getBasicType() == EbtUint) &&  to1.getBasicType() == EbtBool)
-            return true;
-        // -> uint is better than -> int
-        if (to2.getBasicType() == EbtUint &&  to1.getBasicType() == EbtInt)
-            return true;
-        return false;
    };
    // for ambiguity reporting