Commit ef33ec09 by steve-lunarg

HLSL: add intrinsic function implicit promotions

This PR handles implicit promotions for intrinsics when there is no exact match, such as for example clamp(int, bool, float). In this case the int and bool will be promoted to a float, and the clamp(float, float, float) form used. These promotions can be mixed with shape conversions, e.g, clamp(int, bool2, float2). Output conversions are handled either via the existing addOutputArgumentConversion function, which this PR generalizes to handle either aggregates or unaries, or by intrinsic decomposition. If there are methods or intrinsics to be decomposed, then decomposition is responsible for any output conversions, which turns out to happen automatically in all current cases. This can be revisited once inout conversions are in place. Some cases of actual ambiguity were fixed in several tests, e.g, spv.register.autoassign.* Some intrinsics with only uint versions were expanded to signed ints natively, where the underlying AST and SPIR-V supports that. E.g, countbits. This avoids extraneous conversion nodes. A new function promoteAggregate is added, and used by findFunction. This is essentially a generalization of the "promote 1st or 2nd arg" algorithm in promoteBinary. The actual selection proceeds in three steps, as described in the comments in hlslParseContext::findFunction: 1. Attempt an exact match. If found, use it. 2. If not, obtain the operator from step 1, and promote arguments. 3. Re-select the intrinsic overload from the results of step 2.
parent 1c573fbc
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -5,7 +5,7 @@ Linked fragment stage:
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 30
// Id's are bound by 31
Capability Shader
1: ExtInstImport "GLSL.std.450"
......@@ -16,14 +16,14 @@ Linked fragment stage:
Name 9 "Color"
Name 12 "g_tScene[0]"
Name 16 "g_tSamp"
Name 24 "g_tScene[1]"
Name 25 "g_tScene[1]"
Decorate 9(Color) Location 0
Decorate 12(g_tScene[0]) DescriptorSet 0
Decorate 12(g_tScene[0]) Binding 10
Decorate 16(g_tSamp) DescriptorSet 0
Decorate 16(g_tSamp) Binding 5
Decorate 24(g_tScene[1]) DescriptorSet 0
Decorate 24(g_tScene[1]) Binding 11
Decorate 25(g_tScene[1]) DescriptorSet 0
Decorate 25(g_tScene[1]) Binding 11
2: TypeVoid
3: TypeFunction 2
6: TypeFloat 32
......@@ -39,19 +39,20 @@ Linked fragment stage:
18: TypeSampledImage 10
20: TypeVector 6(float) 2
21: 6(float) Constant 1050253722
22: 20(fvec2) ConstantComposite 21 21
24(g_tScene[1]): 11(ptr) Variable UniformConstant
22: 6(float) Constant 1053609165
23: 20(fvec2) ConstantComposite 21 22
25(g_tScene[1]): 11(ptr) Variable UniformConstant
4(main): 2 Function None 3
5: Label
13: 10 Load 12(g_tScene[0])
17: 14 Load 16(g_tSamp)
19: 18 SampledImage 13 17
23: 7(fvec4) ImageSampleImplicitLod 19 22
25: 10 Load 24(g_tScene[1])
26: 14 Load 16(g_tSamp)
27: 18 SampledImage 25 26
28: 7(fvec4) ImageSampleImplicitLod 27 22
29: 7(fvec4) FAdd 23 28
Store 9(Color) 29
24: 7(fvec4) ImageSampleImplicitLod 19 23
26: 10 Load 25(g_tScene[1])
27: 14 Load 16(g_tSamp)
28: 18 SampledImage 26 27
29: 7(fvec4) ImageSampleImplicitLod 28 23
30: 7(fvec4) FAdd 24 29
Store 9(Color) 30
Return
FunctionEnd
......@@ -33,7 +33,7 @@ float PixelShaderFunctionS(float inF0, float inF1, float inF2, uint inU0, uint i
clip(inF0);
float r014 = cos(inF0);
float r015 = cosh(inF0);
uint r016 = countbits(7);
int r016 = countbits(7);
float r017 = ddx(inF0);
float r018 = ddx_coarse(inF0);
float r019 = ddx_fine(inF0);
......@@ -111,7 +111,7 @@ float2 PixelShaderFunction2(float2 inF0, float2 inF1, float2 inF2, uint2 inU0, u
clip(inF0);
float2 r013 = cos(inF0);
float2 r015 = cosh(inF0);
uint2 r016 = countbits(int2(7,3));
int2 r016 = countbits(int2(7,3));
float2 r017 = ddx(inF0);
float2 r018 = ddx_coarse(inF0);
float2 r019 = ddx_fine(inF0);
......
struct PS_OUTPUT { float4 color : SV_Target0; };
int i;
uint u;
float f;
bool b;
int2 i2;
uint2 u2;
float2 f2;
bool2 b2;
PS_OUTPUT main()
{
uint r00 = countbits(f);
uint2 r01 = reversebits(f2);
PS_OUTPUT ps_output;
ps_output.color = float4(0,0,0,0);
return ps_output;
};
struct PS_OUTPUT { float4 color : SV_Target0; };
int i;
uint u;
float f;
bool b;
int2 i2;
uint2 u2;
float2 f2;
bool2 b2;
Buffer <float> g_tTexbfs;
Texture1D <float4> g_tTex1df4;
uint upos;
float fpos;
PS_OUTPUT main()
{
// Same shapes:
float r00 = max(b, f);
uint r01 = max(b, u);
int r02 = max(b, i);
float r03 = max(i, f);
float r04 = max(u, f);
float2 r10 = max(b2, f2);
uint2 r11 = max(b2, u2);
int2 r12 = max(b2, i2);
float2 r13 = max(i2, f2);
float2 r14 = max(u2, f2);
float2 r20 = clamp(i2, u2, f2); // 3 args, converts all to best type.
uint2 r21 = clamp(b2, u2, b2);
float2 r22 = clamp(b2, f2, b2);
// Mixed shapes:
float2 r30 = max(b, f2);
uint2 r31 = max(b, u2);
int2 r32 = max(b, i2);
float2 r33 = max(i, f2);
float2 r34 = max(u, f2);
float2 r40 = clamp(i, u2, f2); // 3 args, converts all to best type.
uint2 r41 = clamp(b2, u, b2);
float2 r42 = clamp(b2, f, b);
int2 r43 = clamp(i, i2, u2);
float r50 = g_tTexbfs.Load(upos);
float r51 = g_tTexbfs.Load(fpos);
int MipLevel;
uint WidthU;
uint HeightU;
uint ElementsU;
uint DepthU;
uint NumberOfLevelsU;
uint NumberOfSamplesU;
int WidthI;
int HeightI;
int ElementsI;
int DepthI;
int NumberOfLevelsI;
int NumberOfSamplesI;
g_tTex1df4 . GetDimensions(WidthI);
g_tTex1df4 . GetDimensions(6, WidthI, NumberOfLevelsU);
g_tTex1df4 . GetDimensions(6, WidthU, NumberOfLevelsI);
g_tTex1df4 . GetDimensions(6, WidthI, NumberOfLevelsI);
// max(i2, f2);
PS_OUTPUT ps_output;
ps_output.color = r00;
return ps_output;
};
struct PS_OUTPUT { float4 color : SV_Target0; };
int i;
uint u;
float f;
bool b;
int2 i2;
uint2 u2;
float2 f2;
bool2 b2;
Buffer <float> g_tTexbfs;
Texture1D <float4> g_tTex1df4;
uint upos;
float fpos;
PS_OUTPUT main()
{
int MipLevel;
uint WidthU;
uint HeightU;
uint ElementsU;
uint DepthU;
uint NumberOfLevelsU;
uint NumberOfSamplesU;
int WidthI;
int HeightI;
int ElementsI;
int DepthI;
int NumberOfLevelsI;
int NumberOfSamplesI;
saturate(fpos);
// Test output promotions
g_tTex1df4 . GetDimensions(WidthI);
g_tTex1df4 . GetDimensions(6, WidthI, NumberOfLevelsU);
g_tTex1df4 . GetDimensions(6, WidthU, NumberOfLevelsI);
g_tTex1df4 . GetDimensions(6, WidthI, NumberOfLevelsI);
// max(i2, f2);
PS_OUTPUT ps_output;
ps_output.color = 0;
return ps_output;
};
......@@ -10,6 +10,6 @@ struct PS_OUTPUT
void main(out PS_OUTPUT psout)
{
psout.Color = g_tScene[0].Sample(g_tSamp, 0.3) +
g_tScene[1].Sample(g_tSamp, 0.3);
psout.Color = g_tScene[0].Sample(g_tSamp, float2(0.3,0.4)) +
g_tScene[1].Sample(g_tSamp, float2(0.3,0.4));
}
......@@ -10,6 +10,6 @@ struct PS_OUTPUT
void main(out PS_OUTPUT psout)
{
psout.Color = g_tScene[0].Sample(g_tSamp, 0.3) +
g_tScene[1].Sample(g_tSamp, 0.3);
psout.Color = g_tScene[0].Sample(g_tSamp, float2(0.3, 0.3)) +
g_tScene[1].Sample(g_tSamp, float2(0.3, 0.3));
}
......@@ -45,6 +45,7 @@
#include "propagateNoContraction.h"
#include <cfloat>
#include <utility>
namespace glslang {
......@@ -575,6 +576,27 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
case EOpDivAssign:
case EOpModAssign:
case EOpAtan:
case EOpClamp:
case EOpCross:
case EOpDistance:
case EOpDot:
case EOpDst:
case EOpFaceForward:
case EOpFma:
case EOpFrexp:
case EOpLdexp:
case EOpMix:
case EOpLit:
case EOpMax:
case EOpMin:
case EOpModf:
case EOpPow:
case EOpReflect:
case EOpRefract:
case EOpSmoothStep:
case EOpStep:
case EOpSequence:
case EOpConstructStruct:
......@@ -833,6 +855,9 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
if (profile == EEsProfile || version == 110)
return false;
if (from == to)
return true;
// TODO: Move more policies into language-specific handlers.
// Some languages allow more general (or potentially, more specific) conversions under some conditions.
if (source == EShSourceHlsl) {
......@@ -901,6 +926,8 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
return version >= 400;
case EbtUint:
return true;
case EbtBool:
return (source == EShSourceHlsl);
default:
return false;
}
......@@ -908,6 +935,8 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
switch (from) {
case EbtInt:
return true;
case EbtBool:
return (source == EShSourceHlsl);
default:
return false;
}
......@@ -1747,6 +1776,9 @@ bool TIntermediate::promote(TIntermOperator* node)
if (node->getAsBinaryNode())
return promoteBinary(*node->getAsBinaryNode());
if (node->getAsAggregate())
return promoteAggregate(*node->getAsAggregate());
return false;
}
......@@ -2190,6 +2222,77 @@ bool TIntermediate::promoteBinary(TIntermBinary& node)
return true;
}
//
// See TIntermediate::promote
//
bool TIntermediate::promoteAggregate(TIntermAggregate& node)
{
TOperator op = node.getOp();
TIntermSequence& args = node.getSequence();
const int numArgs = args.size();
// Presently, only hlsl does intrinsic promotions.
if (getSource() != EShSourceHlsl)
return true;
// set of opcodes that can be promoted in this manner.
switch (op) {
case EOpAtan:
case EOpClamp:
case EOpCross:
case EOpDistance:
case EOpDot:
case EOpDst:
case EOpFaceForward:
// case EOpFindMSB: TODO: ??
// case EOpFindLSB: TODO: ??
case EOpFma:
case EOpMod:
case EOpFrexp:
case EOpLdexp:
case EOpMix:
case EOpLit:
case EOpMax:
case EOpMin:
case EOpModf:
// case EOpGenMul: TODO: ??
case EOpPow:
case EOpReflect:
case EOpRefract:
// case EOpSinCos: TODO: ??
case EOpSmoothStep:
case EOpStep:
break;
default:
return true;
}
// TODO: array and struct behavior
// Try converting all nodes to the given node's type
TIntermSequence convertedArgs(numArgs, nullptr);
// Try to convert all types to the nonConvArg type.
for (int nonConvArg = 0; nonConvArg < numArgs; ++nonConvArg) {
// Try converting all args to this arg's type
for (int convArg = 0; convArg < numArgs; ++convArg) {
convertedArgs[convArg] = addConversion(op, args[nonConvArg]->getAsTyped()->getType(),
args[convArg]->getAsTyped());
}
// If we successfully converted all the args, use the result.
if (std::all_of(convertedArgs.begin(), convertedArgs.end(),
[](const TIntermNode* node) { return node != nullptr; })) {
std::swap(args, convertedArgs);
return true;
}
}
return false;
}
void TIntermBinary::updatePrecision()
{
#ifdef AMD_EXTENSIONS
......
......@@ -370,6 +370,9 @@ void TIntermediate::mergeErrorCheck(TInfoSink& infoSink, const TIntermSymbol& sy
//
void TIntermediate::finalCheck(TInfoSink& infoSink)
{
if (getTreeRoot() == nullptr)
return;
if (source == EShSourceGlsl && numEntryPoints < 1)
error(infoSink, "Missing entry point: Each stage requires one entry point");
......
......@@ -381,6 +381,7 @@ public:
int addXfbBufferOffset(const TType&);
unsigned int computeTypeXfbSize(const TType&, bool& containsDouble) const;
static int getBaseAlignment(const TType&, int& size, int& stride, bool std140, bool rowMajor);
bool promote(TIntermOperator*);
protected:
TIntermSymbol* addSymbol(int Id, const TString&, const TType&, const TConstUnionArray&, TIntermTyped* subtree, const TSourceLoc&);
......@@ -395,10 +396,10 @@ protected:
bool userOutputUsed() const;
static int getBaseAlignmentScalar(const TType&, int& size);
bool isSpecializationOperation(const TIntermOperator&) const;
bool promote(TIntermOperator*);
bool promoteUnary(TIntermUnary&);
bool promoteBinary(TIntermBinary&);
void addSymbolLinkageNode(TIntermAggregate*& linkage, TSymbolTable&, const TString&);
bool promoteAggregate(TIntermAggregate&);
const EShLanguage language; // stage, known at construction time
EShSource source; // source language, known a bit later
......
......@@ -134,6 +134,9 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.intrinsics.negative.comp", "ComputeShaderFunction"},
{"hlsl.intrinsics.negative.frag", "PixelShaderFunction"},
{"hlsl.intrinsics.negative.vert", "VertexShaderFunction"},
{"hlsl.intrinsics.promote.frag", "main"},
{"hlsl.intrinsics.promote.down.frag", "main"},
{"hlsl.intrinsics.promote.outputs.frag", "main"},
{"hlsl.layout.frag", "main"},
{"hlsl.load.2dms.dx10.frag", "main"},
{"hlsl.load.array.dx10.frag", "main"},
......
......@@ -84,7 +84,7 @@ public:
void decomposeGeometryMethods(const TSourceLoc&, TIntermTyped*& node, TIntermNode* arguments);
TIntermTyped* handleLengthMethod(const TSourceLoc&, TFunction*, TIntermNode*);
void addInputArgumentConversions(const TFunction&, TIntermNode*&) const;
TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermAggregate&);
TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermOperator&);
void builtInOpCheck(const TSourceLoc&, const TFunction&, TIntermOperator&);
TFunction* handleConstructorCall(const TSourceLoc&, const TType&);
void handleSemantic(TSourceLoc, TQualifier&, const TString& semantic);
......@@ -125,7 +125,7 @@ public:
void mergeObjectLayoutQualifiers(TQualifier& dest, const TQualifier& src, bool inheritOnly);
void checkNoShaderLayouts(const TSourceLoc&, const TShaderQualifiers&);
const TFunction* findFunction(const TSourceLoc& loc, const TFunction& call, bool& builtIn);
const TFunction* findFunction(const TSourceLoc& loc, const TFunction& call, bool& builtIn, TIntermNode* args);
void declareTypedef(const TSourceLoc&, TString& identifier, const TType&, TArraySizes* typeArray = 0);
TIntermNode* declareVariable(const TSourceLoc&, TString& identifier, TType&, TIntermTyped* initializer = 0);
TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&);
......
......@@ -558,8 +558,8 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
{ "AllMemoryBarrier", nullptr, nullptr, "-", "-", EShLangCS },
{ "AllMemoryBarrierWithGroupSync", nullptr, nullptr, "-", "-", EShLangCS },
{ "any", "S", "B", "SVM", "BFIU", EShLangAll },
{ "asdouble", "S", "D", "S,", "U,", EShLangAll },
{ "asdouble", "V2", "D", "V2,", "U,", EShLangAll },
{ "asdouble", "S", "D", "S,", "UI,", EShLangAll },
{ "asdouble", "V2", "D", "V2,", "UI,", EShLangAll },
{ "asfloat", nullptr, "F", "SVM", "BFIU", EShLangAll },
{ "asin", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "asint", nullptr, "I", "SVM", "FU", EShLangAll },
......@@ -572,7 +572,7 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
{ "clip", "-", "-", "SVM", "F", EShLangPS },
{ "cos", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "cosh", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "countbits", nullptr, nullptr, "SV", "U", EShLangAll },
{ "countbits", nullptr, nullptr, "SV", "UI", EShLangAll },
{ "cross", nullptr, nullptr, "V3,", "F,", EShLangAll },
{ "D3DCOLORtoUBYTE4", "V4", "I", "V4", "F", EShLangAll },
{ "ddx", nullptr, nullptr, "SVM", "F", EShLangPS },
......@@ -636,9 +636,9 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
{ "log10", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "log2", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "mad", nullptr, nullptr, "SVM,,", "DFUI,,", EShLangAll },
{ "max", nullptr, nullptr, "SVM,", "FI,", EShLangAll },
{ "min", nullptr, nullptr, "SVM,", "FI,", EShLangAll },
{ "modf", nullptr, nullptr, "SVM,>", "FI,", EShLangAll },
{ "max", nullptr, nullptr, "SVM,", "FIU,", EShLangAll },
{ "min", nullptr, nullptr, "SVM,", "FIU,", EShLangAll },
{ "modf", nullptr, nullptr, "SVM,>", "FIU,", EShLangAll },
{ "msad4", "V4", "U", "S,V2,V4", "U,,", EShLangAll },
{ "mul", "S", nullptr, "S,S", "FI,", EShLangAll },
{ "mul", "V", nullptr, "S,V", "FI,", EShLangAll },
......@@ -665,7 +665,7 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
{ "rcp", nullptr, nullptr, "SVM", "FD", EShLangAll },
{ "reflect", nullptr, nullptr, "V,", "F,", EShLangAll },
{ "refract", nullptr, nullptr, "V,V,S", "F,,", EShLangAll },
{ "reversebits", nullptr, nullptr, "SV", "U", EShLangAll },
{ "reversebits", nullptr, nullptr, "SV", "UI", EShLangAll },
{ "round", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "rsqrt", nullptr, nullptr, "SVM", "F", EShLangAll },
{ "saturate", nullptr, nullptr , "SVM", "F", EShLangAll },
......@@ -735,7 +735,7 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
// RWTexture loads
{ "Load", "V4", nullptr, "!#,V", "FIU,I", EShLangAll },
// (RW)Buffer loads
{ "Load", "V4", nullptr, "~*1,V", "FIU,I", EShLangAll },
{ "Load", "V4", nullptr, "~*1,V", "FIU,I", EShLangAll },
{ "Gather", /*!O*/ "V4", nullptr, "%@,S,V", "FIU,S,F", EShLangAll },
{ "Gather", /* O*/ "V4", nullptr, "%@,S,V,V", "FIU,S,F,I", EShLangAll },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment