Commit f67f9d7e by John Kessenich Committed by GitHub

Merge pull request #740 from steve-lunarg/f16tof32

HLSL: add f16tof32 and f32tof16 decompositions.
parents 42e33c9b 86b510ef
float PixelShaderFunctionS(float inF0) float PixelShaderFunctionS(uint inF0)
{ {
f32tof16(inF0); return f16tof32(inF0);
return 0.0;
} }
float1 PixelShaderFunction1(float1 inF0) float1 PixelShaderFunction1(uint1 inF0)
{ {
// TODO: ... add when float1 prototypes are generated return f16tof32(inF0);
return 0.0;
} }
float2 PixelShaderFunction2(float2 inF0) float2 PixelShaderFunction2(uint2 inF0)
{ {
f32tof16(inF0); return f16tof32(inF0);
return float2(1,2);
} }
float3 PixelShaderFunction3(float3 inF0) float3 PixelShaderFunction3(uint3 inF0)
{ {
f32tof16(inF0); return f16tof32(inF0);
return float3(1,2,3);
} }
float4 PixelShaderFunction(float4 inF0) float4 PixelShaderFunction(uint4 inF0)
{ {
f32tof16(inF0); return f16tof32(inF0);
return float4(1,2,3,4);
} }
float4 main() : SV_Target0
{
return 0;
}
uint PixelShaderFunctionS(float inF0)
{
return f32tof16(inF0);
}
uint1 PixelShaderFunction1(float1 inF0)
{
return f32tof16(inF0);
}
uint2 PixelShaderFunction2(float2 inF0)
{
return f32tof16(inF0);
}
uint3 PixelShaderFunction3(float3 inF0)
{
return f32tof16(inF0);
}
uint4 PixelShaderFunction(float4 inF0)
{
return f32tof16(inF0);
}
float4 main() : SV_Target0
{
return 0;
}
...@@ -132,7 +132,8 @@ INSTANTIATE_TEST_CASE_P( ...@@ -132,7 +132,8 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.intrinsics.evalfns.frag", "main"}, {"hlsl.intrinsics.evalfns.frag", "main"},
{"hlsl.intrinsics.d3dcolortoubyte4.frag", "main"}, {"hlsl.intrinsics.d3dcolortoubyte4.frag", "main"},
{"hlsl.intrinsics.double.frag", "PixelShaderFunction"}, {"hlsl.intrinsics.double.frag", "PixelShaderFunction"},
{"hlsl.intrinsics.f1632.frag", "PixelShaderFunction"}, {"hlsl.intrinsics.f1632.frag", "main"},
{"hlsl.intrinsics.f3216.frag", "main"},
{"hlsl.intrinsics.frag", "main"}, {"hlsl.intrinsics.frag", "main"},
{"hlsl.intrinsics.lit.frag", "PixelShaderFunction"}, {"hlsl.intrinsics.lit.frag", "PixelShaderFunction"},
{"hlsl.intrinsics.negative.comp", "ComputeShaderFunction"}, {"hlsl.intrinsics.negative.comp", "ComputeShaderFunction"},
......
...@@ -3564,10 +3564,107 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& ...@@ -3564,10 +3564,107 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
} }
case EOpF16tof32: case EOpF16tof32:
{
// input uvecN with low 16 bits of each component holding a float16. convert to float32.
TIntermTyped* argValue = node->getAsUnaryNode()->getOperand();
TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
const int vecSize = argValue->getType().getVectorSize();
TOperator constructOp = EOpNull;
switch (vecSize) {
case 1: constructOp = EOpNull; break; // direct use, no construct needed
case 2: constructOp = EOpConstructVec2; break;
case 3: constructOp = EOpConstructVec3; break;
case 4: constructOp = EOpConstructVec4; break;
default: assert(0); break;
}
// For scalar case, we don't need to construct another type.
TIntermAggregate* result = (vecSize > 1) ? new TIntermAggregate(constructOp) : nullptr;
if (result) {
result->setType(TType(EbtFloat, EvqTemporary, vecSize));
result->setLoc(loc);
}
for (int idx = 0; idx < vecSize; ++idx) {
TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
TIntermTyped* component = argValue->getType().isVector() ?
intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc) : argValue;
if (component != argValue)
component->setType(TType(argValue->getBasicType(), EvqTemporary));
TIntermTyped* unpackOp = new TIntermUnary(EOpUnpackHalf2x16);
unpackOp->setType(TType(EbtFloat, EvqTemporary, 2));
unpackOp->getAsUnaryNode()->setOperand(component);
unpackOp->setLoc(loc);
TIntermTyped* lowOrder = intermediate.addIndex(EOpIndexDirect, unpackOp, zero, loc);
if (result != nullptr) {
result->getSequence().push_back(lowOrder);
node = result;
} else {
node = lowOrder;
}
}
break;
}
case EOpF32tof16: case EOpF32tof16:
{ {
// Temporary until decomposition is available. // input floatN converted to 16 bit float in low order bits of each component of uintN
error(loc, "unimplemented intrinsic: handle natively", "f32tof16", ""); TIntermTyped* argValue = node->getAsUnaryNode()->getOperand();
TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
const int vecSize = argValue->getType().getVectorSize();
TOperator constructOp = EOpNull;
switch (vecSize) {
case 1: constructOp = EOpNull; break; // direct use, no construct needed
case 2: constructOp = EOpConstructUVec2; break;
case 3: constructOp = EOpConstructUVec3; break;
case 4: constructOp = EOpConstructUVec4; break;
default: assert(0); break;
}
// For scalar case, we don't need to construct another type.
TIntermAggregate* result = (vecSize > 1) ? new TIntermAggregate(constructOp) : nullptr;
if (result) {
result->setType(TType(EbtUint, EvqTemporary, vecSize));
result->setLoc(loc);
}
for (int idx = 0; idx < vecSize; ++idx) {
TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
TIntermTyped* component = argValue->getType().isVector() ?
intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc) : argValue;
if (component != argValue)
component->setType(TType(argValue->getBasicType(), EvqTemporary));
TIntermAggregate* vec2ComponentAndZero = new TIntermAggregate(EOpConstructVec2);
vec2ComponentAndZero->getSequence().push_back(component);
vec2ComponentAndZero->getSequence().push_back(zero);
vec2ComponentAndZero->setType(TType(EbtFloat, EvqTemporary, 2));
vec2ComponentAndZero->setLoc(loc);
TIntermTyped* packOp = new TIntermUnary(EOpPackHalf2x16);
packOp->getAsUnaryNode()->setOperand(vec2ComponentAndZero);
packOp->setLoc(loc);
packOp->setType(TType(EbtUint, EvqTemporary));
if (result != nullptr) {
result->getSequence().push_back(packOp);
node = result;
} else {
node = packOp;
}
}
break; break;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment