Commit f420c424 by zmo@google.com

Add/remove more functions to use shims.

Remove normalize and add cos instead to avoid a crash in Mac with ATI cards (angle bug 193, 202). Also add atan and mod as it's also buggy on Mac/Win with NVIDIA cards. Also, trying to minimize emulated functions by adding masks for fragment/vertex shaders. ANGLEBUG=196 Review URL: http://codereview.appspot.com/4992047 git-svn-id: https://angleproject.googlecode.com/svn/trunk@748 736b8ea6-26fd-11df-bfd4-992fa37f6226
parent 2598ffff
#define MAJOR_VERSION 0
#define MINOR_VERSION 0
#define BUILD_VERSION 0
#define BUILD_REVISION 747
#define BUILD_REVISION 748
#define STRINGIFY(x) #x
#define MACRO_STRINGIFY(x) STRINGIFY(x)
......
......@@ -11,20 +11,110 @@
namespace {
const char* kFunctionEmulationSource[] = {
"float webgl_normalize_emu(float a) { return normalize(a) * 1; }",
"vec2 webgl_normalize_emu(vec2 a) { return normalize(a) * 1; }",
"vec3 webgl_normalize_emu(vec3 a) { return normalize(a) * 1; }",
"vec4 webgl_normalize_emu(vec4 a) { return normalize(a) * 1; }",
"float webgl_abs_emu(float a) { float rt = abs(a); if (rt < 0.0) rt = 0.0; return rt; }",
"vec2 webgl_abs_emu(vec2 a) { vec2 rt = abs(a); if (rt[0] < 0.0) rt[0] = 0.0; return rt; }",
"vec3 webgl_abs_emu(vec3 a) { vec3 rt = abs(a); if (rt[0] < 0.0) rt[0] = 0.0; return rt; }",
"vec4 webgl_abs_emu(vec4 a) { vec4 rt = abs(a); if (rt[0] < 0.0) rt[0] = 0.0; return rt; }",
"float webgl_atan_emu(float y, float x) { float rt = atan(y, x); if (rt > 2.0) rt = 0.0; return rt; }",
"vec2 webgl_atan_emu(vec2 y, vec2 x) { vec2 rt = atan(y, x); if (rt[0] > 2.0) rt[0] = 0.0; return rt; }",
"vec3 webgl_atan_emu(vec3 y, vec3 x) { vec3 rt = atan(y, x); if (rt[0] > 2.0) rt[0] = 0.0; return rt; }",
"vec4 webgl_atan_emu(vec4 y, vec4 x) { vec4 rt = atan(y, x); if (rt[0] > 2.0) rt[0] = 0.0; return rt; }",
"float webgl_atan_emu(float y_over_x) { float rt = atan(y_over_x); if (rt > 2.0) rt = 0.0; return rt; }",
"vec2 webgl_atan_emu(vec2 y_over_x) { vec2 rt = atan(y_over_x); if (rt[0] > 2.0) rt[0] = 0.0; return rt; }",
"vec3 webgl_atan_emu(vec3 y_over_x) { vec3 rt = atan(y_over_x); if (rt[0] > 2.0) rt[0] = 0.0; return rt; }",
"vec4 webgl_atan_emu(vec4 y_over_x) { vec4 rt = atan(y_over_x); if (rt[0] > 2.0) rt[0] = 0.0; return rt; }",
"float webgl_cos_emu(float a) { return cos(a); }",
"vec2 webgl_cos_emu(vec2 a) { return cos(a); }",
"vec3 webgl_cos_emu(vec3 a) { return cos(a); }",
"vec4 webgl_cos_emu(vec4 a) { return cos(a); }",
"float webgl_mod_emu(float x, float y) { float rt = mod(x, y); if (rt > x) rt = 0.0; return rt; }",
"vec2 webgl_mod_emu(vec2 x, vec2 y) { vec2 rt = mod(x, y); if (rt[0] > x[0]) rt[0] = 0.0; return rt; }",
"vec3 webgl_mod_emu(vec3 x, vec3 y) { vec3 rt = mod(x, y); if (rt[0] > x[0]) rt[0] = 0.0; return rt; }",
"vec4 webgl_mod_emu(vec4 x, vec4 y) { vec4 rt = mod(x, y); if (rt[0] > x[0]) rt[0] = 0.0; return rt; }",
"float webgl_sign_emu(float a) { float rt = sign(a); if (rt > 1.0) rt = 1.0; return rt; }",
"vec2 webgl_sign_emu(vec2 a) { float rt = sign(a); if (rt[0] > 1.0) rt[0] = 1.0; return rt; }",
"vec3 webgl_sign_emu(vec3 a) { float rt = sign(a); if (rt[0] > 1.0) rt[0] = 1.0; return rt; }",
"vec4 webgl_sign_emu(vec4 a) { float rt = sign(a); if (rt[0] > 1.0) rt[0] = 1.0; return rt; }",
};
const bool kFunctionEmulationVertexMask[] = {
true, // TFunctionAbs1
false, // TFunctionAbs2
false, // TFunctionAbs3
false, // TFunctionAbs4
true, // TFunctionAtan1
false, // TFunctionAtan2
false, // TFunctionAtan3
false, // TFunctionAtan4
false, // TFunctionAtan1_1
true, // TFunctionAtan2_2
true, // TFunctionAtan3_3
true, // TFunctionAtan4_4
false, // TFunctionCos1
false, // TFunctionCos2
false, // TFunctionCos3
false, // TFunctionCos4
false, // TFunctionMod1_1
true, // TFunctionMod2_2
true, // TFunctionMod3_3
true, // TFunctionMod4_4
true, // TFunctionSign1
false, // TFunctionSign2
false, // TFunctionSign3
false, // TFunctionSign4
false // TFunctionUnknown
};
const bool kFunctionEmulationFragmentMask[] = {
false, // TFunctionAbs1
false, // TFunctionAbs2
false, // TFunctionAbs3
false, // TFunctionAbs4
false, // TFunctionAtan1
false, // TFunctionAtan2
false, // TFunctionAtan3
false, // TFunctionAtan4
false, // TFunctionAtan1_1
false, // TFunctionAtan2_2
false, // TFunctionAtan3_3
false, // TFunctionAtan4_4
#if defined(__APPLE__)
// Work around a ATI driver bug in Mac that causes crashes.
true, // TFunctionCos1
true, // TFunctionCos2
true, // TFunctionCos3
true, // TFunctionCos4
#else
false, // TFunctionCos1
false, // TFunctionCos2
false, // TFunctionCos3
false, // TFunctionCos4
#endif
false, // TFunctionMod1_1
false, // TFunctionMod2_2
false, // TFunctionMod3_3
false, // TFunctionMod4_4
false, // TFunctionSign1
false, // TFunctionSign2
false, // TFunctionSign3
false, // TFunctionSign4
false // TFunctionUnknown
};
class BuiltInFunctionEmulationMarker : public TIntermTraverser {
public:
BuiltInFunctionEmulationMarker(BuiltInFunctionEmulator& emulator)
......@@ -43,15 +133,67 @@ public:
return true;
}
virtual bool visitAggregate(Visit visit, TIntermAggregate* node)
{
if (visit == PreVisit) {
// Here we handle all the built-in functions instead of the ones we
// currently identified as problematic.
switch (node->getOp()) {
case EOpLessThan:
case EOpGreaterThan:
case EOpLessThanEqual:
case EOpGreaterThanEqual:
case EOpVectorEqual:
case EOpVectorNotEqual:
case EOpMod:
case EOpPow:
case EOpAtan:
case EOpMin:
case EOpMax:
case EOpClamp:
case EOpMix:
case EOpStep:
case EOpSmoothStep:
case EOpDistance:
case EOpDot:
case EOpCross:
case EOpFaceForward:
case EOpReflect:
case EOpRefract:
case EOpMul:
break;
default:
return true;
};
const TIntermSequence& sequence = node->getSequence();
// Right now we only handle built-in functions with two parameters.
if (sequence.size() != 2)
return true;
TIntermTyped* param1 = sequence[0]->getAsTyped();
TIntermTyped* param2 = sequence[1]->getAsTyped();
if (!param1 || !param2)
return true;
bool needToEmulate = mEmulator.SetFunctionCalled(
node->getOp(), param1->getType(), param2->getType());
if (needToEmulate)
node->setUseEmulatedFunction();
}
return true;
}
private:
BuiltInFunctionEmulator& mEmulator;
};
} // anonymous namepsace
BuiltInFunctionEmulator::BuiltInFunctionEmulator()
BuiltInFunctionEmulator::BuiltInFunctionEmulator(ShShaderType shaderType)
: mFunctionGroupMask(TFunctionGroupAll)
{
if (shaderType == SH_FRAGMENT_SHADER)
mFunctionMask = kFunctionEmulationFragmentMask;
else
mFunctionMask = kFunctionEmulationVertexMask;
}
void BuiltInFunctionEmulator::SetFunctionGroupMask(
......@@ -61,25 +203,28 @@ void BuiltInFunctionEmulator::SetFunctionGroupMask(
}
bool BuiltInFunctionEmulator::SetFunctionCalled(
TOperator op, const TType& returnType)
TOperator op, const TType& param)
{
TBuiltInFunction function = IdentifyFunction(op, returnType);
if (function == TFunctionUnknown)
TBuiltInFunction function = IdentifyFunction(op, param);
return SetFunctionCalled(function);
}
bool BuiltInFunctionEmulator::SetFunctionCalled(
TOperator op, const TType& param1, const TType& param2)
{
TBuiltInFunction function = IdentifyFunction(op, param1, param2);
return SetFunctionCalled(function);
}
bool BuiltInFunctionEmulator::SetFunctionCalled(
BuiltInFunctionEmulator::TBuiltInFunction function) {
if (function == TFunctionUnknown || mFunctionMask[function] == false)
return false;
for (size_t i = 0; i < mFunctions.size(); ++i) {
if (mFunctions[i] == function)
return true;
}
switch (function) {
case TFunctionNormalize1:
case TFunctionNormalize2:
case TFunctionNormalize3:
case TFunctionNormalize4:
if (mFunctionGroupMask & TFunctionGroupNormalize) {
mFunctions.push_back(function);
return true;
}
break;
case TFunctionAbs1:
case TFunctionAbs2:
case TFunctionAbs3:
......@@ -89,6 +234,37 @@ bool BuiltInFunctionEmulator::SetFunctionCalled(
return true;
}
break;
case TFunctionAtan1:
case TFunctionAtan2:
case TFunctionAtan3:
case TFunctionAtan4:
case TFunctionAtan1_1:
case TFunctionAtan2_2:
case TFunctionAtan3_3:
case TFunctionAtan4_4:
if (mFunctionGroupMask & TFunctionGroupAtan) {
mFunctions.push_back(function);
return true;
}
break;
case TFunctionCos1:
case TFunctionCos2:
case TFunctionCos3:
case TFunctionCos4:
if (mFunctionGroupMask & TFunctionGroupCos) {
mFunctions.push_back(function);
return true;
}
break;
case TFunctionMod1_1:
case TFunctionMod2_2:
case TFunctionMod3_3:
case TFunctionMod4_4:
if (mFunctionGroupMask & TFunctionGroupMod) {
mFunctions.push_back(function);
return true;
}
break;
case TFunctionSign1:
case TFunctionSign2:
case TFunctionSign3:
......@@ -125,20 +301,62 @@ void BuiltInFunctionEmulator::OutputEmulatedFunctionDefinition(
}
BuiltInFunctionEmulator::TBuiltInFunction
BuiltInFunctionEmulator::IdentifyFunction(TOperator op, const TType& returnType)
BuiltInFunctionEmulator::IdentifyFunction(
TOperator op, const TType& param)
{
unsigned int function = TFunctionUnknown;
if (op == EOpNormalize)
function = TFunctionNormalize1;
else if (op == EOpAbs)
function = TFunctionAbs1;
else if (op == EOpSign)
function = TFunctionSign1;
else
return static_cast<TBuiltInFunction>(function);
switch (op) {
case EOpAbs:
function = TFunctionAbs1;
break;
case EOpAtan:
function = TFunctionAtan1;
break;
case EOpCos:
function = TFunctionCos1;
break;
case EOpSign:
function = TFunctionSign1;
break;
default:
break;
}
if (function == TFunctionUnknown)
return TFunctionUnknown;
if (param.isVector())
function += param.getNominalSize() - 1;
return static_cast<TBuiltInFunction>(function);
}
if (returnType.isVector())
function += returnType.getNominalSize() - 1;
BuiltInFunctionEmulator::TBuiltInFunction
BuiltInFunctionEmulator::IdentifyFunction(
TOperator op, const TType& param1, const TType& param2)
{
// Right now for all the emulated functions with two parameters, the two
// parameters have the same type.
if (param1.isVector() != param2.isVector() ||
param1.getNominalSize() != param2.getNominalSize() ||
param1.getNominalSize() > 4)
return TFunctionUnknown;
unsigned int function = TFunctionUnknown;
switch (op) {
case EOpAtan:
function = TFunctionAtan1_1;
break;
case EOpMod:
function = TFunctionMod1_1;
break;
case EOpSign:
function = TFunctionSign1;
break;
default:
break;
}
if (function == TFunctionUnknown)
return TFunctionUnknown;
if (param1.isVector())
function += param1.getNominalSize() - 1;
return static_cast<TBuiltInFunction>(function);
}
......
......@@ -7,6 +7,8 @@
#ifndef COMPILIER_BUILT_IN_FUNCTION_EMULATOR_H_
#define COMPILIER_BUILT_IN_FUNCTION_EMULATOR_H_
#include "GLSLANG/ShaderLang.h"
#include "compiler/InfoSink.h"
#include "compiler/intermediate.h"
......@@ -15,11 +17,16 @@
// emulated in certain os/drivers, assuming they are no more than 32.
//
enum TBuiltInFunctionGroup {
TFunctionGroupNormalize = 1 << 0,
TFunctionGroupAbs = 1 << 1,
TFunctionGroupSign = 1 << 2,
TFunctionGroupAll =
TFunctionGroupNormalize | TFunctionGroupAbs | TFunctionGroupSign
TFunctionGroupAbs = 1 << 0, // NVIDIA Win/Mac
TFunctionGroupAtan = 1 << 1, // NVIDIA Win/Mac
TFunctionGroupCos = 1 << 2, // ATI Mac
TFunctionGroupMod = 1 << 3, // NVIDIA Win/Mac
TFunctionGroupSign = 1 << 4, // NVIDIA Win/Mac
TFunctionGroupAll = TFunctionGroupAbs |
TFunctionGroupAtan |
TFunctionGroupCos |
TFunctionGroupMod |
TFunctionGroupSign
};
//
......@@ -29,7 +36,7 @@ enum TBuiltInFunctionGroup {
//
class BuiltInFunctionEmulator {
public:
BuiltInFunctionEmulator();
BuiltInFunctionEmulator(ShShaderType shaderType);
// functionGroupMask is a bitmap of TBuiltInFunctionGroup.
// We only emulate functions that are marked by this mask and are actually
......@@ -42,10 +49,9 @@ public:
// becomes an no-op.
// Returns true if the function call needs to be replaced with an emulated
// one.
// TODO(zmo): for now, an operator and a return type is enough to identify
// the function we want to emulate. Should make this more flexible to
// handle any functions.
bool SetFunctionCalled(TOperator op, const TType& returnType);
bool SetFunctionCalled(TOperator op, const TType& param);
bool SetFunctionCalled(
TOperator op, const TType& param1, const TType& param2);
// Output function emulation definition. This should be before any other
// shader source.
......@@ -61,14 +67,30 @@ private:
// Built-in functions.
//
enum TBuiltInFunction {
TFunctionNormalize1 = 0, // float normalize(float);
TFunctionNormalize2, // vec2 normalize(vec2);
TFunctionNormalize3, // vec3 normalize(vec3);
TFunctionNormalize4, // fec4 normalize(vec4);
TFunctionAbs1, // float abs(float);
TFunctionAbs1 = 0, // float abs(float);
TFunctionAbs2, // vec2 abs(vec2);
TFunctionAbs3, // vec3 abs(vec3);
TFunctionAbs4, // vec4 abs(vec4);
TFunctionAtan1, // float atan(float);
TFunctionAtan2, // vec2 atan(vec2);
TFunctionAtan3, // vec3 atan(vec3);
TFunctionAtan4, // vec4 atan(vec4);
TFunctionAtan1_1, // float atan(float, float);
TFunctionAtan2_2, // vec2 atan(vec2, vec2);
TFunctionAtan3_3, // vec3 atan(vec3, vec2);
TFunctionAtan4_4, // vec4 atan(vec4, vec2);
TFunctionCos1, // float cos(float);
TFunctionCos2, // vec2 cos(vec2);
TFunctionCos3, // vec3 cos(vec3);
TFunctionCos4, // vec4 cos(vec4);
TFunctionMod1_1, // float mod(float, float);
TFunctionMod2_2, // vec2 mod(vec2, vec2);
TFunctionMod3_3, // vec3 mod(vec3, vec3);
TFunctionMod4_4, // vec4 mod(vec4, vec4);
TFunctionSign1, // float sign(float);
TFunctionSign2, // vec2 sign(vec2);
TFunctionSign3, // vec3 sign(vec3);
......@@ -76,11 +98,16 @@ private:
TFunctionUnknown
};
// Same TODO as SetFunctionCalled.
TBuiltInFunction IdentifyFunction(TOperator op, const TType& returnType);
TBuiltInFunction IdentifyFunction(TOperator op, const TType& param);
TBuiltInFunction IdentifyFunction(
TOperator op, const TType& param1, const TType& param2);
bool SetFunctionCalled(TBuiltInFunction function);
TVector<TBuiltInFunction> mFunctions;
unsigned int mFunctionGroupMask; // a bitmap of TBuiltInFunctionGroup.
const bool* mFunctionMask; // a boolean flag for each function.
};
#endif // COMPILIER_BUILT_IN_FUNCTION_EMULATOR_H_
......@@ -88,7 +88,8 @@ TShHandleBase::~TShHandleBase() {
TCompiler::TCompiler(ShShaderType type, ShShaderSpec spec)
: shaderType(type),
shaderSpec(spec)
shaderSpec(spec),
builtInFunctionEmulator(type)
{
}
......
......@@ -436,6 +436,8 @@ bool TOutputGLSLBase::visitAggregate(Visit visit, TIntermAggregate* node)
{
bool visitChildren = true;
TInfoSinkBase& out = objSink();
TString preString;
bool delayedWrite = false;
switch (node->getOp())
{
case EOpSequence: {
......@@ -582,34 +584,38 @@ bool TOutputGLSLBase::visitAggregate(Visit visit, TIntermAggregate* node)
}
break;
case EOpLessThan: writeTriplet(visit, "lessThan(", ", ", ")"); break;
case EOpGreaterThan: writeTriplet(visit, "greaterThan(", ", ", ")"); break;
case EOpLessThanEqual: writeTriplet(visit, "lessThanEqual(", ", ", ")"); break;
case EOpGreaterThanEqual: writeTriplet(visit, "greaterThanEqual(", ", ", ")"); break;
case EOpVectorEqual: writeTriplet(visit, "equal(", ", ", ")"); break;
case EOpVectorNotEqual: writeTriplet(visit, "notEqual(", ", ", ")"); break;
case EOpLessThan: preString = "lessThan("; delayedWrite = true; break;
case EOpGreaterThan: preString = "greaterThan("; delayedWrite = true; break;
case EOpLessThanEqual: preString = "lessThanEqual("; delayedWrite = true; break;
case EOpGreaterThanEqual: preString = "greaterThanEqual("; delayedWrite = true; break;
case EOpVectorEqual: preString = "equal("; delayedWrite = true; break;
case EOpVectorNotEqual: preString = "notEqual("; delayedWrite = true; break;
case EOpComma: writeTriplet(visit, NULL, ", ", NULL); break;
case EOpMod: writeTriplet(visit, "mod(", ", ", ")"); break;
case EOpPow: writeTriplet(visit, "pow(", ", ", ")"); break;
case EOpAtan: writeTriplet(visit, "atan(", ", ", ")"); break;
case EOpMin: writeTriplet(visit, "min(", ", ", ")"); break;
case EOpMax: writeTriplet(visit, "max(", ", ", ")"); break;
case EOpClamp: writeTriplet(visit, "clamp(", ", ", ")"); break;
case EOpMix: writeTriplet(visit, "mix(", ", ", ")"); break;
case EOpStep: writeTriplet(visit, "step(", ", ", ")"); break;
case EOpSmoothStep: writeTriplet(visit, "smoothstep(", ", ", ")"); break;
case EOpDistance: writeTriplet(visit, "distance(", ", ", ")"); break;
case EOpDot: writeTriplet(visit, "dot(", ", ", ")"); break;
case EOpCross: writeTriplet(visit, "cross(", ", ", ")"); break;
case EOpFaceForward: writeTriplet(visit, "faceforward(", ", ", ")"); break;
case EOpReflect: writeTriplet(visit, "reflect(", ", ", ")"); break;
case EOpRefract: writeTriplet(visit, "refract(", ", ", ")"); break;
case EOpMul: writeTriplet(visit, "matrixCompMult(", ", ", ")"); break;
case EOpMod: preString = "mod("; delayedWrite = true; break;
case EOpPow: preString = "pow("; delayedWrite = true; break;
case EOpAtan: preString = "atan("; delayedWrite = true; break;
case EOpMin: preString = "min("; delayedWrite = true; break;
case EOpMax: preString = "max("; delayedWrite = true; break;
case EOpClamp: preString = "clamp("; delayedWrite = true; break;
case EOpMix: preString = "mix("; delayedWrite = true; break;
case EOpStep: preString = "step("; delayedWrite = true; break;
case EOpSmoothStep: preString = "smoothstep("; delayedWrite = true; break;
case EOpDistance: preString = "distance("; delayedWrite = true; break;
case EOpDot: preString = "dot("; delayedWrite = true; break;
case EOpCross: preString = "cross("; delayedWrite = true; break;
case EOpFaceForward: preString = "faceforward("; delayedWrite = true; break;
case EOpReflect: preString = "reflect("; delayedWrite = true; break;
case EOpRefract: preString = "refract("; delayedWrite = true; break;
case EOpMul: preString = "matrixCompMult("; delayedWrite = true; break;
default: UNREACHABLE(); break;
}
if (delayedWrite && visit == PreVisit && node->getUseEmulatedFunction())
preString = BuiltInFunctionEmulator::GetEmulatedFunctionName(preString);
if (delayedWrite)
writeTriplet(visit, preString.c_str(), ", ", ")");
return visitChildren;
}
......
......@@ -425,7 +425,10 @@ public:
protected:
TIntermTyped* operand;
bool useEmulatedFunction; // if set to true, replace the function call by an emulated one.
// If set to true, replace the built-in function call with an emulated one
// to work around driver bugs.
bool useEmulatedFunction;
};
typedef TVector<TIntermNode*> TIntermSequence;
......@@ -436,8 +439,8 @@ typedef TMap<TString, TString> TPragmaTable;
//
class TIntermAggregate : public TIntermOperator {
public:
TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(0), endLine(0) { }
TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(0) { }
TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(0), endLine(0), useEmulatedFunction(false) { }
TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(0), useEmulatedFunction(false) { }
~TIntermAggregate() { delete pragmaTable; }
virtual TIntermAggregate* getAsAggregate() { return this; }
......@@ -460,6 +463,9 @@ public:
void setEndLine(TSourceLoc line) { endLine = line; }
TSourceLoc getEndLine() const { return endLine; }
void setUseEmulatedFunction() { useEmulatedFunction = true; }
bool getUseEmulatedFunction() { return useEmulatedFunction; }
protected:
TIntermAggregate(const TIntermAggregate&); // disallow copy constructor
TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator
......@@ -471,6 +477,10 @@ protected:
bool debug;
TPragmaTable *pragmaTable;
TSourceLoc endLine;
// If set to true, replace the built-in function call with an emulated one
// to work around driver bugs.
bool useEmulatedFunction;
};
//
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment