Commit c3d95f36 by Alexis Hetu Committed by Alexis Hétu

Matrix determinant and inverse implementation

Implementation for determinant has been done directly in ShaderCore in order to avoid having to allocate temporaries manually in OutputASM. For now, the implementation for the inverse matrix is very simple, i.e., it doesn't attempt to re-use results from the cofactor matrix computation to compute the determinant or do any other kind of optimization, but it works. Change-Id: I0fc70133809ae2752dc567bf58b60d7af7a88009 Reviewed-on: https://swiftshader-review.googlesource.com/4000Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 9b3388ed
...@@ -636,6 +636,74 @@ namespace glsl ...@@ -636,6 +636,74 @@ namespace glsl
return true; return true;
} }
void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
{
switch(size)
{
case 1: // Used for cofactor computation only
{
// For a 2x2 matrix, the cofactor is simply a transposed move or negate
bool isMov = (row == col);
sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
Instruction *mov = emit(op, result, arg);
mov->src[0].index += isMov ? 1 - row : row;
mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
mov->dst.index += outCol;
mov->dst.mask = 1 << outRow;
}
break;
case 2:
{
static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
bool isCofactor = (col >= 0) && (row >= 0);
int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
Instruction *det = emit(sw::Shader::OPCODE_DET2, result, arg, arg);
det->src[0].index += negate ? col1 : col0;
det->src[1].index += negate ? col0 : col1;
det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
det->dst.index += outCol;
det->dst.mask = 1 << outRow;
}
break;
case 3:
{
static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
bool isCofactor = (col >= 0) && (row >= 0);
int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
Instruction *det = emit(sw::Shader::OPCODE_DET3, result, arg, arg, arg);
det->src[0].index += col0;
det->src[1].index += negate ? col2 : col1;
det->src[2].index += negate ? col1 : col2;
det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
det->dst.index += outCol;
det->dst.mask = 1 << outRow;
}
break;
case 4:
{
Instruction *det = emit(sw::Shader::OPCODE_DET4, result, arg, arg, arg, arg);
det->src[1].index += 1;
det->src[2].index += 2;
det->src[3].index += 3;
det->dst.index += outCol;
det->dst.mask = 1 << outRow;
}
break;
default:
UNREACHABLE(size);
break;
}
}
bool OutputASM::visitUnary(Visit visit, TIntermUnary *node) bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
{ {
if(currentScope != emitScope) if(currentScope != emitScope)
...@@ -807,6 +875,48 @@ namespace glsl ...@@ -807,6 +875,48 @@ namespace glsl
} }
} }
break; break;
case EOpDeterminant:
if(visit == PostVisit)
{
int size = arg->getNominalSize();
ASSERT(size == arg->getSecondarySize());
emitDeterminant(result, arg, size);
}
break;
case EOpInverse:
if(visit == PostVisit)
{
int size = arg->getNominalSize();
ASSERT(size == arg->getSecondarySize());
// Compute transposed matrix of cofactors
for(int i = 0; i < size; ++i)
{
for(int j = 0; j < size; ++j)
{
// For a 2x2 matrix, the cofactor is simply a transposed move or negate
// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
emitDeterminant(result, arg, size - 1, j, i, i, j);
}
}
// Compute 1 / determinant
Temporary invDet(this);
emitDeterminant(&invDet, arg, size);
Constant one(1.0f, 1.0f, 1.0f, 1.0f);
Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
div->src[1].swizzle = 0x00; // xxxx
// Divide transposed matrix of cofactors by determinant
for(int i = 0; i < size; ++i)
{
Instruction *div = emit(sw::Shader::OPCODE_MUL, result, result, &invDet);
div->src[0].index += i;
div->dst.index += i;
}
}
break;
default: UNREACHABLE(node->getOp()); default: UNREACHABLE(node->getOp());
} }
...@@ -1493,7 +1603,7 @@ namespace glsl ...@@ -1493,7 +1603,7 @@ namespace glsl
return IsSampler(type.getBasicType()) && (type.getQualifier() == EvqUniform || type.getQualifier() == EvqTemporary); return IsSampler(type.getBasicType()) && (type.getQualifier() == EvqUniform || type.getQualifier() == EvqTemporary);
} }
Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, int index) Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, int index)
{ {
if(isSamplerRegister(dst)) if(isSamplerRegister(dst))
{ {
...@@ -1513,6 +1623,7 @@ namespace glsl ...@@ -1513,6 +1623,7 @@ namespace glsl
argument(instruction->src[0], src0, index); argument(instruction->src[0], src0, index);
argument(instruction->src[1], src1, index); argument(instruction->src[1], src1, index);
argument(instruction->src[2], src2, index); argument(instruction->src[2], src2, index);
argument(instruction->src[3], src3, index);
shader->append(instruction); shader->append(instruction);
...@@ -1568,7 +1679,7 @@ namespace glsl ...@@ -1568,7 +1679,7 @@ namespace glsl
{ {
for(int index = 0; index < dst->elementRegisterCount(); index++) for(int index = 0; index < dst->elementRegisterCount(); index++)
{ {
emit(op, dst, src0, src1, src2, index); emit(op, dst, src0, src1, src2, 0, index);
} }
} }
......
...@@ -176,11 +176,12 @@ namespace glsl ...@@ -176,11 +176,12 @@ namespace glsl
virtual bool visitBranch(Visit visit, TIntermBranch*); virtual bool visitBranch(Visit visit, TIntermBranch*);
sw::Shader::Opcode getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const; sw::Shader::Opcode getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const;
Instruction *emit(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0, int index = 0); Instruction *emit(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0, TIntermNode *src3 = 0, int index = 0);
Instruction *emitCast(TIntermTyped *dst, TIntermTyped *src); Instruction *emitCast(TIntermTyped *dst, TIntermTyped *src);
void emitBinary(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0); void emitBinary(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0);
void emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1 = 0); void emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1 = 0);
void emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index = 0); void emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index = 0);
void emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col = -1, int row = -1, int outCol = 0, int outRow = 0);
void argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index = 0); void argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index = 0);
void copy(TIntermTyped *dst, TIntermNode *src, int offset = 0); void copy(TIntermTyped *dst, TIntermNode *src, int offset = 0);
void assignLvalue(TIntermTyped *dst, TIntermTyped *src); void assignLvalue(TIntermTyped *dst, TIntermTyped *src);
......
...@@ -171,6 +171,9 @@ namespace sw ...@@ -171,6 +171,9 @@ namespace sw
case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break;
case Shader::OPCODE_DP3: dp3(d, s0, s1); break; case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
case Shader::OPCODE_DP4: dp4(d, s0, s1); break; case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
case Shader::OPCODE_DET2: det2(d, s0, s1); break;
case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break;
case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break;
case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break;
case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break;
case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break;
......
...@@ -764,6 +764,9 @@ namespace sw ...@@ -764,6 +764,9 @@ namespace sw
case OPCODE_DIST4: return "dist4"; case OPCODE_DIST4: return "dist4";
case OPCODE_DP3: return "dp3"; case OPCODE_DP3: return "dp3";
case OPCODE_DP4: return "dp4"; case OPCODE_DP4: return "dp4";
case OPCODE_DET2: return "det2";
case OPCODE_DET3: return "det3";
case OPCODE_DET4: return "det4";
case OPCODE_MIN: return "min"; case OPCODE_MIN: return "min";
case OPCODE_IMIN: return "imin"; case OPCODE_IMIN: return "imin";
case OPCODE_UMIN: return "umin"; case OPCODE_UMIN: return "umin";
......
...@@ -188,6 +188,9 @@ namespace sw ...@@ -188,6 +188,9 @@ namespace sw
OPCODE_B2I, // Bool to int OPCODE_B2I, // Bool to int
OPCODE_U2B, // Uint to bool OPCODE_U2B, // Uint to bool
OPCODE_B2U, // Bool to uint OPCODE_B2U, // Bool to uint
OPCODE_DET2,
OPCODE_DET3,
OPCODE_DET4,
OPCODE_ALL, OPCODE_ALL,
OPCODE_ANY, OPCODE_ANY,
OPCODE_NEG, OPCODE_NEG,
......
...@@ -1139,6 +1139,34 @@ namespace sw ...@@ -1139,6 +1139,34 @@ namespace sw
Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw); Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
} }
void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = src0.x * src1.y - src0.y * src1.x;
dst.y = dst.z = dst.w = dst.x;
}
void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
{
crs(dst, src1, src2);
dp3(dst, dst, src0);
}
void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3)
{
dst.x = src2.z * src3.w - src2.w * src3.z;
dst.y = src1.w * src3.z - src1.z * src3.w;
dst.z = src1.z * src2.w - src1.w * src2.z;
dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) -
src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) +
src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) +
src2.x * (src1.w * src3.y - src1.y * src3.w) +
src3.x * (src1.y * src2.w - src1.w * src2.y)) +
src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) +
src2.x * (src1.y * src3.z - src1.z * src3.y) +
src3.x * (src1.z * src2.y - src1.y * src2.z));
dst.y = dst.z = dst.w = dst.x;
}
void ShaderCore::frc(Vector4f &dst, const Vector4f &src) void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
{ {
dst.x = Frac(src.x); dst.x = Frac(src.x);
......
...@@ -284,6 +284,9 @@ namespace sw ...@@ -284,6 +284,9 @@ namespace sw
void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
......
...@@ -102,6 +102,7 @@ namespace sw ...@@ -102,6 +102,7 @@ namespace sw
Src src0 = instruction->src[0]; Src src0 = instruction->src[0];
Src src1 = instruction->src[1]; Src src1 = instruction->src[1];
Src src2 = instruction->src[2]; Src src2 = instruction->src[2];
Src src3 = instruction->src[3];
bool predicate = instruction->predicate; bool predicate = instruction->predicate;
Control control = instruction->control; Control control = instruction->control;
...@@ -112,10 +113,12 @@ namespace sw ...@@ -112,10 +113,12 @@ namespace sw
Vector4f s0; Vector4f s0;
Vector4f s1; Vector4f s1;
Vector4f s2; Vector4f s2;
Vector4f s3;
if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0); if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1); if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2); if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
switch(opcode) switch(opcode)
{ {
...@@ -151,6 +154,9 @@ namespace sw ...@@ -151,6 +154,9 @@ namespace sw
case Shader::OPCODE_DP2: dp2(d, s0, s1); break; case Shader::OPCODE_DP2: dp2(d, s0, s1); break;
case Shader::OPCODE_DP3: dp3(d, s0, s1); break; case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
case Shader::OPCODE_DP4: dp4(d, s0, s1); break; case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
case Shader::OPCODE_DET2: det2(d, s0, s1); break;
case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break;
case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break;
case Shader::OPCODE_ATT: att(d, s0, s1); break; case Shader::OPCODE_ATT: att(d, s0, s1); break;
case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break;
case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; case Shader::OPCODE_EXP2: exp2(d, s0, pp); break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment