Commit c3d95f36 by Alexis Hetu Committed by Alexis Hétu

Matrix determinant and inverse implementation

Implementation for determinant has been done directly in ShaderCore in order to avoid having to allocate temporaries manually in OutputASM. For now, the implementation for the inverse matrix is very simple, i.e., it doesn't attempt to re-use results from the cofactor matrix computation to compute the determinant or do any other kind of optimization, but it works. Change-Id: I0fc70133809ae2752dc567bf58b60d7af7a88009 Reviewed-on: https://swiftshader-review.googlesource.com/4000Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 9b3388ed
......@@ -636,6 +636,74 @@ namespace glsl
return true;
}
void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
{
switch(size)
{
case 1: // Used for cofactor computation only
{
// For a 2x2 matrix, the cofactor is simply a transposed move or negate
bool isMov = (row == col);
sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
Instruction *mov = emit(op, result, arg);
mov->src[0].index += isMov ? 1 - row : row;
mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
mov->dst.index += outCol;
mov->dst.mask = 1 << outRow;
}
break;
case 2:
{
static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
bool isCofactor = (col >= 0) && (row >= 0);
int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
Instruction *det = emit(sw::Shader::OPCODE_DET2, result, arg, arg);
det->src[0].index += negate ? col1 : col0;
det->src[1].index += negate ? col0 : col1;
det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
det->dst.index += outCol;
det->dst.mask = 1 << outRow;
}
break;
case 3:
{
static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
bool isCofactor = (col >= 0) && (row >= 0);
int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
Instruction *det = emit(sw::Shader::OPCODE_DET3, result, arg, arg, arg);
det->src[0].index += col0;
det->src[1].index += negate ? col2 : col1;
det->src[2].index += negate ? col1 : col2;
det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
det->dst.index += outCol;
det->dst.mask = 1 << outRow;
}
break;
case 4:
{
Instruction *det = emit(sw::Shader::OPCODE_DET4, result, arg, arg, arg, arg);
det->src[1].index += 1;
det->src[2].index += 2;
det->src[3].index += 3;
det->dst.index += outCol;
det->dst.mask = 1 << outRow;
}
break;
default:
UNREACHABLE(size);
break;
}
}
bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
{
if(currentScope != emitScope)
......@@ -807,6 +875,48 @@ namespace glsl
}
}
break;
case EOpDeterminant:
if(visit == PostVisit)
{
int size = arg->getNominalSize();
ASSERT(size == arg->getSecondarySize());
emitDeterminant(result, arg, size);
}
break;
case EOpInverse:
if(visit == PostVisit)
{
int size = arg->getNominalSize();
ASSERT(size == arg->getSecondarySize());
// Compute transposed matrix of cofactors
for(int i = 0; i < size; ++i)
{
for(int j = 0; j < size; ++j)
{
// For a 2x2 matrix, the cofactor is simply a transposed move or negate
// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
emitDeterminant(result, arg, size - 1, j, i, i, j);
}
}
// Compute 1 / determinant
Temporary invDet(this);
emitDeterminant(&invDet, arg, size);
Constant one(1.0f, 1.0f, 1.0f, 1.0f);
Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
div->src[1].swizzle = 0x00; // xxxx
// Divide transposed matrix of cofactors by determinant
for(int i = 0; i < size; ++i)
{
Instruction *div = emit(sw::Shader::OPCODE_MUL, result, result, &invDet);
div->src[0].index += i;
div->dst.index += i;
}
}
break;
default: UNREACHABLE(node->getOp());
}
......@@ -1493,7 +1603,7 @@ namespace glsl
return IsSampler(type.getBasicType()) && (type.getQualifier() == EvqUniform || type.getQualifier() == EvqTemporary);
}
Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, int index)
Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, int index)
{
if(isSamplerRegister(dst))
{
......@@ -1513,6 +1623,7 @@ namespace glsl
argument(instruction->src[0], src0, index);
argument(instruction->src[1], src1, index);
argument(instruction->src[2], src2, index);
argument(instruction->src[3], src3, index);
shader->append(instruction);
......@@ -1568,7 +1679,7 @@ namespace glsl
{
for(int index = 0; index < dst->elementRegisterCount(); index++)
{
emit(op, dst, src0, src1, src2, index);
emit(op, dst, src0, src1, src2, 0, index);
}
}
......
......@@ -176,11 +176,12 @@ namespace glsl
virtual bool visitBranch(Visit visit, TIntermBranch*);
sw::Shader::Opcode getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const;
Instruction *emit(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0, int index = 0);
Instruction *emit(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0, TIntermNode *src3 = 0, int index = 0);
Instruction *emitCast(TIntermTyped *dst, TIntermTyped *src);
void emitBinary(sw::Shader::Opcode op, TIntermTyped *dst = 0, TIntermNode *src0 = 0, TIntermNode *src1 = 0, TIntermNode *src2 = 0);
void emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1 = 0);
void emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index = 0);
void emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col = -1, int row = -1, int outCol = 0, int outRow = 0);
void argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index = 0);
void copy(TIntermTyped *dst, TIntermNode *src, int offset = 0);
void assignLvalue(TIntermTyped *dst, TIntermTyped *src);
......
......@@ -171,6 +171,9 @@ namespace sw
case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break;
case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
case Shader::OPCODE_DET2: det2(d, s0, s1); break;
case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break;
case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break;
case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break;
case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break;
case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break;
......
......@@ -764,6 +764,9 @@ namespace sw
case OPCODE_DIST4: return "dist4";
case OPCODE_DP3: return "dp3";
case OPCODE_DP4: return "dp4";
case OPCODE_DET2: return "det2";
case OPCODE_DET3: return "det3";
case OPCODE_DET4: return "det4";
case OPCODE_MIN: return "min";
case OPCODE_IMIN: return "imin";
case OPCODE_UMIN: return "umin";
......
......@@ -188,6 +188,9 @@ namespace sw
OPCODE_B2I, // Bool to int
OPCODE_U2B, // Uint to bool
OPCODE_B2U, // Bool to uint
OPCODE_DET2,
OPCODE_DET3,
OPCODE_DET4,
OPCODE_ALL,
OPCODE_ANY,
OPCODE_NEG,
......
......@@ -1139,6 +1139,34 @@ namespace sw
Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
}
void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = src0.x * src1.y - src0.y * src1.x;
dst.y = dst.z = dst.w = dst.x;
}
void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
{
crs(dst, src1, src2);
dp3(dst, dst, src0);
}
void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3)
{
dst.x = src2.z * src3.w - src2.w * src3.z;
dst.y = src1.w * src3.z - src1.z * src3.w;
dst.z = src1.z * src2.w - src1.w * src2.z;
dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) -
src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) +
src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) +
src2.x * (src1.w * src3.y - src1.y * src3.w) +
src3.x * (src1.y * src2.w - src1.w * src2.y)) +
src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) +
src2.x * (src1.y * src3.z - src1.z * src3.y) +
src3.x * (src1.z * src2.y - src1.y * src2.z));
dst.y = dst.z = dst.w = dst.x;
}
void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
{
dst.x = Frac(src.x);
......
......@@ -284,6 +284,9 @@ namespace sw
void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
......
......@@ -102,6 +102,7 @@ namespace sw
Src src0 = instruction->src[0];
Src src1 = instruction->src[1];
Src src2 = instruction->src[2];
Src src3 = instruction->src[3];
bool predicate = instruction->predicate;
Control control = instruction->control;
......@@ -112,10 +113,12 @@ namespace sw
Vector4f s0;
Vector4f s1;
Vector4f s2;
Vector4f s3;
if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
switch(opcode)
{
......@@ -151,6 +154,9 @@ namespace sw
case Shader::OPCODE_DP2: dp2(d, s0, s1); break;
case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
case Shader::OPCODE_DET2: det2(d, s0, s1); break;
case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break;
case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break;
case Shader::OPCODE_ATT: att(d, s0, s1); break;
case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break;
case Shader::OPCODE_EXP2: exp2(d, s0, pp); break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment