Commit a80cdbc2 by Reed Kotler Committed by Jim Stichnoth

Subzero: implement 64 bit multiply in mips32

Implement 64 bit multiply in mips32 and, in addition, add the lo/hi registers which are also used for other 64 bit math such as div, rem. BUG= R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1716483003 . Patch from Reed Kotler <rkotlerimgtec@gmail.com>.
parent 94e97f6a
......@@ -56,7 +56,13 @@ template <> const char *InstMIPS32La::Opcode = "la";
template <> const char *InstMIPS32Add::Opcode = "add";
template <> const char *InstMIPS32Addu::Opcode = "addu";
template <> const char *InstMIPS32And::Opcode = "and";
template <> const char *InstMIPS32Mfhi::Opcode = "mfhi";
template <> const char *InstMIPS32Mflo::Opcode = "mflo";
template <> const char *InstMIPS32Mthi::Opcode = "mthi";
template <> const char *InstMIPS32Mtlo::Opcode = "mtlo";
template <> const char *InstMIPS32Mul::Opcode = "mul";
template <> const char *InstMIPS32Mult::Opcode = "mult";
template <> const char *InstMIPS32Multu::Opcode = "multu";
template <> const char *InstMIPS32Or::Opcode = "or";
template <> const char *InstMIPS32Ori::Opcode = "ori";
template <> const char *InstMIPS32Sltu::Opcode = "sltu";
......@@ -64,6 +70,42 @@ template <> const char *InstMIPS32Sub::Opcode = "sub";
template <> const char *InstMIPS32Subu::Opcode = "subu";
template <> const char *InstMIPS32Xor::Opcode = "xor";
template <> void InstMIPS32Mflo::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
emitUnaryopGPRFLoHi(Opcode, this, Func);
}
template <> void InstMIPS32Mfhi::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
emitUnaryopGPRFLoHi(Opcode, this, Func);
}
template <> void InstMIPS32Mtlo::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
emitUnaryopGPRTLoHi(Opcode, this, Func);
}
template <> void InstMIPS32Mthi::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
emitUnaryopGPRTLoHi(Opcode, this, Func);
}
template <> void InstMIPS32Mult::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
emitThreeAddrLoHi(Opcode, this, Func);
}
template <> void InstMIPS32Multu::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
emitThreeAddrLoHi(Opcode, this, Func);
}
InstMIPS32Call::InstMIPS32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
: InstMIPS32(Func, InstMIPS32::Call, 1, Dest) {
HasSideEffects = true;
......@@ -125,6 +167,23 @@ void InstMIPS32::emitUnaryopGPR(const char *Opcode, const InstMIPS32 *Inst,
Str << ", ";
Inst->getSrc(0)->emit(Func);
}
void InstMIPS32::emitUnaryopGPRFLoHi(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t" << Opcode << "\t";
Inst->getDest()->emit(Func);
}
void InstMIPS32::emitUnaryopGPRTLoHi(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t" << Opcode << "\t";
Inst->getSrc(0)->emit(Func);
}
void InstMIPS32::emitThreeAddr(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func) {
......@@ -140,6 +199,18 @@ void InstMIPS32::emitThreeAddr(const char *Opcode, const InstMIPS32 *Inst,
Inst->getSrc(1)->emit(Func);
}
void InstMIPS32::emitThreeAddrLoHi(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2);
Str << "\t" << Opcode << "\t";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
}
void InstMIPS32Ret::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......
......@@ -39,70 +39,74 @@
#define REGMIPS32_GPR_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \
X(Reg_ZERO, = 0, "zero", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
X(Reg_ZERO, 0, "zero", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_ZERO)) \
X(Reg_AT, = Reg_ZERO + 1, "at", 0, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_AT, 1, "at", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_AT)) \
X(Reg_V0, = Reg_ZERO + 2, "v0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_V0, 2, "v0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_V0, Reg_V0V1)) \
X(Reg_V1, = Reg_ZERO + 3, "v1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_V1, 3, "v1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_V1, Reg_V0V1)) \
X(Reg_A0, = Reg_ZERO + 4, "a0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_A0, 4, "a0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A0, Reg_A0A1)) \
X(Reg_A1, = Reg_ZERO + 5, "a1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_A1, 5, "a1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A1, Reg_A0A1)) \
X(Reg_A2, = Reg_ZERO + 6, "a2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_A2, 6, "a2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A2, Reg_A2A3)) \
X(Reg_A3, = Reg_ZERO + 7, "a3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_A3, 7, "a3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A3, Reg_A2A3)) \
X(Reg_T0, = Reg_ZERO + 8, "t0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T0, 8, "t0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T0, Reg_T0T1)) \
X(Reg_T1, = Reg_ZERO + 9, "t1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T1, 9, "t1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T1, Reg_T0T1)) \
X(Reg_T2, = Reg_ZERO + 10, "t2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T2, 10, "t2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T2, Reg_T2T3)) \
X(Reg_T3, = Reg_ZERO + 11, "t3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T3, 11, "t3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T3, Reg_T2T3)) \
X(Reg_T4, = Reg_ZERO + 12, "t4", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T4, 12, "t4", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T4, Reg_T4T5)) \
X(Reg_T5, = Reg_ZERO + 14, "t5", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T5, 13, "t5", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T5, Reg_T4T5)) \
X(Reg_T6, = Reg_ZERO + 14, "t6", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T6, 14, "t6", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T6, Reg_T6T7)) \
X(Reg_T7, = Reg_ZERO + 15, "t7", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T7, 15, "t7", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T7, Reg_T6T7)) \
X(Reg_S0, = Reg_ZERO + 16, "s0", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S0, 16, "s0", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S0, Reg_S0S1)) \
X(Reg_S1, = Reg_ZERO + 17, "s1", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S1, 17, "s1", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S1, Reg_S0S1)) \
X(Reg_S2, = Reg_ZERO + 18, "s2", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S2, 18, "s2", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S2, Reg_S2S3)) \
X(Reg_S3, = Reg_ZERO + 19, "s3", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S3, 19, "s3", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S3, Reg_S2S3)) \
X(Reg_S4, = Reg_ZERO + 20, "s4", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S4, 20, "s4", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S4, Reg_S4S5)) \
X(Reg_S5, = Reg_ZERO + 21, "s5", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S5, 21, "s5", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S5, Reg_S4S5)) \
X(Reg_S6, = Reg_ZERO + 22, "s6", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S6, 22, "s6", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S6, Reg_S6S7)) \
X(Reg_S7, = Reg_ZERO + 23, "s7", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_S7, 23, "s7", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S7, Reg_S6S7)) \
X(Reg_T8, = Reg_ZERO + 23, "t8", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T8, 24, "t8", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T8, Reg_T8T9)) \
X(Reg_T9, = Reg_ZERO + 25, "t9", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
X(Reg_T9, 25, "t9", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T9, Reg_T8T9)) \
X(Reg_K0, = Reg_ZERO + 26, "k0", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
X(Reg_K0, 26, "k0", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_K0)) \
X(Reg_K1, = Reg_ZERO + 27, "k1", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
X(Reg_K1, 27, "k1", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_K1)) \
X(Reg_GP, = Reg_ZERO + 28, "gp", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
X(Reg_GP, 28, "gp", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_GP)) \
X(Reg_SP, = Reg_ZERO + 29, "sp", 0, 0, 1, 0, 0, 0, 0, 0, 0, \
X(Reg_SP, 29, "sp", 0, 0, 1, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_SP)) \
X(Reg_FP, = Reg_ZERO + 30, "fp", 0, 0, 0, 1, 0, 0, 0, 0, 0, \
X(Reg_FP, 30, "fp", 0, 0, 0, 1, 0, 0, 0, 0, 0, \
ALIASES1(Reg_FP)) \
X(Reg_RA, = Reg_ZERO + 31, "ra", 0, 1, 0, 0, 0, 0, 0, 0, 0, \
X(Reg_RA, 31, "ra", 0, 1, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_RA)) \
X(Reg_LO, 0, "lo", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES2(Reg_LO, Reg_LOHI)) \
X(Reg_HI, 0, "hi", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES2(Reg_HI, Reg_LOHI))
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
// TODO(reed kotler): List FP registers etc.
......@@ -142,6 +146,8 @@
ALIASES3(Reg_S6, Reg_S7, Reg_S6S7)) \
X(Reg_T8T9, 26, "t8, t9", 1, 0, 0, 0, 0, 1, 0, 0, 0, \
ALIASES3(Reg_T8, Reg_T9, Reg_T8T9)) \
X(Reg_LOHI, 0, "lo, hi", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES3(Reg_LO, Reg_HI, Reg_LOHI)) \
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
......@@ -160,9 +166,9 @@
#define REGMIPS32_TABLE_BOUNDS \
/* val, init */ \
X(Reg_GPR_First, = Reg_ZERO) \
X(Reg_GPR_Last, = Reg_RA) \
X(Reg_GPR_Last, = Reg_HI) \
X(Reg_I64PAIR_First, = Reg_V0V1) \
X(Reg_I64PAIR_Last, = Reg_T8T9) \
X(Reg_I64PAIR_Last, = Reg_LOHI) \
//define X(val, init)
// TODO(reed kotler): add condition code tables, etc.
......
......@@ -124,8 +124,14 @@ public:
Call,
La,
Lui,
Mfhi,
Mflo,
Mov, // actually a pseudo op for addi rd, rs, 0
Mthi,
Mtlo,
Mul,
Mult,
Multu,
Or,
Ori,
Ret,
......@@ -149,8 +155,14 @@ public:
/// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
static void emitUnaryopGPRFLoHi(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
static void emitUnaryopGPRTLoHi(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
static void emitThreeAddrLoHi(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
protected:
InstMIPS32(Cfg *Func, InstKindMIPS32 Kind, SizeT Maxsrcs, Variable *Dest)
......@@ -379,7 +391,13 @@ using InstMIPS32Addiu = InstMIPS32Imm16<InstMIPS32::Addiu, true>;
using InstMIPS32And = InstMIPS32ThreeAddrGPR<InstMIPS32::And>;
using InstMIPS32Lui = InstMIPS32Imm16<InstMIPS32::Lui>;
using InstMIPS32La = InstMIPS32UnaryopGPR<InstMIPS32::La>;
using InstMIPS32Mfhi = InstMIPS32UnaryopGPR<InstMIPS32::Mfhi>;
using InstMIPS32Mflo = InstMIPS32UnaryopGPR<InstMIPS32::Mflo>;
using InstMIPS32Mthi = InstMIPS32UnaryopGPR<InstMIPS32::Mthi>;
using InstMIPS32Mtlo = InstMIPS32UnaryopGPR<InstMIPS32::Mtlo>;
using InstMIPS32Mul = InstMIPS32ThreeAddrGPR<InstMIPS32::Mul>;
using InstMIPS32Mult = InstMIPS32ThreeAddrGPR<InstMIPS32::Mult>;
using InstMIPS32Multu = InstMIPS32ThreeAddrGPR<InstMIPS32::Multu>;
using InstMIPS32Or = InstMIPS32ThreeAddrGPR<InstMIPS32::Or>;
using InstMIPS32Ori = InstMIPS32Imm16<InstMIPS32::Ori>;
using InstMIPS32Sltu = InstMIPS32ThreeAddrGPR<InstMIPS32::Sltu>;
......
......@@ -44,7 +44,7 @@ enum GPRRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isI64Pair, isFP32, isFP64, isVec128, alias_init) \
\
Encoded_##val encode,
Encoded_##val = encode,
REGMIPS32_GPR_TABLE
#undef X
Encoded_Not_GPR = -1
......
......@@ -582,6 +582,7 @@ void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
case InstArithmetic::Or:
case InstArithmetic::Sub:
case InstArithmetic::Xor:
case InstArithmetic::Mul:
break;
default:
UnimplementedLoweringError(this, Instr);
......@@ -644,6 +645,24 @@ void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
_mov(DestHi, T_Hi);
return;
}
case InstArithmetic::Mul: {
// TODO(rkotler): Make sure that mul has the side effect of clobbering
// LO, HI. Check for any other LO, HI quirkiness in this section.
auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
auto *T1 = I32Reg(), *T2 = I32Reg();
auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
_multu(T_Lo, Src0LoR, Src1LoR);
Context.insert<InstFakeDef>(T_Hi, T_Lo);
_mflo(T1, T_Lo);
_mfhi(T2, T_Hi);
_mov(DestLo, T1);
_mul(TM1, Src0HiR, Src1LoR);
_mul(TM2, Src0LoR, Src1HiR);
_addu(TM3, TM1, T2);
_addu(TM4, TM3, TM2);
_mov(DestHi, TM4);
return;
}
default:
UnimplementedLoweringError(this, Instr);
return;
......
......@@ -175,10 +175,34 @@ public:
}
}
void _mfhi(Variable *Dest, Operand *Src) {
Context.insert<InstMIPS32Mfhi>(Dest, Src);
}
void _mflo(Variable *Dest, Operand *Src) {
Context.insert<InstMIPS32Mflo>(Dest, Src);
}
void _mthi(Variable *Dest, Operand *Src) {
Context.insert<InstMIPS32Mthi>(Dest, Src);
}
void _mtlo(Variable *Dest, Operand *Src) {
Context.insert<InstMIPS32Mtlo>(Dest, Src);
}
void _mul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstMIPS32Mul>(Dest, Src0, Src1);
}
void _mult(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstMIPS32Mult>(Dest, Src0, Src1);
}
void _multu(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstMIPS32Multu>(Dest, Src0, Src1);
}
void _or(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstMIPS32Or>(Dest, Src0, Src1);
}
......
......@@ -357,6 +357,15 @@ entry:
; ARM32: umull
; ARM32: add
; MIPS32-LABEL: mul64BitSigned
; MIPS32: multu
; MIPS32: mflo
; MIPS32: mfhi
; MIPS32: mul
; MIPS32: mul
; MIPS32: addu
; MIPS32: addu
define internal i64 @mul64BitUnsigned(i64 %a, i64 %b) {
entry:
%mul = mul i64 %b, %a
......@@ -382,6 +391,15 @@ entry:
; ARM32: umull
; ARM32: add
; MIPS32-LABEL: mul64BitUnsigned
; MIPS32: multu
; MIPS32: mflo
; MIPS32: mfhi
; MIPS32: mul
; MIPS32: mul
; MIPS32: addu
; MIPS32: addu
define internal i64 @div64BitSigned(i64 %a, i64 %b) {
entry:
%div = sdiv i64 %a, %b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment