Commit 7b3d9cbb by John Porto

Subzero. ARM32. New bool folding.

Improves the bool folding logic so that branches are short circuited. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1417393003 .
parent f2674646
......@@ -882,7 +882,7 @@ void InstARM32Br::dump(const Cfg *Func) const {
}
if (Label) {
Str << "label %" << Label->getName(Func);
Str << getPredicate() << ", label %" << Label->getName(Func);
} else {
Str << getPredicate() << ", label %" << getTargetTrue()->getName();
if (getTargetFalse()) {
......
......@@ -1136,6 +1136,7 @@ public:
}
bool isRedundantAssign() const override {
return !isMultiDest() && !isMultiSource() &&
getPredicate() == CondARM32::AL &&
checkForRedundantAssign(getDest(), getSrc(0));
}
bool isVarAssign() const override { return llvm::isa<Variable>(getSrc(0)); }
......
......@@ -17,7 +17,6 @@
#ifndef SUBZERO_SRC_ICETLS_H
#define SUBZERO_SRC_ICETLS_H
///
/// @defgroup /IceTLS Defines 5 macros for unifying thread_local and pthread:
/// @{
......@@ -96,7 +95,6 @@
#define ICE_ATTRIBUTE_TLS thread_local
#endif // !_MSC_VER
#define ICE_TLS_DECLARE_FIELD(Type, FieldName) \
static ICE_ATTRIBUTE_TLS Type FieldName
#define ICE_TLS_DEFINE_FIELD(Type, ClassName, FieldName) \
......
......@@ -1044,7 +1044,11 @@ void TargetARM32::legalizeMovStackAddrImm(InstARM32Mov *MovInstr,
}
if (Legalized) {
_mov(Dest, Src);
if (MovInstr->isDestRedefined()) {
_mov_redefined(Dest, Src, MovInstr->getPredicate());
} else {
_mov(Dest, Src, MovInstr->getPredicate());
}
MovInstr->setDeleted();
}
}
......@@ -1346,8 +1350,57 @@ void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
return;
}
TargetARM32::SafeBoolChain
TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
assert(Dest->getType() == IceType_i1);
// So folding didn't work for Inst. Not a problem: We just need to
// materialize the Sources, and perform the operation. We create regular
// Variables (and not infinite-weight ones) because this call might recurse a
// lot, and we might end up with tons of infinite weight temporaries.
assert(Inst->getSrcSize() == 2);
Variable *Src0 = Func->makeVariable(IceType_i1);
SafeBoolChain Src0Safe = lowerInt1(Src0, Inst->getSrc(0));
Operand *Src1 = Inst->getSrc(1);
SafeBoolChain Src1Safe = SBC_Yes;
if (!llvm::isa<Constant>(Src1)) {
Variable *Src1V = Func->makeVariable(IceType_i1);
Src1Safe = lowerInt1(Src1V, Src1);
Src1 = Src1V;
}
Variable *T = makeReg(IceType_i1);
Src0 = legalizeToReg(Src0);
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
switch (Inst->getOp()) {
default:
// If this Unreachable is ever executed, add the offending operation to
// the list of valid consumers.
llvm::report_fatal_error("Unhandled i1 Op");
case InstArithmetic::And:
_and(T, Src0, Src1RF);
break;
case InstArithmetic::Or:
_orr(T, Src0, Src1RF);
break;
case InstArithmetic::Xor:
_eor(T, Src0, Src1RF);
break;
}
_mov(Dest, T);
return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
}
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->getType() == IceType_i1) {
lowerInt1Arithmetic(Inst);
return;
}
// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
// legalize Src0 to flex or Src1 to flex and there is a reversible
// instruction. E.g., reverse subtract with immediate, register vs register,
......@@ -1814,46 +1867,129 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
}
}
TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
InstARM32Label *NewShortCircuitLabel = nullptr;
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
const Inst *Producer = BoolComputations.getProducerOf(Boolean);
if (Producer == nullptr) {
// No producer, no problem: just do emit code to perform (Boolean & 1) and
// set the flags register. The branch should be taken if the resulting flags
// indicate a non-zero result.
_tst(legalizeToReg(Boolean), _1);
return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
}
switch (Producer->getKind()) {
default:
llvm_unreachable("Unexpected producer.");
case Inst::Icmp: {
return ShortCircuitCondAndLabel(
lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
} break;
case Inst::Fcmp: {
return ShortCircuitCondAndLabel(
lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
} break;
case Inst::Cast: {
const auto *CastProducer = llvm::cast<InstCast>(Producer);
assert(CastProducer->getCastKind() == InstCast::Trunc);
Operand *Src = CastProducer->getSrc(0);
if (Src->getType() == IceType_i64)
Src = loOperand(Src);
_tst(legalizeToReg(Src), _1);
return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
} break;
case Inst::Arithmetic: {
const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
switch (ArithProducer->getOp()) {
default:
llvm_unreachable("Unhandled Arithmetic Producer.");
case InstArithmetic::And: {
if (!(ShortCircuitable & SC_And)) {
NewShortCircuitLabel = InstARM32Label::create(Func, this);
}
LowerInt1BranchTarget NewTarget =
TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
const CondWhenTrue &Cond = CondAndLabel.Cond;
_br_short_circuit(NewTarget, Cond.invert());
InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
if (ShortCircuitLabel != nullptr)
Context.insert(ShortCircuitLabel);
return ShortCircuitCondAndLabel(
lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
.assertNoLabelAndReturnCond(),
NewShortCircuitLabel);
} break;
case InstArithmetic::Or: {
if (!(ShortCircuitable & SC_Or)) {
NewShortCircuitLabel = InstARM32Label::create(Func, this);
}
LowerInt1BranchTarget NewTarget =
TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
const CondWhenTrue &Cond = CondAndLabel.Cond;
_br_short_circuit(NewTarget, Cond);
InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
if (ShortCircuitLabel != nullptr)
Context.insert(ShortCircuitLabel);
return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
NewTarget, TargetFalse,
SC_All)
.assertNoLabelAndReturnCond(),
NewShortCircuitLabel);
} break;
}
}
}
}
void TargetARM32::lowerBr(const InstBr *Instr) {
if (Instr->isUnconditional()) {
_br(Instr->getTargetUnconditional());
return;
}
Operand *Cond = Instr->getCondition();
CondARM32::Cond BrCondTrue0 = CondARM32::NE;
CondARM32::Cond BrCondTrue1 = CondARM32::kNone;
CondARM32::Cond BrCondFalse = CondARM32::kNone;
if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) {
// "Cond" was not folded.
Type Ty = Cond->getType();
Variable *Src0R = legalizeToReg(Cond);
assert(Ty == IceType_i1);
if (Ty != IceType_i32)
_uxt(Src0R, Src0R);
Constant *_0 = Ctx->getConstantZero(IceType_i32);
_cmp(Src0R, _0);
BrCondTrue0 = CondARM32::NE;
}
if (BrCondTrue1 != CondARM32::kNone) {
_br(Instr->getTargetTrue(), BrCondTrue1);
}
CfgNode *TargetTrue = Instr->getTargetTrue();
CfgNode *TargetFalse = Instr->getTargetFalse();
ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
LowerInt1BranchTarget(TargetFalse), SC_All);
assert(CondAndLabel.ShortCircuitTarget == nullptr);
if (BrCondTrue0 == CondARM32::kNone) {
assert(BrCondTrue1 == CondARM32::kNone);
_br(Instr->getTargetFalse());
return;
const CondWhenTrue &Cond = CondAndLabel.Cond;
if (Cond.WhenTrue1 != CondARM32::kNone) {
assert(Cond.WhenTrue0 != CondARM32::AL);
_br(TargetTrue, Cond.WhenTrue1);
}
if (BrCondTrue0 == CondARM32::AL) {
assert(BrCondTrue1 == CondARM32::kNone);
assert(BrCondFalse == CondARM32::kNone);
_br(Instr->getTargetTrue());
return;
switch (Cond.WhenTrue0) {
default:
_br(TargetTrue, TargetFalse, Cond.WhenTrue0);
break;
case CondARM32::kNone:
_br(TargetFalse);
break;
case CondARM32::AL:
_br(TargetTrue);
break;
}
_br(Instr->getTargetTrue(), Instr->getTargetFalse(), BrCondTrue0);
}
void TargetARM32::lowerCall(const InstCall *Instr) {
......@@ -1959,6 +2095,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
case IceType_void:
break;
case IceType_i1:
assert(BoolComputations.getProducerOf(Dest) == nullptr);
// Fall-through intended.
case IceType_i8:
case IceType_i16:
case IceType_i32:
......@@ -2089,18 +2227,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
Variable *Src0R = legalizeToReg(Src0);
_sxt(T_Lo, Src0R);
} else {
CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
// Handle bool folding.
Constant *_0 = Ctx->getConstantZero(IceType_i32);
Operand *_m1 =
legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex);
_cmov(T_Lo, _m1, CondTrue0, CondTrue1, _0, CondFalse);
} else {
Variable *Src0R = legalizeToReg(Src0);
_lsl(T_Lo, Src0R, ShiftAmt);
_asr(T_Lo, T_Lo, ShiftAmt);
}
Operand *_0 = Ctx->getConstantZero(IceType_i32);
Operand *_m1 = Ctx->getConstantInt32(-1);
lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
}
_mov(DestLo, T_Lo);
Variable *T_Hi = makeReg(DestHi->getType());
......@@ -2119,24 +2248,10 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
_sxt(T, Src0R);
_mov(Dest, T);
} else {
Constant *_0 = Ctx->getConstantZero(IceType_i32);
Operand *_m1 = Ctx->getConstantInt(Dest->getType(), -1);
Variable *T = makeReg(Dest->getType());
CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
// Handle bool folding.
Constant *_0 = Ctx->getConstantZero(IceType_i32);
Operand *_m1 =
legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex);
_cmov(T, _m1, CondTrue0, CondTrue1, _0, CondFalse);
} else {
// GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
// lsl t1, src_reg, 31
// asr t1, t1, 31
// dst = t1
Variable *Src0R = legalizeToReg(Src0);
Constant *ShiftAmt = Ctx->getConstantInt32(31);
_lsl(T, Src0R, ShiftAmt);
_asr(T, T, ShiftAmt);
}
lowerInt1ForSelect(T, Src0, _m1, _0);
_mov(Dest, T);
}
break;
......@@ -2149,60 +2264,44 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
UnimplementedError(Func->getContext()->getFlags());
} else if (Dest->getType() == IceType_i64) {
// t1=uxtb src; dst.lo=t1; dst.hi=0
Constant *_0 = Ctx->getConstantZero(IceType_i32);
Constant *_1 = Ctx->getConstantInt32(1);
Operand *_0 =
legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = makeReg(DestLo->getType());
CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
// Handle folding opportunities.
Variable *T_Hi = makeReg(DestLo->getType());
_mov(T_Hi, _0);
_mov(DestHi, T_Hi);
_cmov(T_Lo, _1, CondTrue0, CondTrue1, _0, CondFalse);
_mov(DestLo, T_Lo);
return;
switch (Src0->getType()) {
default: {
assert(Src0->getType() != IceType_i64);
_uxt(T_Lo, legalizeToReg(Src0));
} break;
case IceType_i32: {
_mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
} break;
case IceType_i1: {
SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
if (Safe == SBC_No) {
Operand *_1 =
legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
_and(T_Lo, T_Lo, _1);
}
} break;
}
// i32 and i1 can just take up the whole register. i32 doesn't need uxt,
// while i1 will have an and mask later anyway.
if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
_mov(T_Lo, Src0RF);
} else {
Variable *Src0R = legalizeToReg(Src0);
_uxt(T_Lo, Src0R);
}
if (Src0->getType() == IceType_i1) {
Constant *One = Ctx->getConstantInt32(1);
_and(T_Lo, T_Lo, One);
}
_mov(DestLo, T_Lo);
Variable *T_Hi = makeReg(DestLo->getType());
_mov(T_Hi, _0);
_mov(DestHi, T_Hi);
} else if (Src0->getType() == IceType_i1) {
Constant *_1 = Ctx->getConstantInt32(1);
Variable *T = makeReg(Dest->getType());
CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
// Handle folding opportunities.
Constant *_0 = Ctx->getConstantZero(IceType_i32);
_cmov(T, _1, CondTrue0, CondTrue1, _0, CondFalse);
_mov(Dest, T);
return;
SafeBoolChain Safe = lowerInt1(T, Src0);
if (Safe == SBC_No) {
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
_and(T, T, _1);
}
// t = Src0; t &= 1; Dest = t
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
// Just use _mov instead of _uxt since all registers are 32-bit. _uxt
// requires the source to be a register so could have required a _mov
// from legalize anyway.
_mov(T, Src0RF);
_and(T, T, _1);
_mov(Dest, T);
} else {
// t1 = uxt src; dst = t1
......@@ -2473,19 +2572,13 @@ struct {
};
} // end of anonymous namespace
void TargetARM32::lowerFcmpCond(const InstFcmp *Instr,
CondARM32::Cond *CondIfTrue0,
CondARM32::Cond *CondIfTrue1,
CondARM32::Cond *CondIfFalse) {
TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
InstFcmp::FCond Condition = Instr->getCondition();
switch (Condition) {
case InstFcmp::False:
*CondIfFalse = CondARM32::AL;
*CondIfTrue0 = *CondIfTrue1 = CondARM32::kNone;
break;
return CondWhenTrue(CondARM32::kNone);
case InstFcmp::True:
*CondIfFalse = *CondIfTrue1 = CondARM32::kNone;
*CondIfTrue0 = CondARM32::AL;
return CondWhenTrue(CondARM32::AL);
break;
default: {
Variable *Src0R = legalizeToReg(Instr->getSrc(0));
......@@ -2493,11 +2586,7 @@ void TargetARM32::lowerFcmpCond(const InstFcmp *Instr,
_vcmp(Src0R, Src1R);
_vmrs();
assert(Condition < llvm::array_lengthof(TableFcmp));
*CondIfTrue0 = TableFcmp[Condition].CC0;
*CondIfTrue1 = TableFcmp[Condition].CC1;
*CondIfFalse = (*CondIfTrue1 != CondARM32::kNone)
? CondARM32::AL
: InstARM32::getOppositeCondition(*CondIfTrue0);
return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
}
}
}
......@@ -2513,39 +2602,40 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
}
Variable *T = makeReg(IceType_i1);
Operand *_1 = Ctx->getConstantInt32(1);
Operand *_0 = Ctx->getConstantZero(IceType_i32);
Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
Operand *_0 =
legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse;
lowerFcmpCond(Instr, &CondIfTrue0, &CondIfTrue1, &CondIfFalse);
CondWhenTrue Cond = lowerFcmpCond(Instr);
bool RedefineT = false;
if (CondIfFalse != CondARM32::kNone) {
assert(!RedefineT);
_mov(T, _0, CondIfFalse);
if (Cond.WhenTrue0 != CondARM32::AL) {
_mov(T, _0);
RedefineT = true;
}
if (CondIfTrue0 != CondARM32::kNone) {
if (RedefineT) {
_mov_redefined(T, _1, CondIfTrue0);
} else {
_mov(T, _1, CondIfTrue0);
}
RedefineT = true;
if (Cond.WhenTrue0 == CondARM32::kNone) {
_mov(Dest, T);
return;
}
if (RedefineT) {
_mov_redefined(T, _1, Cond.WhenTrue0);
} else {
_mov(T, _1, Cond.WhenTrue0);
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineT);
_mov_redefined(T, _1, CondIfTrue1);
if (Cond.WhenTrue1 != CondARM32::kNone) {
_mov_redefined(T, _1, Cond.WhenTrue1);
}
_mov(Dest, T);
}
void TargetARM32::lowerIcmpCond(const InstIcmp *Inst,
CondARM32::Cond *CondIfTrue,
CondARM32::Cond *CondIfFalse) {
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
assert(Inst->getSrc(0)->getType() != IceType_i1);
assert(Inst->getSrc(1)->getType() != IceType_i1);
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
......@@ -2607,9 +2697,7 @@ void TargetARM32::lowerIcmpCond(const InstIcmp *Inst,
_cmp(Src0Hi, Src1HiRF);
_cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
}
*CondIfTrue = TableIcmp64[Index].C1;
*CondIfFalse = TableIcmp64[Index].C2;
return;
return CondWhenTrue(TableIcmp64[Index].C1);
}
// a=icmp cond b, c ==>
......@@ -2661,8 +2749,7 @@ void TargetARM32::lowerIcmpCond(const InstIcmp *Inst,
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
_cmp(Src0R, Src1RF);
}
*CondIfTrue = getIcmp32Mapping(Inst->getCondition());
*CondIfFalse = InstARM32::getOppositeCondition(*CondIfTrue);
return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));
}
void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
......@@ -2676,17 +2763,18 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
return;
}
Constant *_0 = Ctx->getConstantZero(IceType_i32);
Constant *_1 = Ctx->getConstantInt32(1);
Operand *_0 =
legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
Variable *T = makeReg(IceType_i1);
CondARM32::Cond CondIfTrue, CondIfFalse;
lowerIcmpCond(Inst, &CondIfTrue, &CondIfFalse);
_mov(T, _0, CondIfFalse);
_mov_redefined(T, _1, CondIfTrue);
_mov(T, _0);
CondWhenTrue Cond = lowerIcmpCond(Inst);
_mov_redefined(T, _1, Cond.WhenTrue0);
_mov(Dest, T);
assert(Cond.WhenTrue1 == CondARM32::kNone);
return;
}
......@@ -3903,119 +3991,7 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
return;
}
CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse;
if (!_mov_i1_to_flags(Condition, &CondIfTrue0, &CondIfTrue1, &CondIfFalse)) {
// "Condition" was not fold.
// cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
Variable *CmpOpnd0 = legalizeToReg(Condition);
Type CmpOpnd0Ty = CmpOpnd0->getType();
Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
assert(CmpOpnd0Ty == IceType_i1);
if (CmpOpnd0Ty != IceType_i32)
_uxt(CmpOpnd0, CmpOpnd0);
_cmp(CmpOpnd0, CmpOpnd1);
CondIfTrue0 = CondARM32::NE;
CondIfTrue1 = CondARM32::kNone;
CondIfFalse = CondARM32::EQ;
}
if (DestTy == IceType_i64) {
SrcT = legalizeUndef(SrcT);
SrcF = legalizeUndef(SrcF);
// Set the low portion.
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
Variable *TLo = makeReg(SrcFLo->getType());
bool RedefineTLo = false;
if (CondIfFalse != CondARM32::kNone) {
_mov(TLo, SrcFLo, CondIfFalse);
RedefineTLo = true;
}
if (CondIfTrue0 != CondARM32::kNone) {
if (!RedefineTLo)
_mov(TLo, SrcTLo, CondIfTrue0);
else
_mov_redefined(TLo, SrcTLo, CondIfTrue0);
RedefineTLo = true;
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineTLo);
_mov_redefined(TLo, SrcTLo, CondIfTrue1);
}
_mov(DestLo, TLo);
// Set the high portion.
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
Variable *THi = makeReg(SrcFHi->getType());
bool RedefineTHi = false;
if (CondIfFalse != CondARM32::kNone) {
_mov(THi, SrcFHi, CondIfFalse);
RedefineTHi = true;
}
if (CondIfTrue0 != CondARM32::kNone) {
if (!RedefineTHi)
_mov(THi, SrcTHi, CondIfTrue0);
else
_mov_redefined(THi, SrcTHi, CondIfTrue0);
RedefineTHi = true;
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineTHi);
_mov_redefined(THi, SrcTHi, CondIfTrue1);
}
_mov(DestHi, THi);
return;
}
if (isFloatingType(DestTy)) {
SrcT = legalizeToReg(SrcT);
SrcF = legalizeToReg(SrcF);
Variable *T = makeReg(DestTy);
assert(DestTy == SrcF->getType());
bool RedefineT = false;
if (CondIfFalse != CondARM32::kNone) {
_mov(T, SrcF, CondIfFalse);
RedefineT = true;
}
if (CondIfTrue0 != CondARM32::kNone) {
if (!RedefineT)
_mov(T, SrcT, CondIfTrue0);
else
_mov_redefined(T, SrcT, CondIfTrue0);
RedefineT = true;
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineT);
_mov_redefined(T, SrcT, CondIfTrue1);
}
assert(DestTy == SrcT->getType());
_mov(Dest, T);
return;
}
Variable *T = makeReg(SrcF->getType());
SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
bool RedefineT = false;
if (CondIfFalse != CondARM32::kNone) {
_mov(T, SrcF, CondIfFalse);
RedefineT = true;
}
if (CondIfTrue0 != CondARM32::kNone) {
if (!RedefineT)
_mov(T, SrcT, CondIfTrue0);
else
_mov_redefined(T, SrcT, CondIfTrue0);
RedefineT = true;
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineT);
_mov_redefined(T, SrcT, CondIfTrue1);
}
_mov(Dest, T);
lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT), legalizeUndef(SrcF));
}
void TargetARM32::lowerStore(const InstStore *Inst) {
......@@ -4430,74 +4406,250 @@ void TargetARM32::emit(const ConstantUndef *) const {
llvm::report_fatal_error("undef value encountered by emitter.");
}
void TargetARM32::lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue,
CondARM32::Cond *CondIfFalse) {
Operand *_1 = Ctx->getConstantInt32(1);
Variable *SrcR =
legalizeToReg(Src->getType() == IceType_i64 ? loOperand(Src) : Src);
_tst(SrcR, _1);
*CondIfTrue = CondARM32::NE; // NE <-> APSR.Z == 0
*CondIfFalse = CondARM32::EQ; // EQ <-> APSR.Z == 1
void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
Operand *TrueValue, Operand *FalseValue) {
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
assert(Boolean->getType() == IceType_i1);
bool NeedsAnd1 = false;
if (TrueValue->getType() == IceType_i1) {
assert(FalseValue->getType() == IceType_i1);
Variable *TrueValueV = Func->makeVariable(IceType_i1);
SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
TrueValue = TrueValueV;
Variable *FalseValueV = Func->makeVariable(IceType_i1);
SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
FalseValue = FalseValueV;
NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
}
Variable *DestLo = (Dest->getType() == IceType_i64)
? llvm::cast<Variable>(loOperand(Dest))
: Dest;
Variable *DestHi = (Dest->getType() == IceType_i64)
? llvm::cast<Variable>(hiOperand(Dest))
: nullptr;
Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
? loOperand(FalseValue)
: FalseValue;
Operand *FalseValueHi =
(FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
Operand *TrueValueLo =
(TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
Operand *TrueValueHi =
(TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
Variable *T_Lo = makeReg(DestLo->getType());
Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
_mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
if (DestHi) {
_mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
}
CondWhenTrue Cond(CondARM32::kNone);
// FlagsWereSet is used to determine wether Boolean was folded or not. If not,
// add an explicit _tst instruction below.
bool FlagsWereSet = false;
if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {
switch (Producer->getKind()) {
default:
llvm_unreachable("Unexpected producer.");
case Inst::Icmp: {
Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
FlagsWereSet = true;
} break;
case Inst::Fcmp: {
Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
FlagsWereSet = true;
} break;
case Inst::Cast: {
const auto *CastProducer = llvm::cast<InstCast>(Producer);
assert(CastProducer->getCastKind() == InstCast::Trunc);
Boolean = CastProducer->getSrc(0);
// No flags were set, so a _tst(Src, 1) will be emitted below. Don't
// bother legalizing Src to a Reg because it will be legalized before
// emitting the tst instruction.
FlagsWereSet = false;
} break;
case Inst::Arithmetic: {
// This is a special case: we eagerly assumed Producer could be folded,
// but in reality, it can't. No reason to panic: we just lower it using
// the regular lowerArithmetic helper.
const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
lowerArithmetic(ArithProducer);
Boolean = ArithProducer->getDest();
// No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
// bother legalizing Dest to a Reg because it will be legalized before
// emitting the tst instruction.
FlagsWereSet = false;
} break;
}
}
if (!FlagsWereSet) {
// No flags have been set, so emit a tst Boolean, 1.
Variable *Src = legalizeToReg(Boolean);
_tst(Src, _1);
Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
}
if (Cond.WhenTrue0 == CondARM32::kNone) {
assert(Cond.WhenTrue1 == CondARM32::kNone);
} else {
_mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
Cond.WhenTrue0);
if (DestHi) {
_mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
Cond.WhenTrue0);
}
}
if (Cond.WhenTrue1 != CondARM32::kNone) {
_mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
Cond.WhenTrue1);
if (DestHi) {
_mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
Cond.WhenTrue1);
}
}
if (NeedsAnd1) {
// We lowered something that is unsafe (i.e., can't provably be zero or
// one). Truncate the result.
_and(T_Lo, T_Lo, _1);
}
_mov(DestLo, T_Lo);
if (DestHi) {
_mov(DestHi, T_Hi);
}
}
bool TargetARM32::_mov_i1_to_flags(Operand *Boolean,
CondARM32::Cond *CondIfTrue0,
CondARM32::Cond *CondIfTrue1,
CondARM32::Cond *CondIfFalse) {
*CondIfTrue0 = CondARM32::kNone;
*CondIfTrue1 = CondARM32::kNone;
*CondIfFalse = CondARM32::AL;
bool FoldOK = false;
TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
Operand *Boolean) {
assert(Boolean->getType() == IceType_i1);
Variable *T = makeReg(IceType_i1);
Operand *_0 =
legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
SafeBoolChain Safe = SBC_Yes;
if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {
if (const auto *IcmpProducer = llvm::dyn_cast<InstIcmp>(Producer)) {
lowerIcmpCond(IcmpProducer, CondIfTrue0, CondIfFalse);
FoldOK = true;
} else if (const auto *FcmpProducer = llvm::dyn_cast<InstFcmp>(Producer)) {
lowerFcmpCond(FcmpProducer, CondIfTrue0, CondIfTrue1, CondIfFalse);
FoldOK = true;
} else if (const auto *CastProducer = llvm::dyn_cast<InstCast>(Producer)) {
switch (Producer->getKind()) {
default:
llvm_unreachable("Unexpected producer.");
case Inst::Icmp: {
_mov(T, _0);
CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
assert(Cond.WhenTrue0 != CondARM32::AL);
assert(Cond.WhenTrue0 != CondARM32::kNone);
assert(Cond.WhenTrue1 == CondARM32::kNone);
_mov_redefined(T, _1, Cond.WhenTrue0);
} break;
case Inst::Fcmp: {
_mov(T, _0);
Inst *MovZero = Context.getLastInserted();
CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
if (Cond.WhenTrue0 == CondARM32::AL) {
assert(Cond.WhenTrue1 == CondARM32::kNone);
MovZero->setDeleted();
_mov(T, _1);
} else if (Cond.WhenTrue0 != CondARM32::kNone) {
_mov_redefined(T, _1, Cond.WhenTrue0);
}
if (Cond.WhenTrue1 != CondARM32::kNone) {
assert(Cond.WhenTrue0 != CondARM32::kNone);
assert(Cond.WhenTrue0 != CondARM32::AL);
_mov_redefined(T, _1, Cond.WhenTrue1);
}
} break;
case Inst::Cast: {
const auto *CastProducer = llvm::cast<InstCast>(Producer);
assert(CastProducer->getCastKind() == InstCast::Trunc);
lowerTruncToFlags(CastProducer->getSrc(0), CondIfTrue0, CondIfFalse);
FoldOK = true;
Operand *Src = CastProducer->getSrc(0);
if (Src->getType() == IceType_i64)
Src = loOperand(Src);
_mov(T, legalize(Src, Legal_Reg | Legal_Flex));
Safe = SBC_No;
} break;
case Inst::Arithmetic: {
const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
Safe = lowerInt1Arithmetic(ArithProducer);
_mov(T, ArithProducer->getDest());
} break;
}
} else {
_mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
}
return FoldOK;
_mov(Dest, T);
return Safe;
}
namespace {
namespace BoolFolding {
bool shouldTrackProducer(const Inst &Instr) {
switch (static_cast<uint32_t>(Instr.getKind())) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Icmp:
return true;
case Inst::Fcmp:
return true;
}
if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) {
switch (static_cast<uint32_t>(Cast->getCastKind())) {
case Inst::Cast: {
switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
default:
return false;
case InstCast::Trunc:
return true;
}
}
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::And:
case InstArithmetic::Or:
return true;
}
}
}
}
bool isValidConsumer(const Inst &Instr) {
switch (static_cast<uint32_t>(Instr.getKind())) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Br:
return true;
case Inst::Select:
return !isVectorType(Instr.getDest()->getType());
}
if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) {
switch (static_cast<uint32_t>(Cast->getCastKind())) {
case Inst::Cast: {
switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
default:
return false;
case InstCast::Sext:
return !isVectorType(Instr.getDest()->getType());
case InstCast::Zext:
return !isVectorType(Instr.getDest()->getType());
}
}
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::And:
return !isVectorType(Instr.getDest()->getType());
case InstArithmetic::Or:
return !isVectorType(Instr.getDest()->getType());
}
}
}
}
} // end of namespace BoolFolding
} // end of anonymous namespace
......@@ -4520,9 +4672,8 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
continue;
}
if (IndexOfVarOperandInInst(Var) != 0 ||
!BoolFolding::isValidConsumer(Instr)) {
// All valid consumers use Var as the first source operand
++ComputationIter->second.NumUses;
if (!BoolFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
......@@ -4536,7 +4687,7 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
Iter != End;) {
// Disable the folding if its dest may be live beyond this block.
if (Iter->second.IsLiveOut) {
if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
Iter = KnownComputations.erase(Iter);
continue;
}
......
......@@ -135,19 +135,52 @@ protected:
void postLower() override;
enum SafeBoolChain {
SBC_No,
SBC_Yes,
};
void lowerAlloca(const InstAlloca *Inst) override;
SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override;
void lowerCall(const InstCall *Inst) override;
void lowerCast(const InstCast *Inst) override;
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmpCond(const InstFcmp *Instr, CondARM32::Cond *CondIfTrue0,
CondARM32::Cond *CondIfTrue1,
CondARM32::Cond *CondIfFalse);
/// CondWhenTrue is a helper type returned by every method in the lowering
/// that emits code to set the condition codes.
class CondWhenTrue {
public:
explicit CondWhenTrue(CondARM32::Cond T0,
CondARM32::Cond T1 = CondARM32::kNone)
: WhenTrue0(T0), WhenTrue1(T1) {
assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
assert(T1 != T0 || T0 == CondARM32::kNone);
}
CondARM32::Cond WhenTrue0;
CondARM32::Cond WhenTrue1;
/// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
CondWhenTrue invert() const {
switch (WhenTrue0) {
default:
if (WhenTrue1 == CondARM32::kNone)
return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
InstARM32::getOppositeCondition(WhenTrue1));
case CondARM32::AL:
return CondWhenTrue(CondARM32::kNone);
case CondARM32::kNone:
return CondWhenTrue(CondARM32::AL);
}
}
};
CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
void lowerFcmp(const InstFcmp *Instr) override;
void lowerIcmpCond(const InstIcmp *Instr, CondARM32::Cond *CondIfTrue,
CondARM32::Cond *CondIfFalse);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override;
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
......@@ -334,58 +367,232 @@ protected:
}
}
// _mov_i1_to_flags is used for bool folding. If "Boolean" is folded, this
// method returns true, and sets "CondIfTrue0" and "CondIfTrue1" to the
// appropriate ARM condition codes. If "Boolean" is not to be folded, then
// this method returns false.
bool _mov_i1_to_flags(Operand *Boolean, CondARM32::Cond *CondIfTrue0,
CondARM32::Cond *CondIfTrue1,
CondARM32::Cond *CondIfFalse);
// _cmov is a pseudo instruction that is used for boolean folding. It emits
// code that moves "SrcIfTrue" to dest if either "CondIfTrue0" or
// "CondIfTrue1" holds, and "SrcIfFalse", if "CondIfFalse" holds. It requires
// "Dest" to be an infinite-weight temporary.
void _cmov(Variable *Dest, Operand *SrcIfTrue, CondARM32::Cond CondIfTrue0,
CondARM32::Cond CondIfTrue1, Operand *SrcIfFalse,
CondARM32::Cond CondIfFalse) {
assert(Dest->mustHaveReg());
if (CondIfFalse == CondARM32::kNone) {
assert(CondIfTrue0 == CondARM32::AL);
assert(CondIfTrue1 == CondARM32::kNone);
// --------------------------------------------------------------------------
// Begin bool folding machinery.
//
// There are three types of boolean lowerings handled by this target:
//
// 1) Boolean expressions leading to a boolean Variable definition
// ---------------------------------------------------------------
//
// Whenever a i1 Variable is live out (i.e., its live range extends beyond
// the defining basic block) we do not fold the operation. We instead
// materialize (i.e., compute) the variable normally, so that it can be used
// when needed. We also materialize i1 values that are not single use to
// avoid code duplication. These expressions are not short circuited.
//
// 2) Boolean expressions leading to a select
// ------------------------------------------
//
// These include boolean chains leading to a select instruction, as well as
// i1 Sexts. These boolean expressions are lowered to:
//
// mov T, <false value>
// CC <- eval(Boolean Expression)
// movCC T, <true value>
//
// For Sexts, <false value> is 0, and <true value> is -1.
//
// 3) Boolean expressions leading to a br i1
// -----------------------------------------
//
// These are the boolean chains leading to a branch. These chains are
// short-circuited, i.e.:
//
// A = or i1 B, C
// br i1 A, label %T, label %F
//
// becomes
//
// tst B
// jne %T
// tst B
// jne %T
// j %F
//
// and
//
// A = and i1 B, C
// br i1 A, label %T, label %F
//
// becomes
//
// tst B
// jeq %F
// tst B
// jeq %F
// j %T
//
// Arbitrarily long chains are short circuited, e.g
//
// A = or i1 B, C
// D = and i1 A, E
// F = and i1 G, H
// I = or i1 D, F
// br i1 I, label %True, label %False
//
// becomes
//
// Label[A]:
// tst B, 1
// bne Label[D]
// tst C, 1
// beq Label[I]
// Label[D]:
// tst E, 1
// bne %True
// Label[I]
// tst G, 1
// beq %False
// tst H, 1
// beq %False (bne %True)
/// lowerInt1 materializes Boolean to a Variable.
SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
/// lowerInt1ForSelect generates the following instruction sequence:
///
/// mov T, FalseValue
/// CC <- eval(Boolean)
/// movCC T, TrueValue
/// mov Dest, T
///
/// It is used for lowering select i1, as well as i1 Sext.
void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
Operand *FalseValue);
/// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
/// an InstARM32Label (but never both) so that, during br i1 lowering, we can
/// create auxiliary labels for short circuiting the condition evaluation.
class LowerInt1BranchTarget {
public:
explicit LowerInt1BranchTarget(CfgNode *const Target)
: NodeTarget(Target) {}
explicit LowerInt1BranchTarget(InstARM32Label *const Target)
: LabelTarget(Target) {}
/// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
/// is the exact copy of this if Label is nullptr; otherwise, the returned
/// object will wrap Label instead.
LowerInt1BranchTarget
createForLabelOrDuplicate(InstARM32Label *Label) const {
if (Label != nullptr)
return LowerInt1BranchTarget(Label);
if (NodeTarget)
return LowerInt1BranchTarget(NodeTarget);
return LowerInt1BranchTarget(LabelTarget);
}
if (CondIfTrue0 == CondARM32::kNone) {
assert(CondIfFalse == CondARM32::AL);
assert(CondIfTrue1 == CondARM32::kNone);
}
CfgNode *const NodeTarget = nullptr;
InstARM32Label *const LabelTarget = nullptr;
};
if (CondIfTrue1 != CondARM32::kNone) {
assert(CondIfFalse == CondARM32::AL);
assert(CondIfTrue1 != CondARM32::kNone);
}
/// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
/// determining which type arithmetic is allowed to be short circuited. This
/// is useful for lowering
///
/// t1 = and i1 A, B
/// t2 = and i1 t1, C
/// br i1 t2, label %False, label %True
///
/// to
///
/// tst A, 1
/// beq %False
/// tst B, 1
/// beq %False
/// tst C, 1
/// bne %True
/// b %False
///
/// Without this information, short circuiting would only allow to short
/// circuit a single high level instruction. For example:
///
/// t1 = or i1 A, B
/// t2 = and i1 t1, C
/// br i1 t2, label %False, label %True
///
/// cannot be lowered to
///
/// tst A, 1
/// bne %True
/// tst B, 1
/// bne %True
/// tst C, 1
/// beq %True
/// b %False
///
/// It needs to be lowered to
///
/// tst A, 1
/// bne Aux
/// tst B, 1
/// beq %False
/// Aux:
/// tst C, 1
/// bne %True
/// b %False
///
/// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
/// might.)
enum LowerInt1AllowShortCircuit {
SC_And = 1,
SC_Or = 2,
SC_All = SC_And | SC_Or,
};
bool RedefineT = false;
if (CondIfFalse != CondARM32::kNone) {
_mov(Dest, SrcIfFalse, CondIfFalse);
RedefineT = true;
/// ShortCircuitCondAndLabel wraps the condition codes that should be used
/// after a lowerInt1ForBranch returns to branch to the
/// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
/// called lowerInt1forBranch created an internal (i.e., short-circuit) label
/// used for short circuiting.
class ShortCircuitCondAndLabel {
public:
explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
InstARM32Label *L = nullptr)
: Cond(std::move(C)), ShortCircuitTarget(L) {}
const CondWhenTrue Cond;
InstARM32Label *const ShortCircuitTarget;
CondWhenTrue assertNoLabelAndReturnCond() const {
assert(ShortCircuitTarget == nullptr);
return Cond;
}
};
if (CondIfTrue0 != CondARM32::kNone) {
if (RedefineT) {
_mov_redefined(Dest, SrcIfTrue, CondIfTrue0);
} else {
_mov(Dest, SrcIfTrue, CondIfTrue0);
}
RedefineT = true;
/// lowerInt1ForBranch expands Boolean, and returns the condition codes that
/// are to be used for branching to the branch's TrueTarget. It may return a
/// label that the expansion of Boolean used to short circuit the chain's
/// evaluation.
ShortCircuitCondAndLabel
lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
const LowerInt1BranchTarget &TargetFalse,
uint32_t ShortCircuitable);
// _br is a convenience wrapper that emits br instructions to Target.
void _br(const LowerInt1BranchTarget &BrTarget,
CondARM32::Cond Cond = CondARM32::AL) {
assert((BrTarget.NodeTarget == nullptr) !=
(BrTarget.LabelTarget == nullptr));
if (BrTarget.NodeTarget != nullptr)
_br(BrTarget.NodeTarget, Cond);
else
_br(BrTarget.LabelTarget, Cond);
}
// _br_short_circuit is used when lowering InstArithmetic::And and
// InstArithmetic::Or and a short circuit branch is needed.
void _br_short_circuit(const LowerInt1BranchTarget &Target,
const CondWhenTrue &Cond) {
if (Cond.WhenTrue1 != CondARM32::kNone) {
_br(Target, Cond.WhenTrue1);
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineT);
_mov_redefined(Dest, SrcIfTrue, CondIfTrue1);
if (Cond.WhenTrue0 != CondARM32::kNone) {
_br(Target, Cond.WhenTrue0);
}
}
// End of bool folding machinery
// --------------------------------------------------------------------------
/// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
/// an upper16 relocation).
......@@ -628,9 +835,6 @@ private:
OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
Operand *Base);
void lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue,
CondARM32::Cond *CondIfFalse);
class BoolComputationTracker {
public:
BoolComputationTracker() = default;
......@@ -658,7 +862,7 @@ private:
return;
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "foldable producer:\n ";
Str << "foldable producer:\n";
for (const auto &Computation : KnownComputations) {
Str << " ";
Computation.second.Instr->dump(Func);
......@@ -679,6 +883,7 @@ private:
// Om1 mode) IsLiveOut will never be set to false, and folding will be
// disabled.
bool IsLiveOut = true;
int32_t NumUses = 0;
};
using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>;
......
......@@ -82,7 +82,7 @@ public:
PK_Icmp64,
PK_Fcmp,
PK_Trunc,
PK_Arith // A flag-setting arithmetic instruction.
PK_Arith // A flag-setting arithmetic instruction.
};
/// Currently the actual enum values are not used (other than CK_None), but we
......
......@@ -52,40 +52,40 @@ define internal void @mult_fwd_branches(i32 %a, i32 %b) {
%cmp = icmp slt i32 %a, %b
; ASM-NEXT: ldr r0, [sp, #8]
; ASM-NEXT: ldr r1, [sp, #4]
; ASM-NEXT: cmp r0, r1
; ASM-NEXT: movge r0, #0
; ASM-NEXT: mov r0, #0
; ASM-NEXT: ldr r1, [sp, #8]
; ASM-NEXT: ldr r2, [sp, #4]
; ASM-NEXT: cmp r1, r2
; ASM-NEXT: movlt r0, #1
; ASM-NEXT: strb r0, [sp]
; DIS-NEXT: c: e59d0008
; DIS-NEXT: 10: e59d1004
; DIS-NEXT: 14: e1500001
; DIS-NEXT: 18: a3a00000
; DIS-NEXT: c: e3a00000
; DIS-NEXT: 10: e59d1008
; DIS-NEXT: 14: e59d2004
; DIS-NEXT: 18: e1510002
; DIS-NEXT: 1c: b3a00001
; DIS-NEXT: 20: e5cd0000
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x20
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x2
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x50
; IASM-NEXT: .byte 0x51
; IASM-NEXT: .byte 0xe1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xa3
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0
......@@ -96,23 +96,20 @@ define internal void @mult_fwd_branches(i32 %a, i32 %b) {
br i1 %cmp, label %then, label %else
; ASM-NEXT: ldrb r0, [sp]
; ASM-NEXT: uxtb r0, r0
; ASM-NEXT: cmp r0, #0
; ASM-NEXT: tst r0, #1
; ASM-NEXT: bne .Lmult_fwd_branches$then
; ASM-NEXT: b .Lmult_fwd_branches$else
; DIS-NEXT: 24: e5dd0000
; DIS-NEXT: 28: e6ef0070
; DIS-NEXT: 2c: e3500000
; DIS-NEXT: 30: 1a000000
; DIS-NEXT: 34: ea000000
; DIS-NEXT: 28: e3100001
; DIS-NEXT: 2c: 1a000000
; DIS-NEXT: 30: ea000000
; IASM-NEXT: ldrb r0, [sp]
; IASM-NEXT: uxtb r0, r0
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x50
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0x0
......@@ -132,7 +129,7 @@ then:
br label %end
; ASM-NEXT: b .Lmult_fwd_branches$end
; DIS-NEXT: 38: ea000000
; DIS-NEXT: 34: ea000000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
......@@ -146,7 +143,7 @@ else:
br label %end
; ASM-NEXT: b .Lmult_fwd_branches$end
; DIS-NEXT: 3c: eaffffff
; DIS-NEXT: 38: eaffffff
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0xff
......@@ -163,8 +160,8 @@ end:
; ASM-NEXT: add sp, sp, #12
; ASM-NEXT: bx lr
; DIS-NEXT: 40: e28dd00c
; DIS-NEXT: 44: e12fff1e
; DIS-NEXT: 3c: e28dd00c
; DIS-NEXT: 40: e12fff1e
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
......
......@@ -850,10 +850,7 @@ entry:
; ARM32-LABEL: trunc64To1
; ARM32-OM1: and r0, r0, #1
; ARM32-OM1: and r0, r0, #1
; ARM32-O2: tst r0, #1
; ARM32-O2: moveq [[RES:r[0-9]+]], #0
; ARM32-O2: movne [[RES]], #1
; ARM32-O2: and r0, r0, #1
define internal i64 @sext32To64(i32 %a) {
entry:
......@@ -924,12 +921,10 @@ entry:
; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: sext1To64
; ARM32-OM1: lsl {{.*}}, #31
; ARM32-OM1: asr {{.*}}, #31
; ARM32-O2: tst r0, #1
; ARM32-O2: mvn [[M1:r[0-9]+]], #0
; ARM32-O2: moveq [[RES:r[0-9]+]], #0
; ARM32-O2: movne [[RES]], [[M1]]
; ARM32: mov {{.*}}, #0
; ARM32: tst {{.*}}, #1
; ARM32: mvn {{.*}}, #0
; ARM32: movne
define internal i64 @zext32To64(i32 %a) {
entry:
......@@ -998,11 +993,9 @@ entry:
; OPTM1: mov {{.*}},0x0
; ARM32-LABEL: zext1To64
; ARM32-OM1: and {{.*}}, #1
; ARM32-OM1: mov {{.*}}, #0
; ARM32-O2: tst r0, #1
; ARM32-O2: moveq {{[^,]*}}, #0
; ARM32-O2: movne {{[^,]*}}, #1
; ARM32: and {{.*}}, #1
; ARM32: mov {{.*}}, #0
; ARM32: bx
define internal void @icmpEq64(i64 %a, i64 %b, i64 %c, i64 %d) {
entry:
......@@ -1061,18 +1054,15 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: icmpEq64
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movne
; ARM32-OM1: moveq
; ARM32-OM1: cmp
; ARM32-O2: bne
; ARM32: bl
; ARM32-OM1: tst
; ARM32: bne
; ARM32: bl {{.*}} <func>
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movne
; ARM32-OM1: moveq
; ARM32-OM1: cmp
; ARM32-O2: bne
; ARM32: bl
; ARM32-OM1: tst
; ARM32: bne
; ARM32: bl {{.*}} <func>
; ARM32: bx
declare void @func()
......@@ -1133,16 +1123,14 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: icmpNe64
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: moveq
; ARM32-OM1: movne
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: beq
; ARM32: bl
; ARM32: bl {{.*}} <func>
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: moveq
; ARM32-OM1: movne
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: beq
; ARM32: bl
......@@ -1189,16 +1177,14 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: icmpGt64
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movls
; ARM32-OM1: movhi
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: bls
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32-OM1: movge
; ARM32-OM1: movlt
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: bge
; ARM32: bl
......@@ -1245,16 +1231,14 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: icmpGe64
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movcc
; ARM32-OM1: movcs
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: bcc
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32-OM1: movlt
; ARM32-OM1: movge
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: blt
; ARM32: bl
......@@ -1301,16 +1285,14 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: icmpLt64
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: bcs
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32-OM1: movge
; ARM32-OM1: movlt
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: bge
; ARM32: bl
......@@ -1357,15 +1339,14 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: icmpLe64
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movhi
; ARM32-OM1: movls
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: bhi
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32-OM1: movlt
; ARM32-OM1: movge
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: blt
; ARM32: bl
......@@ -1384,7 +1365,7 @@ entry:
; OPTM1: je
; ARM32-LABEL: icmpEq64Bool
; ARM32: movne
; ARM32: mov
; ARM32: moveq
define internal i32 @icmpNe64Bool(i64 %a, i64 %b) {
......@@ -1402,7 +1383,7 @@ entry:
; OPTM1: jne
; ARM32-LABEL: icmpNe64Bool
; ARM32: moveq
; ARM32: mov
; ARM32: movne
define internal i32 @icmpSgt64Bool(i64 %a, i64 %b) {
......@@ -1426,9 +1407,9 @@ entry:
; OPTM1: ja
; ARM32-LABEL: icmpSgt64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: sbcs
; ARM32: movge
; ARM32: movlt
define internal i32 @icmpUgt64Bool(i64 %a, i64 %b) {
......@@ -1452,9 +1433,9 @@ entry:
; OPTM1: ja
; ARM32-LABEL: icmpUgt64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movls
; ARM32: movhi
define internal i32 @icmpSge64Bool(i64 %a, i64 %b) {
......@@ -1478,9 +1459,9 @@ entry:
; OPTM1: jae
; ARM32-LABEL: icmpSge64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: sbcs
; ARM32: movlt
; ARM32: movge
define internal i32 @icmpUge64Bool(i64 %a, i64 %b) {
......@@ -1504,9 +1485,9 @@ entry:
; OPTM1: jae
; ARM32-LABEL: icmpUge64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcc
; ARM32: movcs
define internal i32 @icmpSlt64Bool(i64 %a, i64 %b) {
......@@ -1530,9 +1511,9 @@ entry:
; OPTM1: jb
; ARM32-LABEL: icmpSlt64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: sbcs
; ARM32: movge
; ARM32: movlt
define internal i32 @icmpUlt64Bool(i64 %a, i64 %b) {
......@@ -1556,9 +1537,9 @@ entry:
; OPTM1: jb
; ARM32-LABEL: icmpUlt64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcs
; ARM32: movcc
define internal i32 @icmpSle64Bool(i64 %a, i64 %b) {
......@@ -1582,9 +1563,9 @@ entry:
; OPTM1: jbe
; ARM32-LABEL: icmpSle64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: sbcs
; ARM32: movlt
; ARM32: movge
define internal i32 @icmpUle64Bool(i64 %a, i64 %b) {
......@@ -1608,9 +1589,9 @@ entry:
; OPTM1: jbe
; ARM32-LABEL: icmpUle64Bool
; ARM32: mov
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movhi
; ARM32: movls
define internal i64 @load64(i32 %a) {
......@@ -1701,9 +1682,7 @@ entry:
; ARM32-LABEL: select64VarVar
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32-OM1: movne
; ARM32-O2: movcc
; ARM32-OM1: movne
......@@ -1734,19 +1713,17 @@ entry:
; OPTM1: cmovne
; ARM32-LABEL: select64VarConst
; ARM32: mov
; ARM32: mov
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32: movw
; ARM32: movt
; ARM32-OM1: tst
; ARM32-OM1: movne
; ARM32-O2: movcc
; ARM32: movw
; ARM32: movt
; ARM32-OM1: movne
; ARM32-O2: movcc
; ARM32-O2: mov
; ARM32-O2: mov
define internal i64 @select64ConstVar(i64 %a, i64 %b) {
entry:
......@@ -1775,9 +1752,7 @@ entry:
; ARM32-LABEL: select64ConstVar
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32-OM1: tst
; ARM32: movw
; ARM32: movt
; ARM32-OM1: movne
......
......@@ -85,7 +85,7 @@ branch2:
; ARM32-LABEL: no_fold_cmp_br_liveout
; ARM32: cmp
; ARM32: movlt [[REG:r[0-9]+]]
; ARM32: cmp [[REG]], #0
; ARM32: tst [[REG]], #1
; ARM32: beq
......@@ -108,11 +108,11 @@ branch2:
; CHECK: cmp
; CHECK: je
; ARM32-LABEL: no_fold_cmp_br_non_whitelist
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movge [[R:r[0-9]+]], #0
; ARM32: movlt [[R]], #1
; ARM32: cmp r0, r1
; ARM32: bge
; ARM32: tst [[R]], #1
; ARM32: beq
; ARM32: bx lr
; ARM32: mov r0, #2
; ARM32: bx lr
......@@ -168,10 +168,10 @@ entry:
; CHECK: cmovl
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_64_undef
; ARM32: mov
; ARM32: mov
; ARM32: cmp {{r[0-9]+}}, r0
; ARM32: movge
; ARM32: movlt
; ARM32: movge
; ARM32: movlt
; ARM32: bx lr
......@@ -218,14 +218,17 @@ entry:
; CHECK: add
; CHECK: add
; ARM32-LABEL: fold_cmp_select_multi
; ARM32: cmp r0, r1
; ARM32: movlt {{r[0-9]+}}, r0
; ARM32: cmp r0, r1
; ARM32: movlt {{r[0-9]+}}, r1
; ARM32: cmp r0, r1
; ARM32: movlt {{r[0-9]+}}, #123
; ARM32: add
; ARM32: add
; ARM32: mov
; ARM32: cmp
; ARM32: movlt {{.*}}, #1
; ARM32: mov
; ARM32: tst {{.*}}, #1
; ARM32: movne
; ARM32: mov
; ARM32: tst {{.*}}, #1
; ARM32: movne
; ARM32: tst {{.*}}, #1
; ARM32: movne {{.*}}, #123
; ARM32: bx lr
......@@ -254,22 +257,17 @@ next:
; CHECK: add
; CHECK: add
; ARM32-LABEL: no_fold_cmp_select_multi_liveout
; ARM32-LABEL: fold_cmp_select_multi
; ARM32: mov
; ARM32: cmp r0, r1
; ARM32: movge [[T0:r[0-9]+]], #0
; ARM32: movlt [[T0]], #1
; ARM32: uxtb [[T1:r[0-9]+]], [[T1]]
; ARM32-NEXT: cmp [[T1]], #0
; ARM32: movne [[T2:r[0-9]+]], r0
; ARM32: uxtb [[T3:r[0-9]+]], [[T3]]
; ARM32-NEXT: cmp [[T3]], #0
; ARM32: movne [[T4:r[0-9]+]], r1
; ARM32-LABEL: .Lno_fold_cmp_select_multi_liveout$next:
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: add
; ARM32: add
; ARM32: movlt
; ARM32: mov
; ARM32: tst
; ARM32: movne
; ARM32: mov
; ARM32: tst
; ARM32: movne
; ARM32: tst
; ARM32: movne
; ARM32: bx lr
; Cmp/multi-select non-folding because of extra non-whitelisted uses.
......@@ -300,19 +298,133 @@ entry:
; CHECK: add
; CHECK: add
; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist
; ARM32: mov
; ARM32: cmp r0, r1
; ARM32: movge [[R0:r[0-9]+]]
; ARM32: movlt [[R0]]
; ARM32: cmp r0, r1
; ARM32: movge [[R1:r[0-9]+]]
; ARM32: movlt [[R1]]
; ARM32: cmp r0, r1
; ARM32: movge [[R2:r[0-9]+]]
; ARM32: movlt [[R2]]
; ARM32: cmp r0, r1
; ARM32: movge [[R3:r[0-9]+]]
; ARM32: movlt [[R3]]
; ARM32: add
; ARM32: add
; ARM32: add
; ARM32: movlt
; ARM32: mov
; ARM32: tst
; ARM32: movne
; ARM32: mov
; ARM32: tst
; ARM32: movne
; ARM32: tst
; ARM32: movne
; ARM32: bx lr
define internal i32 @br_i1_folding2_and(i32 %arg1, i32 %arg2) {
%t0 = trunc i32 %arg1 to i1
%t1 = trunc i32 %arg2 to i1
%t2 = and i1 %t0, %t1
br i1 %t2, label %target_true, label %target_false
target_true:
ret i32 1
target_false:
ret i32 0
}
; ARM32-LABEL: br_i1_folding2_and
; ARM32: tst r0, #1
; ARM32: beq {{.*}}target_false
; ARM32: tst r1, #1
; ARM32: beq {{.*}}target_false
define internal i32 @br_i1_folding2_or(i32 %arg1, i32 %arg2) {
%t0 = trunc i32 %arg1 to i1
%t1 = trunc i32 %arg2 to i1
%t2 = or i1 %t0, %t1
br i1 %t2, label %target_true, label %target_false
target_true:
ret i32 1
target_false:
ret i32 0
}
; ARM32-LABEL: br_i1_folding2_or
; ARM32: tst r0, #1
; ARM32: bne {{.*}}target_true
; ARM32: tst r1, #1
; ARM32: beq {{.*}}target_false
define internal i32 @br_i1_folding3_and_or(i32 %arg1, i32 %arg2, i32 %arg3) {
%t0 = trunc i32 %arg1 to i1
%t1 = trunc i32 %arg2 to i1
%t2 = trunc i32 %arg3 to i1
%t3 = and i1 %t0, %t1
%t4 = or i1 %t3, %t2
br i1 %t4, label %target_true, label %target_false
target_true:
ret i32 1
target_false:
ret i32 0
}
; ARM32-LABEL: br_i1_folding3_and_or
; ARM32: tst r0, #1
; ARM32: beq
; ARM32: tst r1, #1
; ARM32: bne {{.*}}target_true
; ARM32: tst r2, #1
; ARM32: beq {{.*}}target_false
define internal i32 @br_i1_folding3_or_and(i32 %arg1, i32 %arg2, i32 %arg3) {
%t0 = trunc i32 %arg1 to i1
%t1 = trunc i32 %arg2 to i1
%t2 = trunc i32 %arg3 to i1
%t3 = or i1 %t0, %t1
%t4 = and i1 %t3, %t2
br i1 %t4, label %target_true, label %target_false
target_true:
ret i32 1
target_false:
ret i32 0
}
; ARM32-LABEL: br_i1_folding3_or_and
; ARM32: tst r0, #1
; ARM32: bne
; ARM32: tst r1, #1
; ARM32: beq {{.*}}target_false
; ARM32: tst r2, #1
; ARM32: beq {{.*}}target_false
define internal i32 @br_i1_folding4(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
i32 %arg5) {
%t0 = trunc i32 %arg1 to i1
%t1 = trunc i32 %arg2 to i1
%t2 = trunc i32 %arg3 to i1
%t3 = trunc i32 %arg4 to i1
%t4 = trunc i32 %arg5 to i1
%t5 = or i1 %t0, %t1
%t6 = and i1 %t5, %t2
%t7 = and i1 %t3, %t4
%t8 = or i1 %t6, %t7
br i1 %t8, label %target_true, label %target_false
target_true:
ret i32 1
target_false:
ret i32 0
}
; ARM32-LABEL: br_i1_folding4
; ARM32: tst r0, #1
; ARM32: bne
; ARM32: tst r1, #1
; ARM32: beq
; ARM32: tst r2, #1
; ARM32: bne {{.*}}target_true
; ARM32: tst r3, #1
; ARM32: beq {{.*}}target_false
; ARM32: tst r4, #1
; ARM32: beq {{.*}}target_false
......@@ -92,7 +92,6 @@ target:
; OM1: call
; OM1: ret
; Note that compare and branch folding isn't implemented yet (unlike x86-32).
; ARM32O2-LABEL: testCondFallthroughToNextBlock
; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: bge
......@@ -102,10 +101,10 @@ target:
; ARM32O2: bx lr
; ARM32OM1-LABEL: testCondFallthroughToNextBlock
; ARM32OM1: mov {{.*}}, #0
; ARM32OM1: cmp {{.*}}, #123
; ARM32OM1: movlt {{.*}}, #0
; ARM32OM1: movge {{.*}}, #1
; ARM32OM1: cmp {{.*}}, #0
; ARM32OM1: tst {{.*}}, #1
; ARM32OM1: bne
; ARM32OM1: b
; ARM32OM1: bl
......@@ -161,7 +160,7 @@ target:
; ARM32OM1-LABEL: testCondTargetNextBlock
; ARM32OM1: cmp {{.*}}, #123
; ARM32OM1: movge {{.*}}, #1
; ARM32OM1: cmp {{.*}}, #0
; ARM32OM1: tst {{.*}}, #1
; ARM32OM1: bne
; ARM32OM1: b
; ARM32OM1: bl
......
......@@ -58,13 +58,13 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: fcmpEq
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32-OM1: movne [[R0:r[0-9]+]], #0
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: moveq [[R0]], #1
; ARM32-O2: bne
; ARM32: bl func
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: movne [[R1:r[0-9]+]], #0
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; ARM32-OM1: moveq [[R1]], #1
; ARM32-O2: bne
......@@ -115,12 +115,12 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: fcmpNe
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32-OM1: moveq [[R0:r[0-9]+]], #0
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: movne [[R0]], #1
; ARM32-O2: beq
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: moveq [[R1:r[0-9]+]], #0
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; ARM32-OM1: movne [[R1]], #1
; ARM32-O2: beq
......@@ -161,12 +161,12 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: fcmpGt
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32-OM1: movle [[R0:r[0-9]+]], #0
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: movgt [[R0]], #1
; ARM32-O2: ble
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: movle [[R1:r[0-9]+]], #0
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; ARM32-OM1: movgt [[R1]], #1
; ARM32-O2: ble
......@@ -207,12 +207,12 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: fcmpGe
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32-OM1: movge [[R0:r[0-9]+]], #0
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: movlt [[R0]], #1
; ARM32-O2: blt
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: movge [[R1:r[0-9]+]], #0
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; ARM32-OM1: movlt [[R1]], #1
; ARM32-O2: blt
......@@ -253,12 +253,12 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: fcmpLt
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32-OM1: movpl [[R0:r[0-9]+]], #0
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: movmi [[R0]], #1
; ARM32-O2: bpl
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: movpl [[R1:r[0-9]+]], #0
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; ARM32-OM1: movmi [[R1]], #1
; ARM32-O2: bpl
......@@ -299,12 +299,12 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: fcmpLe
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32-OM1: movls [[R0:r[0-9]+]], #0
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: movhi [[R0]], #1
; ARM32-O2: bhi
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: movls [[R1:r[0-9]+]], #0
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; ARM32-OM1: movhi [[R1]], #1
; ARM32-O2: bhi
......@@ -341,9 +341,10 @@ entry:
; CHECK: jne
; CHECK: jp
; ARM32-LABEL: fcmpOeqFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movne [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: moveq [[R]], #1
define internal i32 @fcmpOeqDouble(double %a, double %b) {
......@@ -357,9 +358,10 @@ entry:
; CHECK: jne
; CHECK: jp
; ARM32-LABEL: fcmpOeqDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movne [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: moveq [[R]], #1
define internal i32 @fcmpOgtFloat(float %a, float %b) {
......@@ -372,9 +374,10 @@ entry:
; CHECK: ucomiss
; CHECK: seta
; ARM32-LABEL: fcmpOgtFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movle [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movgt [[R]], #1
define internal i32 @fcmpOgtDouble(double %a, double %b) {
......@@ -387,9 +390,10 @@ entry:
; CHECK: ucomisd
; CHECK: seta
; ARM32-LABEL: fcmpOgtDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movle [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movgt [[R]], #1
define internal i32 @fcmpOgeFloat(float %a, float %b) {
......@@ -402,9 +406,10 @@ entry:
; CHECK: ucomiss
; CHECK: setae
; ARM32-LABEL: fcmpOgeFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movlt [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movge [[R]], #1
define internal i32 @fcmpOgeDouble(double %a, double %b) {
......@@ -417,9 +422,10 @@ entry:
; CHECK: ucomisd
; CHECK: setae
; ARM32-LABEL: fcmpOgeDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movlt [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movge [[R]], #1
define internal i32 @fcmpOltFloat(float %a, float %b) {
......@@ -432,9 +438,10 @@ entry:
; CHECK: ucomiss
; CHECK: seta
; ARM32-LABEL: fcmpOltFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movpl [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movmi [[R]], #1
define internal i32 @fcmpOltDouble(double %a, double %b) {
......@@ -447,9 +454,10 @@ entry:
; CHECK: ucomisd
; CHECK: seta
; ARM32-LABEL: fcmpOltDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movpl [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movmi [[R]], #1
define internal i32 @fcmpOleFloat(float %a, float %b) {
......@@ -462,9 +470,10 @@ entry:
; CHECK: ucomiss
; CHECK: setae
; ARM32-LABEL: fcmpOleFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movhi [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movls [[R]], #1
define internal i32 @fcmpOleDouble(double %a, double %b) {
......@@ -477,9 +486,10 @@ entry:
; CHECK: ucomisd
; CHECK: setae
; ARM32-LABEL: fcmpOleDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movhi [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movls [[R]], #1
define internal i32 @fcmpOneFloat(float %a, float %b) {
......@@ -492,9 +502,10 @@ entry:
; CHECK: ucomiss
; CHECK: setne
; ARM32-LABEL: fcmpOneFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movmi [[R]], #1
; ARM32: movgt [[R]], #1
......@@ -508,9 +519,10 @@ entry:
; CHECK: ucomisd
; CHECK: setne
; ARM32-LABEL: fcmpOneDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movmi [[R]], #1
; ARM32: movgt [[R]], #1
......@@ -524,9 +536,10 @@ entry:
; CHECK: ucomiss
; CHECK: setnp
; ARM32-LABEL: fcmpOrdFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movvs [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movvc [[R]], #1
define internal i32 @fcmpOrdDouble(double %a, double %b) {
......@@ -539,9 +552,10 @@ entry:
; CHECK: ucomisd
; CHECK: setnp
; ARM32-LABEL: fcmpOrdDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movvs [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movvc [[R]], #1
define internal i32 @fcmpUeqFloat(float %a, float %b) {
......@@ -554,9 +568,10 @@ entry:
; CHECK: ucomiss
; CHECK: sete
; ARM32-LABEL: fcmpUeqFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: moveq [[R]], #1
; ARM32: movvs [[R]], #1
......@@ -570,9 +585,10 @@ entry:
; CHECK: ucomisd
; CHECK: sete
; ARM32-LABEL: fcmpUeqDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: moveq [[R]], #1
; ARM32: movvs [[R]], #1
......@@ -586,9 +602,10 @@ entry:
; CHECK: ucomiss
; CHECK: setb
; ARM32-LABEL: fcmpUgtFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movls [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movhi [[R]], #1
define internal i32 @fcmpUgtDouble(double %a, double %b) {
......@@ -601,9 +618,10 @@ entry:
; CHECK: ucomisd
; CHECK: setb
; ARM32-LABEL: fcmpUgtDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movls [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movhi [[R]], #1
define internal i32 @fcmpUgeFloat(float %a, float %b) {
......@@ -616,9 +634,10 @@ entry:
; CHECK: ucomiss
; CHECK: setbe
; ARM32-LABEL: fcmpUgeFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movmi [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movpl [[R]], #1
define internal i32 @fcmpUgeDouble(double %a, double %b) {
......@@ -631,9 +650,10 @@ entry:
; CHECK: ucomisd
; CHECK: setbe
; ARM32-LABEL: fcmpUgeDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movmi [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movpl [[R]], #1
define internal i32 @fcmpUltFloat(float %a, float %b) {
......@@ -646,9 +666,10 @@ entry:
; CHECK: ucomiss
; CHECK: setb
; ARM32-LABEL: fcmpUltFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movge [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movlt [[R]], #1
define internal i32 @fcmpUltDouble(double %a, double %b) {
......@@ -661,9 +682,10 @@ entry:
; CHECK: ucomisd
; CHECK: setb
; ARM32-LABEL: fcmpUltDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movge [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movlt [[R]], #1
define internal i32 @fcmpUleFloat(float %a, float %b) {
......@@ -676,9 +698,10 @@ entry:
; CHECK: ucomiss
; CHECK: setbe
; ARM32-LABEL: fcmpUleFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movgt [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movle [[R]], #1
define internal i32 @fcmpUleDouble(double %a, double %b) {
......@@ -691,9 +714,10 @@ entry:
; CHECK: ucomisd
; CHECK: setbe
; ARM32-LABEL: fcmpUleDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movgt [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movle [[R]], #1
define internal i32 @fcmpUneFloat(float %a, float %b) {
......@@ -707,9 +731,10 @@ entry:
; CHECK: jne
; CHECK: jp
; ARM32-LABEL: fcmpUneFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: moveq [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movne [[R]], #1
define internal i32 @fcmpUneDouble(double %a, double %b) {
......@@ -723,9 +748,10 @@ entry:
; CHECK: jne
; CHECK: jp
; ARM32-LABEL: fcmpUneDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: moveq [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movne [[R]], #1
define internal i32 @fcmpUnoFloat(float %a, float %b) {
......@@ -738,9 +764,10 @@ entry:
; CHECK: ucomiss
; CHECK: setp
; ARM32-LABEL: fcmpUnoFloat
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f32
; ARM32: vmrs
; ARM32: movvc [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movvs [[R]], #1
define internal i32 @fcmpUnoDouble(double %a, double %b) {
......@@ -753,9 +780,10 @@ entry:
; CHECK: ucomisd
; CHECK: setp
; ARM32-LABEL: fcmpUnoDouble
; ARM32-O2: mov [[R:r[0-9]+]], #0
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32: movvc [[R:r[0-9]+]], #0
; ARM32-OM1: mov [[R:r[0-9]+]], #0
; ARM32: movvs [[R]], #1
define internal i32 @fcmpTrueFloat(float %a, float %b) {
......
......@@ -51,14 +51,12 @@ declare void @useInt(i32 %x)
; CHECK: ret
; ARM32-LABEL: testSelect
; ARM32: cmp
; ARM32-OM1: cmp
; ARM32: bl {{.*}} useInt
; ARM32: cmp
; ARM32-Om1: cmp
; ARM32-Om1: mov {{.*}}, #20
; ARM32-O2: mov [[REG:r[0-9]+]], #20
; ARM32: tst
; ARM32-Om1: movne {{.*}}, #10
; ARM32-O2: movle [[REG:r[0-9]+]], #20
; ARM32-O2: movgt [[REG]], #10
; ARM32-O2: movne [[REG]], #10
; ARM32: bl {{.*}} useInt
; ARM32: bl {{.*}} useInt
; ARM32: bl {{.*}} useInt
......
......@@ -15,8 +15,6 @@
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; TODO(jvoung): test this.
; Test that and with true uses immediate 1, not -1.
define internal i32 @testAndTrue(i32 %arg) {
entry:
......@@ -66,9 +64,7 @@ entry:
; CHECK-LABEL: testTrunc
; CHECK: and {{.*}},0x1
; ARM32-LABEL: testTrunc
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: and {{.*}}, #1
; Test zext to i8.
define internal i32 @testZextI8(i32 %arg) {
......@@ -84,10 +80,8 @@ entry:
; match the zext i1 instruction (NOTE: no mov need between i1 and i8).
; CHECK-NOT: and {{.*}},0x1
; ARM32-LABEL: testZextI8
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: uxtb [[REG]]
; ARM32: {{.*}}, #1
; ARM32: uxtb
; Test zext to i16.
define internal i32 @testZextI16(i32 %arg) {
......@@ -105,10 +99,8 @@ entry:
; CHECK-NOT: and [[REG]],0x1
; ARM32-LABEL: testZextI16
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: uxth [[REG]]
; ARM32: and {{.*}}, #1
; ARM32: uxth
; Test zext to i32.
define internal i32 @testZextI32(i32 %arg) {
......@@ -124,9 +116,7 @@ entry:
; CHECK: movzx
; CHECK-NOT: and {{.*}},0x1
; ARM32-LABEL: testZextI32
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: and {{.*}}, #1
; Test zext to i64.
define internal i64 @testZextI64(i32 %arg) {
......@@ -142,10 +132,8 @@ entry:
; CHECK: movzx
; CHECK: mov {{.*}},0x0
; ARM32-LABEL: testZextI64
; ARM32: tst r0, #1
; ARM32: mov r{{[0-9]*}}, #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: and {{.*}}, #1
; ARM32: mov {{.*}}, #0
; Test sext to i8.
define internal i32 @testSextI8(i32 %arg) {
......@@ -163,11 +151,11 @@ entry:
; CHECK-NEXT: sar [[REG]],0x7
;
; ARM32-LABEL: testSextI8
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: sxtb [[REG]]
; ARM32: mov {{.*}}, #0
; ARM32: tst {{.*}}, #1
; ARM32: mvn {{.*}}, #0
; ARM32: movne
; ARM32: sxtb
; Test sext to i16.
define internal i32 @testSextI16(i32 %arg) {
......@@ -186,11 +174,11 @@ entry:
; CHECK-NEXT: sar [[REG]],0xf
; ARM32-LABEL: testSextI16
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: sxth [[REG]]
; ARM32: mov {{.*}}, #0
; ARM32: tst {{.*}}, #1
; ARM32: mvn {{.*}}, #0
; ARM32: movne
; ARM32: sxth
; Test sext to i32.
define internal i32 @testSextI32(i32 %arg) {
......@@ -208,10 +196,10 @@ entry:
; CHECK-NEXT: sar [[REG]],0x1f
; ARM32-LABEL: testSextI32
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: mov {{.*}}, #0
; ARM32: tst {{.*}}, #1
; ARM32: mvn {{.*}}, #0
; ARM32: movne
; Test sext to i64.
define internal i64 @testSextI64(i32 %arg) {
......@@ -229,11 +217,11 @@ entry:
; CHECK-NEXT: sar [[REG]],0x1f
; ARM32-LABEL: testSextI64
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: mov r{{[0-9]+}}, [[REG]]
; ARM32: mov {{.*}}, #0
; ARM32: tst {{.*}}, #1
; ARM32: mvn {{.*}}, #0
; ARM32: movne [[REG:r[0-9]+]]
; ARM32: mov {{.*}}, [[REG]]
; Kind of like sext i1 to i32, but with an immediate source. On ARM,
; sxtb cannot take an immediate operand, so make sure it's using a reg.
......@@ -248,9 +236,10 @@ define internal i32 @testSextTrue() {
; CHECK-NEXT: shl
; CHECK-NEXT: sar
; ARM32-LABEL: testSextTrue
; ARM32: mov{{.*}}, #1
; ARM32: lsl
; ARM32: asr
; ARM32: mov {{.*}}, #0
; ARM32: tst {{.*}}, #1
; ARM32: mvn {{.*}}, #0
; ARM32: movne
define internal i32 @testZextTrue() {
%result = zext i1 true to i32
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment