Commit 4a5e6d05 by John Porto

Subzero. ARM32. Implements bool folding.

parent 57586f73
......@@ -58,6 +58,12 @@ public:
// TODO(jvoung): return a unique_ptr.
static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); }
void initNodeForLowering(CfgNode *Node) override {
BoolComputations.forgetProducers();
BoolComputations.recordProducers(Node);
BoolComputations.dump(Func);
}
void translateOm1() override;
void translateO2() override;
bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
......@@ -130,8 +136,13 @@ protected:
void lowerCall(const InstCall *Inst) override;
void lowerCast(const InstCast *Inst) override;
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
void lowerIcmp(const InstIcmp *Inst) override;
void lowerFcmpCond(const InstFcmp *Instr, CondARM32::Cond *CondIfTrue0,
CondARM32::Cond *CondIfTrue1,
CondARM32::Cond *CondIfFalse);
void lowerFcmp(const InstFcmp *Instr) override;
void lowerIcmpCond(const InstIcmp *Instr, CondARM32::Cond *CondIfTrue,
CondARM32::Cond *CondIfFalse);
void lowerIcmp(const InstIcmp *Instr) override;
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
......@@ -316,6 +327,60 @@ protected:
Context.insert(InstFakeDef::create(Func, Instr->getDestHi()));
}
}
// _mov_i1_to_flags is used for bool folding. If "Boolean" is folded, this
// method returns true, and sets "CondIfTrue0" and "CondIfTrue1" to the
// appropriate ARM condition codes. If "Boolean" is not to be folded, then this
// method returns false.
bool _mov_i1_to_flags(Operand *Boolean, CondARM32::Cond *CondIfTrue0,
CondARM32::Cond *CondIfTrue1,
CondARM32::Cond *CondIfFalse);
// _cmov is a pseudo instruction that is used for boolean folding. It emits
// code that moves "SrcIfTrue" to dest if either "CondIfTrue0" or
// "CondIfTrue1" holds, and "SrcIfFalse", if "CondIfFalse" holds. It requires
// "Dest" to be an infinite-weight temporary.
void _cmov(Variable *Dest, Operand *SrcIfTrue, CondARM32::Cond CondIfTrue0,
CondARM32::Cond CondIfTrue1, Operand *SrcIfFalse,
CondARM32::Cond CondIfFalse) {
assert(Dest->mustHaveReg());
if (CondIfFalse == CondARM32::kNone) {
assert(CondIfTrue0 == CondARM32::AL);
assert(CondIfTrue1 == CondARM32::kNone);
}
if (CondIfTrue0 == CondARM32::kNone) {
assert(CondIfFalse == CondARM32::AL);
assert(CondIfTrue1 == CondARM32::kNone);
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(CondIfFalse == CondARM32::AL);
assert(CondIfTrue1 != CondARM32::kNone);
}
bool RedefineT = false;
if (CondIfFalse != CondARM32::kNone) {
_mov(Dest, SrcIfFalse, CondIfFalse);
RedefineT = true;
}
if (CondIfTrue0 != CondARM32::kNone) {
if (RedefineT) {
_mov_redefined(Dest, SrcIfTrue, CondIfTrue0);
} else {
_mov(Dest, SrcIfTrue, CondIfTrue0);
}
RedefineT = true;
}
if (CondIfTrue1 != CondARM32::kNone) {
assert(RedefineT);
_mov_redefined(Dest, SrcIfTrue, CondIfTrue1);
}
}
/// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
/// an upper16 relocation).
void _movt(Variable *Dest, Operand *Src0,
......@@ -542,6 +607,64 @@ protected:
private:
~TargetARM32() override = default;
void lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue,
CondARM32::Cond *CondIfFalse);
class BoolComputationTracker {
public:
BoolComputationTracker() = default;
~BoolComputationTracker() = default;
void forgetProducers() { KnownComputations.clear(); }
void recordProducers(CfgNode *Node);
const Inst *getProducerOf(const Operand *Opnd) const {
auto *Var = llvm::dyn_cast<Variable>(Opnd);
if (Var == nullptr) {
return nullptr;
}
auto Iter = KnownComputations.find(Var->getIndex());
if (Iter == KnownComputations.end()) {
return nullptr;
}
return Iter->second.Instr;
}
void dump(const Cfg *Func) const {
if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
return;
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "foldable producer:\n ";
for (const auto &Computation : KnownComputations) {
Str << " ";
Computation.second.Instr->dump(Func);
Str << "\n";
}
Str << "\n";
}
private:
class BoolComputationEntry {
public:
explicit BoolComputationEntry(Inst *I) : Instr(I) {}
Inst *const Instr;
// Boolean folding is disabled for variables whose live range is multi
// block. We conservatively initialize IsLiveOut to true, and set it to
// false once we find the end of the live range for the variable defined
// by this instruction. If liveness analysis is not performed (e.g., in
// Om1 mode) IsLiveOut will never be set to false, and folding will be
// disabled.
bool IsLiveOut = true;
};
using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>;
BoolComputationMap KnownComputations;
};
BoolComputationTracker BoolComputations;
};
class TargetDataARM32 final : public TargetDataLowering {
......
......@@ -53,40 +53,40 @@ define internal void @mult_fwd_branches(i32 %a, i32 %b) {
%cmp = icmp slt i32 %a, %b
; ASM-NEXT: ldr r0, [sp, #8]
; ASM-NEXT: mov r1, #0
; ASM-NEXT: ldr r2, [sp, #4]
; ASM-NEXT: cmp r0, r2
; ASM-NEXT: movlt r1, #1
; ASM-NEXT: str r1, [sp]
; ASM-NEXT: ldr r1, [sp, #4]
; ASM-NEXT: cmp r0, r1
; ASM-NEXT: movge r0, #0
; ASM-NEXT: movlt r0, #1
; ASM-NEXT: str r0, [sp]
; DIS-NEXT: c: e59d0008
; DIS-NEXT: 10: e3a01000
; DIS-NEXT: 14: e59d2004
; DIS-NEXT: 18: e1500002
; DIS-NEXT: 1c: b3a01001
; DIS-NEXT: 20: e58d1000
; DIS-NEXT: 10: e59d1004
; DIS-NEXT: 14: e1500001
; DIS-NEXT: 18: a3a00000
; DIS-NEXT: 1c: b3a00001
; DIS-NEXT: 20: e58d0000
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: mov r1, #0
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x20
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: .byte 0x2
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x50
; IASM-NEXT: .byte 0xe1
; IASM-NEXT: movlt r1, #1
; IASM-NEXT: movge r0, #0
; IASM-NEXT: movlt r0, #1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
......
......@@ -18,13 +18,13 @@
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-O2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OM1 %s
@__init_array_start = internal constant [0 x i8] zeroinitializer, align 4
@__fini_array_start = internal constant [0 x i8] zeroinitializer, align 4
......@@ -849,8 +849,11 @@ entry:
; OPTM1-NOT: and eax,0x1
; ARM32-LABEL: trunc64To1
; ARM32: and r0, r0, #1
; ARM32: and r0, r0, #1
; ARM32-OM1: and r0, r0, #1
; ARM32-OM1: and r0, r0, #1
; ARM32-O2: tst r0, #1
; ARM32-O2: moveq [[RES:r[0-9]+]], #0
; ARM32-O2: movne [[RES]], #1
define internal i64 @sext32To64(i32 %a) {
entry:
......@@ -921,8 +924,12 @@ entry:
; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: sext1To64
; ARM32: lsl {{.*}}, #31
; ARM32: asr {{.*}}, #31
; ARM32-OM1: lsl {{.*}}, #31
; ARM32-OM1: asr {{.*}}, #31
; ARM32-O2: tst r0, #1
; ARM32-O2: mvn [[M1:r[0-9]+]], #0
; ARM32-O2: moveq [[RES:r[0-9]+]], #0
; ARM32-O2: movne [[RES]], [[M1]]
define internal i64 @zext32To64(i32 %a) {
entry:
......@@ -991,8 +998,11 @@ entry:
; OPTM1: mov {{.*}},0x0
; ARM32-LABEL: zext1To64
; ARM32: and {{.*}}, #1
; ARM32: mov {{.*}}, #0
; ARM32-OM1: and {{.*}}, #1
; ARM32-OM1: mov {{.*}}, #0
; ARM32-O2: tst r0, #1
; ARM32-O2: moveq {{[^,]*}}, #0
; ARM32-O2: movne {{[^,]*}}, #1
define internal void @icmpEq64(i64 %a, i64 %b, i64 %c, i64 %d) {
entry:
......@@ -1051,13 +1061,17 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: icmpEq64
; ARM32: cmp
; ARM32: cmpeq
; ARM32: moveq
; ARM32: movne
; ARM32-OM1: movne
; ARM32-OM1: moveq
; ARM32-OM1: cmp
; ARM32-O2: bne
; ARM32: bl
; ARM32: cmp
; ARM32: cmpeq
; ARM32: moveq
; ARM32: movne
; ARM32-OM1: movne
; ARM32-OM1: moveq
; ARM32-OM1: cmp
; ARM32-O2: bne
; ARM32: bl
declare void @func()
......@@ -1119,13 +1133,17 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: icmpNe64
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movne
; ARM32: moveq
; ARM32-OM1: moveq
; ARM32-OM1: movne
; ARM32-OM1: cmp
; ARM32-O2: beq
; ARM32: bl
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movne
; ARM32: moveq
; ARM32-OM1: moveq
; ARM32-OM1: movne
; ARM32-OM1: cmp
; ARM32-O2: beq
; ARM32: bl
define internal void @icmpGt64(i64 %a, i64 %b, i64 %c, i64 %d) {
......@@ -1171,13 +1189,17 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: icmpGt64
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movhi
; ARM32: movls
; ARM32-OM1: movls
; ARM32-OM1: movhi
; ARM32-OM1: cmp
; ARM32-O2: bls
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32: movlt
; ARM32: movge
; ARM32-OM1: movge
; ARM32-OM1: movlt
; ARM32-OM1: cmp
; ARM32-O2: bge
; ARM32: bl
define internal void @icmpGe64(i64 %a, i64 %b, i64 %c, i64 %d) {
......@@ -1223,13 +1245,17 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: icmpGe64
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcs
; ARM32: movcc
; ARM32-OM1: movcc
; ARM32-OM1: movcs
; ARM32-OM1: cmp
; ARM32-O2: bcc
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32: movge
; ARM32: movlt
; ARM32-OM1: movlt
; ARM32-OM1: movge
; ARM32-OM1: cmp
; ARM32-O2: blt
; ARM32: bl
define internal void @icmpLt64(i64 %a, i64 %b, i64 %c, i64 %d) {
......@@ -1275,13 +1301,17 @@ if.end3: ; preds = %if.then2, %if.end
; ARM32-LABEL: icmpLt64
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcc
; ARM32: movcs
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32-O2: bcs
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32: movlt
; ARM32: movge
; ARM32-OM1: movge
; ARM32-OM1: movlt
; ARM32-OM1: cmp
; ARM32-O2: bge
; ARM32: bl
define internal void @icmpLe64(i64 %a, i64 %b, i64 %c, i64 %d) {
......@@ -1327,13 +1357,16 @@ if.end3: ; preds = %if.end, %if.then2
; ARM32-LABEL: icmpLe64
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movls
; ARM32: movhi
; ARM32-OM1: movhi
; ARM32-OM1: movls
; ARM32-OM1: cmp
; ARM32-O2: bhi
; ARM32: bl
; ARM32: cmp
; ARM32: sbcs
; ARM32: movge
; ARM32: movlt
; ARM32-OM1: movlt
; ARM32-OM1: movge
; ARM32-O2: blt
; ARM32: bl
define internal i32 @icmpEq64Bool(i64 %a, i64 %b) {
......@@ -1351,8 +1384,8 @@ entry:
; OPTM1: je
; ARM32-LABEL: icmpEq64Bool
; ARM32: moveq
; ARM32: movne
; ARM32: moveq
define internal i32 @icmpNe64Bool(i64 %a, i64 %b) {
entry:
......@@ -1369,8 +1402,8 @@ entry:
; OPTM1: jne
; ARM32-LABEL: icmpNe64Bool
; ARM32: movne
; ARM32: moveq
; ARM32: movne
define internal i32 @icmpSgt64Bool(i64 %a, i64 %b) {
entry:
......@@ -1395,8 +1428,8 @@ entry:
; ARM32-LABEL: icmpSgt64Bool
; ARM32: cmp
; ARM32: sbcs
; ARM32: movlt
; ARM32: movge
; ARM32: movlt
define internal i32 @icmpUgt64Bool(i64 %a, i64 %b) {
entry:
......@@ -1421,8 +1454,8 @@ entry:
; ARM32-LABEL: icmpUgt64Bool
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movhi
; ARM32: movls
; ARM32: movhi
define internal i32 @icmpSge64Bool(i64 %a, i64 %b) {
entry:
......@@ -1447,8 +1480,8 @@ entry:
; ARM32-LABEL: icmpSge64Bool
; ARM32: cmp
; ARM32: sbcs
; ARM32: movge
; ARM32: movlt
; ARM32: movge
define internal i32 @icmpUge64Bool(i64 %a, i64 %b) {
entry:
......@@ -1473,8 +1506,8 @@ entry:
; ARM32-LABEL: icmpUge64Bool
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcs
; ARM32: movcc
; ARM32: movcs
define internal i32 @icmpSlt64Bool(i64 %a, i64 %b) {
entry:
......@@ -1499,8 +1532,8 @@ entry:
; ARM32-LABEL: icmpSlt64Bool
; ARM32: cmp
; ARM32: sbcs
; ARM32: movlt
; ARM32: movge
; ARM32: movlt
define internal i32 @icmpUlt64Bool(i64 %a, i64 %b) {
entry:
......@@ -1525,8 +1558,8 @@ entry:
; ARM32-LABEL: icmpUlt64Bool
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcc
; ARM32: movcs
; ARM32: movcc
define internal i32 @icmpSle64Bool(i64 %a, i64 %b) {
entry:
......@@ -1551,8 +1584,8 @@ entry:
; ARM32-LABEL: icmpSle64Bool
; ARM32: cmp
; ARM32: sbcs
; ARM32: movge
; ARM32: movlt
; ARM32: movge
define internal i32 @icmpUle64Bool(i64 %a, i64 %b) {
entry:
......@@ -1577,8 +1610,8 @@ entry:
; ARM32-LABEL: icmpUle64Bool
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movls
; ARM32: movhi
; ARM32: movls
define internal i64 @load64(i32 %a) {
entry:
......@@ -1666,15 +1699,15 @@ entry:
; OPTM1: cmovne
; ARM32-LABEL: select64VarVar
; The initial compare.
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcc
; ARM32: movcs
; The non-folded compare for the select.
; ARM32: cmp
; ARM32: movne
; ARM32: movne
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32-OM1: movne
; ARM32-O2: movcc
; ARM32-OM1: movne
; ARM32-O2: movcc
define internal i64 @select64VarConst(i64 %a, i64 %b) {
entry:
......@@ -1703,15 +1736,17 @@ entry:
; ARM32-LABEL: select64VarConst
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcc
; ARM32: movcs
; ARM32: cmp
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32: movw
; ARM32: movt
; ARM32: movne
; ARM32-OM1: movne
; ARM32-O2: movcc
; ARM32: movw
; ARM32: movt
; ARM32: movne
; ARM32-OM1: movne
; ARM32-O2: movcc
define internal i64 @select64ConstVar(i64 %a, i64 %b) {
entry:
......@@ -1740,15 +1775,17 @@ entry:
; ARM32-LABEL: select64ConstVar
; ARM32: cmp
; ARM32: cmpeq
; ARM32: movcc
; ARM32: movcs
; ARM32: cmp
; ARM32-OM1: movcs
; ARM32-OM1: movcc
; ARM32-OM1: cmp
; ARM32: movw
; ARM32: movt
; ARM32: movne
; ARM32-OM1: movne
; ARM32-O2: movcc
; ARM32: movw
; ARM32: movt
; ARM32: movne
; ARM32-OM1: movne
; ARM32-O2: movcc
define internal void @icmpEq64Imm() {
entry:
......
......@@ -28,8 +28,12 @@ branch2:
; CHECK: cmp
; CHECK: jge
; ARM32-LABEL: fold_cmp_br
; ARM32: cmp
; ARM32: beq
; ARM32: cmp r0, r1
; ARM32: bge
; ARM32: mov r0, #1
; ARM32: bx lr
; ARM32: mov r0, #2
; ARM32: bx lr
; Cmp/branch folding with intervening instructions.
......@@ -51,11 +55,13 @@ branch2:
; CHECK: jge
; ARM32-LABEL: fold_cmp_br_intervening_insts
; ARM32: push {{[{].*[}]}}
; ARM32: movlt [[TMP:r[0-9]+]], #1
; ARM32: mov [[P:r[4-7]]], [[TMP]]
; ARM32: bl
; ARM32: cmp [[P]], #0
; ARM32: beq
; ARM32: bl use_value
; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}}
; ARM32: bge
; ARM32: mov r0, #1
; ARM32: bx lr
; ARM32: mov r0, #2
; ARM32: bx lr
; Cmp/branch non-folding because of live-out.
......@@ -102,13 +108,14 @@ branch2:
; CHECK: cmp
; CHECK: je
; ARM32-LABEL: no_fold_cmp_br_non_whitelist
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movge [[R:r[0-9]+]], #0
; ARM32: movlt [[R]], #1
; ARM32: mov [[R2:r[0-9]+]], [[R]]
; ARM32: and [[R3:r[0-9]+]], [[R2]], #1
; ARM32: cmp [[R]]
; ARM32: beq
; ARM32: cmp r0, r1
; ARM32: bge
; ARM32: bx lr
; ARM32: mov r0, #2
; ARM32: bx lr
; Basic cmp/select folding.
......@@ -123,11 +130,8 @@ entry:
; CHECK: cmp
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movlt [[R]], #1
; ARM32: cmp [[R]], #0
; ARM32: movlt {{r[0-9]+}}, r0
; 64-bit cmp/select folding.
define internal i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) {
......@@ -144,14 +148,11 @@ entry:
; CHECK: cmovl
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_64
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32: cmp r0, r2
; ARM32: movlt [[R]], #1
; ARM32: cmp [[R]], #0
; ARM32: movne
; ARM32: movne
; ARM32-DAG: mov r0
; ARM32-DAG: mov r1
; ARM32: movlt [[LOW:r[0-9]+]], r0
; ARM32: movlt [[HIGH:r[0-9]+]], r1
; ARM32: mov r0, [[LOW]]
; ARM32: mov r1, [[HIGH]]
; ARM32: bx lr
......@@ -168,12 +169,10 @@ entry:
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_64_undef
; ARM32: cmp {{r[0-9]+}}, r0
; ARM32: movlt [[R:r[0-9]+]], #1
; ARM32: cmp [[R]]
; ARM32: movne
; ARM32: movne
; ARM32-DAG: mov r0
; ARM32-DAG: mov r1
; ARM32: movge
; ARM32: movlt
; ARM32: movge
; ARM32: movlt
; ARM32: bx lr
......@@ -192,17 +191,10 @@ entry:
; CHECK: cmp
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_intervening_insts
; ARM32: mov [[RES0:r[4-7]+]], r0
; ARM32: mov [[RES1:r[4-7]+]], r1
; ARM32: mov [[R:r[0-9]+]], #0
; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}}
; ARM32: movlt [[R]], #1
; ARM32: mov [[R2:r[4-7]]], [[R]]
; ARM32: bl use_value
; ARM32: cmp [[R2]], #0
; ARM32: movne [[RES1]], [[RES0]]
; ARM32: mov r0, [[RES1]]
; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}}
; ARM32: movlt
; ARM32: bx lr
; Cmp/multi-select folding.
define internal i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) {
......@@ -226,20 +218,14 @@ entry:
; CHECK: add
; CHECK: add
; ARM32-LABEL: fold_cmp_select_multi
; ARM32: mov [[T0:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movlt [[T0]], #1
; ARM32: uxtb [[T1:r[0-9]+]], [[T1]]
; ARM32-NEXT: cmp [[T1]], #0
; ARM32: movne [[T2:r[0-9]+]], r0
; ARM32: uxtb [[T3:r[0-9]+]], [[T3]]
; ARM32-NEXT: cmp [[T3]], #0
; ARM32: movne [[T4:r[0-9]+]], r1
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32-NEXT: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: add [[T7:r[0-9]+]], [[T2]], [[T4]]
; ARM32: add {{r[0-9]+}}, [[T7]], [[T6]]
; ARM32: movlt {{r[0-9]+}}, r0
; ARM32: cmp r0, r1
; ARM32: movlt {{r[0-9]+}}, r1
; ARM32: cmp r0, r1
; ARM32: movlt {{r[0-9]+}}, #123
; ARM32: add
; ARM32: add
; ARM32: bx lr
......@@ -269,21 +255,21 @@ next:
; CHECK: add
; ARM32-LABEL: no_fold_cmp_select_multi_liveout
; ARM32-LABEL: fold_cmp_select_multi
; ARM32: mov [[T0:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movge [[T0:r[0-9]+]], #0
; ARM32: movlt [[T0]], #1
; ARM32: uxtb [[T2:r[0-9]+]], [[T2]]
; ARM32-NEXT: cmp [[T2]], #0
; ARM32: movne [[T1]], r0
; ARM32: uxtb [[T4:r[0-9]+]], [[T4]]
; ARM32-NEXT: cmp [[T4]], #0
; ARM32: movne [[T3]], r1
; ARM32: uxtb [[T1:r[0-9]+]], [[T1]]
; ARM32-NEXT: cmp [[T1]], #0
; ARM32: movne [[T2:r[0-9]+]], r0
; ARM32: uxtb [[T3:r[0-9]+]], [[T3]]
; ARM32-NEXT: cmp [[T3]], #0
; ARM32: movne [[T4:r[0-9]+]], r1
; ARM32-LABEL: .Lno_fold_cmp_select_multi_liveout$next:
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: add [[T7:r[0-9]+]], [[T2]], [[T4]]
; ARM32: add {{r[0-9]+}}, [[T7]], [[T6]]
; ARM32: add
; ARM32: add
; ARM32: bx lr
; Cmp/multi-select non-folding because of extra non-whitelisted uses.
......@@ -314,19 +300,19 @@ entry:
; CHECK: add
; CHECK: add
; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist
; ARM32: mov [[T0:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movlt [[T0]], #1
; ARM32: uxtb [[T1:r[0-9]+]], [[T1]]
; ARM32-NEXT: cmp [[T1]], #0
; ARM32: movne [[T2:r[0-9]+]], r0
; ARM32: uxtb [[T3:r[0-9]+]], [[T3]]
; ARM32-NEXT: cmp [[T3]], #0
; ARM32: movne [[T4:r[0-9]+]], r1
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32-NEXT: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: and [[T7:r[0-9]+]], [[T0]], #1
; ARM32: add [[T8:r[0-9]+]], [[T2]], [[T4]]
; ARM32: add {{r[0-9]+}}, [[T8]], [[T7]]
; ARM32: movge [[R0:r[0-9]+]]
; ARM32: movlt [[R0]]
; ARM32: cmp r0, r1
; ARM32: movge [[R1:r[0-9]+]]
; ARM32: movlt [[R1]]
; ARM32: cmp r0, r1
; ARM32: movge [[R2:r[0-9]+]]
; ARM32: movlt [[R2]]
; ARM32: cmp r0, r1
; ARM32: movge [[R3:r[0-9]+]]
; ARM32: movlt [[R3]]
; ARM32: add
; ARM32: add
; ARM32: add
; ARM32: bx lr
......@@ -14,14 +14,14 @@
; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32O2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: --disassemble --target arm32 -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck \
......@@ -95,10 +95,7 @@ target:
; Note that compare and branch folding isn't implemented yet (unlike x86-32).
; ARM32O2-LABEL: testCondFallthroughToNextBlock
; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: movge {{.*}}, #1
; ARM32O2-NEXT: uxtb
; ARM32O2-NEXT: cmp {{.*}}, #0
; ARM32O2-NEXT: bne
; ARM32O2-NEXT: bge
; ARM32O2-NEXT: bl
; ARM32O2: bx lr
; ARM32O2: bl
......@@ -106,7 +103,8 @@ target:
; ARM32OM1-LABEL: testCondFallthroughToNextBlock
; ARM32OM1: cmp {{.*}}, #123
; ARM32OM1-NEXT: movge {{.*}}, #1
; ARM32OM1: movlt {{.*}}, #0
; ARM32OM1: movge {{.*}}, #1
; ARM32OM1: cmp {{.*}}, #0
; ARM32OM1: bne
; ARM32OM1: b
......@@ -154,10 +152,7 @@ target:
; (compared to x86-32).
; ARM32O2-LABEL: testCondTargetNextBlock
; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: movge {{.*}}, #1
; ARM32O2-NEXT: uxtb
; ARM32O2-NEXT: cmp {{.*}}, #0
; ARM32O2-NEXT: beq
; ARM32O2-NEXT: blt
; ARM32O2-NEXT: bl
; ARM32O2: bx lr
; ARM32O2: bl
......
......@@ -12,16 +12,16 @@
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-O2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: --disassemble --target arm32 -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OM1 %s
define internal void @testSelect(i32 %a, i32 %b) {
entry:
......@@ -51,12 +51,16 @@ declare void @useInt(i32 %x)
; CHECK: ret
; ARM32-LABEL: testSelect
; ARM32: cmp
; ARM32: cmp
; ARM32-OM1: cmp
; ARM32: bl {{.*}} useInt
; ARM32: cmp
; ARM32: cmp
; ARM32: mov {{.*}}, #20
; ARM32: movne {{.*}}, #10
; ARM32-Om1: cmp
; ARM32-Om1: mov {{.*}}, #20
; ARM32-Om1: movne {{.*}}, #10
; ARM32-O2: movle [[REG:r[0-9]+]], #20
; ARM32-O2: movgt [[REG]], #10
; ARM32: bl {{.*}} useInt
; ARM32: bl {{.*}} useInt
; ARM32: bl {{.*}} useInt
; ARM32: bx lr
......
......@@ -8,17 +8,10 @@
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
; once enough infrastructure is in. Also, switch to --filetype=obj
; when possible.
; TODO(jpp): Switch to --filetype=obj when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
......@@ -73,7 +66,9 @@ entry:
; CHECK-LABEL: testTrunc
; CHECK: and {{.*}},0x1
; ARM32-LABEL: testTrunc
; ARM32: and {{.*}}, #1
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; Test zext to i8.
define internal i32 @testZextI8(i32 %arg) {
......@@ -89,8 +84,10 @@ entry:
; match the zext i1 instruction (NOTE: no mov need between i1 and i8).
; CHECK-NOT: and {{.*}},0x1
; ARM32-LABEL: testZextI8
; ARM32: and {{.*}}, #1
; ARM32: and {{.*}}, #1
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: uxtb [[REG]]
; Test zext to i16.
define internal i32 @testZextI16(i32 %arg) {
......@@ -108,10 +105,10 @@ entry:
; CHECK-NOT: and [[REG]],0x1
; ARM32-LABEL: testZextI16
; match the trunc instruction
; ARM32: and {{.*}}, #1
; match the zext (no need to uxt into a reg if src is already in a reg)
; ARM32: and {{.*}}, #1
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; ARM32: uxth [[REG]]
; Test zext to i32.
define internal i32 @testZextI32(i32 %arg) {
......@@ -127,8 +124,9 @@ entry:
; CHECK: movzx
; CHECK-NOT: and {{.*}},0x1
; ARM32-LABEL: testZextI32
; ARM32: and {{.*}}, #1
; ARM32: and {{.*}}, #1
; ARM32: tst r0, #1
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; Test zext to i64.
define internal i64 @testZextI64(i32 %arg) {
......@@ -144,9 +142,10 @@ entry:
; CHECK: movzx
; CHECK: mov {{.*}},0x0
; ARM32-LABEL: testZextI64
; ARM32: and {{.*}}, #1
; ARM32: and {{.*}}, #1
; ARM32: mov {{.*}}, #0
; ARM32: tst r0, #1
; ARM32: mov r{{[0-9]*}}, #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], #1
; Test sext to i8.
define internal i32 @testSextI8(i32 %arg) {
......@@ -163,13 +162,12 @@ entry:
; CHECK: shl [[REG:.*]],0x7
; CHECK-NEXT: sar [[REG]],0x7
;
; ARM shifts by 32, since there aren't any byte regs.
; ARM32-LABEL: testSextI8
; match the trunc instruction
; ARM32: and {{.*}}, #1
; match the sext i1 instruction
; ARM32: lsl {{.*}}, #31
; ARM32-NEXT: asr {{.*}}, #31
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: sxtb [[REG]]
; Test sext to i16.
define internal i32 @testSextI16(i32 %arg) {
......@@ -188,9 +186,11 @@ entry:
; CHECK-NEXT: sar [[REG]],0xf
; ARM32-LABEL: testSextI16
; ARM32: and {{.*}}, #1
; ARM32: lsl {{.*}}, #31
; ARM32-NEXT: asr {{.*}}, #31
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: sxth [[REG]]
; Test sext to i32.
define internal i32 @testSextI32(i32 %arg) {
......@@ -208,9 +208,10 @@ entry:
; CHECK-NEXT: sar [[REG]],0x1f
; ARM32-LABEL: testSextI32
; ARM32: and {{.*}}, #1
; ARM32: lsl {{.*}}, #31
; ARM32-NEXT: asr {{.*}}, #31
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; Test sext to i64.
define internal i64 @testSextI64(i32 %arg) {
......@@ -228,10 +229,11 @@ entry:
; CHECK-NEXT: sar [[REG]],0x1f
; ARM32-LABEL: testSextI64
; ARM32: and {{.*}}, #1
; ARM32: lsl {{.*}}, #31
; ARM32-NEXT: asr [[REG:r.*]], {{.*}}, #31
; ARM32-NEXT: {{(mov|str).*}} [[REG]]
; ARM32: tst r0, #1
; ARM32: mvn [[REG_M1:r[0-9]*]], #0
; ARM32: moveq [[REG:r[0-9]*]], #0
; ARM32: movne [[REG]], [[REG_M1]]
; ARM32: mov r{{[0-9]+}}, [[REG]]
; Kind of like sext i1 to i32, but with an immediate source. On ARM,
; sxtb cannot take an immediate operand, so make sure it's using a reg.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment