Commit 6ec369eb by Jan Voung

ARM lowering integer divide and remainder, with div by 0 checks.

ARM normally just returns 0 when dividing by 0 with the software and hw implementations, which is different from what X86 does. So, for NaCl, we've modified LLVM to trap by inserting explicit 0 checks. Uses -mattr=hwdiv-arm attribute to decide if 32-bit sdiv/udiv are supported. Also lower the unreachable-inst to a trap-inst, since we need a trap instruction for divide by 0 anyway. Misc: fix switch test under MINIMAL=1, since ARM requires allow_dump for filetype=asm. Random clang-format changes... TODO: check via cross tests BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1214693004.
parent 6c17dd8c
......@@ -42,6 +42,10 @@ double __Sz_sitofp_i64_f64(int64_t Value) { return (double)Value; }
// Other helper calls emitted by Subzero but not implemented here:
// Compiler-rt:
// __udivsi3 - udiv i32
// __divsi3 - sdiv i32
// __umodsi3 - urem i32
// __modsi3 - srem i32
// __udivdi3 - udiv i64
// __divdi3 - sdiv i64
// __umoddi3 - urem i64
......
......@@ -125,6 +125,34 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
Inst->getSrc(1)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
Str << "\t" << Opcode << Inst->getPredicate() << "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitCmpLike(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2);
Str << "\t" << Opcode << Inst->getPredicate() << "\t";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
}
OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base,
ConstantInteger32 *ImmOffset, AddrMode Mode)
: OperandARM32(kMem, Ty), Base(Base), ImmOffset(ImmOffset), Index(nullptr),
......@@ -207,15 +235,19 @@ InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
}
InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
const CfgNode *TargetFalse, CondARM32::Cond Pred)
const CfgNode *TargetFalse,
const InstARM32Label *Label, CondARM32::Cond Pred)
: InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred),
TargetTrue(TargetTrue), TargetFalse(TargetFalse) {}
TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) {
// If there is no next block, then there can be no fallthrough to
// optimize.
if (NextNode == nullptr)
return false;
// Intra-block conditional branches can't be optimized.
if (Label)
return false;
// If there is no fallthrough node, such as a non-default case label
// for a switch instruction, then there is no opportunity to
// optimize.
......@@ -264,11 +296,12 @@ InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
addSource(CallTarget);
}
InstARM32Cmp::InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Cmp, 2, nullptr, Predicate) {
addSource(Src1);
addSource(Src2);
InstARM32Label::InstARM32Label(Cfg *Func, TargetARM32 *Target)
: InstARM32(Func, InstARM32::Label, 0, nullptr),
Number(Target->makeNextLabelNumber()) {}
IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
}
InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
......@@ -277,15 +310,6 @@ InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
addSource(Mem);
}
InstARM32Mla::InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Acc,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Mla, 3, Dest, Predicate) {
addSource(Src0);
addSource(Src1);
addSource(Acc);
}
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs.
......@@ -314,6 +338,9 @@ InstARM32Str::InstARM32Str(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
addSource(Mem);
}
InstARM32Trap::InstARM32Trap(Cfg *Func)
: InstARM32(Func, InstARM32::Trap, 0, nullptr) {}
InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate)
......@@ -348,7 +375,15 @@ template <> const char *InstARM32Mul::Opcode = "mul";
template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub";
template <> const char *InstARM32Udiv::Opcode = "udiv";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls";
// Cmp-like ops
template <> const char *InstARM32Cmp::Opcode = "cmp";
template <> const char *InstARM32Tst::Opcode = "tst";
void InstARM32::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
......@@ -402,14 +437,18 @@ void InstARM32Br::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t"
<< "b" << getPredicate() << "\t";
if (isUnconditionalBranch()) {
Str << getTargetFalse()->getAsmName();
if (Label) {
Str << Label->getName(Func);
} else {
Str << getTargetTrue()->getAsmName();
if (getTargetFalse()) {
Str << "\n\t"
<< "b"
<< "\t" << getTargetFalse()->getAsmName();
if (isUnconditionalBranch()) {
Str << getTargetFalse()->getAsmName();
} else {
Str << getTargetTrue()->getAsmName();
if (getTargetFalse()) {
Str << "\n\t"
<< "b"
<< "\t" << getTargetFalse()->getAsmName();
}
}
}
}
......@@ -426,13 +465,18 @@ void InstARM32Br::dump(const Cfg *Func) const {
Str << "br ";
if (getPredicate() == CondARM32::AL) {
Str << "label %" << getTargetFalse()->getName();
Str << "label %"
<< (Label ? Label->getName(Func) : getTargetFalse()->getName());
return;
}
Str << getPredicate() << ", label %" << getTargetTrue()->getName();
if (getTargetFalse()) {
Str << ", label %" << getTargetFalse()->getName();
if (Label) {
Str << "label %" << Label->getName(Func);
} else {
Str << getPredicate() << ", label %" << getTargetTrue()->getName();
if (getTargetFalse()) {
Str << ", label %" << getTargetFalse()->getName();
}
}
}
......@@ -479,30 +523,23 @@ void InstARM32Call::dump(const Cfg *Func) const {
getCallTarget()->dump(Func);
}
void InstARM32Cmp::emit(const Cfg *Func) const {
void InstARM32Label::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\t"
<< "cmp" << getPredicate() << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << getName(Func) << ":";
}
void InstARM32Cmp::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
void InstARM32Label::emitIAS(const Cfg *Func) const {
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Cmp::dump(const Cfg *Func) const {
void InstARM32Label::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpOpcodePred(Str, "cmp", getSrc(0)->getType());
dumpSources(Func);
Str << getName(Func) << ":";
}
void InstARM32Ldr::emit(const Cfg *Func) const {
......@@ -536,40 +573,6 @@ void InstARM32Ldr::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstARM32Mla::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
assert(getDest()->hasReg());
Str << "\t"
<< "mla" << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << ", ";
getSrc(2)->emit(Func);
}
void InstARM32Mla::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Mla::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, "mla", getDest()->getType());
Str << " ";
dumpSources(Func);
}
template <> void InstARM32Movw::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......@@ -757,6 +760,33 @@ void InstARM32Str::dump(const Cfg *Func) const {
getSrc(0)->dump(Func);
}
void InstARM32Trap::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 0);
// There isn't a mnemonic for the special NaCl Trap encoding, so dump
// the raw bytes.
Str << "\t.long 0x";
ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
for (uint8_t I : Asm->getNonExecBundlePadding()) {
Str.write_hex(I);
}
}
void InstARM32Trap::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 0);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Trap::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "trap";
}
void InstARM32Umull::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......
......@@ -1939,7 +1939,7 @@ void InstX8632UD2::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "ud2\n";
Str << "ud2";
}
void InstX8632Test::emit(const Cfg *Func) const {
......@@ -1998,7 +1998,7 @@ void InstX8632Mfence::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "mfence\n";
Str << "mfence";
}
void InstX8632Store::emit(const Cfg *Func) const {
......@@ -2512,7 +2512,6 @@ void InstX8632Fstp::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = fstp." << getDest()->getType() << ", st(0)";
Str << "\n";
}
template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
......
......@@ -353,16 +353,20 @@ protected:
const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
const static constexpr char *H_frem_f32 = "fmodf";
const static constexpr char *H_frem_f64 = "fmod";
const static constexpr char *H_sdiv_i32 = "__divsi3";
const static constexpr char *H_sdiv_i64 = "__divdi3";
const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
const static constexpr char *H_srem_i32 = "__modsi3";
const static constexpr char *H_srem_i64 = "__moddi3";
const static constexpr char *H_udiv_i32 = "__udivsi3";
const static constexpr char *H_udiv_i64 = "__udivdi3";
const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
const static constexpr char *H_urem_i32 = "__umodsi3";
const static constexpr char *H_urem_i64 = "__umoddi3";
private:
......
......@@ -22,6 +22,30 @@
namespace Ice {
// Class encapsulating ARM cpu features / instruction set.
class TargetARM32Features {
TargetARM32Features() = delete;
TargetARM32Features(const TargetARM32Features &) = delete;
TargetARM32Features &operator=(const TargetARM32Features &) = delete;
public:
explicit TargetARM32Features(const ClFlags &Flags);
enum ARM32InstructionSet {
Begin,
// Neon is the PNaCl baseline instruction set.
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
private:
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
};
// The target lowering logic for ARM32.
class TargetARM32 : public TargetLowering {
TargetARM32() = delete;
TargetARM32(const TargetARM32 &) = delete;
......@@ -75,15 +99,9 @@ public:
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
enum ARM32InstructionSet {
Begin,
// Neon is the PNaCl baseline instruction set.
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
ARM32InstructionSet getInstructionSet() const { return InstructionSet; }
bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
return CPUFeatures.hasFeature(I);
}
protected:
explicit TargetARM32(Cfg *Func);
......@@ -141,6 +159,18 @@ protected:
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const override;
// If a divide-by-zero check is needed, inserts a:
// test; branch .LSKIP; trap; .LSKIP: <continuation>.
// If no check is needed nothing is inserted.
void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
typedef void (TargetARM32::*ExtInstr)(Variable *, Variable *,
CondARM32::Cond);
typedef void (TargetARM32::*DivInstr)(Variable *, Variable *, Variable *,
CondARM32::Cond);
void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
ExtInstr ExtFunc, DivInstr DivFunc,
const char *DivHelperName, bool IsRemainder);
// The following are helpers that insert lowered ARM32 instructions
// with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical.
......@@ -175,8 +205,8 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
}
void _br(CondARM32::Cond Condition, CfgNode *TargetTrue,
CfgNode *TargetFalse) {
void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
CondARM32::Cond Condition) {
Context.insert(
InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition));
}
......@@ -186,6 +216,9 @@ protected:
void _br(CfgNode *Target, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Target, Condition));
}
void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Label, Condition));
}
void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
......@@ -210,6 +243,10 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
}
void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
}
// If Dest=nullptr is passed in, then a new variable is created,
// marked as infinite register allocation weight, and returned
// through the in/out Dest argument.
......@@ -248,6 +285,12 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred));
}
void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true;
Context.insert(
InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _push(const VarList &Sources) {
Context.insert(InstARM32Push::create(Func, Sources));
}
......@@ -257,6 +300,9 @@ protected:
for (Variable *Dest : Dests)
Context.insert(InstFakeDef::create(Func, Dest));
}
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
......@@ -271,6 +317,10 @@ protected:
Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
}
void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
......@@ -289,8 +339,14 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
}
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
void _tst(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred));
}
void _trap() { Context.insert(InstARM32Trap::create(Func)); }
void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred));
}
void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
......@@ -305,7 +361,7 @@ protected:
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
}
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
TargetARM32Features CPUFeatures;
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
......@@ -386,6 +442,8 @@ protected:
private:
~TargetHeaderARM32() = default;
TargetARM32Features CPUFeatures;
};
} // end of namespace Ice
......
......@@ -315,6 +315,11 @@ entry:
; OPTM1-LABEL: div64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __divdi3
;
; ARM32-LABEL: div64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitSignedConst(i64 %a) {
entry:
......@@ -330,6 +335,14 @@ entry:
; OPTM1: mov DWORD PTR [esp+0xc],0xb3a
; OPTM1: mov DWORD PTR [esp+0x8],0x73ce2ff2
; OPTM1: call {{.*}} R_{{.*}} __divdi3
;
; ARM32-LABEL: div64BitSignedConst
; For a constant, we should be able to optimize-out the divide by zero check.
; ARM32-NOT: orrs
; ARM32: movw {{.*}} ; 0x2ff2
; ARM32: movt {{.*}} ; 0x73ce
; ARM32: movw {{.*}} ; 0xb3a
; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitUnsigned(i64 %a, i64 %b) {
entry:
......@@ -341,6 +354,11 @@ entry:
;
; OPTM1-LABEL: div64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __udivdi3
;
; ARM32-LABEL: div64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __udivdi3
define internal i64 @rem64BitSigned(i64 %a, i64 %b) {
entry:
......@@ -352,6 +370,11 @@ entry:
;
; OPTM1-LABEL: rem64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __moddi3
;
; ARM32-LABEL: rem64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __moddi3
define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) {
entry:
......@@ -363,6 +386,11 @@ entry:
;
; OPTM1-LABEL: rem64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __umoddi3
;
; ARM32-LABEL: rem64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __umoddi3
define internal i64 @shl64BitSigned(i64 %a, i64 %b) {
entry:
......
......@@ -8,10 +8,15 @@
; once enough infrastructure is in. Also, switch to --filetype=obj
; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32HWDIV %s
define i32 @Add(i32 %a, i32 %b) {
entry:
......@@ -117,10 +122,32 @@ entry:
; CHECK-LABEL: Sdiv
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: Sdiv
; TODO(jvoung) -- implement divide and check here.
; The lowering needs to check if the denominator is 0 and trap, since
; ARM normally doesn't trap on divide by 0.
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne [[LABEL:[0-9a-f]+]]
; ARM32: .word 0xe7fedef0
; ARM32: [[LABEL]]: {{.*}} bl {{.*}} __divsi3
; ARM32HWDIV-LABEL: Sdiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: sdiv
define i32 @SdivConst(i32 %a) {
entry:
%div = sdiv i32 %a, 219
ret i32 %div
}
; CHECK-LABEL: SdivConst
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: SdivConst
; ARM32-NOT: tst
; ARM32: bl {{.*}} __divsi3
; ARM32HWDIV-LABEL: SdivConst
; ARM32HWDIV-NOT: tst
; ARM32HWDIV: sdiv
define i32 @Srem(i32 %a, i32 %b) {
entry:
......@@ -130,7 +157,16 @@ entry:
; CHECK-LABEL: Srem
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: Srem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __modsi3
; ARM32HWDIV-LABEL: Srem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: sdiv
; ARM32HWDIV: mls
define i32 @Udiv(i32 %a, i32 %b) {
entry:
......@@ -139,7 +175,15 @@ entry:
}
; CHECK-LABEL: Udiv
; CHECK: div e
;
; ARM32-LABEL: Udiv
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __udivsi3
; ARM32HWDIV-LABEL: Udiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: udiv
define i32 @Urem(i32 %a, i32 %b) {
entry:
......@@ -148,4 +192,13 @@ entry:
}
; CHECK-LABEL: Urem
; CHECK: div e
;
; ARM32-LABEL: Urem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __umodsi3
; ARM32HWDIV-LABEL: Urem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: udiv
; ARM32HWDIV: mls
......@@ -5,9 +5,11 @@
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
; TODO(jvoung): Update to -02 once the phi assignments is done for ARM
; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble \
; RUN: --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define i32 @testSwitch(i32 %a) {
entry:
......
; This tests the basic structure of the Unreachable instruction.
; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s
; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define internal i32 @divide(i32 %num, i32 %den) {
entry:
......@@ -22,3 +32,9 @@ return: ; preds = %entry
; CHECK: cdq
; CHECK: idiv
; CHECK: ret
; ARM32-LABEL: divide
; ARM32: cmp
; ARM32: .word 0xe7fedef0
; ARM32: bl {{.*}} __divsi3
; ARM32: bx lr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment