Commit 6ec369eb by Jan Voung

ARM lowering integer divide and remainder, with div by 0 checks.

ARM normally just returns 0 when dividing by 0 with the software and hw implementations, which is different from what X86 does. So, for NaCl, we've modified LLVM to trap by inserting explicit 0 checks. Uses -mattr=hwdiv-arm attribute to decide if 32-bit sdiv/udiv are supported. Also lower the unreachable-inst to a trap-inst, since we need a trap instruction for divide by 0 anyway. Misc: fix switch test under MINIMAL=1, since ARM requires allow_dump for filetype=asm. Random clang-format changes... TODO: check via cross tests BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1214693004.
parent 6c17dd8c
...@@ -42,6 +42,10 @@ double __Sz_sitofp_i64_f64(int64_t Value) { return (double)Value; } ...@@ -42,6 +42,10 @@ double __Sz_sitofp_i64_f64(int64_t Value) { return (double)Value; }
// Other helper calls emitted by Subzero but not implemented here: // Other helper calls emitted by Subzero but not implemented here:
// Compiler-rt: // Compiler-rt:
// __udivsi3 - udiv i32
// __divsi3 - sdiv i32
// __umodsi3 - urem i32
// __modsi3 - srem i32
// __udivdi3 - udiv i64 // __udivdi3 - udiv i64
// __divdi3 - sdiv i64 // __divdi3 - sdiv i64
// __umoddi3 - urem i64 // __umoddi3 - urem i64
......
...@@ -125,6 +125,34 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst, ...@@ -125,6 +125,34 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
Inst->getSrc(1)->emit(Func); Inst->getSrc(1)->emit(Func);
} }
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
Str << "\t" << Opcode << Inst->getPredicate() << "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitCmpLike(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2);
Str << "\t" << Opcode << Inst->getPredicate() << "\t";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
}
OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base, OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base,
ConstantInteger32 *ImmOffset, AddrMode Mode) ConstantInteger32 *ImmOffset, AddrMode Mode)
: OperandARM32(kMem, Ty), Base(Base), ImmOffset(ImmOffset), Index(nullptr), : OperandARM32(kMem, Ty), Base(Base), ImmOffset(ImmOffset), Index(nullptr),
...@@ -207,15 +235,19 @@ InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP, ...@@ -207,15 +235,19 @@ InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
} }
InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
const CfgNode *TargetFalse, CondARM32::Cond Pred) const CfgNode *TargetFalse,
const InstARM32Label *Label, CondARM32::Cond Pred)
: InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred), : InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred),
TargetTrue(TargetTrue), TargetFalse(TargetFalse) {} TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) { bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) {
// If there is no next block, then there can be no fallthrough to // If there is no next block, then there can be no fallthrough to
// optimize. // optimize.
if (NextNode == nullptr) if (NextNode == nullptr)
return false; return false;
// Intra-block conditional branches can't be optimized.
if (Label)
return false;
// If there is no fallthrough node, such as a non-default case label // If there is no fallthrough node, such as a non-default case label
// for a switch instruction, then there is no opportunity to // for a switch instruction, then there is no opportunity to
// optimize. // optimize.
...@@ -264,11 +296,12 @@ InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget) ...@@ -264,11 +296,12 @@ InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
addSource(CallTarget); addSource(CallTarget);
} }
InstARM32Cmp::InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2, InstARM32Label::InstARM32Label(Cfg *Func, TargetARM32 *Target)
CondARM32::Cond Predicate) : InstARM32(Func, InstARM32::Label, 0, nullptr),
: InstARM32Pred(Func, InstARM32::Cmp, 2, nullptr, Predicate) { Number(Target->makeNextLabelNumber()) {}
addSource(Src1);
addSource(Src2); IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
} }
InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem, InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
...@@ -277,15 +310,6 @@ InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem, ...@@ -277,15 +310,6 @@ InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
addSource(Mem); addSource(Mem);
} }
InstARM32Mla::InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Acc,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Mla, 3, Dest, Predicate) {
addSource(Src0);
addSource(Src1);
addSource(Acc);
}
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. // Track modifications to Dests separately via FakeDefs.
...@@ -314,6 +338,9 @@ InstARM32Str::InstARM32Str(Cfg *Func, Variable *Value, OperandARM32Mem *Mem, ...@@ -314,6 +338,9 @@ InstARM32Str::InstARM32Str(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
addSource(Mem); addSource(Mem);
} }
InstARM32Trap::InstARM32Trap(Cfg *Func)
: InstARM32(Func, InstARM32::Trap, 0, nullptr) {}
InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi, InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
Variable *Src0, Variable *Src1, Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
...@@ -348,7 +375,15 @@ template <> const char *InstARM32Mul::Opcode = "mul"; ...@@ -348,7 +375,15 @@ template <> const char *InstARM32Mul::Opcode = "mul";
template <> const char *InstARM32Orr::Opcode = "orr"; template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb"; template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Sbc::Opcode = "sbc"; template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub"; template <> const char *InstARM32Sub::Opcode = "sub";
template <> const char *InstARM32Udiv::Opcode = "udiv";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls";
// Cmp-like ops
template <> const char *InstARM32Cmp::Opcode = "cmp";
template <> const char *InstARM32Tst::Opcode = "tst";
void InstARM32::dump(const Cfg *Func) const { void InstARM32::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -402,6 +437,9 @@ void InstARM32Br::emit(const Cfg *Func) const { ...@@ -402,6 +437,9 @@ void InstARM32Br::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t" Str << "\t"
<< "b" << getPredicate() << "\t"; << "b" << getPredicate() << "\t";
if (Label) {
Str << Label->getName(Func);
} else {
if (isUnconditionalBranch()) { if (isUnconditionalBranch()) {
Str << getTargetFalse()->getAsmName(); Str << getTargetFalse()->getAsmName();
} else { } else {
...@@ -412,6 +450,7 @@ void InstARM32Br::emit(const Cfg *Func) const { ...@@ -412,6 +450,7 @@ void InstARM32Br::emit(const Cfg *Func) const {
<< "\t" << getTargetFalse()->getAsmName(); << "\t" << getTargetFalse()->getAsmName();
} }
} }
}
} }
void InstARM32Br::emitIAS(const Cfg *Func) const { void InstARM32Br::emitIAS(const Cfg *Func) const {
...@@ -426,14 +465,19 @@ void InstARM32Br::dump(const Cfg *Func) const { ...@@ -426,14 +465,19 @@ void InstARM32Br::dump(const Cfg *Func) const {
Str << "br "; Str << "br ";
if (getPredicate() == CondARM32::AL) { if (getPredicate() == CondARM32::AL) {
Str << "label %" << getTargetFalse()->getName(); Str << "label %"
<< (Label ? Label->getName(Func) : getTargetFalse()->getName());
return; return;
} }
if (Label) {
Str << "label %" << Label->getName(Func);
} else {
Str << getPredicate() << ", label %" << getTargetTrue()->getName(); Str << getPredicate() << ", label %" << getTargetTrue()->getName();
if (getTargetFalse()) { if (getTargetFalse()) {
Str << ", label %" << getTargetFalse()->getName(); Str << ", label %" << getTargetFalse()->getName();
} }
}
} }
void InstARM32Call::emit(const Cfg *Func) const { void InstARM32Call::emit(const Cfg *Func) const {
...@@ -479,30 +523,23 @@ void InstARM32Call::dump(const Cfg *Func) const { ...@@ -479,30 +523,23 @@ void InstARM32Call::dump(const Cfg *Func) const {
getCallTarget()->dump(Func); getCallTarget()->dump(Func);
} }
void InstARM32Cmp::emit(const Cfg *Func) const { void InstARM32Label::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2); Str << getName(Func) << ":";
Str << "\t"
<< "cmp" << getPredicate() << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
} }
void InstARM32Cmp::emitIAS(const Cfg *Func) const { void InstARM32Label::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
(void)Func; (void)Func;
llvm_unreachable("Not yet implemented"); llvm_unreachable("Not yet implemented");
} }
void InstARM32Cmp::dump(const Cfg *Func) const { void InstARM32Label::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
dumpOpcodePred(Str, "cmp", getSrc(0)->getType()); Str << getName(Func) << ":";
dumpSources(Func);
} }
void InstARM32Ldr::emit(const Cfg *Func) const { void InstARM32Ldr::emit(const Cfg *Func) const {
...@@ -536,40 +573,6 @@ void InstARM32Ldr::dump(const Cfg *Func) const { ...@@ -536,40 +573,6 @@ void InstARM32Ldr::dump(const Cfg *Func) const {
dumpSources(Func); dumpSources(Func);
} }
void InstARM32Mla::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
assert(getDest()->hasReg());
Str << "\t"
<< "mla" << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << ", ";
getSrc(2)->emit(Func);
}
void InstARM32Mla::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Mla::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, "mla", getDest()->getType());
Str << " ";
dumpSources(Func);
}
template <> void InstARM32Movw::emit(const Cfg *Func) const { template <> void InstARM32Movw::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -757,6 +760,33 @@ void InstARM32Str::dump(const Cfg *Func) const { ...@@ -757,6 +760,33 @@ void InstARM32Str::dump(const Cfg *Func) const {
getSrc(0)->dump(Func); getSrc(0)->dump(Func);
} }
void InstARM32Trap::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 0);
// There isn't a mnemonic for the special NaCl Trap encoding, so dump
// the raw bytes.
Str << "\t.long 0x";
ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
for (uint8_t I : Asm->getNonExecBundlePadding()) {
Str.write_hex(I);
}
}
void InstARM32Trap::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 0);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Trap::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "trap";
}
void InstARM32Umull::emit(const Cfg *Func) const { void InstARM32Umull::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
......
...@@ -1939,7 +1939,7 @@ void InstX8632UD2::dump(const Cfg *Func) const { ...@@ -1939,7 +1939,7 @@ void InstX8632UD2::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
Str << "ud2\n"; Str << "ud2";
} }
void InstX8632Test::emit(const Cfg *Func) const { void InstX8632Test::emit(const Cfg *Func) const {
...@@ -1998,7 +1998,7 @@ void InstX8632Mfence::dump(const Cfg *Func) const { ...@@ -1998,7 +1998,7 @@ void InstX8632Mfence::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
Str << "mfence\n"; Str << "mfence";
} }
void InstX8632Store::emit(const Cfg *Func) const { void InstX8632Store::emit(const Cfg *Func) const {
...@@ -2512,7 +2512,6 @@ void InstX8632Fstp::dump(const Cfg *Func) const { ...@@ -2512,7 +2512,6 @@ void InstX8632Fstp::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func); dumpDest(Func);
Str << " = fstp." << getDest()->getType() << ", st(0)"; Str << " = fstp." << getDest()->getType() << ", st(0)";
Str << "\n";
} }
template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const { template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
......
...@@ -353,16 +353,20 @@ protected: ...@@ -353,16 +353,20 @@ protected:
const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64"; const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
const static constexpr char *H_frem_f32 = "fmodf"; const static constexpr char *H_frem_f32 = "fmodf";
const static constexpr char *H_frem_f64 = "fmod"; const static constexpr char *H_frem_f64 = "fmod";
const static constexpr char *H_sdiv_i32 = "__divsi3";
const static constexpr char *H_sdiv_i64 = "__divdi3"; const static constexpr char *H_sdiv_i64 = "__divdi3";
const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32"; const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64"; const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
const static constexpr char *H_srem_i32 = "__modsi3";
const static constexpr char *H_srem_i64 = "__moddi3"; const static constexpr char *H_srem_i64 = "__moddi3";
const static constexpr char *H_udiv_i32 = "__udivsi3";
const static constexpr char *H_udiv_i64 = "__udivdi3"; const static constexpr char *H_udiv_i64 = "__udivdi3";
const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32"; const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32"; const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64"; const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32"; const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64"; const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
const static constexpr char *H_urem_i32 = "__umodsi3";
const static constexpr char *H_urem_i64 = "__umoddi3"; const static constexpr char *H_urem_i64 = "__umoddi3";
private: private:
......
...@@ -22,6 +22,30 @@ ...@@ -22,6 +22,30 @@
namespace Ice { namespace Ice {
// Class encapsulating ARM cpu features / instruction set.
class TargetARM32Features {
TargetARM32Features() = delete;
TargetARM32Features(const TargetARM32Features &) = delete;
TargetARM32Features &operator=(const TargetARM32Features &) = delete;
public:
explicit TargetARM32Features(const ClFlags &Flags);
enum ARM32InstructionSet {
Begin,
// Neon is the PNaCl baseline instruction set.
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
private:
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
};
// The target lowering logic for ARM32.
class TargetARM32 : public TargetLowering { class TargetARM32 : public TargetLowering {
TargetARM32() = delete; TargetARM32() = delete;
TargetARM32(const TargetARM32 &) = delete; TargetARM32(const TargetARM32 &) = delete;
...@@ -75,15 +99,9 @@ public: ...@@ -75,15 +99,9 @@ public:
void finishArgumentLowering(Variable *Arg, Variable *FramePtr, void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes); size_t BasicFrameOffset, size_t &InArgsSizeBytes);
enum ARM32InstructionSet { bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
Begin, return CPUFeatures.hasFeature(I);
// Neon is the PNaCl baseline instruction set. }
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
ARM32InstructionSet getInstructionSet() const { return InstructionSet; }
protected: protected:
explicit TargetARM32(Cfg *Func); explicit TargetARM32(Cfg *Func);
...@@ -141,6 +159,18 @@ protected: ...@@ -141,6 +159,18 @@ protected:
llvm::SmallVectorImpl<int32_t> &Permutation, llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const override; const llvm::SmallBitVector &ExcludeRegisters) const override;
// If a divide-by-zero check is needed, inserts a:
// test; branch .LSKIP; trap; .LSKIP: <continuation>.
// If no check is needed nothing is inserted.
void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
typedef void (TargetARM32::*ExtInstr)(Variable *, Variable *,
CondARM32::Cond);
typedef void (TargetARM32::*DivInstr)(Variable *, Variable *, Variable *,
CondARM32::Cond);
void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
ExtInstr ExtFunc, DivInstr DivFunc,
const char *DivHelperName, bool IsRemainder);
// The following are helpers that insert lowered ARM32 instructions // The following are helpers that insert lowered ARM32 instructions
// with minimal syntactic overhead, so that the lowering code can // with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical. // look as close to assembly as practical.
...@@ -175,8 +205,8 @@ protected: ...@@ -175,8 +205,8 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
} }
void _br(CondARM32::Cond Condition, CfgNode *TargetTrue, void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
CfgNode *TargetFalse) { CondARM32::Cond Condition) {
Context.insert( Context.insert(
InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition)); InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition));
} }
...@@ -186,6 +216,9 @@ protected: ...@@ -186,6 +216,9 @@ protected:
void _br(CfgNode *Target, CondARM32::Cond Condition) { void _br(CfgNode *Target, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Target, Condition)); Context.insert(InstARM32Br::create(Func, Target, Condition));
} }
void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Label, Condition));
}
void _cmp(Variable *Src0, Operand *Src1, void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred)); Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
...@@ -210,6 +243,10 @@ protected: ...@@ -210,6 +243,10 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred)); Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
} }
void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
}
// If Dest=nullptr is passed in, then a new variable is created, // If Dest=nullptr is passed in, then a new variable is created,
// marked as infinite register allocation weight, and returned // marked as infinite register allocation weight, and returned
// through the in/out Dest argument. // through the in/out Dest argument.
...@@ -248,6 +285,12 @@ protected: ...@@ -248,6 +285,12 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred));
} }
void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true;
Context.insert(
InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _push(const VarList &Sources) { void _push(const VarList &Sources) {
Context.insert(InstARM32Push::create(Func, Sources)); Context.insert(InstARM32Push::create(Func, Sources));
} }
...@@ -257,6 +300,9 @@ protected: ...@@ -257,6 +300,9 @@ protected:
for (Variable *Dest : Dests) for (Variable *Dest : Dests)
Context.insert(InstFakeDef::create(Func, Dest)); Context.insert(InstFakeDef::create(Func, Dest));
} }
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
...@@ -271,6 +317,10 @@ protected: ...@@ -271,6 +317,10 @@ protected:
Context.insert( Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
}
void _str(Variable *Value, OperandARM32Mem *Addr, void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred)); Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
...@@ -289,8 +339,14 @@ protected: ...@@ -289,8 +339,14 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred)); Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
} }
void _ret(Variable *LR, Variable *Src0 = nullptr) { void _tst(Variable *Src0, Operand *Src1,
Context.insert(InstARM32Ret::create(Func, LR, Src0)); CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred));
}
void _trap() { Context.insert(InstARM32Trap::create(Func)); }
void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred));
} }
void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
...@@ -305,7 +361,7 @@ protected: ...@@ -305,7 +361,7 @@ protected:
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred)); Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
} }
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; TargetARM32Features CPUFeatures;
bool UsesFramePointer = false; bool UsesFramePointer = false;
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true; bool MaybeLeafFunc = true;
...@@ -386,6 +442,8 @@ protected: ...@@ -386,6 +442,8 @@ protected:
private: private:
~TargetHeaderARM32() = default; ~TargetHeaderARM32() = default;
TargetARM32Features CPUFeatures;
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -315,6 +315,11 @@ entry: ...@@ -315,6 +315,11 @@ entry:
; OPTM1-LABEL: div64BitSigned ; OPTM1-LABEL: div64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __divdi3 ; OPTM1: call {{.*}} R_{{.*}} __divdi3
;
; ARM32-LABEL: div64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitSignedConst(i64 %a) { define internal i64 @div64BitSignedConst(i64 %a) {
entry: entry:
...@@ -330,6 +335,14 @@ entry: ...@@ -330,6 +335,14 @@ entry:
; OPTM1: mov DWORD PTR [esp+0xc],0xb3a ; OPTM1: mov DWORD PTR [esp+0xc],0xb3a
; OPTM1: mov DWORD PTR [esp+0x8],0x73ce2ff2 ; OPTM1: mov DWORD PTR [esp+0x8],0x73ce2ff2
; OPTM1: call {{.*}} R_{{.*}} __divdi3 ; OPTM1: call {{.*}} R_{{.*}} __divdi3
;
; ARM32-LABEL: div64BitSignedConst
; For a constant, we should be able to optimize-out the divide by zero check.
; ARM32-NOT: orrs
; ARM32: movw {{.*}} ; 0x2ff2
; ARM32: movt {{.*}} ; 0x73ce
; ARM32: movw {{.*}} ; 0xb3a
; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitUnsigned(i64 %a, i64 %b) { define internal i64 @div64BitUnsigned(i64 %a, i64 %b) {
entry: entry:
...@@ -341,6 +354,11 @@ entry: ...@@ -341,6 +354,11 @@ entry:
; ;
; OPTM1-LABEL: div64BitUnsigned ; OPTM1-LABEL: div64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __udivdi3 ; OPTM1: call {{.*}} R_{{.*}} __udivdi3
;
; ARM32-LABEL: div64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __udivdi3
define internal i64 @rem64BitSigned(i64 %a, i64 %b) { define internal i64 @rem64BitSigned(i64 %a, i64 %b) {
entry: entry:
...@@ -352,6 +370,11 @@ entry: ...@@ -352,6 +370,11 @@ entry:
; ;
; OPTM1-LABEL: rem64BitSigned ; OPTM1-LABEL: rem64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __moddi3 ; OPTM1: call {{.*}} R_{{.*}} __moddi3
;
; ARM32-LABEL: rem64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __moddi3
define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) { define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) {
entry: entry:
...@@ -363,6 +386,11 @@ entry: ...@@ -363,6 +386,11 @@ entry:
; ;
; OPTM1-LABEL: rem64BitUnsigned ; OPTM1-LABEL: rem64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __umoddi3 ; OPTM1: call {{.*}} R_{{.*}} __umoddi3
;
; ARM32-LABEL: rem64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __umoddi3
define internal i64 @shl64BitSigned(i64 %a, i64 %b) { define internal i64 @shl64BitSigned(i64 %a, i64 %b) {
entry: entry:
......
...@@ -8,10 +8,15 @@ ...@@ -8,10 +8,15 @@
; once enough infrastructure is in. Also, switch to --filetype=obj ; once enough infrastructure is in. Also, switch to --filetype=obj
; when possible. ; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \ ; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \ ; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \ ; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s ; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32HWDIV %s
define i32 @Add(i32 %a, i32 %b) { define i32 @Add(i32 %a, i32 %b) {
entry: entry:
...@@ -117,10 +122,32 @@ entry: ...@@ -117,10 +122,32 @@ entry:
; CHECK-LABEL: Sdiv ; CHECK-LABEL: Sdiv
; CHECK: cdq ; CHECK: cdq
; CHECK: idiv e ; CHECK: idiv e
;
; ARM32-LABEL: Sdiv ; ARM32-LABEL: Sdiv
; TODO(jvoung) -- implement divide and check here. ; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; The lowering needs to check if the denominator is 0 and trap, since ; ARM32: bne [[LABEL:[0-9a-f]+]]
; ARM normally doesn't trap on divide by 0. ; ARM32: .word 0xe7fedef0
; ARM32: [[LABEL]]: {{.*}} bl {{.*}} __divsi3
; ARM32HWDIV-LABEL: Sdiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: sdiv
define i32 @SdivConst(i32 %a) {
entry:
%div = sdiv i32 %a, 219
ret i32 %div
}
; CHECK-LABEL: SdivConst
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: SdivConst
; ARM32-NOT: tst
; ARM32: bl {{.*}} __divsi3
; ARM32HWDIV-LABEL: SdivConst
; ARM32HWDIV-NOT: tst
; ARM32HWDIV: sdiv
define i32 @Srem(i32 %a, i32 %b) { define i32 @Srem(i32 %a, i32 %b) {
entry: entry:
...@@ -130,7 +157,16 @@ entry: ...@@ -130,7 +157,16 @@ entry:
; CHECK-LABEL: Srem ; CHECK-LABEL: Srem
; CHECK: cdq ; CHECK: cdq
; CHECK: idiv e ; CHECK: idiv e
;
; ARM32-LABEL: Srem ; ARM32-LABEL: Srem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __modsi3
; ARM32HWDIV-LABEL: Srem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: sdiv
; ARM32HWDIV: mls
define i32 @Udiv(i32 %a, i32 %b) { define i32 @Udiv(i32 %a, i32 %b) {
entry: entry:
...@@ -139,7 +175,15 @@ entry: ...@@ -139,7 +175,15 @@ entry:
} }
; CHECK-LABEL: Udiv ; CHECK-LABEL: Udiv
; CHECK: div e ; CHECK: div e
;
; ARM32-LABEL: Udiv ; ARM32-LABEL: Udiv
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __udivsi3
; ARM32HWDIV-LABEL: Udiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: udiv
define i32 @Urem(i32 %a, i32 %b) { define i32 @Urem(i32 %a, i32 %b) {
entry: entry:
...@@ -148,4 +192,13 @@ entry: ...@@ -148,4 +192,13 @@ entry:
} }
; CHECK-LABEL: Urem ; CHECK-LABEL: Urem
; CHECK: div e ; CHECK: div e
;
; ARM32-LABEL: Urem ; ARM32-LABEL: Urem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __umodsi3
; ARM32HWDIV-LABEL: Urem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: udiv
; ARM32HWDIV: mls
...@@ -5,9 +5,11 @@ ...@@ -5,9 +5,11 @@
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
; TODO(jvoung): Update to -02 once the phi assignments is done for ARM ; TODO(jvoung): Update to -02 once the phi assignments is done for ARM
; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \ ; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \ ; RUN: --command %p2i --filetype=asm --assemble --disassemble \
; RUN: | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s ; RUN: --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define i32 @testSwitch(i32 %a) { define i32 @testSwitch(i32 %a) {
entry: entry:
......
; This tests the basic structure of the Unreachable instruction. ; This tests the basic structure of the Unreachable instruction.
; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s ; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define internal i32 @divide(i32 %num, i32 %den) { define internal i32 @divide(i32 %num, i32 %den) {
entry: entry:
...@@ -22,3 +32,9 @@ return: ; preds = %entry ...@@ -22,3 +32,9 @@ return: ; preds = %entry
; CHECK: cdq ; CHECK: cdq
; CHECK: idiv ; CHECK: idiv
; CHECK: ret ; CHECK: ret
; ARM32-LABEL: divide
; ARM32: cmp
; ARM32: .word 0xe7fedef0
; ARM32: bl {{.*}} __divsi3
; ARM32: bx lr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment