Commit 6ec369eb by Jan Voung

ARM lowering integer divide and remainder, with div by 0 checks.

ARM normally just returns 0 when dividing by 0 with the software and hw implementations, which is different from what X86 does. So, for NaCl, we've modified LLVM to trap by inserting explicit 0 checks. Uses -mattr=hwdiv-arm attribute to decide if 32-bit sdiv/udiv are supported. Also lower the unreachable-inst to a trap-inst, since we need a trap instruction for divide by 0 anyway. Misc: fix switch test under MINIMAL=1, since ARM requires allow_dump for filetype=asm. Random clang-format changes... TODO: check via cross tests BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1214693004.
parent 6c17dd8c
......@@ -42,6 +42,10 @@ double __Sz_sitofp_i64_f64(int64_t Value) { return (double)Value; }
// Other helper calls emitted by Subzero but not implemented here:
// Compiler-rt:
// __udivsi3 - udiv i32
// __divsi3 - sdiv i32
// __umodsi3 - urem i32
// __modsi3 - srem i32
// __udivdi3 - udiv i64
// __divdi3 - sdiv i64
// __umoddi3 - urem i64
......
......@@ -125,6 +125,34 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
Inst->getSrc(1)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
Str << "\t" << Opcode << Inst->getPredicate() << "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitCmpLike(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2);
Str << "\t" << Opcode << Inst->getPredicate() << "\t";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
}
OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base,
ConstantInteger32 *ImmOffset, AddrMode Mode)
: OperandARM32(kMem, Ty), Base(Base), ImmOffset(ImmOffset), Index(nullptr),
......@@ -207,15 +235,19 @@ InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
}
InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
const CfgNode *TargetFalse, CondARM32::Cond Pred)
const CfgNode *TargetFalse,
const InstARM32Label *Label, CondARM32::Cond Pred)
: InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred),
TargetTrue(TargetTrue), TargetFalse(TargetFalse) {}
TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) {
// If there is no next block, then there can be no fallthrough to
// optimize.
if (NextNode == nullptr)
return false;
// Intra-block conditional branches can't be optimized.
if (Label)
return false;
// If there is no fallthrough node, such as a non-default case label
// for a switch instruction, then there is no opportunity to
// optimize.
......@@ -264,11 +296,12 @@ InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
addSource(CallTarget);
}
InstARM32Cmp::InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Cmp, 2, nullptr, Predicate) {
addSource(Src1);
addSource(Src2);
InstARM32Label::InstARM32Label(Cfg *Func, TargetARM32 *Target)
: InstARM32(Func, InstARM32::Label, 0, nullptr),
Number(Target->makeNextLabelNumber()) {}
IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
}
InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
......@@ -277,15 +310,6 @@ InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
addSource(Mem);
}
InstARM32Mla::InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Acc,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Mla, 3, Dest, Predicate) {
addSource(Src0);
addSource(Src1);
addSource(Acc);
}
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs.
......@@ -314,6 +338,9 @@ InstARM32Str::InstARM32Str(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
addSource(Mem);
}
InstARM32Trap::InstARM32Trap(Cfg *Func)
: InstARM32(Func, InstARM32::Trap, 0, nullptr) {}
InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate)
......@@ -348,7 +375,15 @@ template <> const char *InstARM32Mul::Opcode = "mul";
template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub";
template <> const char *InstARM32Udiv::Opcode = "udiv";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls";
// Cmp-like ops
template <> const char *InstARM32Cmp::Opcode = "cmp";
template <> const char *InstARM32Tst::Opcode = "tst";
void InstARM32::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
......@@ -402,14 +437,18 @@ void InstARM32Br::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t"
<< "b" << getPredicate() << "\t";
if (isUnconditionalBranch()) {
Str << getTargetFalse()->getAsmName();
if (Label) {
Str << Label->getName(Func);
} else {
Str << getTargetTrue()->getAsmName();
if (getTargetFalse()) {
Str << "\n\t"
<< "b"
<< "\t" << getTargetFalse()->getAsmName();
if (isUnconditionalBranch()) {
Str << getTargetFalse()->getAsmName();
} else {
Str << getTargetTrue()->getAsmName();
if (getTargetFalse()) {
Str << "\n\t"
<< "b"
<< "\t" << getTargetFalse()->getAsmName();
}
}
}
}
......@@ -426,13 +465,18 @@ void InstARM32Br::dump(const Cfg *Func) const {
Str << "br ";
if (getPredicate() == CondARM32::AL) {
Str << "label %" << getTargetFalse()->getName();
Str << "label %"
<< (Label ? Label->getName(Func) : getTargetFalse()->getName());
return;
}
Str << getPredicate() << ", label %" << getTargetTrue()->getName();
if (getTargetFalse()) {
Str << ", label %" << getTargetFalse()->getName();
if (Label) {
Str << "label %" << Label->getName(Func);
} else {
Str << getPredicate() << ", label %" << getTargetTrue()->getName();
if (getTargetFalse()) {
Str << ", label %" << getTargetFalse()->getName();
}
}
}
......@@ -479,30 +523,23 @@ void InstARM32Call::dump(const Cfg *Func) const {
getCallTarget()->dump(Func);
}
void InstARM32Cmp::emit(const Cfg *Func) const {
void InstARM32Label::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\t"
<< "cmp" << getPredicate() << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << getName(Func) << ":";
}
void InstARM32Cmp::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
void InstARM32Label::emitIAS(const Cfg *Func) const {
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Cmp::dump(const Cfg *Func) const {
void InstARM32Label::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpOpcodePred(Str, "cmp", getSrc(0)->getType());
dumpSources(Func);
Str << getName(Func) << ":";
}
void InstARM32Ldr::emit(const Cfg *Func) const {
......@@ -536,40 +573,6 @@ void InstARM32Ldr::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstARM32Mla::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
assert(getDest()->hasReg());
Str << "\t"
<< "mla" << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << ", ";
getSrc(2)->emit(Func);
}
void InstARM32Mla::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Mla::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, "mla", getDest()->getType());
Str << " ";
dumpSources(Func);
}
template <> void InstARM32Movw::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......@@ -757,6 +760,33 @@ void InstARM32Str::dump(const Cfg *Func) const {
getSrc(0)->dump(Func);
}
void InstARM32Trap::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 0);
// There isn't a mnemonic for the special NaCl Trap encoding, so dump
// the raw bytes.
Str << "\t.long 0x";
ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
for (uint8_t I : Asm->getNonExecBundlePadding()) {
Str.write_hex(I);
}
}
void InstARM32Trap::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 0);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Trap::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "trap";
}
void InstARM32Umull::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......
......@@ -262,10 +262,12 @@ public:
Call,
Cmp,
Eor,
Label,
Ldr,
Lsl,
Lsr,
Mla,
Mls,
Mov,
Movt,
Movw,
......@@ -277,9 +279,13 @@ public:
Ret,
Rsb,
Sbc,
Sdiv,
Str,
Sub,
Sxt,
Trap,
Tst,
Udiv,
Umull,
Uxt
};
......@@ -322,6 +328,10 @@ public:
const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func, bool SetFlags);
static void emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func);
static void emitCmpLike(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func);
protected:
CondARM32::Cond Predicate;
......@@ -477,11 +487,11 @@ public:
// Create an ordinary binary-op instruction like add, and sub.
// Dest and Src1 must be registers.
static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest,
Variable *Src1, Operand *Src2,
Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate,
bool SetFlags = false) {
return new (Func->allocate<InstARM32ThreeAddrGPR>())
InstARM32ThreeAddrGPR(Func, Dest, Src1, Src2, Predicate, SetFlags);
InstARM32ThreeAddrGPR(Func, Dest, Src0, Src1, Predicate, SetFlags);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
......@@ -505,17 +515,109 @@ public:
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src1,
Operand *Src2, CondARM32::Cond Predicate, bool SetFlags)
InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
Operand *Src1, CondARM32::Cond Predicate, bool SetFlags)
: InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) {
addSource(Src0);
addSource(Src1);
addSource(Src2);
}
static const char *Opcode;
bool SetFlags;
};
// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrGPR : public InstARM32Pred {
InstARM32FourAddrGPR() = delete;
InstARM32FourAddrGPR(const InstARM32FourAddrGPR &) = delete;
InstARM32FourAddrGPR &operator=(const InstARM32FourAddrGPR &) = delete;
public:
// Every operand must be a register.
static InstARM32FourAddrGPR *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Src2,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32FourAddrGPR>())
InstARM32FourAddrGPR(Func, Dest, Src0, Src1, Src2, Predicate);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddr(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
llvm::report_fatal_error("Not yet implemented");
}
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, Opcode, getDest()->getType());
Str << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32FourAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Src2,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 3, Dest, Predicate) {
addSource(Src0);
addSource(Src1);
addSource(Src2);
}
static const char *Opcode;
};
// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred {
InstARM32CmpLike() = delete;
InstARM32CmpLike(const InstARM32CmpLike &) = delete;
InstARM32CmpLike &operator=(const InstARM32CmpLike &) = delete;
public:
static InstARM32CmpLike *create(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32CmpLike>())
InstARM32CmpLike(Func, Src0, Src1, Predicate);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitCmpLike(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
llvm_unreachable("Not yet implemented");
}
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpOpcodePred(Str, Opcode, getSrc(0)->getType());
Str << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 2, nullptr, Predicate) {
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
typedef InstARM32ThreeAddrGPR<InstARM32::Adc> InstARM32Adc;
typedef InstARM32ThreeAddrGPR<InstARM32::Add> InstARM32Add;
typedef InstARM32ThreeAddrGPR<InstARM32::And> InstARM32And;
......@@ -528,7 +630,9 @@ typedef InstARM32ThreeAddrGPR<InstARM32::Mul> InstARM32Mul;
typedef InstARM32ThreeAddrGPR<InstARM32::Orr> InstARM32Orr;
typedef InstARM32ThreeAddrGPR<InstARM32::Rsb> InstARM32Rsb;
typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc;
typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv;
typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub;
typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv;
// Move instruction (variable <- flex). This is more of a pseudo-inst.
// If var is a register, then we use "mov". If var is stack, then we use
// "str" to store to the stack.
......@@ -543,6 +647,35 @@ typedef InstARM32UnaryopGPR<InstARM32::Mvn> InstARM32Mvn;
// but we aren't using that for now, so just model as a Unaryop.
typedef InstARM32UnaryopGPR<InstARM32::Sxt> InstARM32Sxt;
typedef InstARM32UnaryopGPR<InstARM32::Uxt> InstARM32Uxt;
typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla;
typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls;
typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp;
typedef InstARM32CmpLike<InstARM32::Tst> InstARM32Tst;
// InstARM32Label represents an intra-block label that is the target
// of an intra-block branch. The offset between the label and the
// branch must be fit in the instruction immediate (considered "near").
class InstARM32Label : public InstARM32 {
InstARM32Label() = delete;
InstARM32Label(const InstARM32Label &) = delete;
InstARM32Label &operator=(const InstARM32Label &) = delete;
public:
static InstARM32Label *create(Cfg *Func, TargetARM32 *Target) {
return new (Func->allocate<InstARM32Label>()) InstARM32Label(Func, Target);
}
uint32_t getEmitInstCount() const override { return 0; }
IceString getName(const Cfg *Func) const;
SizeT getNumber() const { return Number; }
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
private:
InstARM32Label(Cfg *Func, TargetARM32 *Target);
SizeT Number; // used for unique label generation.
};
// Direct branch instruction.
class InstARM32Br : public InstARM32Pred {
......@@ -555,14 +688,16 @@ public:
static InstARM32Br *create(Cfg *Func, CfgNode *TargetTrue,
CfgNode *TargetFalse, CondARM32::Cond Predicate) {
assert(Predicate != CondARM32::AL);
constexpr InstARM32Label *NoLabel = nullptr;
return new (Func->allocate<InstARM32Br>())
InstARM32Br(Func, TargetTrue, TargetFalse, Predicate);
InstARM32Br(Func, TargetTrue, TargetFalse, NoLabel, Predicate);
}
// Create an unconditional branch to a node.
static InstARM32Br *create(Cfg *Func, CfgNode *Target) {
const CfgNode *NoCondTarget = nullptr;
constexpr CfgNode *NoCondTarget = nullptr;
constexpr InstARM32Label *NoLabel = nullptr;
return new (Func->allocate<InstARM32Br>())
InstARM32Br(Func, NoCondTarget, Target, CondARM32::AL);
InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL);
}
// Create a non-terminator conditional branch to a node, with a
// fallthrough to the next instruction in the current node. This is
......@@ -570,15 +705,27 @@ public:
static InstARM32Br *create(Cfg *Func, CfgNode *Target,
CondARM32::Cond Predicate) {
assert(Predicate != CondARM32::AL);
const CfgNode *NoUncondTarget = nullptr;
constexpr CfgNode *NoUncondTarget = nullptr;
constexpr InstARM32Label *NoLabel = nullptr;
return new (Func->allocate<InstARM32Br>())
InstARM32Br(Func, Target, NoUncondTarget, NoLabel, Predicate);
}
// Create a conditional intra-block branch (or unconditional, if
// Condition==AL) to a label in the current block.
static InstARM32Br *create(Cfg *Func, InstARM32Label *Label,
CondARM32::Cond Predicate) {
constexpr CfgNode *NoCondTarget = nullptr;
constexpr CfgNode *NoUncondTarget = nullptr;
return new (Func->allocate<InstARM32Br>())
InstARM32Br(Func, Target, NoUncondTarget, Predicate);
InstARM32Br(Func, NoCondTarget, NoUncondTarget, Label, Predicate);
}
const CfgNode *getTargetTrue() const { return TargetTrue; }
const CfgNode *getTargetFalse() const { return TargetFalse; }
bool optimizeBranch(const CfgNode *NextNode);
uint32_t getEmitInstCount() const override {
uint32_t Sum = 0;
if (Label)
++Sum;
if (getTargetTrue())
++Sum;
if (getTargetFalse())
......@@ -596,10 +743,11 @@ public:
private:
InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
CondARM32::Cond Predicate);
const InstARM32Label *Label, CondARM32::Cond Predicate);
const CfgNode *TargetTrue;
const CfgNode *TargetFalse;
const InstARM32Label *Label; // Intra-block branch target
};
// AdjustStack instruction - subtracts SP by the given amount and
......@@ -653,28 +801,6 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
};
// Integer compare instruction.
class InstARM32Cmp : public InstARM32Pred {
InstARM32Cmp() = delete;
InstARM32Cmp(const InstARM32Cmp &) = delete;
InstARM32Cmp &operator=(const InstARM32Cmp &) = delete;
public:
static InstARM32Cmp *create(Cfg *Func, Variable *Src1, Operand *Src2,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Cmp>())
InstARM32Cmp(Func, Src1, Src2, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Cmp); }
private:
InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2,
CondARM32::Cond Predicate);
};
// Load instruction.
class InstARM32Ldr : public InstARM32Pred {
InstARM32Ldr() = delete;
......@@ -698,30 +824,6 @@ private:
CondARM32::Cond Predicate);
};
// Multiply Accumulate: d := x * y + a
class InstARM32Mla : public InstARM32Pred {
InstARM32Mla() = delete;
InstARM32Mla(const InstARM32Mla &) = delete;
InstARM32Mla &operator=(const InstARM32Mla &) = delete;
public:
// Everything must be a register.
static InstARM32Mla *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Acc,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Mla>())
InstARM32Mla(Func, Dest, Src0, Src1, Acc, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Mla); }
private:
InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1,
Variable *Acc, CondARM32::Cond Predicate);
};
// Pop into a list of GPRs. Technically this can be predicated, but we don't
// need that functionality.
class InstARM32Pop : public InstARM32 {
......@@ -816,6 +918,24 @@ private:
CondARM32::Cond Predicate);
};
class InstARM32Trap : public InstARM32 {
InstARM32Trap() = delete;
InstARM32Trap(const InstARM32Trap &) = delete;
InstARM32Trap &operator=(const InstARM32Trap &) = delete;
public:
static InstARM32Trap *create(Cfg *Func) {
return new (Func->allocate<InstARM32Trap>()) InstARM32Trap(Func);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Trap); }
private:
explicit InstARM32Trap(Cfg *Func);
};
// Unsigned Multiply Long: d.lo, d.hi := x * y
class InstARM32Umull : public InstARM32Pred {
InstARM32Umull() = delete;
......
......@@ -1939,7 +1939,7 @@ void InstX8632UD2::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "ud2\n";
Str << "ud2";
}
void InstX8632Test::emit(const Cfg *Func) const {
......@@ -1998,7 +1998,7 @@ void InstX8632Mfence::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "mfence\n";
Str << "mfence";
}
void InstX8632Store::emit(const Cfg *Func) const {
......@@ -2512,7 +2512,6 @@ void InstX8632Fstp::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = fstp." << getDest()->getType() << ", st(0)";
Str << "\n";
}
template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
......
......@@ -353,16 +353,20 @@ protected:
const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
const static constexpr char *H_frem_f32 = "fmodf";
const static constexpr char *H_frem_f64 = "fmod";
const static constexpr char *H_sdiv_i32 = "__divsi3";
const static constexpr char *H_sdiv_i64 = "__divdi3";
const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
const static constexpr char *H_srem_i32 = "__modsi3";
const static constexpr char *H_srem_i64 = "__moddi3";
const static constexpr char *H_udiv_i32 = "__udivsi3";
const static constexpr char *H_udiv_i64 = "__udivdi3";
const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
const static constexpr char *H_urem_i32 = "__umodsi3";
const static constexpr char *H_urem_i64 = "__umoddi3";
private:
......
......@@ -141,21 +141,34 @@ uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
return Utils::applyAlignment(Value, typeAlignInBytes);
}
// Conservatively check if at compile time we know that the operand is
// definitely a non-zero integer.
bool isGuaranteedNonzeroInt(const Operand *Op) {
if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
return Const->getValue() != 0;
}
return false;
}
} // end of anonymous namespace
TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) {
TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
static_assert(
(ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
(TargetInstructionSet::ARM32InstructionSet_End -
TargetInstructionSet::ARM32InstructionSet_Begin),
"ARM32InstructionSet range different from TargetInstructionSet");
if (Func->getContext()->getFlags().getTargetInstructionSet() !=
if (Flags.getTargetInstructionSet() !=
TargetInstructionSet::BaseInstructionSet) {
InstructionSet = static_cast<ARM32InstructionSet>(
(Func->getContext()->getFlags().getTargetInstructionSet() -
(Flags.getTargetInstructionSet() -
TargetInstructionSet::ARM32InstructionSet_Begin) +
ARM32InstructionSet::Begin);
}
}
TargetARM32::TargetARM32(Cfg *Func)
: TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
// TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the
// class.
......@@ -1009,6 +1022,75 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
_mov(Dest, SP);
}
void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
return;
Variable *SrcLoReg = legalizeToVar(SrcLo);
switch (Ty) {
default:
llvm_unreachable("Unexpected type");
case IceType_i8: {
Operand *Mask =
legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
_tst(SrcLoReg, Mask);
break;
}
case IceType_i16: {
Operand *Mask =
legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
_tst(SrcLoReg, Mask);
break;
}
case IceType_i32: {
_tst(SrcLoReg, SrcLoReg);
break;
}
case IceType_i64: {
Variable *ScratchReg = makeReg(IceType_i32);
_orrs(ScratchReg, SrcLoReg, SrcHi);
// ScratchReg isn't going to be used, but we need the
// side-effect of setting flags from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg));
}
}
InstARM32Label *Label = InstARM32Label::create(Func, this);
_br(Label, CondARM32::NE);
_trap();
Context.insert(Label);
}
void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
Operand *Src1, ExtInstr ExtFunc,
DivInstr DivFunc, const char *DivHelperName,
bool IsRemainder) {
div0Check(Dest->getType(), Src1, nullptr);
Variable *Src1R = legalizeToVar(Src1);
Variable *T0R = Src0R;
Variable *T1R = Src1R;
if (Dest->getType() != IceType_i32) {
T0R = makeReg(IceType_i32);
(this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
T1R = makeReg(IceType_i32);
(this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
}
if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
(this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
if (IsRemainder) {
Variable *T2 = makeReg(IceType_i32);
_mls(T2, T, T1R, T0R);
T = T2;
}
_mov(Dest, T);
} else {
constexpr SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
Call->addArg(T0R);
Call->addArg(T1R);
lowerCall(Call);
}
return;
}
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
......@@ -1182,9 +1264,47 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Udiv:
case InstArithmetic::Sdiv:
case InstArithmetic::Urem:
case InstArithmetic::Srem:
UnimplementedError(Func->getContext()->getFlags());
break;
case InstArithmetic::Srem: {
// Check for divide by 0 (ARM normally doesn't trap, but we want it
// to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
// to a register, which will hide a constant source operand.
// Instead, check the not-yet-legalized Src1 to optimize-out a divide
// by 0 check.
if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
if (C64->getValue() == 0) {
div0Check(IceType_i64, Src1Lo, Src1Hi);
}
} else {
div0Check(IceType_i64, Src1Lo, Src1Hi);
}
// Technically, ARM has their own aeabi routines, but we can use the
// non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
// but uses the more standard __moddi3 for rem.
const char *HelperName = "";
switch (Inst->getOp()) {
case InstArithmetic::Udiv:
HelperName = H_udiv_i64;
break;
case InstArithmetic::Sdiv:
HelperName = H_sdiv_i64;
break;
case InstArithmetic::Urem:
HelperName = H_urem_i64;
break;
case InstArithmetic::Srem:
HelperName = H_srem_i64;
break;
default:
llvm_unreachable("Should have only matched div ops.");
break;
}
constexpr SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
Call->addArg(Inst->getSrc(0));
Call->addArg(Inst->getSrc(1));
lowerCall(Call);
return;
}
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
case InstArithmetic::Fmul:
......@@ -1197,61 +1317,73 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
UnimplementedError(Func->getContext()->getFlags());
} else { // Dest->getType() is non-i64 scalar
Variable *Src0R = legalizeToVar(Inst->getSrc(0));
Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
Operand *Src1RF = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
Variable *T = makeReg(Dest->getType());
switch (Inst->getOp()) {
case InstArithmetic::_num:
llvm_unreachable("Unknown arithmetic operator");
break;
case InstArithmetic::Add: {
_add(T, Src0R, Src1);
_add(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::And: {
_and(T, Src0R, Src1);
_and(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Or: {
_orr(T, Src0R, Src1);
_orr(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Xor: {
_eor(T, Src0R, Src1);
_eor(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Sub: {
_sub(T, Src0R, Src1);
_sub(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Mul: {
Variable *Src1R = legalizeToVar(Src1);
Variable *Src1R = legalizeToVar(Src1RF);
_mul(T, Src0R, Src1R);
_mov(Dest, T);
} break;
case InstArithmetic::Shl:
_lsl(T, Src0R, Src1);
_lsl(T, Src0R, Src1RF);
_mov(Dest, T);
break;
case InstArithmetic::Lshr:
_lsr(T, Src0R, Src1);
_lsr(T, Src0R, Src1RF);
_mov(Dest, T);
break;
case InstArithmetic::Ashr:
_asr(T, Src0R, Src1);
_asr(T, Src0R, Src1RF);
_mov(Dest, T);
break;
case InstArithmetic::Udiv:
UnimplementedError(Func->getContext()->getFlags());
break;
case InstArithmetic::Sdiv:
UnimplementedError(Func->getContext()->getFlags());
break;
case InstArithmetic::Urem:
UnimplementedError(Func->getContext()->getFlags());
break;
case InstArithmetic::Srem:
UnimplementedError(Func->getContext()->getFlags());
break;
case InstArithmetic::Udiv: {
constexpr bool IsRemainder = false;
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt,
&TargetARM32::_udiv, H_udiv_i32, IsRemainder);
return;
}
case InstArithmetic::Sdiv: {
constexpr bool IsRemainder = false;
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt,
&TargetARM32::_sdiv, H_sdiv_i32, IsRemainder);
return;
}
case InstArithmetic::Urem: {
constexpr bool IsRemainder = true;
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt,
&TargetARM32::_udiv, H_urem_i32, IsRemainder);
return;
}
case InstArithmetic::Srem: {
constexpr bool IsRemainder = true;
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt,
&TargetARM32::_sdiv, H_srem_i32, IsRemainder);
return;
}
case InstArithmetic::Fadd:
UnimplementedError(Func->getContext()->getFlags());
break;
......@@ -1322,7 +1454,7 @@ void TargetARM32::lowerBr(const InstBr *Inst) {
Variable *Src0R = legalizeToVar(Cond);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(Src0R, Zero);
_br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
_br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
}
void TargetARM32::lowerCall(const InstCall *Instr) {
......@@ -2113,7 +2245,7 @@ void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
}
void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
UnimplementedError(Func->getContext()->getFlags());
_trap();
}
// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
......@@ -2417,7 +2549,7 @@ void TargetDataARM32::lowerConstants() {
}
TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
: TargetHeaderLowering(Ctx) {}
: TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
void TargetHeaderARM32::lower() {
OstreamLocker L(Ctx);
......@@ -2431,12 +2563,18 @@ void TargetHeaderARM32::lower() {
// sub-subsection of the first public subsection of the attributes.
Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
// Chromebooks are at least A15, but do A9 for higher compat.
Str << ".cpu cortex-a9\n"
<< ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
// For some reason, the LLVM ARM asm parser has the .cpu directive override
// the mattr specified on the commandline. So to test hwdiv, we need to set
// the .cpu directive higher (can't just rely on --mattr=...).
if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
Str << ".cpu cortex-a15\n";
} else {
Str << ".cpu cortex-a9\n";
}
Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
<< ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
<< ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
// TODO(jvoung): check other CPU features like HW div.
Str << ".fpu neon\n"
<< ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
<< ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
......@@ -2450,6 +2588,9 @@ void TargetHeaderARM32::lower() {
<< ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
<< ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
<< ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
}
// Technically R9 is used for TLS with Sandboxing, and we reserve it.
// However, for compatibility with current NaCl LLVM, don't claim that.
Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
......
......@@ -22,6 +22,30 @@
namespace Ice {
// Class encapsulating ARM cpu features / instruction set.
class TargetARM32Features {
TargetARM32Features() = delete;
TargetARM32Features(const TargetARM32Features &) = delete;
TargetARM32Features &operator=(const TargetARM32Features &) = delete;
public:
explicit TargetARM32Features(const ClFlags &Flags);
enum ARM32InstructionSet {
Begin,
// Neon is the PNaCl baseline instruction set.
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
private:
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
};
// The target lowering logic for ARM32.
class TargetARM32 : public TargetLowering {
TargetARM32() = delete;
TargetARM32(const TargetARM32 &) = delete;
......@@ -75,15 +99,9 @@ public:
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
enum ARM32InstructionSet {
Begin,
// Neon is the PNaCl baseline instruction set.
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
ARM32InstructionSet getInstructionSet() const { return InstructionSet; }
bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
return CPUFeatures.hasFeature(I);
}
protected:
explicit TargetARM32(Cfg *Func);
......@@ -141,6 +159,18 @@ protected:
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const override;
// If a divide-by-zero check is needed, inserts a:
// test; branch .LSKIP; trap; .LSKIP: <continuation>.
// If no check is needed nothing is inserted.
void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
typedef void (TargetARM32::*ExtInstr)(Variable *, Variable *,
CondARM32::Cond);
typedef void (TargetARM32::*DivInstr)(Variable *, Variable *, Variable *,
CondARM32::Cond);
void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
ExtInstr ExtFunc, DivInstr DivFunc,
const char *DivHelperName, bool IsRemainder);
// The following are helpers that insert lowered ARM32 instructions
// with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical.
......@@ -175,8 +205,8 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
}
void _br(CondARM32::Cond Condition, CfgNode *TargetTrue,
CfgNode *TargetFalse) {
void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
CondARM32::Cond Condition) {
Context.insert(
InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition));
}
......@@ -186,6 +216,9 @@ protected:
void _br(CfgNode *Target, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Target, Condition));
}
void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Label, Condition));
}
void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
......@@ -210,6 +243,10 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
}
void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
}
// If Dest=nullptr is passed in, then a new variable is created,
// marked as infinite register allocation weight, and returned
// through the in/out Dest argument.
......@@ -248,6 +285,12 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred));
}
void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true;
Context.insert(
InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _push(const VarList &Sources) {
Context.insert(InstARM32Push::create(Func, Sources));
}
......@@ -257,6 +300,9 @@ protected:
for (Variable *Dest : Dests)
Context.insert(InstFakeDef::create(Func, Dest));
}
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
......@@ -271,6 +317,10 @@ protected:
Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
}
void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
......@@ -289,8 +339,14 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
}
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
void _tst(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred));
}
void _trap() { Context.insert(InstARM32Trap::create(Func)); }
void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred));
}
void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
......@@ -305,7 +361,7 @@ protected:
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
}
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
TargetARM32Features CPUFeatures;
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
......@@ -386,6 +442,8 @@ protected:
private:
~TargetHeaderARM32() = default;
TargetARM32Features CPUFeatures;
};
} // end of namespace Ice
......
......@@ -315,6 +315,11 @@ entry:
; OPTM1-LABEL: div64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __divdi3
;
; ARM32-LABEL: div64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitSignedConst(i64 %a) {
entry:
......@@ -330,6 +335,14 @@ entry:
; OPTM1: mov DWORD PTR [esp+0xc],0xb3a
; OPTM1: mov DWORD PTR [esp+0x8],0x73ce2ff2
; OPTM1: call {{.*}} R_{{.*}} __divdi3
;
; ARM32-LABEL: div64BitSignedConst
; For a constant, we should be able to optimize-out the divide by zero check.
; ARM32-NOT: orrs
; ARM32: movw {{.*}} ; 0x2ff2
; ARM32: movt {{.*}} ; 0x73ce
; ARM32: movw {{.*}} ; 0xb3a
; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitUnsigned(i64 %a, i64 %b) {
entry:
......@@ -341,6 +354,11 @@ entry:
;
; OPTM1-LABEL: div64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __udivdi3
;
; ARM32-LABEL: div64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __udivdi3
define internal i64 @rem64BitSigned(i64 %a, i64 %b) {
entry:
......@@ -352,6 +370,11 @@ entry:
;
; OPTM1-LABEL: rem64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __moddi3
;
; ARM32-LABEL: rem64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __moddi3
define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) {
entry:
......@@ -363,6 +386,11 @@ entry:
;
; OPTM1-LABEL: rem64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __umoddi3
;
; ARM32-LABEL: rem64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
; ARM32: bl {{.*}} __umoddi3
define internal i64 @shl64BitSigned(i64 %a, i64 %b) {
entry:
......
......@@ -8,10 +8,15 @@
; once enough infrastructure is in. Also, switch to --filetype=obj
; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32HWDIV %s
define i32 @Add(i32 %a, i32 %b) {
entry:
......@@ -117,10 +122,32 @@ entry:
; CHECK-LABEL: Sdiv
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: Sdiv
; TODO(jvoung) -- implement divide and check here.
; The lowering needs to check if the denominator is 0 and trap, since
; ARM normally doesn't trap on divide by 0.
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne [[LABEL:[0-9a-f]+]]
; ARM32: .word 0xe7fedef0
; ARM32: [[LABEL]]: {{.*}} bl {{.*}} __divsi3
; ARM32HWDIV-LABEL: Sdiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: sdiv
define i32 @SdivConst(i32 %a) {
entry:
%div = sdiv i32 %a, 219
ret i32 %div
}
; CHECK-LABEL: SdivConst
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: SdivConst
; ARM32-NOT: tst
; ARM32: bl {{.*}} __divsi3
; ARM32HWDIV-LABEL: SdivConst
; ARM32HWDIV-NOT: tst
; ARM32HWDIV: sdiv
define i32 @Srem(i32 %a, i32 %b) {
entry:
......@@ -130,7 +157,16 @@ entry:
; CHECK-LABEL: Srem
; CHECK: cdq
; CHECK: idiv e
;
; ARM32-LABEL: Srem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __modsi3
; ARM32HWDIV-LABEL: Srem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: sdiv
; ARM32HWDIV: mls
define i32 @Udiv(i32 %a, i32 %b) {
entry:
......@@ -139,7 +175,15 @@ entry:
}
; CHECK-LABEL: Udiv
; CHECK: div e
;
; ARM32-LABEL: Udiv
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __udivsi3
; ARM32HWDIV-LABEL: Udiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: udiv
define i32 @Urem(i32 %a, i32 %b) {
entry:
......@@ -148,4 +192,13 @@ entry:
}
; CHECK-LABEL: Urem
; CHECK: div e
;
; ARM32-LABEL: Urem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
; ARM32: bl {{.*}} __umodsi3
; ARM32HWDIV-LABEL: Urem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32HWDIV: udiv
; ARM32HWDIV: mls
......@@ -5,9 +5,11 @@
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
; TODO(jvoung): Update to -02 once the phi assignments is done for ARM
; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble \
; RUN: --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define i32 @testSwitch(i32 %a) {
entry:
......
; This tests the basic structure of the Unreachable instruction.
; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s
; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define internal i32 @divide(i32 %num, i32 %den) {
entry:
......@@ -22,3 +32,9 @@ return: ; preds = %entry
; CHECK: cdq
; CHECK: idiv
; CHECK: ret
; ARM32-LABEL: divide
; ARM32: cmp
; ARM32: .word 0xe7fedef0
; ARM32: bl {{.*}} __divsi3
; ARM32: bx lr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment