Commit befd03ab by Jan Voung

Subzero ARM: lowerLoad and lowerStore.

Thought leaving "mov" simple and not handle memory operands, but then we'd have to duplicate some of the lowerAssign code for lowerLoad =/ BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=kschimpf@google.com, stichnot@chromium.org Review URL: https://codereview.chromium.org/1152703006
parent e5b58fbe
......@@ -281,6 +281,13 @@ InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source)
addSource(Source);
}
InstARM32Str::InstARM32Str(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Str, 2, nullptr, Predicate) {
addSource(Value);
addSource(Mem);
}
InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate)
......@@ -327,16 +334,15 @@ template <> void InstARM32Mov::emit(const Cfg *Func) const {
assert(getSrcSize() == 1);
Variable *Dest = getDest();
if (Dest->hasReg()) {
const char *Opcode = "mov";
IceString Opcode = "mov";
Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) {
Opcode = "ldr"; // Always load the full stack slot (vs ldrb, ldrh).
Opcode = IceString("ldr"); // Always use the whole stack slot.
}
} else {
// If Src isn't a variable, it shouldn't be a memory operand either
// (otherwise Opcode will have to be ldr).
assert(!llvm::isa<OperandARM32Mem>(Src0));
if (llvm::isa<OperandARM32Mem>(Src0))
Opcode = IceString("ldr") + getWidthString(Dest->getType());
}
Str << "\t" << Opcode << getPredicate() << "\t";
getDest()->emit(Func);
......@@ -656,6 +662,36 @@ void InstARM32Ret::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstARM32Str::emit(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType();
Str << "\t"
<< "str" << getWidthString(Ty) << getPredicate() << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
}
void InstARM32Str::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Str::dump(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpOpcodePred(Str, "str", getDest()->getType());
Str << " ";
getSrc(1)->dump(Func);
Str << ", ";
getSrc(0)->dump(Func);
}
void InstARM32Umull::emit(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
......
......@@ -91,7 +91,7 @@ public:
// general Constant operands like ConstantRelocatable, since a relocatable
// can potentially take up too many bits.
static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
ConstantInteger32 *ImmOffset = nullptr,
ConstantInteger32 *ImmOffset,
AddrMode Mode = Offset) {
return new (Func->allocate<OperandARM32Mem>())
OperandARM32Mem(Func, Ty, Base, ImmOffset, Mode);
......@@ -277,6 +277,7 @@ public:
Push,
Ret,
Sbc,
Str,
Sub,
Umull
};
......@@ -763,6 +764,31 @@ private:
~InstARM32Ret() override {}
};
// Store instruction. It's important for liveness that there is no Dest
// operand (OperandARM32Mem instead of Dest Variable).
class InstARM32Str : public InstARM32Pred {
InstARM32Str() = delete;
InstARM32Str(const InstARM32Str &) = delete;
InstARM32Str &operator=(const InstARM32Str &) = delete;
public:
// Value must be a register.
static InstARM32Str *create(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Str>())
InstARM32Str(Func, Value, Mem, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Str); }
private:
InstARM32Str(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
CondARM32::Cond Predicate);
~InstARM32Str() override {}
};
// Unsigned Multiply Long: d.lo, d.hi := x * y
class InstARM32Umull : public InstARM32Pred {
InstARM32Umull() = delete;
......
......@@ -251,9 +251,9 @@ void TargetLowering::sortVarsByAlignment(VarList &Dest,
// as the buckets, if performance is an issue.
std::sort(Dest.begin(), Dest.end(),
[this](const Variable *V1, const Variable *V2) {
return typeWidthInBytesOnStack(V1->getType()) >
typeWidthInBytesOnStack(V2->getType());
});
return typeWidthInBytesOnStack(V1->getType()) >
typeWidthInBytesOnStack(V2->getType());
});
}
void TargetLowering::getVarStackSlotParams(
......
......@@ -1628,9 +1628,18 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
void TargetARM32::lowerLoad(const InstLoad *Inst) {
(void)Inst;
UnimplementedError(Func->getContext()->getFlags());
void TargetARM32::lowerLoad(const InstLoad *Load) {
// A Load instruction can be treated the same as an Assign
// instruction, after the source operand is transformed into an
// OperandARM32Mem operand.
Type Ty = Load->getDest()->getType();
Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Variable *DestLoad = Load->getDest();
// TODO(jvoung): handled folding opportunities. Sign and zero extension
// can be folded into a load.
InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
lowerAssign(Assign);
}
void TargetARM32::doAddressOptLoad() {
......@@ -1687,8 +1696,22 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
}
void TargetARM32::lowerStore(const InstStore *Inst) {
(void)Inst;
UnimplementedError(Func->getContext()->getFlags());
Operand *Value = Inst->getData();
Operand *Addr = Inst->getAddr();
OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
Type Ty = NewAddr->getType();
if (Ty == IceType_i64) {
Variable *ValueHi = legalizeToVar(hiOperand(Value));
Variable *ValueLo = legalizeToVar(loOperand(Value));
_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
} else {
Variable *ValueR = legalizeToVar(Value);
_str(ValueR, NewAddr);
}
}
void TargetARM32::doAddressOptStore() {
......@@ -1891,6 +1914,23 @@ Variable *TargetARM32::legalizeToVar(Operand *From, int32_t RegNum) {
return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
}
OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
// It may be the case that address mode optimization already creates
// an OperandARM32Mem, so in that case it wouldn't need another level
// of transformation.
if (Mem) {
return llvm::cast<OperandARM32Mem>(legalize(Mem));
}
// If we didn't do address mode optimization, then we only
// have a base/offset to work with. ARM always requires a base
// register, so just use that to hold the operand.
Variable *Base = legalizeToVar(Operand);
return OperandARM32Mem::create(
Func, Ty, Base,
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
}
Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for ARM32.
assert(Type != IceType_i64);
......
......@@ -117,6 +117,7 @@ protected:
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToVar(Operand *From, int32_t RegNum = Variable::NoRegister);
OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType();
......@@ -241,6 +242,10 @@ protected:
Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
}
void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sub::create(Func, Dest, Src0, Src1, Pred));
......
......@@ -2981,7 +2981,7 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
// can't happen anyway, since this is x86-32 and integer arithmetic only
// happens on 32-bit quantities.
Variable *T = makeReg(IceType_f64);
OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
_movq(T, Addr);
// Then cast the bits back out of the XMM register to the i64 Dest.
InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
......@@ -3030,7 +3030,7 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
lowerCast(Cast);
// Then store XMM w/ a movq.
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);
_storeq(T, Addr);
_mfence();
return;
......@@ -3239,7 +3239,7 @@ void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
Operand *Expected, Operand *Desired) {
if (Expected->getType() == IceType_i64) {
// Reserve the pre-colored registers first, before adding any more
// infinite-weight variables from FormMemoryOperand's legalization.
// infinite-weight variables from formMemoryOperand's legalization.
Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
......@@ -3248,7 +3248,7 @@ void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
_mov(T_edx, hiOperand(Expected));
_mov(T_ebx, loOperand(Desired));
_mov(T_ecx, hiOperand(Desired));
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
const bool Locked = true;
_cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
......@@ -3259,7 +3259,7 @@ void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
}
Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
_mov(T_eax, Expected);
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
Variable *DesiredReg = legalizeToVar(Desired);
const bool Locked = true;
_cmpxchg(Addr, T_eax, DesiredReg, Locked);
......@@ -3357,7 +3357,7 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Op_Hi = &TargetX8632::_adc;
break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
const bool Locked = true;
Variable *T = nullptr;
_mov(T, Val);
......@@ -3372,7 +3372,7 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Op_Hi = &TargetX8632::_sbb;
break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
const bool Locked = true;
Variable *T = nullptr;
_mov(T, Val);
......@@ -3410,7 +3410,7 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Op_Hi = nullptr;
break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Variable *T = nullptr;
_mov(T, Val);
_xchg(Addr, T);
......@@ -3455,7 +3455,7 @@ void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
if (Ty == IceType_i64) {
Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
_mov(T_eax, loOperand(Addr));
_mov(T_edx, hiOperand(Addr));
Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
......@@ -3502,7 +3502,7 @@ void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
_mov(DestHi, T_edx);
return;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
_mov(T_eax, Addr);
InstX8632Label *Label = InstX8632Label::create(Func, this);
......@@ -3853,7 +3853,7 @@ void TargetX8632::lowerLoad(const InstLoad *Load) {
// optimization already creates an OperandX8632Mem operand, so it
// doesn't need another level of transformation.
Type Ty = Load->getDest()->getType();
Operand *Src0 = FormMemoryOperand(Load->getSourceAddress(), Ty);
Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
// Fuse this load with a subsequent Arithmetic instruction in the
// following situations:
......@@ -4124,7 +4124,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
void TargetX8632::lowerStore(const InstStore *Inst) {
Operand *Value = Inst->getData();
Operand *Addr = Inst->getAddr();
OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
Type Ty = NewAddr->getType();
if (Ty == IceType_i64) {
......@@ -4639,7 +4639,7 @@ Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
}
OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) {
OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
// It may be the case that address mode optimization already creates
// an OperandX8632Mem, so in that case it wouldn't need another level
......
......@@ -229,7 +229,7 @@ protected:
// Turn a pointer operand into a memory operand that can be
// used by a real load/store operation. Legalizes the operand as well.
// This is a nop if the operand is already a legal memory operand.
OperandX8632Mem *FormMemoryOperand(Operand *Ptr, Type Ty);
OperandX8632Mem *formMemoryOperand(Operand *Ptr, Type Ty);
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType();
......
......@@ -1303,6 +1303,10 @@ entry:
; OPTM1: mov e{{..}},DWORD PTR [e{{..}}]
; OPTM1: mov e{{..}},DWORD PTR [e{{..}}+0x4]
; ARM32-LABEL: load64
; ARM32: ldr r{{.*}}, [r[[REG:.*]]]
; ARM32: ldr r{{.*}}, [r[[REG]], #4]
define internal void @store64(i32 %a, i64 %value) {
entry:
%__2 = inttoptr i32 %a to i64*
......@@ -1318,6 +1322,10 @@ entry:
; OPTM1: mov DWORD PTR [e[[REGISTER:[a-z]+]]+0x4],
; OPTM1: mov DWORD PTR [e[[REGISTER]]],
; ARM32-LABEL: store64
; ARM32: str r{{.*}}, [r[[REG:.*]], #4]
; ARM32: str r{{.*}}, [r[[REG]]]
define internal void @store64Const(i32 %a) {
entry:
%__1 = inttoptr i32 %a to i64*
......@@ -1333,6 +1341,14 @@ entry:
; OPTM1: mov DWORD PTR [e[[REGISTER:[a-z]+]]+0x4],0xdeadbeef
; OPTM1: mov DWORD PTR [e[[REGISTER]]],0x12345678
; ARM32-LABEL: store64Const
; ARM32: movw [[REG1:.*]], #48879 ; 0xbeef
; ARM32: movt [[REG1:.*]], #57005 ; 0xdead
; ARM32: movw [[REG2:.*]], #22136 ; 0x5678
; ARM32: movt [[REG2:.*]], #4660 ; 0x1234
; ARM32: str [[REG1]], [r[[REG:.*]], #4]
; ARM32: str [[REG2]], [r[[REG]]]
define internal i64 @select64VarVar(i64 %a, i64 %b) {
entry:
%cmp = icmp ult i64 %a, %b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment