Commit e4f65d86 by Jim Stichnoth

Subzero: Transform suitable Load/Arith/Store sequences into RMW ops.

Search for sequences of Load/Arith/Store instructions that can be transformed into single non-atomic Read-Modify-Write instructions. Corresponding operands must match up, and it is limited to the operator/type combinations that have simple lowerings. For suitable sequences, an RMW pseudo-instruction is added. Extra variables are attached to the RMW instruction and the original Store instruction, to make it easy to figure out whether to retain the original Store instruction or the new RMW instruction (but never both). The RMW instructions are similar to their non-RMW counterparts, except that the RMW instruction has no Dest variable - the Src[0] operand doubles as the memory-operand dest. The x86-32 integrated assembler has some new forms of existing instructions added. Note: this CL puts the machinery in place to identify, lower, and emit RMW operations only for the "add" instruction operating on i32/i16/i8 operands. The next CL will fill in the rest of the options. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/1182603004
parent 8b1a7051
...@@ -1759,6 +1759,29 @@ void AssemblerX8632::add(Type Ty, GPRRegister reg, const Immediate &imm) { ...@@ -1759,6 +1759,29 @@ void AssemblerX8632::add(Type Ty, GPRRegister reg, const Immediate &imm) {
emitComplex(Ty, 0, Operand(reg), imm); emitComplex(Ty, 0, Operand(reg), imm);
} }
void AssemblerX8632::add(Type Ty, const Address &address, GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (Ty == IceType_i16)
emitOperandSizeOverride();
if (isByteSizedArithType(Ty))
emitUint8(0x00);
else
emitUint8(0x01);
emitOperand(reg, address);
}
void AssemblerX8632::add(Type Ty, const Address &address,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (isByteSizedArithType(Ty)) {
emitComplexI8(0, address, imm);
return;
}
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitComplex(Ty, 0, address, imm);
}
void AssemblerX8632::adc(Type Ty, GPRRegister dst, GPRRegister src) { void AssemblerX8632::adc(Type Ty, GPRRegister dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer); AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (Ty == IceType_i16) if (Ty == IceType_i16)
......
...@@ -720,6 +720,8 @@ public: ...@@ -720,6 +720,8 @@ public:
void add(Type Ty, GPRRegister dst, GPRRegister src); void add(Type Ty, GPRRegister dst, GPRRegister src);
void add(Type Ty, GPRRegister reg, const Address &address); void add(Type Ty, GPRRegister reg, const Address &address);
void add(Type Ty, GPRRegister reg, const Immediate &imm); void add(Type Ty, GPRRegister reg, const Immediate &imm);
void add(Type Ty, const Address &address, GPRRegister reg);
void add(Type Ty, const Address &address, const Immediate &imm);
void adc(Type Ty, GPRRegister dst, GPRRegister src); void adc(Type Ty, GPRRegister dst, GPRRegister src);
void adc(Type Ty, GPRRegister dst, const Address &address); void adc(Type Ty, GPRRegister dst, const Address &address);
......
...@@ -433,8 +433,12 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -433,8 +433,12 @@ void Cfg::liveness(LivenessMode Mode) {
// register. This is accomplished by extending the entry // register. This is accomplished by extending the entry
// block's instruction range from [2,n) to [1,n) which will // block's instruction range from [2,n) to [1,n) which will
// transform the problematic [2,2) live ranges into [1,2). // transform the problematic [2,2) live ranges into [1,2).
if (FirstInstNum == Inst::NumberInitial) if (Node == getEntryNode()) {
// TODO(stichnot): Make it a strict requirement that the entry
// node gets the lowest instruction numbers, so that extending
// the live range for in-args is guaranteed to work.
FirstInstNum = Inst::NumberExtended; FirstInstNum = Inst::NumberExtended;
}
Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum); Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
} }
} }
......
...@@ -211,6 +211,7 @@ cl::list<Ice::VerboseItem> VerboseList( ...@@ -211,6 +211,7 @@ cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"), clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
clEnumValN(Ice::IceV_Random, "random", "Randomization details"), clEnumValN(Ice::IceV_Random, "random", "Randomization details"),
clEnumValN(Ice::IceV_Folding, "fold", "Instruction folding details"), clEnumValN(Ice::IceV_Folding, "fold", "Instruction folding details"),
clEnumValN(Ice::IceV_RMW, "rmw", "ReadModifyWrite optimization"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"), clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_Most, "most", clEnumValN(Ice::IceV_Most, "most",
"Use all verbose options except 'regalloc'"), "Use all verbose options except 'regalloc'"),
......
...@@ -173,6 +173,7 @@ enum VerboseItem { ...@@ -173,6 +173,7 @@ enum VerboseItem {
IceV_AddrOpt = 1 << 9, IceV_AddrOpt = 1 << 9,
IceV_Random = 1 << 10, IceV_Random = 1 << 10,
IceV_Folding = 1 << 11, IceV_Folding = 1 << 11,
IceV_RMW = 1 << 12,
IceV_All = ~IceV_None, IceV_All = ~IceV_None,
IceV_Most = IceV_All & ~IceV_LinearScan IceV_Most = IceV_All & ~IceV_LinearScan
}; };
......
...@@ -422,9 +422,16 @@ InstSelect::InstSelect(Cfg *Func, Variable *Dest, Operand *Condition, ...@@ -422,9 +422,16 @@ InstSelect::InstSelect(Cfg *Func, Variable *Dest, Operand *Condition,
} }
InstStore::InstStore(Cfg *Func, Operand *Data, Operand *Addr) InstStore::InstStore(Cfg *Func, Operand *Data, Operand *Addr)
: InstHighLevel(Func, Inst::Store, 2, nullptr) { : InstHighLevel(Func, Inst::Store, 3, nullptr) {
addSource(Data); addSource(Data);
addSource(Addr); addSource(Addr);
// The 3rd operand is a dummy placeholder for the RMW beacon.
addSource(Data);
}
void InstStore::setRmwBeacon(Variable *Beacon) {
Dest = llvm::dyn_cast<Variable>(getData());
Srcs[2] = Beacon;
} }
InstSwitch::InstSwitch(Cfg *Func, SizeT NumCases, Operand *Source, InstSwitch::InstSwitch(Cfg *Func, SizeT NumCases, Operand *Source,
...@@ -747,11 +754,18 @@ void InstStore::dump(const Cfg *Func) const { ...@@ -747,11 +754,18 @@ void InstStore::dump(const Cfg *Func) const {
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
Type Ty = getData()->getType(); Type Ty = getData()->getType();
dumpDest(Func);
if (Dest)
Str << " = ";
Str << "store " << Ty << " "; Str << "store " << Ty << " ";
getData()->dump(Func); getData()->dump(Func);
Str << ", " << Ty << "* "; Str << ", " << Ty << "* ";
getAddr()->dump(Func); getAddr()->dump(Func);
Str << ", align " << typeAlignInBytes(Ty); Str << ", align " << typeAlignInBytes(Ty);
if (getRmwBeacon()) {
Str << ", beacon ";
getRmwBeacon()->dump(Func);
}
} }
void InstSwitch::dump(const Cfg *Func) const { void InstSwitch::dump(const Cfg *Func) const {
......
...@@ -691,13 +691,15 @@ class InstStore : public InstHighLevel { ...@@ -691,13 +691,15 @@ class InstStore : public InstHighLevel {
public: public:
static InstStore *create(Cfg *Func, Operand *Data, Operand *Addr, static InstStore *create(Cfg *Func, Operand *Data, Operand *Addr,
uint32_t align = 1) { uint32_t Align = 1) {
// TODO(kschimpf) Stop ignoring alignment specification. // TODO(kschimpf) Stop ignoring alignment specification.
(void)align; (void)Align;
return new (Func->allocate<InstStore>()) InstStore(Func, Data, Addr); return new (Func->allocate<InstStore>()) InstStore(Func, Data, Addr);
} }
Operand *getAddr() const { return getSrc(1); } Operand *getAddr() const { return getSrc(1); }
Operand *getData() const { return getSrc(0); } Operand *getData() const { return getSrc(0); }
Variable *getRmwBeacon() const { return llvm::dyn_cast<Variable>(getSrc(2)); }
void setRmwBeacon(Variable *Beacon);
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return Inst->getKind() == Store; } static bool classof(const Inst *Inst) { return Inst->getKind() == Store; }
......
...@@ -111,6 +111,14 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base, ...@@ -111,6 +111,14 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
} }
} }
InstX8632FakeRMW::InstX8632FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
InstArithmetic::OpKind Op, Variable *Beacon)
: InstX8632(Func, InstX8632::FakeRMW, 3, nullptr), Op(Op) {
addSource(Data);
addSource(Addr);
addSource(Beacon);
}
InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount, InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount,
Variable *Esp) Variable *Esp)
: InstX8632(Func, InstX8632::Adjuststack, 1, Esp), Amount(Amount) { : InstX8632(Func, InstX8632::Adjuststack, 1, Esp), Amount(Amount) {
...@@ -370,6 +378,19 @@ void InstX8632::dump(const Cfg *Func) const { ...@@ -370,6 +378,19 @@ void InstX8632::dump(const Cfg *Func) const {
Inst::dump(Func); Inst::dump(Func);
} }
void InstX8632FakeRMW::dump(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrDump();
Type Ty = getData()->getType();
Str << "rmw " << InstArithmetic::getOpName(getOp()) << " " << Ty << " *";
getAddr()->dump(Func);
Str << ", ";
getData()->dump(Func);
Str << ", beacon=";
getBeacon()->dump(Func);
}
void InstX8632Label::emit(const Cfg *Func) const { void InstX8632Label::emit(const Cfg *Func) const {
if (!ALLOW_DUMP) if (!ALLOW_DUMP)
return; return;
...@@ -589,7 +610,9 @@ void InstX8632::emitTwoAddress(const char *Opcode, const Inst *Inst, ...@@ -589,7 +610,9 @@ void InstX8632::emitTwoAddress(const char *Opcode, const Inst *Inst,
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2); assert(Inst->getSrcSize() == 2);
Variable *Dest = Inst->getDest(); Operand *Dest = Inst->getDest();
if (Dest == nullptr)
Dest = Inst->getSrc(0);
assert(Dest == Inst->getSrc(0)); assert(Dest == Inst->getSrc(0));
Operand *Src1 = Inst->getSrc(1); Operand *Src1 = Inst->getSrc(1);
Str << "\t" << Opcode << InstX8632::getWidthString(Dest->getType()) << "\t"; Str << "\t" << Opcode << InstX8632::getWidthString(Dest->getType()) << "\t";
...@@ -911,6 +934,7 @@ template <> const char *InstX8632Movp::Opcode = "movups"; ...@@ -911,6 +934,7 @@ template <> const char *InstX8632Movp::Opcode = "movups";
template <> const char *InstX8632Movq::Opcode = "movq"; template <> const char *InstX8632Movq::Opcode = "movq";
// Binary ops // Binary ops
template <> const char *InstX8632Add::Opcode = "add"; template <> const char *InstX8632Add::Opcode = "add";
template <> const char *InstX8632AddRMW::Opcode = "add";
template <> const char *InstX8632Addps::Opcode = "addps"; template <> const char *InstX8632Addps::Opcode = "addps";
template <> const char *InstX8632Adc::Opcode = "adc"; template <> const char *InstX8632Adc::Opcode = "adc";
template <> const char *InstX8632Addss::Opcode = "addss"; template <> const char *InstX8632Addss::Opcode = "addss";
...@@ -994,6 +1018,9 @@ const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Add::Emitter = { ...@@ -994,6 +1018,9 @@ const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Add::Emitter = {
&X8632::AssemblerX8632::add, &X8632::AssemblerX8632::add, &X8632::AssemblerX8632::add, &X8632::AssemblerX8632::add,
&X8632::AssemblerX8632::add}; &X8632::AssemblerX8632::add};
template <> template <>
const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632AddRMW::Emitter = {
&X8632::AssemblerX8632::add, &X8632::AssemblerX8632::add};
template <>
const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Adc::Emitter = { const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Adc::Emitter = {
&X8632::AssemblerX8632::adc, &X8632::AssemblerX8632::adc, &X8632::AssemblerX8632::adc, &X8632::AssemblerX8632::adc,
&X8632::AssemblerX8632::adc}; &X8632::AssemblerX8632::adc};
......
...@@ -176,6 +176,7 @@ public: ...@@ -176,6 +176,7 @@ public:
k__Start = Inst::Target, k__Start = Inst::Target,
Adc, Adc,
Add, Add,
AddRMW,
Addps, Addps,
Addss, Addss,
Adjuststack, Adjuststack,
...@@ -195,6 +196,7 @@ public: ...@@ -195,6 +196,7 @@ public:
Div, Div,
Divps, Divps,
Divss, Divss,
FakeRMW,
Fld, Fld,
Fstp, Fstp,
Icmp, Icmp,
...@@ -312,6 +314,40 @@ private: ...@@ -312,6 +314,40 @@ private:
} }
}; };
// InstX8632FakeRMW represents a non-atomic read-modify-write operation on a
// memory location. An InstX8632FakeRMW is a "fake" instruction in that it
// still needs to be lowered to some actual RMW instruction.
//
// If A is some memory address, D is some data value to apply, and OP is an
// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
class InstX8632FakeRMW : public InstX8632 {
InstX8632FakeRMW() = delete;
InstX8632FakeRMW(const InstX8632FakeRMW &) = delete;
InstX8632FakeRMW &operator=(const InstX8632FakeRMW &) = delete;
public:
static InstX8632FakeRMW *create(Cfg *Func, Operand *Data, Operand *Addr,
Variable *Beacon, InstArithmetic::OpKind Op,
uint32_t Align = 1) {
// TODO(stichnot): Stop ignoring alignment specification.
(void)Align;
return new (Func->allocate<InstX8632FakeRMW>())
InstX8632FakeRMW(Func, Data, Addr, Op, Beacon);
}
Operand *getAddr() const { return getSrc(1); }
Operand *getData() const { return getSrc(0); }
InstArithmetic::OpKind getOp() const { return Op; }
Variable *getBeacon() const { return llvm::cast<Variable>(getSrc(2)); }
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, FakeRMW); }
private:
InstArithmetic::OpKind Op;
InstX8632FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
InstArithmetic::OpKind Op, Variable *Beacon);
~InstX8632FakeRMW() override {}
};
// InstX8632Label represents an intra-block label that is the target // InstX8632Label represents an intra-block label that is the target
// of an intra-block branch. The offset between the label and the // of an intra-block branch. The offset between the label and the
// branch must be fit into one byte (considered "near"). These are // branch must be fit into one byte (considered "near"). These are
...@@ -515,6 +551,9 @@ private: ...@@ -515,6 +551,9 @@ private:
// Emit a one-operand (GPR) instruction. // Emit a one-operand (GPR) instruction.
void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var, void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var,
const X8632::AssemblerX8632::GPREmitterOneOp &Emitter); const X8632::AssemblerX8632::GPREmitterOneOp &Emitter);
void emitIASAsAddrOpTyGPR(
const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter);
// Instructions of the form x := op(x). // Instructions of the form x := op(x).
template <InstX8632::InstKindX8632 K> template <InstX8632::InstKindX8632 K>
...@@ -765,6 +804,50 @@ private: ...@@ -765,6 +804,50 @@ private:
static const X8632::AssemblerX8632::GPREmitterRegOp Emitter; static const X8632::AssemblerX8632::GPREmitterRegOp Emitter;
}; };
template <InstX8632::InstKindX8632 K>
class InstX8632BinopRMW : public InstX8632 {
InstX8632BinopRMW() = delete;
InstX8632BinopRMW(const InstX8632BinopRMW &) = delete;
InstX8632BinopRMW &operator=(const InstX8632BinopRMW &) = delete;
public:
// Create an ordinary binary-op instruction like add or sub.
static InstX8632BinopRMW *create(Cfg *Func, OperandX8632Mem *DestSrc0,
Operand *Src1) {
return new (Func->allocate<InstX8632BinopRMW>())
InstX8632BinopRMW(Func, DestSrc0, Src1);
}
void emit(const Cfg *Func) const override {
if (!ALLOW_DUMP)
return;
const bool ShiftHack = false;
emitTwoAddress(Opcode, this, Func, ShiftHack);
}
void emitIAS(const Cfg *Func) const override {
Type Ty = getSrc(0)->getType();
assert(getSrcSize() == 2);
emitIASAsAddrOpTyGPR(Func, Ty, getSrc(0), getSrc(1), Emitter);
}
void dump(const Cfg *Func) const override {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << Opcode << "." << getSrc(0)->getType() << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstX8632BinopRMW(Cfg *Func, OperandX8632Mem *DestSrc0, Operand *Src1)
: InstX8632(Func, K, 2, nullptr) {
addSource(DestSrc0);
addSource(Src1);
}
~InstX8632BinopRMW() override {}
static const char *Opcode;
static const X8632::AssemblerX8632::GPREmitterAddrOp Emitter;
};
template <InstX8632::InstKindX8632 K, bool NeedsElementType> template <InstX8632::InstKindX8632 K, bool NeedsElementType>
class InstX8632BinopXmm : public InstX8632 { class InstX8632BinopXmm : public InstX8632 {
InstX8632BinopXmm() = delete; InstX8632BinopXmm() = delete;
...@@ -1017,6 +1100,7 @@ typedef InstX8632Movlike<InstX8632::Movp> InstX8632Movp; ...@@ -1017,6 +1100,7 @@ typedef InstX8632Movlike<InstX8632::Movp> InstX8632Movp;
// Movq - copy between XMM registers, or mem64 and XMM registers. // Movq - copy between XMM registers, or mem64 and XMM registers.
typedef InstX8632Movlike<InstX8632::Movq> InstX8632Movq; typedef InstX8632Movlike<InstX8632::Movq> InstX8632Movq;
typedef InstX8632BinopGPR<InstX8632::Add> InstX8632Add; typedef InstX8632BinopGPR<InstX8632::Add> InstX8632Add;
typedef InstX8632BinopRMW<InstX8632::AddRMW> InstX8632AddRMW;
typedef InstX8632BinopXmm<InstX8632::Addps, true> InstX8632Addps; typedef InstX8632BinopXmm<InstX8632::Addps, true> InstX8632Addps;
typedef InstX8632BinopGPR<InstX8632::Adc> InstX8632Adc; typedef InstX8632BinopGPR<InstX8632::Adc> InstX8632Adc;
typedef InstX8632BinopXmm<InstX8632::Addss, false> InstX8632Addss; typedef InstX8632BinopXmm<InstX8632::Addss, false> InstX8632Addss;
......
...@@ -130,10 +130,13 @@ void TargetLowering::lower() { ...@@ -130,10 +130,13 @@ void TargetLowering::lower() {
assert(!Context.atEnd()); assert(!Context.atEnd());
Inst *Inst = Context.getCur(); Inst *Inst = Context.getCur();
Inst->deleteIfDead(); Inst->deleteIfDead();
if (!Inst->isDeleted()) { if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) &&
!llvm::isa<InstFakeUse>(Inst)) {
// Mark the current instruction as deleted before lowering, // Mark the current instruction as deleted before lowering,
// otherwise the Dest variable will likely get marked as non-SSA. // otherwise the Dest variable will likely get marked as non-SSA.
// See Variable::setDefinition(). // See Variable::setDefinition(). However, just pass-through
// FakeDef and FakeUse instructions that might have been inserted
// prior to lowering.
Inst->setDeleted(); Inst->setDeleted();
switch (Inst->getKind()) { switch (Inst->getKind()) {
case Inst::Alloca: case Inst::Alloca:
...@@ -194,15 +197,8 @@ void TargetLowering::lower() { ...@@ -194,15 +197,8 @@ void TargetLowering::lower() {
case Inst::Unreachable: case Inst::Unreachable:
lowerUnreachable(llvm::cast<InstUnreachable>(Inst)); lowerUnreachable(llvm::cast<InstUnreachable>(Inst));
break; break;
case Inst::BundleLock: default:
case Inst::BundleUnlock: lowerOther(Inst);
case Inst::FakeDef:
case Inst::FakeUse:
case Inst::FakeKill:
case Inst::Target:
// These are all Target instruction types and shouldn't be
// encountered at this stage.
Func->setError("Can't lower unsupported instruction type");
break; break;
} }
...@@ -213,6 +209,11 @@ void TargetLowering::lower() { ...@@ -213,6 +209,11 @@ void TargetLowering::lower() {
Context.advanceNext(); Context.advanceNext();
} }
void TargetLowering::lowerOther(const Inst *Instr) {
(void)Instr;
Func->setError("Can't lower unsupported instruction type");
}
// Drives register allocation, allowing all physical registers (except // Drives register allocation, allowing all physical registers (except
// perhaps for the frame pointer) to be allocated. This set of // perhaps for the frame pointer) to be allocated. This set of
// registers could potentially be parameterized if we want to restrict // registers could potentially be parameterized if we want to restrict
......
...@@ -248,6 +248,7 @@ protected: ...@@ -248,6 +248,7 @@ protected:
virtual void lowerStore(const InstStore *Inst) = 0; virtual void lowerStore(const InstStore *Inst) = 0;
virtual void lowerSwitch(const InstSwitch *Inst) = 0; virtual void lowerSwitch(const InstSwitch *Inst) = 0;
virtual void lowerUnreachable(const InstUnreachable *Inst) = 0; virtual void lowerUnreachable(const InstUnreachable *Inst) = 0;
virtual void lowerOther(const Inst *Instr);
virtual void doAddressOptLoad() {} virtual void doAddressOptLoad() {}
virtual void doAddressOptStore() {} virtual void doAddressOptStore() {}
......
...@@ -465,6 +465,12 @@ void TargetX8632::translateO2() { ...@@ -465,6 +465,12 @@ void TargetX8632::translateO2() {
Func->getVMetadata()->init(VMK_SingleDefs); Func->getVMetadata()->init(VMK_SingleDefs);
Func->doAddressOpt(); Func->doAddressOpt();
// Find read-modify-write opportunities. Do this after address mode
// optimization so that doAddressOpt() doesn't need to be applied to RMW
// instructions as well.
findRMW();
Func->dump("After RMW transform");
// Argument lowering // Argument lowering
Func->doArgLowering(); Func->doArgLowering();
...@@ -579,6 +585,146 @@ void TargetX8632::translateOm1() { ...@@ -579,6 +585,146 @@ void TargetX8632::translateOm1() {
namespace { namespace {
bool canRMW(const InstArithmetic *Arith) {
Type Ty = Arith->getDest()->getType();
bool isI64 = Ty == IceType_i64;
bool isVector = isVectorType(Ty);
switch (Arith->getOp()) {
// Not handled for lack of simple lowering:
// shift on i64 and vectors
// mul, udiv, urem, sdiv, srem, frem
default:
return false;
case InstArithmetic::Add:
return !isI64 && !isVector; // TODO(stichnot): implement i64 and vector
case InstArithmetic::Sub:
case InstArithmetic::And:
case InstArithmetic::Or:
case InstArithmetic::Xor:
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
case InstArithmetic::Fmul:
case InstArithmetic::Fdiv:
return false; // TODO(stichnot): implement
return true;
case InstArithmetic::Shl:
case InstArithmetic::Lshr:
case InstArithmetic::Ashr:
return false; // TODO(stichnot): implement
return !isI64 && !isVector;
}
}
bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
if (A == B)
return true;
if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
return MemA->getBase() == MemB->getBase() &&
MemA->getOffset() == MemB->getOffset() &&
MemA->getIndex() == MemB->getIndex() &&
MemA->getShift() == MemB->getShift() &&
MemA->getSegmentRegister() == MemB->getSegmentRegister();
}
}
return false;
}
} // end of anonymous namespace
void TargetX8632::findRMW() {
Func->dump("Before RMW");
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
for (CfgNode *Node : Func->getNodes()) {
// Walk through the instructions, considering each sequence of 3
// instructions, and look for the particular RMW pattern. Note that this
// search can be "broken" (false negatives) if there are intervening deleted
// instructions, or intervening instructions that could be safely moved out
// of the way to reveal an RMW pattern.
auto E = Node->getInsts().end();
auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
// Make I3 skip over deleted instructions.
while (I3 != E && I3->isDeleted())
++I3;
if (I1 == E || I2 == E || I3 == E)
continue;
assert(!I1->isDeleted());
assert(!I2->isDeleted());
assert(!I3->isDeleted());
if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
// Look for:
// a = Load addr
// b = <op> a, other
// Store b, addr
// Change to:
// a = Load addr
// b = <op> a, other
// x = FakeDef
// RMW <op>, addr, other, x
// b = Store b, addr, x
// Note that inferTwoAddress() makes sure setDestNonKillable() gets
// called on the updated Store instruction, to avoid liveness
// problems later.
//
// With this transformation, the Store instruction acquires a Dest
// variable and is now subject to dead code elimination if there are
// no more uses of "b". Variable "x" is a beacon for determining
// whether the Store instruction gets dead-code eliminated. If the
// Store instruction is eliminated, then it must be the case that
// the RMW instruction ends x's live range, and therefore the RMW
// instruction will be retained and later lowered. On the other
// hand, if the RMW instruction does not end x's live range, then
// the Store instruction must still be present, and therefore the
// RMW instruction is ignored during lowering because it is
// redundant with the Store instruction.
//
// Note that if "a" has further uses, the RMW transformation may
// still trigger, resulting in two loads and one store, which is
// worse than the original one load and one store. However, this is
// probably rare, and caching probably keeps it just as fast.
if (!isSameMemAddressOperand(Load->getSourceAddress(),
Store->getAddr()))
continue;
if (false && Load->getSourceAddress() != Store->getAddr())
continue;
if (Arith->getSrc(0) != Load->getDest())
continue;
if (Arith->getDest() != Store->getData())
continue;
if (!canRMW(Arith))
continue;
if (Func->isVerbose(IceV_RMW)) {
Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
Load->dump(Func);
Str << "\n ";
Arith->dump(Func);
Str << "\n ";
Store->dump(Func);
Str << "\n";
}
Variable *Beacon = Func->makeVariable(IceType_i32);
Beacon->setWeight(0);
Store->setRmwBeacon(Beacon);
InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
Node->getInsts().insert(I3, BeaconDef);
InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
Func, Arith->getSrc(1), Store->getAddr(), Beacon,
Arith->getOp());
Node->getInsts().insert(I3, RMW);
}
}
}
}
}
}
namespace {
// Converts a ConstantInteger32 operand into its constant value, or // Converts a ConstantInteger32 operand into its constant value, or
// MemoryOrderInvalid if the operand is not a ConstantInteger32. // MemoryOrderInvalid if the operand is not a ConstantInteger32.
uint64_t getConstantMemoryOrder(Operand *Opnd) { uint64_t getConstantMemoryOrder(Operand *Opnd) {
...@@ -4394,7 +4540,10 @@ void TargetX8632::doAddressOptStore() { ...@@ -4394,7 +4540,10 @@ void TargetX8632::doAddressOptStore() {
Constant *OffsetOp = Ctx->getConstantInt32(Offset); Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Shift, SegmentReg); Shift, SegmentReg);
Context.insert(InstStore::create(Func, Data, Addr)); InstStore *NewStore = InstStore::create(Func, Data, Addr);
if (Inst->getDest())
NewStore->setRmwBeacon(Inst->getRmwBeacon());
Context.insert(NewStore);
} }
} }
...@@ -4497,6 +4646,46 @@ void TargetX8632::eliminateNextVectorSextInstruction( ...@@ -4497,6 +4646,46 @@ void TargetX8632::eliminateNextVectorSextInstruction(
void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
// If the beacon variable's live range does not end in this
// instruction, then it must end in the modified Store instruction
// that follows. This means that the original Store instruction is
// still there, either because the value being stored is used beyond
// the Store instruction, or because dead code elimination did not
// happen. In either case, we cancel RMW lowering (and the caller
// deletes the RMW instruction).
if (!RMW->isLastUse(RMW->getBeacon()))
return;
Operand *Src = RMW->getData();
Type Ty = Src->getType();
OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
if (Ty == IceType_i64) {
// TODO(stichnot): Implement.
} else if (isVectorType(Ty)) {
// TODO(stichnot): Implement.
} else {
// i8, i16, i32, f32, f64
switch (RMW->getOp()) {
default:
// TODO(stichnot): Implement other arithmetic operators.
break;
case InstArithmetic::Add:
Src = legalize(Src, Legal_Reg | Legal_Imm);
_add_rmw(Addr, Src);
return;
}
}
llvm::report_fatal_error("Couldn't lower RMW instruction");
}
void TargetX8632::lowerOther(const Inst *Instr) {
if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
lowerRMW(RMW);
} else {
TargetLowering::lowerOther(Instr);
}
}
// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
// preserve integrity of liveness analysis. Undef values are also // preserve integrity of liveness analysis. Undef values are also
// turned into zeroes, since loOperand() and hiOperand() don't expect // turned into zeroes, since loOperand() and hiOperand() don't expect
......
...@@ -180,6 +180,8 @@ protected: ...@@ -180,6 +180,8 @@ protected:
void lowerStore(const InstStore *Inst) override; void lowerStore(const InstStore *Inst) override;
void lowerSwitch(const InstSwitch *Inst) override; void lowerSwitch(const InstSwitch *Inst) override;
void lowerUnreachable(const InstUnreachable *Inst) override; void lowerUnreachable(const InstUnreachable *Inst) override;
void lowerOther(const Inst *Instr) override;
void lowerRMW(const InstX8632FakeRMW *RMW);
void prelowerPhis() override; void prelowerPhis() override;
void lowerPhiAssignments(CfgNode *Node, void lowerPhiAssignments(CfgNode *Node,
const AssignList &Assignments) override; const AssignList &Assignments) override;
...@@ -265,6 +267,9 @@ protected: ...@@ -265,6 +267,9 @@ protected:
void _add(Variable *Dest, Operand *Src0) { void _add(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Add::create(Func, Dest, Src0)); Context.insert(InstX8632Add::create(Func, Dest, Src0));
} }
void _add_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
Context.insert(InstX8632AddRMW::create(Func, DestSrc0, Src1));
}
void _adjust_stack(int32_t Amount) { void _adjust_stack(int32_t Amount) {
Context.insert(InstX8632AdjustStack::create( Context.insert(InstX8632AdjustStack::create(
Func, Amount, getPhysicalRegister(RegX8632::Reg_esp))); Func, Amount, getPhysicalRegister(RegX8632::Reg_esp)));
...@@ -561,6 +566,7 @@ protected: ...@@ -561,6 +566,7 @@ protected:
} }
bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
void findRMW();
X86InstructionSet InstructionSet; X86InstructionSet InstructionSet;
bool IsEbpBasedFrame; bool IsEbpBasedFrame;
......
...@@ -26,6 +26,7 @@ entry: ...@@ -26,6 +26,7 @@ entry:
; CHECK: .long p_global_char ; CHECK: .long p_global_char
; CHECK: .long global_char ; CHECK: .long global_char
; Also exercises the RMW add operation.
define internal void @add_in_place() { define internal void @add_in_place() {
entry: entry:
%p_global_char.bc = bitcast [4 x i8]* @p_global_char to i32* %p_global_char.bc = bitcast [4 x i8]* @p_global_char to i32*
...@@ -37,8 +38,8 @@ entry: ...@@ -37,8 +38,8 @@ entry:
ret void ret void
} }
; CHECK-LABEL: add_in_place ; CHECK-LABEL: add_in_place
; CHECK: .long global_char
; CHECK: .long p_global_char ; CHECK: .long p_global_char
; CHECK-NEXT: .long global_char
define internal void @cmp_global_immediate() { define internal void @cmp_global_immediate() {
entry: entry:
......
; This tests Read-Modify-Write (RMW) detection and lowering at the O2
; optimization level.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
define internal void @rmw_add_i32_var(i32 %addr_arg, i32 %var) {
entry:
%addr = inttoptr i32 %addr_arg to i32*
%val = load i32, i32* %addr, align 1
%rmw = add i32 %val, %var
store i32 %rmw, i32* %addr, align 1
ret void
}
; Look for something like: add DWORD PTR [eax],ecx
; CHECK-LABEL: rmw_add_i32_var
; CHECK: add DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}],e{{ax|bx|cx|dx|bp|di|si}}
define internal void @rmw_add_i32_imm(i32 %addr_arg) {
entry:
%addr = inttoptr i32 %addr_arg to i32*
%val = load i32, i32* %addr, align 1
%rmw = add i32 %val, 19
store i32 %rmw, i32* %addr, align 1
ret void
}
; Look for something like: add DWORD PTR [eax],0x13
; CHECK-LABEL: rmw_add_i32_imm
; CHECK: add DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}],0x13
define internal i32 @no_rmw_add_i32_var(i32 %addr_arg, i32 %var) {
entry:
%addr = inttoptr i32 %addr_arg to i32*
%val = load i32, i32* %addr, align 1
%rmw = add i32 %val, %var
store i32 %rmw, i32* %addr, align 1
ret i32 %rmw
}
; CHECK-LABEL: no_rmw_add_i32_var
; CHECK: add e{{ax|bx|cx|dx|bp|di|si}},DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}]
define internal void @rmw_add_i16_var(i32 %addr_arg, i16 %var) {
entry:
%addr = inttoptr i32 %addr_arg to i16*
%val = load i16, i16* %addr, align 1
%rmw = add i16 %val, %var
store i16 %rmw, i16* %addr, align 1
ret void
}
; Look for something like: add WORD PTR [eax],cx
; CHECK-LABEL: rmw_add_i16_var
; CHECK: add WORD PTR [e{{ax|bx|cx|dx|bp|di|si}}],{{ax|bx|cx|dx|bp|di|si}}
define internal void @rmw_add_i16_imm(i32 %addr_arg) {
entry:
%addr = inttoptr i32 %addr_arg to i16*
%val = load i16, i16* %addr, align 1
%rmw = add i16 %val, 19
store i16 %rmw, i16* %addr, align 1
ret void
}
; Look for something like: add WORD PTR [eax],0x13
; CHECK-LABEL: rmw_add_i16_imm
; CHECK: add WORD PTR [e{{ax|bx|cx|dx|bp|di|si}}],0x13
define internal void @rmw_add_i8_var(i32 %addr_arg, i8 %var) {
entry:
%addr = inttoptr i32 %addr_arg to i8*
%val = load i8, i8* %addr, align 1
%rmw = add i8 %val, %var
store i8 %rmw, i8* %addr, align 1
ret void
}
; Look for something like: add BYTE PTR [eax],cl
; CHECK-LABEL: rmw_add_i8_var
; CHECK: add BYTE PTR [e{{ax|bx|cx|dx|bp|di|si}}],{{al|bl|cl|dl}}
define internal void @rmw_add_i8_imm(i32 %addr_arg) {
entry:
%addr = inttoptr i32 %addr_arg to i8*
%val = load i8, i8* %addr, align 1
%rmw = add i8 %val, 19
store i8 %rmw, i8* %addr, align 1
ret void
}
; Look for something like: add BYTE PTR [eax],0x13
; CHECK-LABEL: rmw_add_i8_imm
; CHECK: add BYTE PTR [e{{ax|bx|cx|dx|bp|di|si}}],0x13
define internal void @rmw_add_i32_var_addropt(i32 %addr_arg, i32 %var) {
entry:
%addr_arg_plus_12 = add i32 %addr_arg, 12
%var_times_4 = mul i32 %var, 4
%addr_base = add i32 %addr_arg_plus_12 , %var_times_4
%addr = inttoptr i32 %addr_base to i32*
%val = load i32, i32* %addr, align 1
%rmw = add i32 %val, %var
store i32 %rmw, i32* %addr, align 1
ret void
}
; Look for something like: add DWORD PTR [eax+ecx*4+12],ecx
; CHECK-LABEL: rmw_add_i32_var_addropt
; CHECK: add DWORD PTR [e{{..}}+e{{..}}*4+0xc],e{{ax|bx|cx|dx|bp|di|si}}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment