Commit 00741a00 by Andrew Scull

Improve use of CfgLocalAllocator and introduce containers that use it.

This doesn't make a big difference but does reduce the proportion of time spent in malloc and free. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1349833005 .
parent 385351ba
......@@ -121,6 +121,7 @@ ifdef ASAN
endif
ifdef MSAN
# TODO(ascull): this has an as yet undiagnosed uninitialized memory access
OBJDIR := $(OBJDIR)+MSan
CXX_EXTRA += -fsanitize=memory
LD_EXTRA += -fsanitize=memory
......
......@@ -314,13 +314,13 @@ void Cfg::advancedPhiLowering() {
void Cfg::reorderNodes() {
// TODO(ascull): it would be nice if the switch tests were always followed by
// the default case to allow for fall through.
using PlacedList = std::list<CfgNode *>;
using PlacedList = CfgList<CfgNode *>;
PlacedList Placed; // Nodes with relative placement locked down
PlacedList Unreachable; // Unreachable nodes
PlacedList::iterator NoPlace = Placed.end();
// Keep track of where each node has been tentatively placed so that we can
// manage insertions into the middle.
std::vector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace);
CfgVector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace);
for (CfgNode *Node : Nodes) {
// The "do ... while(0);" construct is to factor out the --PlaceIndex and
// assert() statements before moving to the next node.
......
......@@ -273,7 +273,7 @@ private:
std::unique_ptr<Assembler> TargetAssembler;
/// Globals required by this CFG. Mostly used for the profiler's globals.
std::unique_ptr<VariableDeclarationList> GlobalInits;
std::vector<InstJumpTable *> JumpTables;
CfgVector<InstJumpTable *> JumpTables;
/// CurrentNode is maintained during dumping/emitting just for validating
/// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but
......
......@@ -827,7 +827,7 @@ namespace {
// Helper functions for emit().
void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
bool IsLiveIn, std::vector<SizeT> &LiveRegCount) {
bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
if (!BuildDefs::dump())
return;
Liveness *Liveness = Func->getLiveness();
......@@ -840,7 +840,7 @@ void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
Str << "\t\t\t\t# LiveOut=";
}
if (!Live->empty()) {
std::vector<Variable *> LiveRegs;
CfgVector<Variable *> LiveRegs;
for (SizeT i = 0; i < Live->size(); ++i) {
if ((*Live)[i]) {
Variable *Var = Liveness->getVariable(i, Node);
......@@ -869,7 +869,7 @@ void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
}
void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
std::vector<SizeT> &LiveRegCount) {
CfgVector<SizeT> &LiveRegCount) {
if (!BuildDefs::dump())
return;
bool First = true;
......@@ -935,7 +935,7 @@ void CfgNode::emit(Cfg *Func) const {
// each register is assigned to. Normally that would be only 0 or 1, but the
// register allocator's AllowOverlap inference allows it to be greater than 1
// for short periods.
std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
if (DecorateAsm) {
constexpr bool IsLiveIn = true;
emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
......
......@@ -145,10 +145,14 @@ using InstList = llvm::ilist<Inst>;
using PhiList = InstList;
using AssignList = InstList;
// Standard library containers with CfgLocalAllocator.
template <typename T> using CfgVector = std::vector<T, CfgLocalAllocator<T>>;
template <typename T> using CfgList = std::list<T, CfgLocalAllocator<T>>;
// Containers that are arena-allocated from the Cfg's allocator.
using OperandList = std::vector<Operand *, CfgLocalAllocator<Operand *>>;
using VarList = std::vector<Variable *, CfgLocalAllocator<Variable *>>;
using NodeList = std::vector<CfgNode *, CfgLocalAllocator<CfgNode *>>;
using OperandList = CfgVector<Operand *>;
using VarList = CfgVector<Variable *>;
using NodeList = CfgVector<CfgNode *>;
// Contains that use the default (global) allocator.
using ConstantList = std::vector<Constant *>;
......@@ -168,8 +172,7 @@ using InstNumberT = int32_t;
/// value, giving the instruction number that begins or ends a variable's live
/// range.
using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>;
using LiveBeginEndMap =
std::vector<LiveBeginEndMapEntry, CfgLocalAllocator<LiveBeginEndMapEntry>>;
using LiveBeginEndMap = CfgVector<LiveBeginEndMapEntry>;
using LivenessBV = llvm::BitVector;
using TimerStackIdT = uint32_t;
......
......@@ -722,10 +722,8 @@ GlobalContext::~GlobalContext() {
llvm::DeleteContainerPointers(AllThreadContexts);
LockedPtr<DestructorArray> Dtors = getDestructors();
// Destructors are invoked in the opposite object construction order.
for (auto DtorIter = Dtors->crbegin(); DtorIter != Dtors->crend();
++DtorIter) {
(*DtorIter)();
}
for (const auto &Dtor : reverse_range(*Dtors))
Dtor();
}
// TODO(stichnot): Consider adding thread-local caches of constant pool entries
......
......@@ -161,10 +161,7 @@ public:
void dumpDest(const Cfg *Func) const;
virtual bool isRedundantAssign() const { return false; }
// TODO(jpp): Insts should not have non-trivial destructors, but they
// currently do. This dtor is marked final as a multi-step refactor that
// will eventually fix this problem.
virtual ~Inst() = default;
~Inst() = default;
protected:
Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest);
......
......@@ -47,7 +47,7 @@ class Liveness {
// LiveToVarMap maps a liveness bitvector index to a Variable. This is
// generally just for printing/dumping. The index should be less than
// NumLocals + Liveness::NumGlobals.
std::vector<Variable *> LiveToVarMap;
CfgVector<Variable *> LiveToVarMap;
// LiveIn and LiveOut track the in- and out-liveness of the global
// variables. The size of each vector is LivenessNode::NumGlobals.
LivenessBV LiveIn, LiveOut;
......@@ -107,13 +107,13 @@ private:
LivenessMode Mode;
SizeT NumGlobals = 0;
/// Size of Nodes is Cfg::Nodes.size().
std::vector<LivenessNode> Nodes;
CfgVector<LivenessNode> Nodes;
/// VarToLiveMap maps a Variable's Variable::Number to its live index within
/// its basic block.
std::vector<SizeT> VarToLiveMap;
CfgVector<SizeT> VarToLiveMap;
/// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for non-local
/// variables.
std::vector<Variable *> LiveToVarMap;
CfgVector<Variable *> LiveToVarMap;
/// RangeMask[Variable::Number] indicates whether we want to track that
/// Variable's live range.
llvm::BitVector RangeMask;
......
......@@ -88,9 +88,8 @@ private:
bool Deleted = false;
};
using LoopNodeList = std::vector<LoopNode, CfgLocalAllocator<LoopNode>>;
using LoopNodePtrList =
std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>>;
using LoopNodeList = CfgVector<LoopNode>;
using LoopNodePtrList = CfgVector<LoopNode *>;
/// Process the node as part as part of Tarjan's algorithm and return either a
/// node to recurse into or nullptr when the node has been fully processed.
......
......@@ -85,12 +85,13 @@ public:
}
/// @}
~Operand() = default;
protected:
Operand(OperandKind Kind, Type Ty) : Ty(Ty), Kind(Kind) {
// It is undefined behavior to have a larger value in the enum
assert(Kind <= kTarget_Max);
}
virtual ~Operand() = default;
const Type Ty;
const OperandKind Kind;
......@@ -354,7 +355,7 @@ public:
LiveRange() = default;
/// Special constructor for building a kill set. The advantage is that we can
/// reserve the right amount of space in advance.
explicit LiveRange(const std::vector<InstNumberT> &Kills) {
explicit LiveRange(const CfgVector<InstNumberT> &Kills) {
Range.reserve(Kills.size());
for (InstNumberT I : Kills)
addSegment(I, I);
......@@ -388,8 +389,7 @@ public:
private:
using RangeElementType = std::pair<InstNumberT, InstNumberT>;
/// RangeType is arena-allocated from the Cfg's allocator.
using RangeType =
std::vector<RangeElementType, CfgLocalAllocator<RangeElementType>>;
using RangeType = CfgVector<RangeElementType>;
RangeType Range;
/// TrimmedBegin is an optimization for the overlaps() computation. Since the
/// linear-scan algorithm always calls it as overlaps(Cur) and Cur advances
......@@ -556,7 +556,7 @@ enum MetadataKind {
VMK_SingleDefs, /// Track uses+defs, but only record single def
VMK_All /// Track uses+defs, including full def list
};
using InstDefList = std::vector<const Inst *, CfgLocalAllocator<const Inst *>>;
using InstDefList = CfgVector<const Inst *>;
/// VariableTracking tracks the metadata for a single variable. It is
/// only meant to be used internally by VariablesMetadata.
......@@ -652,7 +652,7 @@ public:
private:
const Cfg *Func;
MetadataKind Kind;
std::vector<VariableTracking> Metadata;
CfgVector<VariableTracking> Metadata;
const static InstDefList NoDefinitions;
};
......
......@@ -166,8 +166,8 @@ void LinearScan::initForInfOnly() {
// Iterate across all instructions and record the begin and end of the live
// range for each variable that is pre-colored or infinite weight.
std::vector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
std::vector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
for (CfgNode *Node : Func->getNodes()) {
for (Inst &Inst : Node->getInsts()) {
if (Inst.isDeleted())
......
......@@ -39,8 +39,8 @@ public:
static constexpr size_t REGS_SIZE = 32;
private:
using OrderedRanges = std::vector<Variable *>;
using UnorderedRanges = std::vector<Variable *>;
using OrderedRanges = CfgVector<Variable *>;
using UnorderedRanges = CfgVector<Variable *>;
class IterationState {
IterationState(const IterationState &) = delete;
......@@ -103,7 +103,7 @@ private:
/// faster processing.
OrderedRanges UnhandledPrecolored;
UnorderedRanges Active, Inactive, Handled;
std::vector<InstNumberT> Kills;
CfgVector<InstNumberT> Kills;
RegAllocKind Kind = RAK_Unknown;
/// RegUses[I] is the number of live ranges (variables) that register I is
/// currently assigned to. It can be greater than 1 as a result of
......
......@@ -20,8 +20,7 @@ namespace Ice {
class CaseCluster;
using CaseClusterArray =
std::vector<CaseCluster, CfgLocalAllocator<CaseCluster>>;
using CaseClusterArray = CfgVector<CaseCluster>;
/// A cluster of cases can be tested by a common method during switch lowering.
class CaseCluster {
......
......@@ -116,7 +116,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
// the document "OS X ABI Function Call Guide" by Apple.
NeedsStackAlignment = true;
using OperandList = std::vector<Operand *>;
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
......
......@@ -491,8 +491,8 @@ bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
template <class Machine> void TargetX86Base<Machine>::findRMW() {
Func->dump("Before RMW");
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
if (Func->isVerbose(IceV_RMW))
Func->getContext()->lockStr();
for (CfgNode *Node : Func->getNodes()) {
// Walk through the instructions, considering each sequence of 3
// instructions, and look for the particular RMW pattern. Note that this
......@@ -510,75 +510,76 @@ template <class Machine> void TargetX86Base<Machine>::findRMW() {
assert(!I1->isDeleted());
assert(!I2->isDeleted());
assert(!I3->isDeleted());
if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
// Look for:
// a = Load addr
// b = <op> a, other
// Store b, addr
// Change to:
// a = Load addr
// b = <op> a, other
// x = FakeDef
// RMW <op>, addr, other, x
// b = Store b, addr, x
// Note that inferTwoAddress() makes sure setDestNonKillable() gets
// called on the updated Store instruction, to avoid liveness
// problems later.
//
// With this transformation, the Store instruction acquires a Dest
// variable and is now subject to dead code elimination if there
// are no more uses of "b". Variable "x" is a beacon for
// determining whether the Store instruction gets dead-code
// eliminated. If the Store instruction is eliminated, then it
// must be the case that the RMW instruction ends x's live range,
// and therefore the RMW instruction will be retained and later
// lowered. On the other hand, if the RMW instruction does not end
// x's live range, then the Store instruction must still be
// present, and therefore the RMW instruction is ignored during
// lowering because it is redundant with the Store instruction.
//
// Note that if "a" has further uses, the RMW transformation may
// still trigger, resulting in two loads and one store, which is
// worse than the original one load and one store. However, this
// is probably rare, and caching probably keeps it just as fast.
if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
Store->getAddr()))
continue;
Operand *ArithSrcFromLoad = Arith->getSrc(0);
Operand *ArithSrcOther = Arith->getSrc(1);
if (ArithSrcFromLoad != Load->getDest()) {
if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
continue;
std::swap(ArithSrcFromLoad, ArithSrcOther);
}
if (Arith->getDest() != Store->getData())
continue;
if (!canRMW(Arith))
continue;
if (Func->isVerbose(IceV_RMW)) {
Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
Load->dump(Func);
Str << "\n ";
Arith->dump(Func);
Str << "\n ";
Store->dump(Func);
Str << "\n";
}
Variable *Beacon = Func->makeVariable(IceType_i32);
Beacon->setMustNotHaveReg();
Store->setRmwBeacon(Beacon);
InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
Node->getInsts().insert(I3, BeaconDef);
auto *RMW = Traits::Insts::FakeRMW::create(
Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
Node->getInsts().insert(I3, RMW);
}
}
auto *Load = llvm::dyn_cast<InstLoad>(I1);
auto *Arith = llvm::dyn_cast<InstArithmetic>(I2);
auto *Store = llvm::dyn_cast<InstStore>(I3);
if (!Load || !Arith || !Store)
continue;
// Look for:
// a = Load addr
// b = <op> a, other
// Store b, addr
// Change to:
// a = Load addr
// b = <op> a, other
// x = FakeDef
// RMW <op>, addr, other, x
// b = Store b, addr, x
// Note that inferTwoAddress() makes sure setDestNonKillable() gets
// called on the updated Store instruction, to avoid liveness problems
// later.
//
// With this transformation, the Store instruction acquires a Dest
// variable and is now subject to dead code elimination if there are no
// more uses of "b". Variable "x" is a beacon for determining whether
// the Store instruction gets dead-code eliminated. If the Store
// instruction is eliminated, then it must be the case that the RMW
// instruction ends x's live range, and therefore the RMW instruction
// will be retained and later lowered. On the other hand, if the RMW
// instruction does not end x's live range, then the Store instruction
// must still be present, and therefore the RMW instruction is ignored
// during lowering because it is redundant with the Store instruction.
//
// Note that if "a" has further uses, the RMW transformation may still
// trigger, resulting in two loads and one store, which is worse than the
// original one load and one store. However, this is probably rare, and
// caching probably keeps it just as fast.
if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
Store->getAddr()))
continue;
Operand *ArithSrcFromLoad = Arith->getSrc(0);
Operand *ArithSrcOther = Arith->getSrc(1);
if (ArithSrcFromLoad != Load->getDest()) {
if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
continue;
std::swap(ArithSrcFromLoad, ArithSrcOther);
}
if (Arith->getDest() != Store->getData())
continue;
if (!canRMW(Arith))
continue;
if (Func->isVerbose(IceV_RMW)) {
Ostream &Str = Func->getContext()->getStrDump();
Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
Load->dump(Func);
Str << "\n ";
Arith->dump(Func);
Str << "\n ";
Store->dump(Func);
Str << "\n";
}
Variable *Beacon = Func->makeVariable(IceType_i32);
Beacon->setMustNotHaveReg();
Store->setRmwBeacon(Beacon);
InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
Node->getInsts().insert(I3, BeaconDef);
auto *RMW = Traits::Insts::FakeRMW::create(
Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
Node->getInsts().insert(I3, RMW);
}
}
if (Func->isVerbose(IceV_RMW))
Func->getContext()->unlockStr();
}
// Converts a ConstantInteger32 operand into its constant value, or
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment