Commit c4554d78 by Jim Stichnoth

Subzero: Rewrite the pass timing infrastructure.

This makes it much more useful for individual analysis and long-term translation performance tracking. 1. Collect and report aggregated across the entire translation, instead of function-by-function. If you really care about a single function, just extract it and translate it separately for analysis. 2. Remove "-verbose time" and just use -timing. 3. Collects two kinds of timings: cumulative and flat. Cumulative measures the total time, even if a callee also times itself. Flat only measures the currently active timer at the top of the stack. The flat times should add up to 100%, but cumulative will usually add up to much more than 100%. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/610813002
parent 118ca798
......@@ -80,9 +80,10 @@ SRCS= \
IceMemoryRegion.cpp \
IceOperand.cpp \
IceRegAlloc.cpp \
IceRNG.cpp \
IceRNG.cpp \
IceTargetLowering.cpp \
IceTargetLoweringX8632.cpp \
IceTimerTree.cpp \
IceTranslator.cpp \
IceTypeConverter.cpp \
IceTypes.cpp \
......@@ -113,7 +114,7 @@ $(OBJS): | $(OBJDIR)
$(OBJDIR):
@mkdir -p $@
check-lit: llvm2ice
check-lit: llvm2ice make_symlink
LLVM_BIN_PATH=$(LLVM_BIN_PATH) \
$(LLVM_SRC_PATH)/utils/lit/lit.py -sv tests_lit
......
......@@ -69,14 +69,14 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
void Cfg::translate() {
if (hasError())
return;
static TimerIdT IDtranslate = GlobalContext::getTimerID("translate");
TimerMarker T(IDtranslate, getContext());
dump("Initial CFG");
Timer T_translate;
// The set of translation passes and their order are determined by
// the target.
getTarget()->translate();
T_translate.printElapsedUs(getContext(), "translate()");
dump("Final output");
}
......@@ -88,6 +88,9 @@ void Cfg::computePredecessors() {
}
void Cfg::renumberInstructions() {
static TimerIdT IDrenumberInstructions =
GlobalContext::getTimerID("renumberInstructions");
TimerMarker T(IDrenumberInstructions, getContext());
NextInstNumber = 1;
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->renumberInstructions();
......@@ -96,6 +99,8 @@ void Cfg::renumberInstructions() {
// placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() {
static TimerIdT IDplacePhiLoads = GlobalContext::getTimerID("placePhiLoads");
TimerMarker T(IDplacePhiLoads, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->placePhiLoads();
}
......@@ -103,34 +108,48 @@ void Cfg::placePhiLoads() {
// placePhiStores() must be called after placePhiLoads().
void Cfg::placePhiStores() {
static TimerIdT IDplacePhiStores =
GlobalContext::getTimerID("placePhiStores");
TimerMarker T(IDplacePhiStores, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->placePhiStores();
}
}
void Cfg::deletePhis() {
static TimerIdT IDdeletePhis = GlobalContext::getTimerID("deletePhis");
TimerMarker T(IDdeletePhis, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->deletePhis();
}
}
void Cfg::doArgLowering() {
static TimerIdT IDdoArgLowering = GlobalContext::getTimerID("doArgLowering");
TimerMarker T(IDdoArgLowering, getContext());
getTarget()->lowerArguments();
}
void Cfg::doAddressOpt() {
static TimerIdT IDdoAddressOpt = GlobalContext::getTimerID("doAddressOpt");
TimerMarker T(IDdoAddressOpt, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->doAddressOpt();
}
}
void Cfg::doNopInsertion() {
static TimerIdT IDdoNopInsertion =
GlobalContext::getTimerID("doNopInsertion");
TimerMarker T(IDdoNopInsertion, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->doNopInsertion();
}
}
void Cfg::genCode() {
static TimerIdT IDgenCode = GlobalContext::getTimerID("genCode");
TimerMarker T(IDgenCode, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->genCode();
}
......@@ -138,6 +157,8 @@ void Cfg::genCode() {
// Compute the stack frame layout.
void Cfg::genFrame() {
static TimerIdT IDgenFrame = GlobalContext::getTimerID("genFrame");
TimerMarker T(IDgenFrame, getContext());
getTarget()->addProlog(Entry);
// TODO: Consider folding epilog generation into the final
// emission/assembly pass to avoid an extra iteration over the node
......@@ -154,6 +175,9 @@ void Cfg::genFrame() {
// completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence.
void Cfg::livenessLightweight() {
static TimerIdT IDlivenessLightweight =
GlobalContext::getTimerID("livenessLightweight");
TimerMarker T(IDlivenessLightweight, getContext());
getVMetadata()->init();
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->livenessLightweight();
......@@ -161,6 +185,8 @@ void Cfg::livenessLightweight() {
}
void Cfg::liveness(LivenessMode Mode) {
static TimerIdT IDliveness = GlobalContext::getTimerID("liveness");
TimerMarker T(IDliveness, getContext());
Live.reset(new Liveness(this, Mode));
getVMetadata()->init();
Live->init();
......@@ -199,9 +225,10 @@ void Cfg::liveness(LivenessMode Mode) {
// Collect timing for just the portion that constructs the live
// range intervals based on the end-of-live-range computation, for a
// finer breakdown of the cost.
Timer T_liveRange;
// Make a final pass over instructions to delete dead instructions
// and build each Variable's live range.
static TimerIdT IDliveRange = GlobalContext::getTimerID("liveRange");
TimerMarker T1(IDliveRange, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->livenessPostprocess(Mode, getLiveness());
}
......@@ -241,7 +268,6 @@ void Cfg::liveness(LivenessMode Mode) {
if (Var->getWeight().isInf())
Var->setLiveRangeInfiniteWeight();
}
T_liveRange.printElapsedUs(getContext(), "live range construction");
dump();
}
}
......@@ -249,6 +275,9 @@ void Cfg::liveness(LivenessMode Mode) {
// Traverse every Variable of every Inst and verify that it
// appears within the Variable's computed live range.
bool Cfg::validateLiveness() const {
static TimerIdT IDvalidateLiveness =
GlobalContext::getTimerID("validateLiveness");
TimerMarker T(IDvalidateLiveness, getContext());
bool Valid = true;
Ostream &Str = Ctx->getStrDump();
for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1;
......@@ -296,18 +325,21 @@ bool Cfg::validateLiveness() const {
}
void Cfg::doBranchOpt() {
static TimerIdT IDdoBranchOpt = GlobalContext::getTimerID("doBranchOpt");
TimerMarker T(IDdoBranchOpt, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
NodeList::iterator NextNode = I;
++NextNode;
(*I)->doBranchOpt(*NextNode);
(*I)->doBranchOpt(NextNode == E ? NULL : *NextNode);
}
}
// ======================== Dump routines ======================== //
void Cfg::emit() {
static TimerIdT IDemit = GlobalContext::getTimerID("emit");
TimerMarker T(IDemit, getContext());
Ostream &Str = Ctx->getStrEmit();
Timer T_emit;
if (!Ctx->testAndSetHasEmittedFirstMethod()) {
// Print a helpful command for assembling the output.
// TODO: have the Target emit the header
......@@ -339,7 +371,6 @@ void Cfg::emit() {
(*I)->emit(this);
}
Str << "\n";
T_emit.printElapsedUs(Ctx, "emit()");
}
// Dumps the IR with an optional introductory message.
......
......@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
#include "IceConverter.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceClFlags.h"
#include "IceConverter.h"
#include "IceDefs.h"
#include "IceGlobalContext.h"
#include "IceInst.h"
......@@ -60,6 +59,9 @@ public:
// Caller is expected to delete the returned Ice::Cfg object.
Ice::Cfg *convertFunction(const Function *F) {
static Ice::TimerIdT IDllvmConvert =
Ice::GlobalContext::getTimerID("llvmConvert");
Ice::TimerMarker T(IDllvmConvert, Ctx);
VarMap.clear();
NodeMap.clear();
Func = new Ice::Cfg(Ctx);
......@@ -621,6 +623,8 @@ private:
namespace Ice {
void Converter::convertToIce() {
static TimerIdT IDconvertToIce = GlobalContext::getTimerID("convertToIce");
TimerMarker T(IDconvertToIce, Ctx);
nameUnnamedGlobalAddresses(Mod);
if (!Ctx->getFlags().DisableGlobals)
convertGlobals(Mod);
......@@ -635,11 +639,6 @@ void Converter::convertFunctions() {
Timer TConvert;
Cfg *Fcn = FunctionConverter.convertFunction(I);
if (Ctx->getFlags().SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] Convert function "
<< Fcn->getFunctionName() << ": " << TConvert.getElapsedSec()
<< " sec\n";
}
translateFcn(Fcn);
}
......
......@@ -78,6 +78,8 @@ typedef uint32_t SizeT;
// numbers are used for representing Variable live ranges.
typedef int32_t InstNumberT;
typedef uint32_t TimerIdT;
enum LivenessMode {
// Basic version of live-range-end calculation. Marks the last uses
// of variables based on dataflow analysis. Records the set of
......@@ -103,34 +105,14 @@ enum VerboseItem {
IceV_RegOrigins = 1 << 7,
IceV_LinearScan = 1 << 8,
IceV_Frame = 1 << 9,
IceV_Timing = 1 << 10,
IceV_AddrOpt = 1 << 11,
IceV_AddrOpt = 1 << 10,
IceV_All = ~IceV_None,
IceV_Most = IceV_All & ~(IceV_Timing | IceV_LinearScan)
IceV_Most = IceV_All & ~IceV_LinearScan
};
typedef uint32_t VerboseMask;
typedef llvm::raw_ostream Ostream;
// TODO: Implement in terms of std::chrono after switching to C++11.
class Timer {
public:
Timer() : Start(llvm::TimeRecord::getCurrentTime(false)) {}
uint64_t getElapsedNs() const { return getElapsedSec() * 1000 * 1000 * 1000; }
uint64_t getElapsedUs() const { return getElapsedSec() * 1000 * 1000; }
uint64_t getElapsedMs() const { return getElapsedSec() * 1000; }
double getElapsedSec() const {
llvm::TimeRecord End = llvm::TimeRecord::getCurrentTime(false);
return End.getWallTime() - Start.getWallTime();
}
void printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const;
private:
const llvm::TimeRecord Start;
Timer(const Timer &) LLVM_DELETED_FUNCTION;
Timer &operator=(const Timer &) LLVM_DELETED_FUNCTION;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEDEFS_H
//===- subzero/src/IceGlobalContext.cpp - Global context defs ---*- C++ -*-===//
//===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
//
// The Subzero Code Generator
//
......@@ -22,6 +22,7 @@
#include "IceGlobalContext.h"
#include "IceOperand.h"
#include "IceTargetLowering.h"
#include "IceTimerTree.h"
namespace Ice {
......@@ -122,7 +123,7 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump,
: StrDump(OsDump), StrEmit(OsEmit), VMask(Mask),
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false),
RNG("") {}
RNG(""), Timers(new TimerStack("main")) {}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name
......@@ -384,6 +385,14 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const {
llvm_unreachable("Unknown type");
}
TimerIdT GlobalContext::getTimerID(const IceString &Name) {
return TimerStack::getTimerID(Name);
}
void GlobalContext::pushTimer(TimerIdT ID) { Timers->push(ID); }
void GlobalContext::popTimer(TimerIdT ID) { Timers->pop(ID); }
void GlobalContext::dumpStats(const IceString &Name, bool Final) {
if (Flags.DumpStats) {
if (Final) {
......@@ -395,12 +404,6 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
}
}
void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const {
if (Ctx->isVerbose(IceV_Timing)) {
// Prefixing with '#' allows timing strings to be included
// without error in textual assembly output.
Ctx->getStrDump() << "# " << getElapsedUs() << " usec " << Tag << "\n";
}
}
void GlobalContext::dumpTimers() { Timers->dump(getStrDump()); }
} // end of namespace Ice
......@@ -69,12 +69,8 @@ public:
// Returns true if any of the specified options in the verbose mask
// are set. If the argument is omitted, it checks if any verbose
// options at all are set. IceV_Timing is treated specially, so
// that running with just IceV_Timing verbosity doesn't trigger an
// avalanche of extra output.
bool isVerbose(VerboseMask Mask = (IceV_All & ~IceV_Timing)) const {
return VMask & Mask;
}
// options at all are set.
bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
void setVerbose(VerboseMask Mask) { VMask = Mask; }
void addVerbose(VerboseMask Mask) { VMask |= Mask; }
void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
......@@ -154,6 +150,11 @@ public:
StatsCumulative.updateFills();
}
static TimerIdT getTimerID(const IceString &Name);
void pushTimer(TimerIdT ID);
void popTimer(TimerIdT ID);
void dumpTimers();
private:
Ostream *StrDump; // Stream for dumping / diagnostics
Ostream *StrEmit; // Stream for code emission
......@@ -170,6 +171,7 @@ private:
RandomNumberGenerator RNG;
CodeStats StatsFunction;
CodeStats StatsCumulative;
llvm::OwningPtr<class TimerStack> Timers;
GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
......@@ -178,6 +180,30 @@ private:
void incrementSubstitutions(ManglerVector &OldName) const;
};
// Helper class to push and pop a timer marker. The constructor
// pushes a marker, and the destructor pops it. This is for
// convenient timing of regions of code.
class TimerMarker {
TimerMarker(const TimerMarker &) LLVM_DELETED_FUNCTION;
TimerMarker &operator=(const TimerMarker &) LLVM_DELETED_FUNCTION;
public:
TimerMarker(TimerIdT ID, GlobalContext *Ctx)
: ID(ID), Ctx(Ctx), Active(Ctx->getFlags().SubzeroTimingEnabled) {
if (Active)
Ctx->pushTimer(ID);
}
~TimerMarker() {
if (Active)
Ctx->popTimer(ID);
}
private:
TimerIdT ID;
GlobalContext *const Ctx;
const bool Active;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
......@@ -509,9 +509,9 @@ private:
class InstLoad : public InstHighLevel {
public:
static InstLoad *create(Cfg *Func, Variable *Dest, Operand *SourceAddr,
uint32_t align = 1) {
uint32_t Align = 1) {
// TODO(kschimpf) Stop ignoring alignment specification.
(void)align;
(void)Align;
return new (Func->allocateInst<InstLoad>())
InstLoad(Func, Dest, SourceAddr);
}
......
......@@ -122,9 +122,26 @@ bool LiveRange::overlaps(const LiveRange &Other) const {
}
bool LiveRange::overlaps(InstNumberT OtherBegin) const {
bool Result = false;
for (RangeType::const_iterator I = Range.begin(), E = Range.end(); I != E;
++I) {
if (OtherBegin < I->first) {
Result = false;
break;
}
if (OtherBegin < I->second) {
Result = true;
break;
}
}
#if 0
// An equivalent but less inefficient implementation:
LiveRange Temp;
Temp.addSegment(OtherBegin, OtherBegin + 1);
return overlaps(Temp);
bool Validation = overlaps(Temp);
assert(Result == Validation);
#endif
return Result;
}
// Returns true if the live range contains the given instruction
......@@ -259,6 +276,8 @@ const Inst *VariableTracking::getSingleDefinition() const {
}
void VariablesMetadata::init() {
static TimerIdT IDvmetadata = GlobalContext::getTimerID("vmetadata");
TimerMarker T(IDvmetadata, Func->getContext());
Metadata.clear();
Metadata.resize(Func->getNumVariables());
......
......@@ -64,12 +64,15 @@ void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
// preparation. Results are assigned to Variable::RegNum for each
// Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
static TimerIdT IDscan = GlobalContext::getTimerID("linearScan");
TimerMarker T(IDscan, Func->getContext());
assert(RegMaskFull.any()); // Sanity check
Unhandled.clear();
Handled.clear();
Inactive.clear();
Active.clear();
Ostream &Str = Func->getContext()->getStrDump();
bool Verbose = Func->getContext()->isVerbose(IceV_LinearScan);
Func->resetCurrentNode();
VariablesMetadata *VMetadata = Func->getVMetadata();
......@@ -81,26 +84,32 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// a result, it may be useful to design a better data structure for
// storing Func->getVariables().
const VarList &Vars = Func->getVariables();
for (VarList::const_iterator I = Vars.begin(), E = Vars.end(); I != E; ++I) {
Variable *Var = *I;
// Explicitly don't consider zero-weight variables, which are
// meant to be spill slots.
if (Var->getWeight() == RegWeight::Zero)
continue;
// Don't bother if the variable has a null live range, which means
// it was never referenced.
if (Var->getLiveRange().isEmpty())
continue;
Unhandled.insert(LiveRangeWrapper(Var));
if (Var->hasReg()) {
Var->setRegNumTmp(Var->getRegNum());
Var->setLiveRangeInfiniteWeight();
{
static TimerIdT IDinitUnhandled =
GlobalContext::getTimerID("initUnhandled");
TimerMarker T(IDinitUnhandled, Func->getContext());
for (VarList::const_iterator I = Vars.begin(), E = Vars.end(); I != E;
++I) {
Variable *Var = *I;
// Explicitly don't consider zero-weight variables, which are
// meant to be spill slots.
if (Var->getWeight() == RegWeight::Zero)
continue;
// Don't bother if the variable has a null live range, which means
// it was never referenced.
if (Var->getLiveRange().isEmpty())
continue;
Unhandled.insert(LiveRangeWrapper(Var));
if (Var->hasReg()) {
Var->setRegNumTmp(Var->getRegNum());
Var->setLiveRangeInfiniteWeight();
}
}
}
// RegUses[I] is the number of live ranges (variables) that register
// I is currently assigned to. It can be greater than 1 as a result
// of Variable::AllowRegisterOverlap.
// of AllowOverlap inference below.
std::vector<int> RegUses(RegMaskFull.size());
// Unhandled is already set to all ranges in increasing order of
// start points.
......@@ -112,7 +121,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
while (!Unhandled.empty()) {
LiveRangeWrapper Cur = *Unhandled.begin();
Unhandled.erase(Unhandled.begin());
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "\nConsidering ";
Cur.dump(Func);
Str << "\n";
......@@ -130,7 +139,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
int32_t RegNum = Cur.Var->getRegNum();
// RegNumTmp should have already been set above.
assert(Cur.Var->getRegNumTmp() == RegNum);
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Precoloring ";
Cur.dump(Func);
Str << "\n";
......@@ -150,7 +159,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
bool Moved = false;
if (Item.endsBefore(Cur)) {
// Move Item from Active to Handled list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Expiring ";
Item.dump(Func);
Str << "\n";
......@@ -160,7 +169,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
Moved = true;
} else if (!Item.overlapsStart(Cur)) {
// Move Item from Active to Inactive list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Inactivating ";
Item.dump(Func);
Str << "\n";
......@@ -186,7 +195,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
LiveRangeWrapper Item = *I;
if (Item.endsBefore(Cur)) {
// Move Item from Inactive to Handled list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Expiring ";
Item.dump(Func);
Str << "\n";
......@@ -195,7 +204,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
Handled.push_back(Item);
} else if (Item.overlapsStart(Cur)) {
// Move Item from Inactive to Active list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Reactivating ";
Item.dump(Func);
Str << "\n";
......@@ -261,7 +270,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
}
}
}
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
if (Prefer) {
Str << "Initial Prefer=" << *Prefer << " R=" << PreferReg
<< " LIVE=" << Prefer->getLiveRange() << " Overlap=" << AllowOverlap
......@@ -279,8 +288,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
int32_t RegNum = Item.Var->getRegNumTmp();
// Don't assert(Free[RegNum]) because in theory (though
// probably never in practice) there could be two inactive
// variables that were allowed marked with
// AllowRegisterOverlap.
// variables that were marked with AllowOverlap.
Free[RegNum] = false;
// Disable AllowOverlap if an Inactive variable, which is not
// Prefer, shares Prefer's register, and has a definition
......@@ -331,7 +339,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
}
// Print info about physical register availability.
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
for (SizeT i = 0; i < RegMask.size(); ++i) {
if (RegMask[i]) {
Str << Func->getTarget()->getRegName(i, IceType_i32)
......@@ -346,7 +354,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// First choice: a preferred register that is either free or is
// allowed to overlap with its linked variable.
Cur.Var->setRegNumTmp(PreferReg);
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Preferring ";
Cur.dump(Func);
Str << "\n";
......@@ -360,7 +368,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// picking the lowest-numbered available register?
int32_t RegNum = Free.find_first();
Cur.Var->setRegNumTmp(RegNum);
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Allocating ";
Cur.dump(Func);
Str << "\n";
......@@ -434,7 +442,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
++Next;
LiveRangeWrapper Item = *I;
if (Item.Var->getRegNumTmp() == MinWeightIndex) {
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Evicting ";
Item.dump(Func);
Str << "\n";
......@@ -463,7 +471,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// instructions.
if (Item.Var->getRegNumTmp() == MinWeightIndex &&
Item.overlaps(Cur)) {
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Evicting ";
Item.dump(Func);
Str << "\n";
......@@ -478,7 +486,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
assert(RegUses[MinWeightIndex] >= 0);
++RegUses[MinWeightIndex];
Active.push_back(Cur);
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
Str << "Allocating ";
Cur.dump(Func);
Str << "\n";
......@@ -509,7 +517,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
I != E; ++I) {
LiveRangeWrapper Item = *I;
int32_t RegNum = Item.Var->getRegNumTmp();
if (Func->getContext()->isVerbose(IceV_LinearScan)) {
if (Verbose) {
if (!Item.Var->hasRegTmp()) {
Str << "Not assigning ";
Item.Var->dump(Func);
......
......@@ -229,6 +229,8 @@ void TargetLowering::lower() {
// registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing.
void TargetLowering::regAlloc() {
static TimerIdT IDregAlloc = GlobalContext::getTimerID("regAlloc");
TimerMarker T(IDregAlloc, Ctx);
LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None;
RegSetMask RegExclude = RegSet_None;
......
......@@ -302,35 +302,27 @@ TargetX8632::TargetX8632(Cfg *Func)
void TargetX8632::translateO2() {
GlobalContext *Context = Func->getContext();
static TimerIdT IDO2 = GlobalContext::getTimerID("O2");
TimerMarker T(IDO2, Context);
// Lower Phi instructions.
Timer T_placePhiLoads;
Func->placePhiLoads();
if (Func->hasError())
return;
T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
Timer T_placePhiStores;
Func->placePhiStores();
if (Func->hasError())
return;
T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
Timer T_deletePhis;
Func->deletePhis();
if (Func->hasError())
return;
T_deletePhis.printElapsedUs(Context, "deletePhis()");
Func->dump("After Phi lowering");
// Address mode optimization.
Timer T_doAddressOpt;
Func->getVMetadata()->init();
Func->doAddressOpt();
T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
// Argument lowering
Timer T_argLowering;
Func->doArgLowering();
T_argLowering.printElapsedUs(Context, "lowerArguments()");
// Target lowering. This requires liveness analysis for some parts
// of the lowering decisions, such as compare/branch fusing. If
......@@ -338,40 +330,30 @@ void TargetX8632::translateO2() {
// to be renumbered first. TODO: This renumbering should only be
// necessary if we're actually calculating live intervals, which we
// only do for register allocation.
Timer T_renumber1;
Func->renumberInstructions();
if (Func->hasError())
return;
T_renumber1.printElapsedUs(Context, "renumberInstructions()");
// TODO: It should be sufficient to use the fastest liveness
// calculation, i.e. livenessLightweight(). However, for some
// reason that slows down the rest of the translation. Investigate.
Timer T_liveness1;
Func->liveness(Liveness_Basic);
if (Func->hasError())
return;
T_liveness1.printElapsedUs(Context, "liveness()");
Func->dump("After x86 address mode opt");
Timer T_genCode;
Func->genCode();
if (Func->hasError())
return;
T_genCode.printElapsedUs(Context, "genCode()");
// Register allocation. This requires instruction renumbering and
// full liveness analysis.
Timer T_renumber2;
Func->renumberInstructions();
if (Func->hasError())
return;
T_renumber2.printElapsedUs(Context, "renumberInstructions()");
Timer T_liveness2;
Func->liveness(Liveness_Intervals);
if (Func->hasError())
return;
T_liveness2.printElapsedUs(Context, "liveness()");
// Validate the live range computations. Do it outside the timing
// code. TODO: Put this under a flag.
bool ValidLiveness = Func->validateLiveness();
......@@ -381,20 +363,16 @@ void TargetX8632::translateO2() {
// The post-codegen dump is done here, after liveness analysis and
// associated cleanup, to make the dump cleaner and more useful.
Func->dump("After initial x8632 codegen");
Timer T_regAlloc;
Func->getVMetadata()->init();
regAlloc();
if (Func->hasError())
return;
T_regAlloc.printElapsedUs(Context, "regAlloc()");
Func->dump("After linear scan regalloc");
// Stack frame mapping.
Timer T_genFrame;
Func->genFrame();
if (Func->hasError())
return;
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping");
// Branch optimization. This needs to be done just before code
......@@ -413,39 +391,29 @@ void TargetX8632::translateO2() {
void TargetX8632::translateOm1() {
GlobalContext *Context = Func->getContext();
Timer T_placePhiLoads;
static TimerIdT IDOm1 = GlobalContext::getTimerID("Om1");
TimerMarker T(IDOm1, Context);
Func->placePhiLoads();
if (Func->hasError())
return;
T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
Timer T_placePhiStores;
Func->placePhiStores();
if (Func->hasError())
return;
T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
Timer T_deletePhis;
Func->deletePhis();
if (Func->hasError())
return;
T_deletePhis.printElapsedUs(Context, "deletePhis()");
Func->dump("After Phi lowering");
Timer T_argLowering;
Func->doArgLowering();
T_argLowering.printElapsedUs(Context, "lowerArguments()");
Timer T_genCode;
Func->genCode();
if (Func->hasError())
return;
T_genCode.printElapsedUs(Context, "genCode()");
Func->dump("After initial x8632 codegen");
Timer T_genFrame;
Func->genFrame();
if (Func->hasError())
return;
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping");
// Nop insertion
......
//===- subzero/src/IceTimerTree.cpp - Pass timer defs ---------------------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the TimerTree class, which tracks flat and
// cumulative execution time collection of call chains.
//
//===----------------------------------------------------------------------===//
#include "IceDefs.h"
#include "IceTimerTree.h"
namespace Ice {
std::vector<IceString> TimerStack::IDs;
TimerStack::TimerStack(const IceString &TopLevelName)
: FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
StateChangeCount(0), StackTop(0) {
Nodes.resize(1); // Reserve Nodes[0] for the root node.
push(getTimerID(TopLevelName));
}
// Returns the unique timer ID for the given Name, creating a new ID
// if needed. For performance reasons, it's best to make only one
// call per Name and cache the result, e.g. via a static initializer.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
TimerIdT Size = IDs.size();
for (TimerIdT i = 0; i < Size; ++i) {
if (IDs[i] == Name)
return i;
}
IDs.push_back(Name);
return Size;
}
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
update();
if (Nodes[StackTop].Children.size() <= ID)
Nodes[StackTop].Children.resize(ID + 1);
if (Nodes[StackTop].Children[ID] == 0) {
TTindex Size = Nodes.size();
Nodes[StackTop].Children[ID] = Size;
Nodes.resize(Size + 1);
Nodes[Size].Parent = StackTop;
Nodes[Size].Interior = ID;
}
StackTop = Nodes[StackTop].Children[ID];
}
// Pop the top marker from the timer stack. Validates via assert()
// that the expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
update();
assert(StackTop);
assert(Nodes[StackTop].Parent < StackTop);
// Verify that the expected ID is being popped.
assert(Nodes[StackTop].Interior == ID);
(void)ID;
// Verify that the parent's child points to the current stack top.
assert(Nodes[Nodes[StackTop].Parent].Children[ID] == StackTop);
StackTop = Nodes[StackTop].Parent;
}
// At a state change (e.g. push or pop), updates the flat and
// cumulative timings for everything on the timer stack.
void TimerStack::update() {
++StateChangeCount;
// Whenever the stack is about to change, we grab the time delta
// since the last change and add it to all active cumulative
// elements and to the flat element for the top of the stack.
double Current = timestamp();
double Delta = Current - LastTimestamp;
LastTimestamp = Current;
if (StackTop) {
TimerIdT Leaf = Nodes[StackTop].Interior;
if (Leaf >= LeafTimes.size())
LeafTimes.resize(Leaf + 1);
LeafTimes[Leaf] += Delta;
}
TTindex Prefix = StackTop;
while (Prefix) {
Nodes[Prefix].Time += Delta;
TTindex Next = Nodes[Prefix].Parent;
assert(Next < Prefix);
Prefix = Next;
}
}
namespace {
typedef std::multimap<double, IceString> DumpMapType;
// Dump the Map items in reverse order of their time contribution.
void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
for (DumpMapType::const_reverse_iterator I = Map.rbegin(), E = Map.rend();
I != E; ++I) {
char buf[80];
snprintf(buf, llvm::array_lengthof(buf), " %10.6f (%4.1f%%): ", I->first,
I->first * 100 / TotalTime);
Str << buf << I->second << "\n";
}
}
} // end of anonymous namespace
void TimerStack::dump(Ostream &Str) {
update();
double TotalTime = LastTimestamp - FirstTimestamp;
assert(TotalTime);
Str << "Cumulative function times:\n";
DumpMapType CumulativeMap;
for (TTindex i = 1; i < Nodes.size(); ++i) {
TTindex Prefix = i;
IceString Suffix = "";
while (Prefix) {
if (Suffix.empty())
Suffix = IDs[Nodes[Prefix].Interior];
else
Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
assert(Nodes[Prefix].Parent < Prefix);
Prefix = Nodes[Prefix].Parent;
}
CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
}
dumpHelper(Str, CumulativeMap, TotalTime);
Str << "Flat function times:\n";
DumpMapType FlatMap;
for (TimerIdT i = 0; i < LeafTimes.size(); ++i) {
FlatMap.insert(std::make_pair(LeafTimes[i], IDs[i]));
}
dumpHelper(Str, FlatMap, TotalTime);
Str << "Number of timer updates: " << StateChangeCount << "\n";
}
} // end of namespace Ice
//===- subzero/src/IceTimerTree.h - Pass timer defs -------------*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the TimerTree class, which allows flat and
// cumulative execution time collection of call chains.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETIMERTREE_H
#define SUBZERO_SRC_ICETIMERTREE_H
namespace Ice {
class TimerTreeNode;
// Timer tree index type
typedef std::vector<TimerTreeNode>::size_type TTindex;
// TimerTreeNode represents an interior or leaf node in the call tree.
// It contains a list of children, a pointer to its parent, and the
// timer ID for the node. It also holds the cumulative time spent at
// this node and below. The children are always at a higher index in
// the TimerTreeNode::Nodes array, and the parent is always at a lower
// index.
class TimerTreeNode {
public:
TimerTreeNode() : Parent(0), Interior(0), Time(0) {}
std::vector<TTindex> Children; // indexed by TimerIdT
TTindex Parent;
TimerIdT Interior;
double Time;
};
class TimerStack {
TimerStack(const TimerStack &) LLVM_DELETED_FUNCTION;
TimerStack &operator=(const TimerStack &) LLVM_DELETED_FUNCTION;
public:
TimerStack(const IceString &TopLevelName);
static TimerIdT getTimerID(const IceString &Name);
void push(TimerIdT ID);
void pop(TimerIdT ID);
void dump(Ostream &Str);
private:
void update();
static double timestamp() {
// TODO: Implement in terms of std::chrono for C++11.
return llvm::TimeRecord::getCurrentTime(false).getWallTime();
}
const double FirstTimestamp;
double LastTimestamp;
uint64_t StateChangeCount;
static std::vector<IceString> IDs; // indexed by TimerIdT
std::vector<TimerTreeNode> Nodes; // indexed by TTindex
std::vector<double> LeafTimes; // indexed by TimerIdT
TTindex StackTop;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICETIMERTREE_H
......@@ -12,12 +12,11 @@
//
//===----------------------------------------------------------------------===//
#include "IceTranslator.h"
#include "IceCfg.h"
#include "IceClFlags.h"
#include "IceDefs.h"
#include "IceTargetLowering.h"
#include "IceTranslator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
......@@ -80,24 +79,13 @@ void Translator::translateFcn(Cfg *Fcn) {
if (Ctx->getFlags().DisableTranslation) {
Func->dump();
} else {
Timer TTranslate;
Func->translate();
if (Ctx->getFlags().SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] Translate function "
<< Func->getFunctionName() << ": " << TTranslate.getElapsedSec()
<< " sec\n";
}
if (Func->hasError()) {
std::cerr << "ICE translation error: " << Func->getError() << "\n";
ErrorStatus = true;
}
Timer TEmit;
Func->emit();
if (Ctx->getFlags().SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName()
<< ": " << TEmit.getElapsedSec() << " sec\n";
}
Ctx->dumpStats(Func->getFunctionName());
}
}
......
......@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
#include "PNaClTranslator.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceClFlags.h"
......@@ -20,6 +19,7 @@
#include "IceInst.h"
#include "IceOperand.h"
#include "IceTypeConverter.h"
#include "PNaClTranslator.h"
#include "llvm/Analysis/NaCl/PNaClABIProps.h"
#include "llvm/Bitcode/NaCl/NaClBitcodeDecoders.h"
#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
......@@ -852,7 +852,7 @@ public:
}
}
~FunctionParser() override;
~FunctionParser() override {};
// Set the next constant ID to the given constant C.
void setNextConstantID(Ice::Constant *C) {
......@@ -860,8 +860,6 @@ public:
}
private:
// Timer for reading function bitcode and converting to ICE.
Ice::Timer TConvert;
// The corresponding ICE function defined by the function block.
Ice::Cfg *Func;
// The index to the current basic block being built.
......@@ -1376,13 +1374,6 @@ private:
}
};
FunctionParser::~FunctionParser() {
if (getFlags().SubzeroTimingEnabled) {
errs() << "[Subzero timing] Convert function " << Func->getFunctionName()
<< ": " << TConvert.getElapsedSec() << " sec\n";
}
}
void FunctionParser::ReportInvalidBinopOpcode(unsigned Opcode, Ice::Type Ty) {
std::string Buffer;
raw_string_ostream StrBuf(Buffer);
......
......@@ -44,7 +44,6 @@ static cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_RegOrigins, "orig", "Physical register origins"),
clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"),
clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"),
clEnumValN(Ice::IceV_Timing, "time", "Pass timing details"),
clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_Most, "most",
......@@ -175,6 +174,8 @@ int main(int argc, char **argv) {
Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
Flags);
static Ice::TimerIdT IDszmain = Ice::GlobalContext::getTimerID("szmain");
Ice::TimerMarker T(IDszmain, &Ctx);
int ErrorStatus = 0;
if (BuildOnRead) {
......@@ -184,15 +185,11 @@ int main(int argc, char **argv) {
} else {
// Parse the input LLVM IR file into a module.
SMDiagnostic Err;
Ice::Timer T;
static Ice::TimerIdT IDparse = Ice::GlobalContext::getTimerID("parse");
Ice::TimerMarker T1(IDparse, &Ctx);
Module *Mod =
NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext());
if (SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] IR Parsing: " << T.getElapsedSec()
<< " sec\n";
}
if (!Mod) {
Err.print(argv[0], errs());
return 1;
......@@ -202,6 +199,8 @@ int main(int argc, char **argv) {
Converter.convertToIce();
ErrorStatus = Converter.getErrorStatus();
}
if (SubzeroTimingEnabled)
Ctx.dumpTimers();
const bool FinalStats = true;
Ctx.dumpStats("_FINAL_", FinalStats);
return ErrorStatus;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment