Commit c4554d78 by Jim Stichnoth

Subzero: Rewrite the pass timing infrastructure.

This makes it much more useful for individual analysis and long-term translation performance tracking. 1. Collect and report aggregated across the entire translation, instead of function-by-function. If you really care about a single function, just extract it and translate it separately for analysis. 2. Remove "-verbose time" and just use -timing. 3. Collects two kinds of timings: cumulative and flat. Cumulative measures the total time, even if a callee also times itself. Flat only measures the currently active timer at the top of the stack. The flat times should add up to 100%, but cumulative will usually add up to much more than 100%. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/610813002
parent 118ca798
...@@ -80,9 +80,10 @@ SRCS= \ ...@@ -80,9 +80,10 @@ SRCS= \
IceMemoryRegion.cpp \ IceMemoryRegion.cpp \
IceOperand.cpp \ IceOperand.cpp \
IceRegAlloc.cpp \ IceRegAlloc.cpp \
IceRNG.cpp \ IceRNG.cpp \
IceTargetLowering.cpp \ IceTargetLowering.cpp \
IceTargetLoweringX8632.cpp \ IceTargetLoweringX8632.cpp \
IceTimerTree.cpp \
IceTranslator.cpp \ IceTranslator.cpp \
IceTypeConverter.cpp \ IceTypeConverter.cpp \
IceTypes.cpp \ IceTypes.cpp \
...@@ -113,7 +114,7 @@ $(OBJS): | $(OBJDIR) ...@@ -113,7 +114,7 @@ $(OBJS): | $(OBJDIR)
$(OBJDIR): $(OBJDIR):
@mkdir -p $@ @mkdir -p $@
check-lit: llvm2ice check-lit: llvm2ice make_symlink
LLVM_BIN_PATH=$(LLVM_BIN_PATH) \ LLVM_BIN_PATH=$(LLVM_BIN_PATH) \
$(LLVM_SRC_PATH)/utils/lit/lit.py -sv tests_lit $(LLVM_SRC_PATH)/utils/lit/lit.py -sv tests_lit
......
...@@ -69,14 +69,14 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); } ...@@ -69,14 +69,14 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
void Cfg::translate() { void Cfg::translate() {
if (hasError()) if (hasError())
return; return;
static TimerIdT IDtranslate = GlobalContext::getTimerID("translate");
TimerMarker T(IDtranslate, getContext());
dump("Initial CFG"); dump("Initial CFG");
Timer T_translate;
// The set of translation passes and their order are determined by // The set of translation passes and their order are determined by
// the target. // the target.
getTarget()->translate(); getTarget()->translate();
T_translate.printElapsedUs(getContext(), "translate()");
dump("Final output"); dump("Final output");
} }
...@@ -88,6 +88,9 @@ void Cfg::computePredecessors() { ...@@ -88,6 +88,9 @@ void Cfg::computePredecessors() {
} }
void Cfg::renumberInstructions() { void Cfg::renumberInstructions() {
static TimerIdT IDrenumberInstructions =
GlobalContext::getTimerID("renumberInstructions");
TimerMarker T(IDrenumberInstructions, getContext());
NextInstNumber = 1; NextInstNumber = 1;
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->renumberInstructions(); (*I)->renumberInstructions();
...@@ -96,6 +99,8 @@ void Cfg::renumberInstructions() { ...@@ -96,6 +99,8 @@ void Cfg::renumberInstructions() {
// placePhiLoads() must be called before placePhiStores(). // placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() { void Cfg::placePhiLoads() {
static TimerIdT IDplacePhiLoads = GlobalContext::getTimerID("placePhiLoads");
TimerMarker T(IDplacePhiLoads, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->placePhiLoads(); (*I)->placePhiLoads();
} }
...@@ -103,34 +108,48 @@ void Cfg::placePhiLoads() { ...@@ -103,34 +108,48 @@ void Cfg::placePhiLoads() {
// placePhiStores() must be called after placePhiLoads(). // placePhiStores() must be called after placePhiLoads().
void Cfg::placePhiStores() { void Cfg::placePhiStores() {
static TimerIdT IDplacePhiStores =
GlobalContext::getTimerID("placePhiStores");
TimerMarker T(IDplacePhiStores, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->placePhiStores(); (*I)->placePhiStores();
} }
} }
void Cfg::deletePhis() { void Cfg::deletePhis() {
static TimerIdT IDdeletePhis = GlobalContext::getTimerID("deletePhis");
TimerMarker T(IDdeletePhis, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->deletePhis(); (*I)->deletePhis();
} }
} }
void Cfg::doArgLowering() { void Cfg::doArgLowering() {
static TimerIdT IDdoArgLowering = GlobalContext::getTimerID("doArgLowering");
TimerMarker T(IDdoArgLowering, getContext());
getTarget()->lowerArguments(); getTarget()->lowerArguments();
} }
void Cfg::doAddressOpt() { void Cfg::doAddressOpt() {
static TimerIdT IDdoAddressOpt = GlobalContext::getTimerID("doAddressOpt");
TimerMarker T(IDdoAddressOpt, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->doAddressOpt(); (*I)->doAddressOpt();
} }
} }
void Cfg::doNopInsertion() { void Cfg::doNopInsertion() {
static TimerIdT IDdoNopInsertion =
GlobalContext::getTimerID("doNopInsertion");
TimerMarker T(IDdoNopInsertion, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->doNopInsertion(); (*I)->doNopInsertion();
} }
} }
void Cfg::genCode() { void Cfg::genCode() {
static TimerIdT IDgenCode = GlobalContext::getTimerID("genCode");
TimerMarker T(IDgenCode, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->genCode(); (*I)->genCode();
} }
...@@ -138,6 +157,8 @@ void Cfg::genCode() { ...@@ -138,6 +157,8 @@ void Cfg::genCode() {
// Compute the stack frame layout. // Compute the stack frame layout.
void Cfg::genFrame() { void Cfg::genFrame() {
static TimerIdT IDgenFrame = GlobalContext::getTimerID("genFrame");
TimerMarker T(IDgenFrame, getContext());
getTarget()->addProlog(Entry); getTarget()->addProlog(Entry);
// TODO: Consider folding epilog generation into the final // TODO: Consider folding epilog generation into the final
// emission/assembly pass to avoid an extra iteration over the node // emission/assembly pass to avoid an extra iteration over the node
...@@ -154,6 +175,9 @@ void Cfg::genFrame() { ...@@ -154,6 +175,9 @@ void Cfg::genFrame() {
// completely with a single block. It is a quick single pass and // completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence. // doesn't need to iterate until convergence.
void Cfg::livenessLightweight() { void Cfg::livenessLightweight() {
static TimerIdT IDlivenessLightweight =
GlobalContext::getTimerID("livenessLightweight");
TimerMarker T(IDlivenessLightweight, getContext());
getVMetadata()->init(); getVMetadata()->init();
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->livenessLightweight(); (*I)->livenessLightweight();
...@@ -161,6 +185,8 @@ void Cfg::livenessLightweight() { ...@@ -161,6 +185,8 @@ void Cfg::livenessLightweight() {
} }
void Cfg::liveness(LivenessMode Mode) { void Cfg::liveness(LivenessMode Mode) {
static TimerIdT IDliveness = GlobalContext::getTimerID("liveness");
TimerMarker T(IDliveness, getContext());
Live.reset(new Liveness(this, Mode)); Live.reset(new Liveness(this, Mode));
getVMetadata()->init(); getVMetadata()->init();
Live->init(); Live->init();
...@@ -199,9 +225,10 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -199,9 +225,10 @@ void Cfg::liveness(LivenessMode Mode) {
// Collect timing for just the portion that constructs the live // Collect timing for just the portion that constructs the live
// range intervals based on the end-of-live-range computation, for a // range intervals based on the end-of-live-range computation, for a
// finer breakdown of the cost. // finer breakdown of the cost.
Timer T_liveRange;
// Make a final pass over instructions to delete dead instructions // Make a final pass over instructions to delete dead instructions
// and build each Variable's live range. // and build each Variable's live range.
static TimerIdT IDliveRange = GlobalContext::getTimerID("liveRange");
TimerMarker T1(IDliveRange, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->livenessPostprocess(Mode, getLiveness()); (*I)->livenessPostprocess(Mode, getLiveness());
} }
...@@ -241,7 +268,6 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -241,7 +268,6 @@ void Cfg::liveness(LivenessMode Mode) {
if (Var->getWeight().isInf()) if (Var->getWeight().isInf())
Var->setLiveRangeInfiniteWeight(); Var->setLiveRangeInfiniteWeight();
} }
T_liveRange.printElapsedUs(getContext(), "live range construction");
dump(); dump();
} }
} }
...@@ -249,6 +275,9 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -249,6 +275,9 @@ void Cfg::liveness(LivenessMode Mode) {
// Traverse every Variable of every Inst and verify that it // Traverse every Variable of every Inst and verify that it
// appears within the Variable's computed live range. // appears within the Variable's computed live range.
bool Cfg::validateLiveness() const { bool Cfg::validateLiveness() const {
static TimerIdT IDvalidateLiveness =
GlobalContext::getTimerID("validateLiveness");
TimerMarker T(IDvalidateLiveness, getContext());
bool Valid = true; bool Valid = true;
Ostream &Str = Ctx->getStrDump(); Ostream &Str = Ctx->getStrDump();
for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1; for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1;
...@@ -296,18 +325,21 @@ bool Cfg::validateLiveness() const { ...@@ -296,18 +325,21 @@ bool Cfg::validateLiveness() const {
} }
void Cfg::doBranchOpt() { void Cfg::doBranchOpt() {
static TimerIdT IDdoBranchOpt = GlobalContext::getTimerID("doBranchOpt");
TimerMarker T(IDdoBranchOpt, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
NodeList::iterator NextNode = I; NodeList::iterator NextNode = I;
++NextNode; ++NextNode;
(*I)->doBranchOpt(*NextNode); (*I)->doBranchOpt(NextNode == E ? NULL : *NextNode);
} }
} }
// ======================== Dump routines ======================== // // ======================== Dump routines ======================== //
void Cfg::emit() { void Cfg::emit() {
static TimerIdT IDemit = GlobalContext::getTimerID("emit");
TimerMarker T(IDemit, getContext());
Ostream &Str = Ctx->getStrEmit(); Ostream &Str = Ctx->getStrEmit();
Timer T_emit;
if (!Ctx->testAndSetHasEmittedFirstMethod()) { if (!Ctx->testAndSetHasEmittedFirstMethod()) {
// Print a helpful command for assembling the output. // Print a helpful command for assembling the output.
// TODO: have the Target emit the header // TODO: have the Target emit the header
...@@ -339,7 +371,6 @@ void Cfg::emit() { ...@@ -339,7 +371,6 @@ void Cfg::emit() {
(*I)->emit(this); (*I)->emit(this);
} }
Str << "\n"; Str << "\n";
T_emit.printElapsedUs(Ctx, "emit()");
} }
// Dumps the IR with an optional introductory message. // Dumps the IR with an optional introductory message.
......
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "IceConverter.h"
#include "IceCfg.h" #include "IceCfg.h"
#include "IceCfgNode.h" #include "IceCfgNode.h"
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceConverter.h"
#include "IceDefs.h" #include "IceDefs.h"
#include "IceGlobalContext.h" #include "IceGlobalContext.h"
#include "IceInst.h" #include "IceInst.h"
...@@ -60,6 +59,9 @@ public: ...@@ -60,6 +59,9 @@ public:
// Caller is expected to delete the returned Ice::Cfg object. // Caller is expected to delete the returned Ice::Cfg object.
Ice::Cfg *convertFunction(const Function *F) { Ice::Cfg *convertFunction(const Function *F) {
static Ice::TimerIdT IDllvmConvert =
Ice::GlobalContext::getTimerID("llvmConvert");
Ice::TimerMarker T(IDllvmConvert, Ctx);
VarMap.clear(); VarMap.clear();
NodeMap.clear(); NodeMap.clear();
Func = new Ice::Cfg(Ctx); Func = new Ice::Cfg(Ctx);
...@@ -621,6 +623,8 @@ private: ...@@ -621,6 +623,8 @@ private:
namespace Ice { namespace Ice {
void Converter::convertToIce() { void Converter::convertToIce() {
static TimerIdT IDconvertToIce = GlobalContext::getTimerID("convertToIce");
TimerMarker T(IDconvertToIce, Ctx);
nameUnnamedGlobalAddresses(Mod); nameUnnamedGlobalAddresses(Mod);
if (!Ctx->getFlags().DisableGlobals) if (!Ctx->getFlags().DisableGlobals)
convertGlobals(Mod); convertGlobals(Mod);
...@@ -635,11 +639,6 @@ void Converter::convertFunctions() { ...@@ -635,11 +639,6 @@ void Converter::convertFunctions() {
Timer TConvert; Timer TConvert;
Cfg *Fcn = FunctionConverter.convertFunction(I); Cfg *Fcn = FunctionConverter.convertFunction(I);
if (Ctx->getFlags().SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] Convert function "
<< Fcn->getFunctionName() << ": " << TConvert.getElapsedSec()
<< " sec\n";
}
translateFcn(Fcn); translateFcn(Fcn);
} }
......
...@@ -78,6 +78,8 @@ typedef uint32_t SizeT; ...@@ -78,6 +78,8 @@ typedef uint32_t SizeT;
// numbers are used for representing Variable live ranges. // numbers are used for representing Variable live ranges.
typedef int32_t InstNumberT; typedef int32_t InstNumberT;
typedef uint32_t TimerIdT;
enum LivenessMode { enum LivenessMode {
// Basic version of live-range-end calculation. Marks the last uses // Basic version of live-range-end calculation. Marks the last uses
// of variables based on dataflow analysis. Records the set of // of variables based on dataflow analysis. Records the set of
...@@ -103,34 +105,14 @@ enum VerboseItem { ...@@ -103,34 +105,14 @@ enum VerboseItem {
IceV_RegOrigins = 1 << 7, IceV_RegOrigins = 1 << 7,
IceV_LinearScan = 1 << 8, IceV_LinearScan = 1 << 8,
IceV_Frame = 1 << 9, IceV_Frame = 1 << 9,
IceV_Timing = 1 << 10, IceV_AddrOpt = 1 << 10,
IceV_AddrOpt = 1 << 11,
IceV_All = ~IceV_None, IceV_All = ~IceV_None,
IceV_Most = IceV_All & ~(IceV_Timing | IceV_LinearScan) IceV_Most = IceV_All & ~IceV_LinearScan
}; };
typedef uint32_t VerboseMask; typedef uint32_t VerboseMask;
typedef llvm::raw_ostream Ostream; typedef llvm::raw_ostream Ostream;
// TODO: Implement in terms of std::chrono after switching to C++11.
class Timer {
public:
Timer() : Start(llvm::TimeRecord::getCurrentTime(false)) {}
uint64_t getElapsedNs() const { return getElapsedSec() * 1000 * 1000 * 1000; }
uint64_t getElapsedUs() const { return getElapsedSec() * 1000 * 1000; }
uint64_t getElapsedMs() const { return getElapsedSec() * 1000; }
double getElapsedSec() const {
llvm::TimeRecord End = llvm::TimeRecord::getCurrentTime(false);
return End.getWallTime() - Start.getWallTime();
}
void printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const;
private:
const llvm::TimeRecord Start;
Timer(const Timer &) LLVM_DELETED_FUNCTION;
Timer &operator=(const Timer &) LLVM_DELETED_FUNCTION;
};
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICEDEFS_H #endif // SUBZERO_SRC_ICEDEFS_H
//===- subzero/src/IceGlobalContext.cpp - Global context defs ---*- C++ -*-===// //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
// //
// The Subzero Code Generator // The Subzero Code Generator
// //
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "IceGlobalContext.h" #include "IceGlobalContext.h"
#include "IceOperand.h" #include "IceOperand.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceTimerTree.h"
namespace Ice { namespace Ice {
...@@ -122,7 +123,7 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump, ...@@ -122,7 +123,7 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump,
: StrDump(OsDump), StrEmit(OsEmit), VMask(Mask), : StrDump(OsDump), StrEmit(OsEmit), VMask(Mask),
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt), ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false), TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false),
RNG("") {} RNG(""), Timers(new TimerStack("main")) {}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with // Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name // S<num>_ where <num> is the next base-36 value. If a type name
...@@ -384,6 +385,14 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const { ...@@ -384,6 +385,14 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const {
llvm_unreachable("Unknown type"); llvm_unreachable("Unknown type");
} }
TimerIdT GlobalContext::getTimerID(const IceString &Name) {
return TimerStack::getTimerID(Name);
}
void GlobalContext::pushTimer(TimerIdT ID) { Timers->push(ID); }
void GlobalContext::popTimer(TimerIdT ID) { Timers->pop(ID); }
void GlobalContext::dumpStats(const IceString &Name, bool Final) { void GlobalContext::dumpStats(const IceString &Name, bool Final) {
if (Flags.DumpStats) { if (Flags.DumpStats) {
if (Final) { if (Final) {
...@@ -395,12 +404,6 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) { ...@@ -395,12 +404,6 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
} }
} }
void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const { void GlobalContext::dumpTimers() { Timers->dump(getStrDump()); }
if (Ctx->isVerbose(IceV_Timing)) {
// Prefixing with '#' allows timing strings to be included
// without error in textual assembly output.
Ctx->getStrDump() << "# " << getElapsedUs() << " usec " << Tag << "\n";
}
}
} // end of namespace Ice } // end of namespace Ice
...@@ -69,12 +69,8 @@ public: ...@@ -69,12 +69,8 @@ public:
// Returns true if any of the specified options in the verbose mask // Returns true if any of the specified options in the verbose mask
// are set. If the argument is omitted, it checks if any verbose // are set. If the argument is omitted, it checks if any verbose
// options at all are set. IceV_Timing is treated specially, so // options at all are set.
// that running with just IceV_Timing verbosity doesn't trigger an bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
// avalanche of extra output.
bool isVerbose(VerboseMask Mask = (IceV_All & ~IceV_Timing)) const {
return VMask & Mask;
}
void setVerbose(VerboseMask Mask) { VMask = Mask; } void setVerbose(VerboseMask Mask) { VMask = Mask; }
void addVerbose(VerboseMask Mask) { VMask |= Mask; } void addVerbose(VerboseMask Mask) { VMask |= Mask; }
void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
...@@ -154,6 +150,11 @@ public: ...@@ -154,6 +150,11 @@ public:
StatsCumulative.updateFills(); StatsCumulative.updateFills();
} }
static TimerIdT getTimerID(const IceString &Name);
void pushTimer(TimerIdT ID);
void popTimer(TimerIdT ID);
void dumpTimers();
private: private:
Ostream *StrDump; // Stream for dumping / diagnostics Ostream *StrDump; // Stream for dumping / diagnostics
Ostream *StrEmit; // Stream for code emission Ostream *StrEmit; // Stream for code emission
...@@ -170,6 +171,7 @@ private: ...@@ -170,6 +171,7 @@ private:
RandomNumberGenerator RNG; RandomNumberGenerator RNG;
CodeStats StatsFunction; CodeStats StatsFunction;
CodeStats StatsCumulative; CodeStats StatsCumulative;
llvm::OwningPtr<class TimerStack> Timers;
GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION; GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION; GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
...@@ -178,6 +180,30 @@ private: ...@@ -178,6 +180,30 @@ private:
void incrementSubstitutions(ManglerVector &OldName) const; void incrementSubstitutions(ManglerVector &OldName) const;
}; };
// Helper class to push and pop a timer marker. The constructor
// pushes a marker, and the destructor pops it. This is for
// convenient timing of regions of code.
class TimerMarker {
TimerMarker(const TimerMarker &) LLVM_DELETED_FUNCTION;
TimerMarker &operator=(const TimerMarker &) LLVM_DELETED_FUNCTION;
public:
TimerMarker(TimerIdT ID, GlobalContext *Ctx)
: ID(ID), Ctx(Ctx), Active(Ctx->getFlags().SubzeroTimingEnabled) {
if (Active)
Ctx->pushTimer(ID);
}
~TimerMarker() {
if (Active)
Ctx->popTimer(ID);
}
private:
TimerIdT ID;
GlobalContext *const Ctx;
const bool Active;
};
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
...@@ -509,9 +509,9 @@ private: ...@@ -509,9 +509,9 @@ private:
class InstLoad : public InstHighLevel { class InstLoad : public InstHighLevel {
public: public:
static InstLoad *create(Cfg *Func, Variable *Dest, Operand *SourceAddr, static InstLoad *create(Cfg *Func, Variable *Dest, Operand *SourceAddr,
uint32_t align = 1) { uint32_t Align = 1) {
// TODO(kschimpf) Stop ignoring alignment specification. // TODO(kschimpf) Stop ignoring alignment specification.
(void)align; (void)Align;
return new (Func->allocateInst<InstLoad>()) return new (Func->allocateInst<InstLoad>())
InstLoad(Func, Dest, SourceAddr); InstLoad(Func, Dest, SourceAddr);
} }
......
...@@ -122,9 +122,26 @@ bool LiveRange::overlaps(const LiveRange &Other) const { ...@@ -122,9 +122,26 @@ bool LiveRange::overlaps(const LiveRange &Other) const {
} }
bool LiveRange::overlaps(InstNumberT OtherBegin) const { bool LiveRange::overlaps(InstNumberT OtherBegin) const {
bool Result = false;
for (RangeType::const_iterator I = Range.begin(), E = Range.end(); I != E;
++I) {
if (OtherBegin < I->first) {
Result = false;
break;
}
if (OtherBegin < I->second) {
Result = true;
break;
}
}
#if 0
// An equivalent but less inefficient implementation:
LiveRange Temp; LiveRange Temp;
Temp.addSegment(OtherBegin, OtherBegin + 1); Temp.addSegment(OtherBegin, OtherBegin + 1);
return overlaps(Temp); bool Validation = overlaps(Temp);
assert(Result == Validation);
#endif
return Result;
} }
// Returns true if the live range contains the given instruction // Returns true if the live range contains the given instruction
...@@ -259,6 +276,8 @@ const Inst *VariableTracking::getSingleDefinition() const { ...@@ -259,6 +276,8 @@ const Inst *VariableTracking::getSingleDefinition() const {
} }
void VariablesMetadata::init() { void VariablesMetadata::init() {
static TimerIdT IDvmetadata = GlobalContext::getTimerID("vmetadata");
TimerMarker T(IDvmetadata, Func->getContext());
Metadata.clear(); Metadata.clear();
Metadata.resize(Func->getNumVariables()); Metadata.resize(Func->getNumVariables());
......
...@@ -64,12 +64,15 @@ void dumpDisableOverlap(const Cfg *Func, const Variable *Var, ...@@ -64,12 +64,15 @@ void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
// preparation. Results are assigned to Variable::RegNum for each // preparation. Results are assigned to Variable::RegNum for each
// Variable. // Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
static TimerIdT IDscan = GlobalContext::getTimerID("linearScan");
TimerMarker T(IDscan, Func->getContext());
assert(RegMaskFull.any()); // Sanity check assert(RegMaskFull.any()); // Sanity check
Unhandled.clear(); Unhandled.clear();
Handled.clear(); Handled.clear();
Inactive.clear(); Inactive.clear();
Active.clear(); Active.clear();
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
bool Verbose = Func->getContext()->isVerbose(IceV_LinearScan);
Func->resetCurrentNode(); Func->resetCurrentNode();
VariablesMetadata *VMetadata = Func->getVMetadata(); VariablesMetadata *VMetadata = Func->getVMetadata();
...@@ -81,26 +84,32 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -81,26 +84,32 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// a result, it may be useful to design a better data structure for // a result, it may be useful to design a better data structure for
// storing Func->getVariables(). // storing Func->getVariables().
const VarList &Vars = Func->getVariables(); const VarList &Vars = Func->getVariables();
for (VarList::const_iterator I = Vars.begin(), E = Vars.end(); I != E; ++I) { {
Variable *Var = *I; static TimerIdT IDinitUnhandled =
// Explicitly don't consider zero-weight variables, which are GlobalContext::getTimerID("initUnhandled");
// meant to be spill slots. TimerMarker T(IDinitUnhandled, Func->getContext());
if (Var->getWeight() == RegWeight::Zero) for (VarList::const_iterator I = Vars.begin(), E = Vars.end(); I != E;
continue; ++I) {
// Don't bother if the variable has a null live range, which means Variable *Var = *I;
// it was never referenced. // Explicitly don't consider zero-weight variables, which are
if (Var->getLiveRange().isEmpty()) // meant to be spill slots.
continue; if (Var->getWeight() == RegWeight::Zero)
Unhandled.insert(LiveRangeWrapper(Var)); continue;
if (Var->hasReg()) { // Don't bother if the variable has a null live range, which means
Var->setRegNumTmp(Var->getRegNum()); // it was never referenced.
Var->setLiveRangeInfiniteWeight(); if (Var->getLiveRange().isEmpty())
continue;
Unhandled.insert(LiveRangeWrapper(Var));
if (Var->hasReg()) {
Var->setRegNumTmp(Var->getRegNum());
Var->setLiveRangeInfiniteWeight();
}
} }
} }
// RegUses[I] is the number of live ranges (variables) that register // RegUses[I] is the number of live ranges (variables) that register
// I is currently assigned to. It can be greater than 1 as a result // I is currently assigned to. It can be greater than 1 as a result
// of Variable::AllowRegisterOverlap. // of AllowOverlap inference below.
std::vector<int> RegUses(RegMaskFull.size()); std::vector<int> RegUses(RegMaskFull.size());
// Unhandled is already set to all ranges in increasing order of // Unhandled is already set to all ranges in increasing order of
// start points. // start points.
...@@ -112,7 +121,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -112,7 +121,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
while (!Unhandled.empty()) { while (!Unhandled.empty()) {
LiveRangeWrapper Cur = *Unhandled.begin(); LiveRangeWrapper Cur = *Unhandled.begin();
Unhandled.erase(Unhandled.begin()); Unhandled.erase(Unhandled.begin());
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "\nConsidering "; Str << "\nConsidering ";
Cur.dump(Func); Cur.dump(Func);
Str << "\n"; Str << "\n";
...@@ -130,7 +139,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -130,7 +139,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
int32_t RegNum = Cur.Var->getRegNum(); int32_t RegNum = Cur.Var->getRegNum();
// RegNumTmp should have already been set above. // RegNumTmp should have already been set above.
assert(Cur.Var->getRegNumTmp() == RegNum); assert(Cur.Var->getRegNumTmp() == RegNum);
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Precoloring "; Str << "Precoloring ";
Cur.dump(Func); Cur.dump(Func);
Str << "\n"; Str << "\n";
...@@ -150,7 +159,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -150,7 +159,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
bool Moved = false; bool Moved = false;
if (Item.endsBefore(Cur)) { if (Item.endsBefore(Cur)) {
// Move Item from Active to Handled list. // Move Item from Active to Handled list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Expiring "; Str << "Expiring ";
Item.dump(Func); Item.dump(Func);
Str << "\n"; Str << "\n";
...@@ -160,7 +169,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -160,7 +169,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
Moved = true; Moved = true;
} else if (!Item.overlapsStart(Cur)) { } else if (!Item.overlapsStart(Cur)) {
// Move Item from Active to Inactive list. // Move Item from Active to Inactive list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Inactivating "; Str << "Inactivating ";
Item.dump(Func); Item.dump(Func);
Str << "\n"; Str << "\n";
...@@ -186,7 +195,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -186,7 +195,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
LiveRangeWrapper Item = *I; LiveRangeWrapper Item = *I;
if (Item.endsBefore(Cur)) { if (Item.endsBefore(Cur)) {
// Move Item from Inactive to Handled list. // Move Item from Inactive to Handled list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Expiring "; Str << "Expiring ";
Item.dump(Func); Item.dump(Func);
Str << "\n"; Str << "\n";
...@@ -195,7 +204,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -195,7 +204,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
Handled.push_back(Item); Handled.push_back(Item);
} else if (Item.overlapsStart(Cur)) { } else if (Item.overlapsStart(Cur)) {
// Move Item from Inactive to Active list. // Move Item from Inactive to Active list.
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Reactivating "; Str << "Reactivating ";
Item.dump(Func); Item.dump(Func);
Str << "\n"; Str << "\n";
...@@ -261,7 +270,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -261,7 +270,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
} }
} }
} }
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
if (Prefer) { if (Prefer) {
Str << "Initial Prefer=" << *Prefer << " R=" << PreferReg Str << "Initial Prefer=" << *Prefer << " R=" << PreferReg
<< " LIVE=" << Prefer->getLiveRange() << " Overlap=" << AllowOverlap << " LIVE=" << Prefer->getLiveRange() << " Overlap=" << AllowOverlap
...@@ -279,8 +288,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -279,8 +288,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
int32_t RegNum = Item.Var->getRegNumTmp(); int32_t RegNum = Item.Var->getRegNumTmp();
// Don't assert(Free[RegNum]) because in theory (though // Don't assert(Free[RegNum]) because in theory (though
// probably never in practice) there could be two inactive // probably never in practice) there could be two inactive
// variables that were allowed marked with // variables that were marked with AllowOverlap.
// AllowRegisterOverlap.
Free[RegNum] = false; Free[RegNum] = false;
// Disable AllowOverlap if an Inactive variable, which is not // Disable AllowOverlap if an Inactive variable, which is not
// Prefer, shares Prefer's register, and has a definition // Prefer, shares Prefer's register, and has a definition
...@@ -331,7 +339,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -331,7 +339,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
} }
// Print info about physical register availability. // Print info about physical register availability.
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
for (SizeT i = 0; i < RegMask.size(); ++i) { for (SizeT i = 0; i < RegMask.size(); ++i) {
if (RegMask[i]) { if (RegMask[i]) {
Str << Func->getTarget()->getRegName(i, IceType_i32) Str << Func->getTarget()->getRegName(i, IceType_i32)
...@@ -346,7 +354,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -346,7 +354,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// First choice: a preferred register that is either free or is // First choice: a preferred register that is either free or is
// allowed to overlap with its linked variable. // allowed to overlap with its linked variable.
Cur.Var->setRegNumTmp(PreferReg); Cur.Var->setRegNumTmp(PreferReg);
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Preferring "; Str << "Preferring ";
Cur.dump(Func); Cur.dump(Func);
Str << "\n"; Str << "\n";
...@@ -360,7 +368,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -360,7 +368,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// picking the lowest-numbered available register? // picking the lowest-numbered available register?
int32_t RegNum = Free.find_first(); int32_t RegNum = Free.find_first();
Cur.Var->setRegNumTmp(RegNum); Cur.Var->setRegNumTmp(RegNum);
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Allocating "; Str << "Allocating ";
Cur.dump(Func); Cur.dump(Func);
Str << "\n"; Str << "\n";
...@@ -434,7 +442,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -434,7 +442,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
++Next; ++Next;
LiveRangeWrapper Item = *I; LiveRangeWrapper Item = *I;
if (Item.Var->getRegNumTmp() == MinWeightIndex) { if (Item.Var->getRegNumTmp() == MinWeightIndex) {
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Evicting "; Str << "Evicting ";
Item.dump(Func); Item.dump(Func);
Str << "\n"; Str << "\n";
...@@ -463,7 +471,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -463,7 +471,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// instructions. // instructions.
if (Item.Var->getRegNumTmp() == MinWeightIndex && if (Item.Var->getRegNumTmp() == MinWeightIndex &&
Item.overlaps(Cur)) { Item.overlaps(Cur)) {
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Evicting "; Str << "Evicting ";
Item.dump(Func); Item.dump(Func);
Str << "\n"; Str << "\n";
...@@ -478,7 +486,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -478,7 +486,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
assert(RegUses[MinWeightIndex] >= 0); assert(RegUses[MinWeightIndex] >= 0);
++RegUses[MinWeightIndex]; ++RegUses[MinWeightIndex];
Active.push_back(Cur); Active.push_back(Cur);
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
Str << "Allocating "; Str << "Allocating ";
Cur.dump(Func); Cur.dump(Func);
Str << "\n"; Str << "\n";
...@@ -509,7 +517,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -509,7 +517,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
I != E; ++I) { I != E; ++I) {
LiveRangeWrapper Item = *I; LiveRangeWrapper Item = *I;
int32_t RegNum = Item.Var->getRegNumTmp(); int32_t RegNum = Item.Var->getRegNumTmp();
if (Func->getContext()->isVerbose(IceV_LinearScan)) { if (Verbose) {
if (!Item.Var->hasRegTmp()) { if (!Item.Var->hasRegTmp()) {
Str << "Not assigning "; Str << "Not assigning ";
Item.Var->dump(Func); Item.Var->dump(Func);
......
...@@ -229,6 +229,8 @@ void TargetLowering::lower() { ...@@ -229,6 +229,8 @@ void TargetLowering::lower() {
// registers could potentially be parameterized if we want to restrict // registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing. // registers e.g. for performance testing.
void TargetLowering::regAlloc() { void TargetLowering::regAlloc() {
static TimerIdT IDregAlloc = GlobalContext::getTimerID("regAlloc");
TimerMarker T(IDregAlloc, Ctx);
LinearScan LinearScan(Func); LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None; RegSetMask RegInclude = RegSet_None;
RegSetMask RegExclude = RegSet_None; RegSetMask RegExclude = RegSet_None;
......
...@@ -302,35 +302,27 @@ TargetX8632::TargetX8632(Cfg *Func) ...@@ -302,35 +302,27 @@ TargetX8632::TargetX8632(Cfg *Func)
void TargetX8632::translateO2() { void TargetX8632::translateO2() {
GlobalContext *Context = Func->getContext(); GlobalContext *Context = Func->getContext();
static TimerIdT IDO2 = GlobalContext::getTimerID("O2");
TimerMarker T(IDO2, Context);
// Lower Phi instructions. // Lower Phi instructions.
Timer T_placePhiLoads;
Func->placePhiLoads(); Func->placePhiLoads();
if (Func->hasError()) if (Func->hasError())
return; return;
T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
Timer T_placePhiStores;
Func->placePhiStores(); Func->placePhiStores();
if (Func->hasError()) if (Func->hasError())
return; return;
T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
Timer T_deletePhis;
Func->deletePhis(); Func->deletePhis();
if (Func->hasError()) if (Func->hasError())
return; return;
T_deletePhis.printElapsedUs(Context, "deletePhis()");
Func->dump("After Phi lowering"); Func->dump("After Phi lowering");
// Address mode optimization. // Address mode optimization.
Timer T_doAddressOpt;
Func->getVMetadata()->init(); Func->getVMetadata()->init();
Func->doAddressOpt(); Func->doAddressOpt();
T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
// Argument lowering // Argument lowering
Timer T_argLowering;
Func->doArgLowering(); Func->doArgLowering();
T_argLowering.printElapsedUs(Context, "lowerArguments()");
// Target lowering. This requires liveness analysis for some parts // Target lowering. This requires liveness analysis for some parts
// of the lowering decisions, such as compare/branch fusing. If // of the lowering decisions, such as compare/branch fusing. If
...@@ -338,40 +330,30 @@ void TargetX8632::translateO2() { ...@@ -338,40 +330,30 @@ void TargetX8632::translateO2() {
// to be renumbered first. TODO: This renumbering should only be // to be renumbered first. TODO: This renumbering should only be
// necessary if we're actually calculating live intervals, which we // necessary if we're actually calculating live intervals, which we
// only do for register allocation. // only do for register allocation.
Timer T_renumber1;
Func->renumberInstructions(); Func->renumberInstructions();
if (Func->hasError()) if (Func->hasError())
return; return;
T_renumber1.printElapsedUs(Context, "renumberInstructions()");
// TODO: It should be sufficient to use the fastest liveness // TODO: It should be sufficient to use the fastest liveness
// calculation, i.e. livenessLightweight(). However, for some // calculation, i.e. livenessLightweight(). However, for some
// reason that slows down the rest of the translation. Investigate. // reason that slows down the rest of the translation. Investigate.
Timer T_liveness1;
Func->liveness(Liveness_Basic); Func->liveness(Liveness_Basic);
if (Func->hasError()) if (Func->hasError())
return; return;
T_liveness1.printElapsedUs(Context, "liveness()");
Func->dump("After x86 address mode opt"); Func->dump("After x86 address mode opt");
Timer T_genCode;
Func->genCode(); Func->genCode();
if (Func->hasError()) if (Func->hasError())
return; return;
T_genCode.printElapsedUs(Context, "genCode()");
// Register allocation. This requires instruction renumbering and // Register allocation. This requires instruction renumbering and
// full liveness analysis. // full liveness analysis.
Timer T_renumber2;
Func->renumberInstructions(); Func->renumberInstructions();
if (Func->hasError()) if (Func->hasError())
return; return;
T_renumber2.printElapsedUs(Context, "renumberInstructions()");
Timer T_liveness2;
Func->liveness(Liveness_Intervals); Func->liveness(Liveness_Intervals);
if (Func->hasError()) if (Func->hasError())
return; return;
T_liveness2.printElapsedUs(Context, "liveness()");
// Validate the live range computations. Do it outside the timing // Validate the live range computations. Do it outside the timing
// code. TODO: Put this under a flag. // code. TODO: Put this under a flag.
bool ValidLiveness = Func->validateLiveness(); bool ValidLiveness = Func->validateLiveness();
...@@ -381,20 +363,16 @@ void TargetX8632::translateO2() { ...@@ -381,20 +363,16 @@ void TargetX8632::translateO2() {
// The post-codegen dump is done here, after liveness analysis and // The post-codegen dump is done here, after liveness analysis and
// associated cleanup, to make the dump cleaner and more useful. // associated cleanup, to make the dump cleaner and more useful.
Func->dump("After initial x8632 codegen"); Func->dump("After initial x8632 codegen");
Timer T_regAlloc;
Func->getVMetadata()->init(); Func->getVMetadata()->init();
regAlloc(); regAlloc();
if (Func->hasError()) if (Func->hasError())
return; return;
T_regAlloc.printElapsedUs(Context, "regAlloc()");
Func->dump("After linear scan regalloc"); Func->dump("After linear scan regalloc");
// Stack frame mapping. // Stack frame mapping.
Timer T_genFrame;
Func->genFrame(); Func->genFrame();
if (Func->hasError()) if (Func->hasError())
return; return;
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping"); Func->dump("After stack frame mapping");
// Branch optimization. This needs to be done just before code // Branch optimization. This needs to be done just before code
...@@ -413,39 +391,29 @@ void TargetX8632::translateO2() { ...@@ -413,39 +391,29 @@ void TargetX8632::translateO2() {
void TargetX8632::translateOm1() { void TargetX8632::translateOm1() {
GlobalContext *Context = Func->getContext(); GlobalContext *Context = Func->getContext();
Timer T_placePhiLoads; static TimerIdT IDOm1 = GlobalContext::getTimerID("Om1");
TimerMarker T(IDOm1, Context);
Func->placePhiLoads(); Func->placePhiLoads();
if (Func->hasError()) if (Func->hasError())
return; return;
T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
Timer T_placePhiStores;
Func->placePhiStores(); Func->placePhiStores();
if (Func->hasError()) if (Func->hasError())
return; return;
T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
Timer T_deletePhis;
Func->deletePhis(); Func->deletePhis();
if (Func->hasError()) if (Func->hasError())
return; return;
T_deletePhis.printElapsedUs(Context, "deletePhis()");
Func->dump("After Phi lowering"); Func->dump("After Phi lowering");
Timer T_argLowering;
Func->doArgLowering(); Func->doArgLowering();
T_argLowering.printElapsedUs(Context, "lowerArguments()");
Timer T_genCode;
Func->genCode(); Func->genCode();
if (Func->hasError()) if (Func->hasError())
return; return;
T_genCode.printElapsedUs(Context, "genCode()");
Func->dump("After initial x8632 codegen"); Func->dump("After initial x8632 codegen");
Timer T_genFrame;
Func->genFrame(); Func->genFrame();
if (Func->hasError()) if (Func->hasError())
return; return;
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping"); Func->dump("After stack frame mapping");
// Nop insertion // Nop insertion
......
//===- subzero/src/IceTimerTree.cpp - Pass timer defs ---------------------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the TimerTree class, which tracks flat and
// cumulative execution time collection of call chains.
//
//===----------------------------------------------------------------------===//
#include "IceDefs.h"
#include "IceTimerTree.h"
namespace Ice {
std::vector<IceString> TimerStack::IDs;
TimerStack::TimerStack(const IceString &TopLevelName)
: FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
StateChangeCount(0), StackTop(0) {
Nodes.resize(1); // Reserve Nodes[0] for the root node.
push(getTimerID(TopLevelName));
}
// Returns the unique timer ID for the given Name, creating a new ID
// if needed. For performance reasons, it's best to make only one
// call per Name and cache the result, e.g. via a static initializer.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
TimerIdT Size = IDs.size();
for (TimerIdT i = 0; i < Size; ++i) {
if (IDs[i] == Name)
return i;
}
IDs.push_back(Name);
return Size;
}
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
update();
if (Nodes[StackTop].Children.size() <= ID)
Nodes[StackTop].Children.resize(ID + 1);
if (Nodes[StackTop].Children[ID] == 0) {
TTindex Size = Nodes.size();
Nodes[StackTop].Children[ID] = Size;
Nodes.resize(Size + 1);
Nodes[Size].Parent = StackTop;
Nodes[Size].Interior = ID;
}
StackTop = Nodes[StackTop].Children[ID];
}
// Pop the top marker from the timer stack. Validates via assert()
// that the expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
update();
assert(StackTop);
assert(Nodes[StackTop].Parent < StackTop);
// Verify that the expected ID is being popped.
assert(Nodes[StackTop].Interior == ID);
(void)ID;
// Verify that the parent's child points to the current stack top.
assert(Nodes[Nodes[StackTop].Parent].Children[ID] == StackTop);
StackTop = Nodes[StackTop].Parent;
}
// At a state change (e.g. push or pop), updates the flat and
// cumulative timings for everything on the timer stack.
void TimerStack::update() {
++StateChangeCount;
// Whenever the stack is about to change, we grab the time delta
// since the last change and add it to all active cumulative
// elements and to the flat element for the top of the stack.
double Current = timestamp();
double Delta = Current - LastTimestamp;
LastTimestamp = Current;
if (StackTop) {
TimerIdT Leaf = Nodes[StackTop].Interior;
if (Leaf >= LeafTimes.size())
LeafTimes.resize(Leaf + 1);
LeafTimes[Leaf] += Delta;
}
TTindex Prefix = StackTop;
while (Prefix) {
Nodes[Prefix].Time += Delta;
TTindex Next = Nodes[Prefix].Parent;
assert(Next < Prefix);
Prefix = Next;
}
}
namespace {
typedef std::multimap<double, IceString> DumpMapType;
// Dump the Map items in reverse order of their time contribution.
void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
for (DumpMapType::const_reverse_iterator I = Map.rbegin(), E = Map.rend();
I != E; ++I) {
char buf[80];
snprintf(buf, llvm::array_lengthof(buf), " %10.6f (%4.1f%%): ", I->first,
I->first * 100 / TotalTime);
Str << buf << I->second << "\n";
}
}
} // end of anonymous namespace
void TimerStack::dump(Ostream &Str) {
update();
double TotalTime = LastTimestamp - FirstTimestamp;
assert(TotalTime);
Str << "Cumulative function times:\n";
DumpMapType CumulativeMap;
for (TTindex i = 1; i < Nodes.size(); ++i) {
TTindex Prefix = i;
IceString Suffix = "";
while (Prefix) {
if (Suffix.empty())
Suffix = IDs[Nodes[Prefix].Interior];
else
Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
assert(Nodes[Prefix].Parent < Prefix);
Prefix = Nodes[Prefix].Parent;
}
CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
}
dumpHelper(Str, CumulativeMap, TotalTime);
Str << "Flat function times:\n";
DumpMapType FlatMap;
for (TimerIdT i = 0; i < LeafTimes.size(); ++i) {
FlatMap.insert(std::make_pair(LeafTimes[i], IDs[i]));
}
dumpHelper(Str, FlatMap, TotalTime);
Str << "Number of timer updates: " << StateChangeCount << "\n";
}
} // end of namespace Ice
//===- subzero/src/IceTimerTree.h - Pass timer defs -------------*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the TimerTree class, which allows flat and
// cumulative execution time collection of call chains.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETIMERTREE_H
#define SUBZERO_SRC_ICETIMERTREE_H
namespace Ice {
class TimerTreeNode;
// Timer tree index type
typedef std::vector<TimerTreeNode>::size_type TTindex;
// TimerTreeNode represents an interior or leaf node in the call tree.
// It contains a list of children, a pointer to its parent, and the
// timer ID for the node. It also holds the cumulative time spent at
// this node and below. The children are always at a higher index in
// the TimerTreeNode::Nodes array, and the parent is always at a lower
// index.
class TimerTreeNode {
public:
TimerTreeNode() : Parent(0), Interior(0), Time(0) {}
std::vector<TTindex> Children; // indexed by TimerIdT
TTindex Parent;
TimerIdT Interior;
double Time;
};
class TimerStack {
TimerStack(const TimerStack &) LLVM_DELETED_FUNCTION;
TimerStack &operator=(const TimerStack &) LLVM_DELETED_FUNCTION;
public:
TimerStack(const IceString &TopLevelName);
static TimerIdT getTimerID(const IceString &Name);
void push(TimerIdT ID);
void pop(TimerIdT ID);
void dump(Ostream &Str);
private:
void update();
static double timestamp() {
// TODO: Implement in terms of std::chrono for C++11.
return llvm::TimeRecord::getCurrentTime(false).getWallTime();
}
const double FirstTimestamp;
double LastTimestamp;
uint64_t StateChangeCount;
static std::vector<IceString> IDs; // indexed by TimerIdT
std::vector<TimerTreeNode> Nodes; // indexed by TTindex
std::vector<double> LeafTimes; // indexed by TimerIdT
TTindex StackTop;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICETIMERTREE_H
...@@ -12,12 +12,11 @@ ...@@ -12,12 +12,11 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "IceTranslator.h"
#include "IceCfg.h" #include "IceCfg.h"
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceDefs.h" #include "IceDefs.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceTranslator.h"
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/IR/Constant.h" #include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h" #include "llvm/IR/Constants.h"
...@@ -80,24 +79,13 @@ void Translator::translateFcn(Cfg *Fcn) { ...@@ -80,24 +79,13 @@ void Translator::translateFcn(Cfg *Fcn) {
if (Ctx->getFlags().DisableTranslation) { if (Ctx->getFlags().DisableTranslation) {
Func->dump(); Func->dump();
} else { } else {
Timer TTranslate;
Func->translate(); Func->translate();
if (Ctx->getFlags().SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] Translate function "
<< Func->getFunctionName() << ": " << TTranslate.getElapsedSec()
<< " sec\n";
}
if (Func->hasError()) { if (Func->hasError()) {
std::cerr << "ICE translation error: " << Func->getError() << "\n"; std::cerr << "ICE translation error: " << Func->getError() << "\n";
ErrorStatus = true; ErrorStatus = true;
} }
Timer TEmit;
Func->emit(); Func->emit();
if (Ctx->getFlags().SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName()
<< ": " << TEmit.getElapsedSec() << " sec\n";
}
Ctx->dumpStats(Func->getFunctionName()); Ctx->dumpStats(Func->getFunctionName());
} }
} }
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "PNaClTranslator.h"
#include "IceCfg.h" #include "IceCfg.h"
#include "IceCfgNode.h" #include "IceCfgNode.h"
#include "IceClFlags.h" #include "IceClFlags.h"
...@@ -20,6 +19,7 @@ ...@@ -20,6 +19,7 @@
#include "IceInst.h" #include "IceInst.h"
#include "IceOperand.h" #include "IceOperand.h"
#include "IceTypeConverter.h" #include "IceTypeConverter.h"
#include "PNaClTranslator.h"
#include "llvm/Analysis/NaCl/PNaClABIProps.h" #include "llvm/Analysis/NaCl/PNaClABIProps.h"
#include "llvm/Bitcode/NaCl/NaClBitcodeDecoders.h" #include "llvm/Bitcode/NaCl/NaClBitcodeDecoders.h"
#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
...@@ -852,7 +852,7 @@ public: ...@@ -852,7 +852,7 @@ public:
} }
} }
~FunctionParser() override; ~FunctionParser() override {};
// Set the next constant ID to the given constant C. // Set the next constant ID to the given constant C.
void setNextConstantID(Ice::Constant *C) { void setNextConstantID(Ice::Constant *C) {
...@@ -860,8 +860,6 @@ public: ...@@ -860,8 +860,6 @@ public:
} }
private: private:
// Timer for reading function bitcode and converting to ICE.
Ice::Timer TConvert;
// The corresponding ICE function defined by the function block. // The corresponding ICE function defined by the function block.
Ice::Cfg *Func; Ice::Cfg *Func;
// The index to the current basic block being built. // The index to the current basic block being built.
...@@ -1376,13 +1374,6 @@ private: ...@@ -1376,13 +1374,6 @@ private:
} }
}; };
FunctionParser::~FunctionParser() {
if (getFlags().SubzeroTimingEnabled) {
errs() << "[Subzero timing] Convert function " << Func->getFunctionName()
<< ": " << TConvert.getElapsedSec() << " sec\n";
}
}
void FunctionParser::ReportInvalidBinopOpcode(unsigned Opcode, Ice::Type Ty) { void FunctionParser::ReportInvalidBinopOpcode(unsigned Opcode, Ice::Type Ty) {
std::string Buffer; std::string Buffer;
raw_string_ostream StrBuf(Buffer); raw_string_ostream StrBuf(Buffer);
......
...@@ -44,7 +44,6 @@ static cl::list<Ice::VerboseItem> VerboseList( ...@@ -44,7 +44,6 @@ static cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_RegOrigins, "orig", "Physical register origins"), clEnumValN(Ice::IceV_RegOrigins, "orig", "Physical register origins"),
clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"), clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"),
clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"), clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"),
clEnumValN(Ice::IceV_Timing, "time", "Pass timing details"),
clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"), clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"), clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_Most, "most", clEnumValN(Ice::IceV_Most, "most",
...@@ -175,6 +174,8 @@ int main(int argc, char **argv) { ...@@ -175,6 +174,8 @@ int main(int argc, char **argv) {
Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix, Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
Flags); Flags);
static Ice::TimerIdT IDszmain = Ice::GlobalContext::getTimerID("szmain");
Ice::TimerMarker T(IDszmain, &Ctx);
int ErrorStatus = 0; int ErrorStatus = 0;
if (BuildOnRead) { if (BuildOnRead) {
...@@ -184,15 +185,11 @@ int main(int argc, char **argv) { ...@@ -184,15 +185,11 @@ int main(int argc, char **argv) {
} else { } else {
// Parse the input LLVM IR file into a module. // Parse the input LLVM IR file into a module.
SMDiagnostic Err; SMDiagnostic Err;
Ice::Timer T; static Ice::TimerIdT IDparse = Ice::GlobalContext::getTimerID("parse");
Ice::TimerMarker T1(IDparse, &Ctx);
Module *Mod = Module *Mod =
NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext()); NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext());
if (SubzeroTimingEnabled) {
std::cerr << "[Subzero timing] IR Parsing: " << T.getElapsedSec()
<< " sec\n";
}
if (!Mod) { if (!Mod) {
Err.print(argv[0], errs()); Err.print(argv[0], errs());
return 1; return 1;
...@@ -202,6 +199,8 @@ int main(int argc, char **argv) { ...@@ -202,6 +199,8 @@ int main(int argc, char **argv) {
Converter.convertToIce(); Converter.convertToIce();
ErrorStatus = Converter.getErrorStatus(); ErrorStatus = Converter.getErrorStatus();
} }
if (SubzeroTimingEnabled)
Ctx.dumpTimers();
const bool FinalStats = true; const bool FinalStats = true;
Ctx.dumpStats("_FINAL_", FinalStats); Ctx.dumpStats("_FINAL_", FinalStats);
return ErrorStatus; return ErrorStatus;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment