Commit 8363a066 by Jim Stichnoth

Subzero: Add a few performance measurement tools.

--timing-funcs - Produces a sorted list of total time spent translating each function. --timing-focus=<F> - Turns on the --timing equivalent just for one function. Use '*' to do this for all functions, i.e. get complete timing breakdowns across all functions. --verbose-focus=<F> - Temporarily turns on --verbose=all for one function. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/620373004
parent e4dc61bf
...@@ -25,8 +25,8 @@ namespace Ice { ...@@ -25,8 +25,8 @@ namespace Ice {
Cfg::Cfg(GlobalContext *Ctx) Cfg::Cfg(GlobalContext *Ctx)
: Ctx(Ctx), FunctionName(""), ReturnType(IceType_void), : Ctx(Ctx), FunctionName(""), ReturnType(IceType_void),
IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL), IsInternalLinkage(false), HasError(false), FocusedTiming(false),
NextInstNumber(1), Live(nullptr), ErrorMessage(""), Entry(NULL), NextInstNumber(1), Live(nullptr),
Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)), Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
VMetadata(new VariablesMetadata(this)), VMetadata(new VariablesMetadata(this)),
TargetAssembler( TargetAssembler(
...@@ -69,8 +69,15 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); } ...@@ -69,8 +69,15 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
void Cfg::translate() { void Cfg::translate() {
if (hasError()) if (hasError())
return; return;
static TimerIdT IDtranslate = GlobalContext::getTimerID("translate"); VerboseMask OldVerboseMask = getContext()->getVerbose();
TimerMarker T(IDtranslate, getContext()); const IceString &TimingFocusOn = getContext()->getFlags().TimingFocusOn;
if (TimingFocusOn == "*" || TimingFocusOn == getFunctionName())
setFocusedTiming();
bool VerboseFocus =
(getContext()->getFlags().VerboseFocusOn == getFunctionName());
if (VerboseFocus)
getContext()->setVerbose(IceV_All);
TimerMarker T(TimerStack::TT_translate, this);
dump("Initial CFG"); dump("Initial CFG");
...@@ -79,6 +86,10 @@ void Cfg::translate() { ...@@ -79,6 +86,10 @@ void Cfg::translate() {
getTarget()->translate(); getTarget()->translate();
dump("Final output"); dump("Final output");
if (getFocusedTiming())
getContext()->dumpTimers();
if (VerboseFocus)
getContext()->setVerbose(OldVerboseMask);
} }
void Cfg::computePredecessors() { void Cfg::computePredecessors() {
...@@ -87,9 +98,7 @@ void Cfg::computePredecessors() { ...@@ -87,9 +98,7 @@ void Cfg::computePredecessors() {
} }
void Cfg::renumberInstructions() { void Cfg::renumberInstructions() {
static TimerIdT IDrenumberInstructions = TimerMarker T(TimerStack::TT_renumberInstructions, this);
GlobalContext::getTimerID("renumberInstructions");
TimerMarker T(IDrenumberInstructions, getContext());
NextInstNumber = 1; NextInstNumber = 1;
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->renumberInstructions(); Node->renumberInstructions();
...@@ -97,60 +106,50 @@ void Cfg::renumberInstructions() { ...@@ -97,60 +106,50 @@ void Cfg::renumberInstructions() {
// placePhiLoads() must be called before placePhiStores(). // placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() { void Cfg::placePhiLoads() {
static TimerIdT IDplacePhiLoads = GlobalContext::getTimerID("placePhiLoads"); TimerMarker T(TimerStack::TT_placePhiLoads, this);
TimerMarker T(IDplacePhiLoads, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->placePhiLoads(); Node->placePhiLoads();
} }
// placePhiStores() must be called after placePhiLoads(). // placePhiStores() must be called after placePhiLoads().
void Cfg::placePhiStores() { void Cfg::placePhiStores() {
static TimerIdT IDplacePhiStores = TimerMarker T(TimerStack::TT_placePhiStores, this);
GlobalContext::getTimerID("placePhiStores");
TimerMarker T(IDplacePhiStores, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->placePhiStores(); Node->placePhiStores();
} }
void Cfg::deletePhis() { void Cfg::deletePhis() {
static TimerIdT IDdeletePhis = GlobalContext::getTimerID("deletePhis"); TimerMarker T(TimerStack::TT_deletePhis, this);
TimerMarker T(IDdeletePhis, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->deletePhis(); Node->deletePhis();
} }
void Cfg::doArgLowering() { void Cfg::doArgLowering() {
static TimerIdT IDdoArgLowering = GlobalContext::getTimerID("doArgLowering"); TimerMarker T(TimerStack::TT_doArgLowering, this);
TimerMarker T(IDdoArgLowering, getContext());
getTarget()->lowerArguments(); getTarget()->lowerArguments();
} }
void Cfg::doAddressOpt() { void Cfg::doAddressOpt() {
static TimerIdT IDdoAddressOpt = GlobalContext::getTimerID("doAddressOpt"); TimerMarker T(TimerStack::TT_doAddressOpt, this);
TimerMarker T(IDdoAddressOpt, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->doAddressOpt(); Node->doAddressOpt();
} }
void Cfg::doNopInsertion() { void Cfg::doNopInsertion() {
static TimerIdT IDdoNopInsertion = TimerMarker T(TimerStack::TT_doNopInsertion, this);
GlobalContext::getTimerID("doNopInsertion");
TimerMarker T(IDdoNopInsertion, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->doNopInsertion(); Node->doNopInsertion();
} }
void Cfg::genCode() { void Cfg::genCode() {
static TimerIdT IDgenCode = GlobalContext::getTimerID("genCode"); TimerMarker T(TimerStack::TT_genCode, this);
TimerMarker T(IDgenCode, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->genCode(); Node->genCode();
} }
// Compute the stack frame layout. // Compute the stack frame layout.
void Cfg::genFrame() { void Cfg::genFrame() {
static TimerIdT IDgenFrame = GlobalContext::getTimerID("genFrame"); TimerMarker T(TimerStack::TT_genFrame, this);
TimerMarker T(IDgenFrame, getContext());
getTarget()->addProlog(Entry); getTarget()->addProlog(Entry);
// TODO: Consider folding epilog generation into the final // TODO: Consider folding epilog generation into the final
// emission/assembly pass to avoid an extra iteration over the node // emission/assembly pass to avoid an extra iteration over the node
...@@ -165,17 +164,14 @@ void Cfg::genFrame() { ...@@ -165,17 +164,14 @@ void Cfg::genFrame() {
// completely with a single block. It is a quick single pass and // completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence. // doesn't need to iterate until convergence.
void Cfg::livenessLightweight() { void Cfg::livenessLightweight() {
static TimerIdT IDlivenessLightweight = TimerMarker T(TimerStack::TT_livenessLightweight, this);
GlobalContext::getTimerID("livenessLightweight");
TimerMarker T(IDlivenessLightweight, getContext());
getVMetadata()->init(); getVMetadata()->init();
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->livenessLightweight(); Node->livenessLightweight();
} }
void Cfg::liveness(LivenessMode Mode) { void Cfg::liveness(LivenessMode Mode) {
static TimerIdT IDliveness = GlobalContext::getTimerID("liveness"); TimerMarker T(TimerStack::TT_liveness, this);
TimerMarker T(IDliveness, getContext());
Live.reset(new Liveness(this, Mode)); Live.reset(new Liveness(this, Mode));
getVMetadata()->init(); getVMetadata()->init();
Live->init(); Live->init();
...@@ -208,8 +204,7 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -208,8 +204,7 @@ void Cfg::liveness(LivenessMode Mode) {
// finer breakdown of the cost. // finer breakdown of the cost.
// Make a final pass over instructions to delete dead instructions // Make a final pass over instructions to delete dead instructions
// and build each Variable's live range. // and build each Variable's live range.
static TimerIdT IDliveRange = GlobalContext::getTimerID("liveRange"); TimerMarker T1(TimerStack::TT_liveRange, this);
TimerMarker T1(IDliveRange, getContext());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->livenessPostprocess(Mode, getLiveness()); Node->livenessPostprocess(Mode, getLiveness());
if (Mode == Liveness_Intervals) { if (Mode == Liveness_Intervals) {
...@@ -255,9 +250,7 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -255,9 +250,7 @@ void Cfg::liveness(LivenessMode Mode) {
// Traverse every Variable of every Inst and verify that it // Traverse every Variable of every Inst and verify that it
// appears within the Variable's computed live range. // appears within the Variable's computed live range.
bool Cfg::validateLiveness() const { bool Cfg::validateLiveness() const {
static TimerIdT IDvalidateLiveness = TimerMarker T(TimerStack::TT_validateLiveness, this);
GlobalContext::getTimerID("validateLiveness");
TimerMarker T(IDvalidateLiveness, getContext());
bool Valid = true; bool Valid = true;
Ostream &Str = Ctx->getStrDump(); Ostream &Str = Ctx->getStrDump();
for (CfgNode *Node : Nodes) { for (CfgNode *Node : Nodes) {
...@@ -300,8 +293,7 @@ bool Cfg::validateLiveness() const { ...@@ -300,8 +293,7 @@ bool Cfg::validateLiveness() const {
} }
void Cfg::doBranchOpt() { void Cfg::doBranchOpt() {
static TimerIdT IDdoBranchOpt = GlobalContext::getTimerID("doBranchOpt"); TimerMarker T(TimerStack::TT_doBranchOpt, this);
TimerMarker T(IDdoBranchOpt, getContext());
for (auto I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { for (auto I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
auto NextNode = I; auto NextNode = I;
++NextNode; ++NextNode;
...@@ -312,8 +304,7 @@ void Cfg::doBranchOpt() { ...@@ -312,8 +304,7 @@ void Cfg::doBranchOpt() {
// ======================== Dump routines ======================== // // ======================== Dump routines ======================== //
void Cfg::emit() { void Cfg::emit() {
static TimerIdT IDemit = GlobalContext::getTimerID("emit"); TimerMarker T(TimerStack::TT_emit, this);
TimerMarker T(IDemit, getContext());
Ostream &Str = Ctx->getStrEmit(); Ostream &Str = Ctx->getStrEmit();
if (!Ctx->testAndSetHasEmittedFirstMethod()) { if (!Ctx->testAndSetHasEmittedFirstMethod()) {
// Print a helpful command for assembling the output. // Print a helpful command for assembling the output.
......
...@@ -96,6 +96,8 @@ public: ...@@ -96,6 +96,8 @@ public:
return getContext()->getFlags().UseIntegratedAssembler; return getContext()->getFlags().UseIntegratedAssembler;
} }
bool hasComputedFrame() const; bool hasComputedFrame() const;
bool getFocusedTiming() const { return FocusedTiming; }
void setFocusedTiming() { FocusedTiming = true; }
// Passes over the CFG. // Passes over the CFG.
void translate(); void translate();
...@@ -165,6 +167,7 @@ private: ...@@ -165,6 +167,7 @@ private:
Type ReturnType; Type ReturnType;
bool IsInternalLinkage; bool IsInternalLinkage;
bool HasError; bool HasError;
bool FocusedTiming;
IceString ErrorMessage; IceString ErrorMessage;
CfgNode *Entry; // entry basic block CfgNode *Entry; // entry basic block
NodeList Nodes; // linearized node list; Entry should be first NodeList Nodes; // linearized node list; Entry should be first
......
...@@ -495,7 +495,7 @@ void CfgNode::dump(Cfg *Func) const { ...@@ -495,7 +495,7 @@ void CfgNode::dump(Cfg *Func) const {
Str << " // preds = "; Str << " // preds = ";
bool First = true; bool First = true;
for (CfgNode *I : InEdges) { for (CfgNode *I : InEdges) {
if (First) if (!First)
Str << ", "; Str << ", ";
First = false; First = false;
Str << "%" << I->getName(); Str << "%" << I->getName();
...@@ -540,7 +540,7 @@ void CfgNode::dump(Cfg *Func) const { ...@@ -540,7 +540,7 @@ void CfgNode::dump(Cfg *Func) const {
Str << " // succs = "; Str << " // succs = ";
bool First = true; bool First = true;
for (CfgNode *I : OutEdges) { for (CfgNode *I : OutEdges) {
if (First) if (!First)
Str << ", "; Str << ", ";
First = false; First = false;
Str << "%" << I->getName(); Str << "%" << I->getName();
......
...@@ -25,8 +25,9 @@ public: ...@@ -25,8 +25,9 @@ public:
: DisableInternal(false), SubzeroTimingEnabled(false), : DisableInternal(false), SubzeroTimingEnabled(false),
DisableTranslation(false), DisableGlobals(false), DisableTranslation(false), DisableGlobals(false),
FunctionSections(false), UseIntegratedAssembler(false), FunctionSections(false), UseIntegratedAssembler(false),
UseSandboxing(false), DumpStats(false), DefaultGlobalPrefix(""), UseSandboxing(false), DumpStats(false), TimeEachFunction(false),
DefaultFunctionPrefix("") {} DefaultGlobalPrefix(""), DefaultFunctionPrefix(""), TimingFocusOn(""),
VerboseFocusOn("") {}
bool DisableInternal; bool DisableInternal;
bool SubzeroTimingEnabled; bool SubzeroTimingEnabled;
bool DisableTranslation; bool DisableTranslation;
...@@ -35,8 +36,11 @@ public: ...@@ -35,8 +36,11 @@ public:
bool UseIntegratedAssembler; bool UseIntegratedAssembler;
bool UseSandboxing; bool UseSandboxing;
bool DumpStats; bool DumpStats;
bool TimeEachFunction;
IceString DefaultGlobalPrefix; IceString DefaultGlobalPrefix;
IceString DefaultFunctionPrefix; IceString DefaultFunctionPrefix;
IceString TimingFocusOn;
IceString VerboseFocusOn;
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -59,15 +59,13 @@ public: ...@@ -59,15 +59,13 @@ public:
// Caller is expected to delete the returned Ice::Cfg object. // Caller is expected to delete the returned Ice::Cfg object.
Ice::Cfg *convertFunction(const Function *F) { Ice::Cfg *convertFunction(const Function *F) {
static Ice::TimerIdT IDllvmConvert =
Ice::GlobalContext::getTimerID("llvmConvert");
Ice::TimerMarker T(IDllvmConvert, Ctx);
VarMap.clear(); VarMap.clear();
NodeMap.clear(); NodeMap.clear();
Func = new Ice::Cfg(Ctx); Func = new Ice::Cfg(Ctx);
Func->setFunctionName(F->getName()); Func->setFunctionName(F->getName());
Func->setReturnType(convertToIceType(F->getReturnType())); Func->setReturnType(convertToIceType(F->getReturnType()));
Func->setInternal(F->hasInternalLinkage()); Func->setInternal(F->hasInternalLinkage());
Ice::TimerMarker T(Ice::TimerStack::TT_llvmConvert, Func);
// The initial definition/use of each arg is the entry node. // The initial definition/use of each arg is the entry node.
for (auto ArgI = F->arg_begin(), ArgE = F->arg_end(); ArgI != ArgE; for (auto ArgI = F->arg_begin(), ArgE = F->arg_end(); ArgI != ArgE;
...@@ -617,8 +615,7 @@ private: ...@@ -617,8 +615,7 @@ private:
namespace Ice { namespace Ice {
void Converter::convertToIce() { void Converter::convertToIce() {
static TimerIdT IDconvertToIce = GlobalContext::getTimerID("convertToIce"); TimerMarker T(TimerStack::TT_convertToIce, Ctx);
TimerMarker T(IDconvertToIce, Ctx);
nameUnnamedGlobalAddresses(Mod); nameUnnamedGlobalAddresses(Mod);
if (!Ctx->getFlags().DisableGlobals) if (!Ctx->getFlags().DisableGlobals)
convertGlobals(Mod); convertGlobals(Mod);
...@@ -626,13 +623,21 @@ void Converter::convertToIce() { ...@@ -626,13 +623,21 @@ void Converter::convertToIce() {
} }
void Converter::convertFunctions() { void Converter::convertFunctions() {
TimerStackIdT StackID = GlobalContext::TSK_Funcs;
for (const Function &I : *Mod) { for (const Function &I : *Mod) {
if (I.empty()) if (I.empty())
continue; continue;
TimerIdT TimerID = 0;
if (Ctx->getFlags().TimeEachFunction) {
TimerID = Ctx->getTimerID(StackID, I.getName());
Ctx->pushTimer(TimerID, StackID);
}
LLVM2ICEConverter FunctionConverter(Ctx, Mod->getContext()); LLVM2ICEConverter FunctionConverter(Ctx, Mod->getContext());
Cfg *Fcn = FunctionConverter.convertFunction(&I); Cfg *Fcn = FunctionConverter.convertFunction(&I);
translateFcn(Fcn); translateFcn(Fcn);
if (Ctx->getFlags().TimeEachFunction)
Ctx->popTimer(TimerID, StackID);
} }
emitConstants(); emitConstants();
......
...@@ -69,6 +69,7 @@ typedef uint32_t SizeT; ...@@ -69,6 +69,7 @@ typedef uint32_t SizeT;
// numbers are used for representing Variable live ranges. // numbers are used for representing Variable live ranges.
typedef int32_t InstNumberT; typedef int32_t InstNumberT;
typedef uint32_t TimerStackIdT;
typedef uint32_t TimerIdT; typedef uint32_t TimerIdT;
enum LivenessMode { enum LivenessMode {
......
...@@ -119,7 +119,11 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump, ...@@ -119,7 +119,11 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump,
: StrDump(OsDump), StrEmit(OsEmit), VMask(Mask), : StrDump(OsDump), StrEmit(OsEmit), VMask(Mask),
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt), ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false), TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false),
RNG(""), Timers(new TimerStack("main")) {} RNG("") {
// Pre-register built-in stack names.
newTimerStackID("Total across all functions");
newTimerStackID("Per-function summary");
}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with // Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name // S<num>_ where <num> is the next base-36 value. If a type name
...@@ -381,13 +385,27 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const { ...@@ -381,13 +385,27 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const {
llvm_unreachable("Unknown type"); llvm_unreachable("Unknown type");
} }
TimerIdT GlobalContext::getTimerID(const IceString &Name) { TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
return TimerStack::getTimerID(Name); const IceString &Name) {
assert(StackID < Timers.size());
return Timers[StackID].getTimerID(Name);
}
TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
TimerStackIdT NewID = Timers.size();
Timers.push_back(TimerStack(Name));
return NewID;
} }
void GlobalContext::pushTimer(TimerIdT ID) { Timers->push(ID); } void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
assert(StackID < Timers.size());
Timers[StackID].push(ID);
}
void GlobalContext::popTimer(TimerIdT ID) { Timers->pop(ID); } void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
assert(StackID < Timers.size());
Timers[StackID].pop(ID);
}
void GlobalContext::dumpStats(const IceString &Name, bool Final) { void GlobalContext::dumpStats(const IceString &Name, bool Final) {
if (Flags.DumpStats) { if (Flags.DumpStats) {
...@@ -400,6 +418,16 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) { ...@@ -400,6 +418,16 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
} }
} }
void GlobalContext::dumpTimers() { Timers->dump(getStrDump()); } void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
assert(Timers.size() > StackID);
Timers[StackID].dump(getStrDump(), DumpCumulative);
}
TimerMarker::TimerMarker(TimerIdT ID, const Cfg *Func)
: ID(ID), Ctx(Func->getContext()),
Active(Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled) {
if (Active)
Ctx->pushTimer(ID);
}
} // end of namespace Ice } // end of namespace Ice
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "IceDefs.h" #include "IceDefs.h"
#include "IceIntrinsics.h" #include "IceIntrinsics.h"
#include "IceRNG.h" #include "IceRNG.h"
#include "IceTimerTree.h"
#include "IceTypes.h" #include "IceTypes.h"
namespace Ice { namespace Ice {
...@@ -71,6 +72,7 @@ public: ...@@ -71,6 +72,7 @@ public:
// Returns true if any of the specified options in the verbose mask // Returns true if any of the specified options in the verbose mask
// are set. If the argument is omitted, it checks if any verbose // are set. If the argument is omitted, it checks if any verbose
// options at all are set. // options at all are set.
VerboseMask getVerbose() const { return VMask; }
bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
void setVerbose(VerboseMask Mask) { VMask = Mask; } void setVerbose(VerboseMask Mask) { VMask = Mask; }
void addVerbose(VerboseMask Mask) { VMask |= Mask; } void addVerbose(VerboseMask Mask) { VMask |= Mask; }
...@@ -151,10 +153,19 @@ public: ...@@ -151,10 +153,19 @@ public:
StatsCumulative.updateFills(); StatsCumulative.updateFills();
} }
static TimerIdT getTimerID(const IceString &Name); // These are predefined TimerStackIdT values.
void pushTimer(TimerIdT ID); enum TimerStackKind {
void popTimer(TimerIdT ID); TSK_Default = 0,
void dumpTimers(); TSK_Funcs,
TSK_Num
};
TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
TimerStackIdT newTimerStackID(const IceString &Name);
void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
private: private:
Ostream *StrDump; // Stream for dumping / diagnostics Ostream *StrDump; // Stream for dumping / diagnostics
...@@ -172,7 +183,7 @@ private: ...@@ -172,7 +183,7 @@ private:
RandomNumberGenerator RNG; RandomNumberGenerator RNG;
CodeStats StatsFunction; CodeStats StatsFunction;
CodeStats StatsCumulative; CodeStats StatsCumulative;
std::unique_ptr<class TimerStack> Timers; std::vector<TimerStack> Timers;
GlobalContext(const GlobalContext &) = delete; GlobalContext(const GlobalContext &) = delete;
GlobalContext &operator=(const GlobalContext &) = delete; GlobalContext &operator=(const GlobalContext &) = delete;
...@@ -194,6 +205,8 @@ public: ...@@ -194,6 +205,8 @@ public:
if (Active) if (Active)
Ctx->pushTimer(ID); Ctx->pushTimer(ID);
} }
TimerMarker(TimerIdT ID, const Cfg *Func);
~TimerMarker() { ~TimerMarker() {
if (Active) if (Active)
Ctx->popTimer(ID); Ctx->popTimer(ID);
......
...@@ -782,7 +782,8 @@ const x86::AssemblerX86::GPREmitterRegOp InstX8632Lea::Emitter = { ...@@ -782,7 +782,8 @@ const x86::AssemblerX86::GPREmitterRegOp InstX8632Lea::Emitter = {
// Unary XMM ops // Unary XMM ops
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Sqrtss::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Sqrtss::Emitter = {
&x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss}; &x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss
};
// Binary GPR ops // Binary GPR ops
template <> template <>
...@@ -824,58 +825,76 @@ const x86::AssemblerX86::GPREmitterShiftOp InstX8632Shr::Emitter = { ...@@ -824,58 +825,76 @@ const x86::AssemblerX86::GPREmitterShiftOp InstX8632Shr::Emitter = {
// Binary XMM ops // Binary XMM ops
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addss::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addss::Emitter = {
&x86::AssemblerX86::addss, &x86::AssemblerX86::addss}; &x86::AssemblerX86::addss, &x86::AssemblerX86::addss
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addps::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addps::Emitter = {
&x86::AssemblerX86::addps, &x86::AssemblerX86::addps}; &x86::AssemblerX86::addps, &x86::AssemblerX86::addps
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divss::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divss::Emitter = {
&x86::AssemblerX86::divss, &x86::AssemblerX86::divss}; &x86::AssemblerX86::divss, &x86::AssemblerX86::divss
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divps::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divps::Emitter = {
&x86::AssemblerX86::divps, &x86::AssemblerX86::divps}; &x86::AssemblerX86::divps, &x86::AssemblerX86::divps
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulss::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulss::Emitter = {
&x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss}; &x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulps::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulps::Emitter = {
&x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps}; &x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Padd::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Padd::Emitter = {
&x86::AssemblerX86::padd, &x86::AssemblerX86::padd}; &x86::AssemblerX86::padd, &x86::AssemblerX86::padd
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pand::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pand::Emitter = {
&x86::AssemblerX86::pand, &x86::AssemblerX86::pand}; &x86::AssemblerX86::pand, &x86::AssemblerX86::pand
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pandn::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pandn::Emitter = {
&x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn}; &x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpeq::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpeq::Emitter = {
&x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq}; &x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpgt::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpgt::Emitter = {
&x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt}; &x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmull::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmull::Emitter = {
&x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull}; &x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmuludq::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmuludq::Emitter = {
&x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq}; &x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Por::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Por::Emitter = {
&x86::AssemblerX86::por, &x86::AssemblerX86::por}; &x86::AssemblerX86::por, &x86::AssemblerX86::por
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Psub::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Psub::Emitter = {
&x86::AssemblerX86::psub, &x86::AssemblerX86::psub}; &x86::AssemblerX86::psub, &x86::AssemblerX86::psub
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pxor::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pxor::Emitter = {
&x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor}; &x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subss::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subss::Emitter = {
&x86::AssemblerX86::subss, &x86::AssemblerX86::subss}; &x86::AssemblerX86::subss, &x86::AssemblerX86::subss
};
template <> template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subps::Emitter = { const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subps::Emitter = {
&x86::AssemblerX86::subps, &x86::AssemblerX86::subps}; &x86::AssemblerX86::subps, &x86::AssemblerX86::subps
};
// Binary XMM Shift ops // Binary XMM Shift ops
template <> template <>
...@@ -1427,10 +1446,11 @@ void InstX8632Icmp::emitIAS(const Cfg *Func) const { ...@@ -1427,10 +1446,11 @@ void InstX8632Icmp::emitIAS(const Cfg *Func) const {
const Operand *Src1 = getSrc(1); const Operand *Src1 = getSrc(1);
Type Ty = Src0->getType(); Type Ty = Src0->getType();
static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = { static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
&x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
&x86::AssemblerX86::cmp}; };
static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = { static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
&x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp}; &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
};
if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) { if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar0->hasReg()) { if (SrcVar0->hasReg()) {
emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter); emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
...@@ -1471,7 +1491,8 @@ void InstX8632Ucomiss::emitIAS(const Cfg *Func) const { ...@@ -1471,7 +1491,8 @@ void InstX8632Ucomiss::emitIAS(const Cfg *Func) const {
const Variable *Src0 = llvm::cast<Variable>(getSrc(0)); const Variable *Src0 = llvm::cast<Variable>(getSrc(0));
Type Ty = Src0->getType(); Type Ty = Src0->getType();
const static x86::AssemblerX86::XmmEmitterRegOp Emitter = { const static x86::AssemblerX86::XmmEmitterRegOp Emitter = {
&x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss}; &x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss
};
emitIASRegOpTyXMM(Func, Ty, Src0, getSrc(1), Emitter); emitIASRegOpTyXMM(Func, Ty, Src0, getSrc(1), Emitter);
} }
...@@ -1517,9 +1538,11 @@ void InstX8632Test::emitIAS(const Cfg *Func) const { ...@@ -1517,9 +1538,11 @@ void InstX8632Test::emitIAS(const Cfg *Func) const {
Type Ty = Src0->getType(); Type Ty = Src0->getType();
// The Reg/Addr form of test is not encodeable. // The Reg/Addr form of test is not encodeable.
static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = { static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
&x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test}; &x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test
};
static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = { static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
&x86::AssemblerX86::test, &x86::AssemblerX86::test}; &x86::AssemblerX86::test, &x86::AssemblerX86::test
};
if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) { if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar0->hasReg()) { if (SrcVar0->hasReg()) {
emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter); emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
...@@ -1770,8 +1793,9 @@ template <> void InstX8632Movp::emitIAS(const Cfg *Func) const { ...@@ -1770,8 +1793,9 @@ template <> void InstX8632Movp::emitIAS(const Cfg *Func) const {
const Variable *Dest = getDest(); const Variable *Dest = getDest();
const Operand *Src = getSrc(0); const Operand *Src = getSrc(0);
const static x86::AssemblerX86::XmmEmitterMovOps Emitter = { const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
&x86::AssemblerX86::movups, &x86::AssemblerX86::movups, &x86::AssemblerX86::movups, &x86::AssemblerX86::movups,
&x86::AssemblerX86::movups}; &x86::AssemblerX86::movups
};
emitIASMovlikeXMM(Func, Dest, Src, Emitter); emitIASMovlikeXMM(Func, Dest, Src, Emitter);
} }
...@@ -1794,8 +1818,8 @@ template <> void InstX8632Movq::emitIAS(const Cfg *Func) const { ...@@ -1794,8 +1818,8 @@ template <> void InstX8632Movq::emitIAS(const Cfg *Func) const {
const Variable *Dest = getDest(); const Variable *Dest = getDest();
const Operand *Src = getSrc(0); const Operand *Src = getSrc(0);
const static x86::AssemblerX86::XmmEmitterMovOps Emitter = { const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
&x86::AssemblerX86::movq, &x86::AssemblerX86::movq, &x86::AssemblerX86::movq, &x86::AssemblerX86::movq, &x86::AssemblerX86::movq
&x86::AssemblerX86::movq}; };
emitIASMovlikeXMM(Func, Dest, Src, Emitter); emitIASMovlikeXMM(Func, Dest, Src, Emitter);
} }
......
...@@ -278,8 +278,7 @@ const Inst *VariableTracking::getSingleDefinition() const { ...@@ -278,8 +278,7 @@ const Inst *VariableTracking::getSingleDefinition() const {
} }
void VariablesMetadata::init() { void VariablesMetadata::init() {
static TimerIdT IDvmetadata = GlobalContext::getTimerID("vmetadata"); TimerMarker T(TimerStack::TT_vmetadata, Func);
TimerMarker T(IDvmetadata, Func->getContext());
Metadata.clear(); Metadata.clear();
Metadata.resize(Func->getNumVariables()); Metadata.resize(Func->getNumVariables());
...@@ -438,7 +437,7 @@ void LiveRange::dump(Ostream &Str) const { ...@@ -438,7 +437,7 @@ void LiveRange::dump(Ostream &Str) const {
Str << "(weight=" << Weight << ") "; Str << "(weight=" << Weight << ") ";
bool First = true; bool First = true;
for (const RangeElementType &I : Range) { for (const RangeElementType &I : Range) {
if (First) if (!First)
Str << ", "; Str << ", ";
First = false; First = false;
Str << "[" << I.first << ":" << I.second << ")"; Str << "[" << I.first << ":" << I.second << ")";
......
...@@ -64,8 +64,7 @@ void dumpDisableOverlap(const Cfg *Func, const Variable *Var, ...@@ -64,8 +64,7 @@ void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
// preparation. Results are assigned to Variable::RegNum for each // preparation. Results are assigned to Variable::RegNum for each
// Variable. // Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
static TimerIdT IDscan = GlobalContext::getTimerID("linearScan"); TimerMarker T(TimerStack::TT_linearScan, Func);
TimerMarker T(IDscan, Func->getContext());
assert(RegMaskFull.any()); // Sanity check assert(RegMaskFull.any()); // Sanity check
Unhandled.clear(); Unhandled.clear();
UnhandledPrecolored.clear(); UnhandledPrecolored.clear();
...@@ -86,9 +85,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -86,9 +85,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// storing Func->getVariables(). // storing Func->getVariables().
const VarList &Vars = Func->getVariables(); const VarList &Vars = Func->getVariables();
{ {
static TimerIdT IDinitUnhandled = TimerMarker T(TimerStack::TT_initUnhandled, Func);
GlobalContext::getTimerID("initUnhandled");
TimerMarker T(IDinitUnhandled, Func->getContext());
for (Variable *Var : Vars) { for (Variable *Var : Vars) {
// Explicitly don't consider zero-weight variables, which are // Explicitly don't consider zero-weight variables, which are
// meant to be spill slots. // meant to be spill slots.
......
...@@ -229,8 +229,7 @@ void TargetLowering::lower() { ...@@ -229,8 +229,7 @@ void TargetLowering::lower() {
// registers could potentially be parameterized if we want to restrict // registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing. // registers e.g. for performance testing.
void TargetLowering::regAlloc() { void TargetLowering::regAlloc() {
static TimerIdT IDregAlloc = GlobalContext::getTimerID("regAlloc"); TimerMarker T(TimerStack::TT_regAlloc, Func);
TimerMarker T(IDregAlloc, Ctx);
LinearScan LinearScan(Func); LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None; RegSetMask RegInclude = RegSet_None;
RegSetMask RegExclude = RegSet_None; RegSetMask RegExclude = RegSet_None;
......
...@@ -313,9 +313,7 @@ TargetX8632::TargetX8632(Cfg *Func) ...@@ -313,9 +313,7 @@ TargetX8632::TargetX8632(Cfg *Func)
} }
void TargetX8632::translateO2() { void TargetX8632::translateO2() {
GlobalContext *Context = Func->getContext(); TimerMarker T(TimerStack::TT_O2, Func);
static TimerIdT IDO2 = GlobalContext::getTimerID("O2");
TimerMarker T(IDO2, Context);
// Lower Phi instructions. // Lower Phi instructions.
Func->placePhiLoads(); Func->placePhiLoads();
...@@ -400,9 +398,7 @@ void TargetX8632::translateO2() { ...@@ -400,9 +398,7 @@ void TargetX8632::translateO2() {
} }
void TargetX8632::translateOm1() { void TargetX8632::translateOm1() {
GlobalContext *Context = Func->getContext(); TimerMarker T(TimerStack::TT_Om1, Func);
static TimerIdT IDOm1 = GlobalContext::getTimerID("Om1");
TimerMarker T(IDOm1, Context);
Func->placePhiLoads(); Func->placePhiLoads();
if (Func->hasError()) if (Func->hasError())
return; return;
...@@ -4305,8 +4301,7 @@ Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { ...@@ -4305,8 +4301,7 @@ Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
void TargetX8632::postLower() { void TargetX8632::postLower() {
if (Ctx->getOptLevel() != Opt_m1) if (Ctx->getOptLevel() != Opt_m1)
return; return;
static TimerIdT IDpostLower = GlobalContext::getTimerID("postLower"); TimerMarker T(TimerStack::TT_postLower, Func);
TimerMarker T(IDpostLower, Ctx);
// TODO: Avoid recomputing WhiteList every instruction. // TODO: Avoid recomputing WhiteList every instruction.
RegSetMask RegInclude = RegSet_All; RegSetMask RegInclude = RegSet_All;
RegSetMask RegExclude = RegSet_StackPointer; RegSetMask RegExclude = RegSet_StackPointer;
......
...@@ -19,26 +19,28 @@ ...@@ -19,26 +19,28 @@
namespace Ice { namespace Ice {
std::vector<IceString> TimerStack::IDs; TimerStack::TimerStack(const IceString &Name)
: Name(Name), FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
TimerStack::TimerStack(const IceString &TopLevelName)
: FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
StateChangeCount(0), StackTop(0) { StateChangeCount(0), StackTop(0) {
Nodes.resize(1); // Reserve Nodes[0] for the root node. Nodes.resize(1); // Reserve Nodes[0] for the root node.
push(getTimerID(TopLevelName)); IDs.resize(TT__num);
#define STR(s) #s
#define X(tag) \
IDs[TT_##tag] = STR(tag); \
IDsIndex[STR(tag)] = TT_##tag;
TIMERTREE_TABLE;
#undef X
#undef STR
} }
// Returns the unique timer ID for the given Name, creating a new ID // Returns the unique timer ID for the given Name, creating a new ID
// if needed. For performance reasons, it's best to make only one // if needed.
// call per Name and cache the result, e.g. via a static initializer.
TimerIdT TimerStack::getTimerID(const IceString &Name) { TimerIdT TimerStack::getTimerID(const IceString &Name) {
TimerIdT Size = IDs.size(); if (IDsIndex.find(Name) == IDsIndex.end()) {
for (TimerIdT i = 0; i < Size; ++i) { IDsIndex[Name] = IDs.size();
if (IDs[i] == Name) IDs.push_back(Name);
return i;
} }
IDs.push_back(Name); return IDsIndex[Name];
return Size;
} }
// Pushes a new marker onto the timer stack. // Pushes a new marker onto the timer stack.
...@@ -112,27 +114,29 @@ void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) { ...@@ -112,27 +114,29 @@ void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
} // end of anonymous namespace } // end of anonymous namespace
void TimerStack::dump(Ostream &Str) { void TimerStack::dump(Ostream &Str, bool DumpCumulative) {
update(); update();
double TotalTime = LastTimestamp - FirstTimestamp; double TotalTime = LastTimestamp - FirstTimestamp;
assert(TotalTime); assert(TotalTime);
Str << "Cumulative function times:\n"; if (DumpCumulative) {
DumpMapType CumulativeMap; Str << Name << " - Cumulative times:\n";
for (TTindex i = 1; i < Nodes.size(); ++i) { DumpMapType CumulativeMap;
TTindex Prefix = i; for (TTindex i = 1; i < Nodes.size(); ++i) {
IceString Suffix = ""; TTindex Prefix = i;
while (Prefix) { IceString Suffix = "";
if (Suffix.empty()) while (Prefix) {
Suffix = IDs[Nodes[Prefix].Interior]; if (Suffix.empty())
else Suffix = IDs[Nodes[Prefix].Interior];
Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix; else
assert(Nodes[Prefix].Parent < Prefix); Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
Prefix = Nodes[Prefix].Parent; assert(Nodes[Prefix].Parent < Prefix);
Prefix = Nodes[Prefix].Parent;
}
CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
} }
CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix)); dumpHelper(Str, CumulativeMap, TotalTime);
} }
dumpHelper(Str, CumulativeMap, TotalTime); Str << Name << " - Flat times:\n";
Str << "Flat function times:\n";
DumpMapType FlatMap; DumpMapType FlatMap;
for (TimerIdT i = 0; i < LeafTimes.size(); ++i) { for (TimerIdT i = 0; i < LeafTimes.size(); ++i) {
FlatMap.insert(std::make_pair(LeafTimes[i], IDs[i])); FlatMap.insert(std::make_pair(LeafTimes[i], IDs[i]));
......
//===- subzero/src/IceTimerTree.def - X-macros for timing -------*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file lists predefined timing tags. New tags can be added to
// avoid a runtime string lookup.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETIMERTREE_DEF
#define TIMERTREE_TABLE \
/* enum value */ \
X(O2) \
X(Om1) \
X(convertToIce) \
X(deletePhis) \
X(doAddressOpt) \
X(doArgLowering) \
X(doBranchOpt) \
X(doNopInsertion) \
X(emit) \
X(genCode) \
X(genFrame) \
X(initUnhandled) \
X(linearScan) \
X(liveRange) \
X(liveness) \
X(livenessLightweight) \
X(llvmConvert) \
X(parse) \
X(placePhiLoads) \
X(placePhiStores) \
X(postLower) \
X(regAlloc) \
X(renumberInstructions) \
X(szmain) \
X(translate) \
X(validateLiveness) \
X(vmetadata)
//#define X(tag)
#define SUBZERO_SRC_ICETIMERTREE_DEF
#endif // SUBZERO_SRC_ICETIMERTREE_DEF
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#ifndef SUBZERO_SRC_ICETIMERTREE_H #ifndef SUBZERO_SRC_ICETIMERTREE_H
#define SUBZERO_SRC_ICETIMERTREE_H #define SUBZERO_SRC_ICETIMERTREE_H
#include "IceTimerTree.def"
namespace Ice { namespace Ice {
class TimerTreeNode; class TimerTreeNode;
...@@ -38,23 +40,32 @@ public: ...@@ -38,23 +40,32 @@ public:
}; };
class TimerStack { class TimerStack {
TimerStack(const TimerStack &) = delete; // TimerStack(const TimerStack &) = delete;
TimerStack &operator=(const TimerStack &) = delete; TimerStack &operator=(const TimerStack &) = delete;
public: public:
TimerStack(const IceString &TopLevelName); enum TimerTag {
static TimerIdT getTimerID(const IceString &Name); #define X(tag) TT_##tag,
TIMERTREE_TABLE
#undef X
TT__num
};
TimerStack(const IceString &Name);
TimerIdT getTimerID(const IceString &Name);
void push(TimerIdT ID); void push(TimerIdT ID);
void pop(TimerIdT ID); void pop(TimerIdT ID);
void dump(Ostream &Str); void dump(Ostream &Str, bool DumpCumulative);
private: private:
void update(); void update();
static double timestamp(); static double timestamp();
const IceString Name;
const double FirstTimestamp; const double FirstTimestamp;
double LastTimestamp; double LastTimestamp;
uint64_t StateChangeCount; uint64_t StateChangeCount;
static std::vector<IceString> IDs; // indexed by TimerIdT // IDsIndex maps a symbolic timer name to its integer ID.
std::map<IceString, TimerIdT> IDsIndex;
std::vector<IceString> IDs; // indexed by TimerIdT
std::vector<TimerTreeNode> Nodes; // indexed by TTindex std::vector<TimerTreeNode> Nodes; // indexed by TTindex
std::vector<double> LeafTimes; // indexed by TimerIdT std::vector<double> LeafTimes; // indexed by TimerIdT
TTindex StackTop; TTindex StackTop;
......
...@@ -843,6 +843,11 @@ public: ...@@ -843,6 +843,11 @@ public:
NextLocalInstIndex(Context->getNumGlobalValueIDs()), NextLocalInstIndex(Context->getNumGlobalValueIDs()),
InstIsTerminating(false) { InstIsTerminating(false) {
Func->setFunctionName(LLVMFunc->getName()); Func->setFunctionName(LLVMFunc->getName());
if (getFlags().TimeEachFunction)
getTranslator().getContext()->pushTimer(
getTranslator().getContext()->getTimerID(
Ice::GlobalContext::TSK_Funcs, Func->getFunctionName()),
Ice::GlobalContext::TSK_Funcs);
Func->setReturnType(Context->convertToIceType(LLVMFunc->getReturnType())); Func->setReturnType(Context->convertToIceType(LLVMFunc->getReturnType()));
Func->setInternal(LLVMFunc->hasInternalLinkage()); Func->setInternal(LLVMFunc->hasInternalLinkage());
CurrentNode = InstallNextBasicBlock(); CurrentNode = InstallNextBasicBlock();
...@@ -1404,6 +1409,11 @@ void FunctionParser::ExitBlock() { ...@@ -1404,6 +1409,11 @@ void FunctionParser::ExitBlock() {
// for such parsing errors. // for such parsing errors.
if (Context->getNumErrors() == 0) if (Context->getNumErrors() == 0)
getTranslator().translateFcn(Func); getTranslator().translateFcn(Func);
if (getFlags().TimeEachFunction)
getTranslator().getContext()->popTimer(
getTranslator().getContext()->getTimerID(Ice::GlobalContext::TSK_Funcs,
Func->getFunctionName()),
Ice::GlobalContext::TSK_Funcs);
} }
void FunctionParser::ReportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op, void FunctionParser::ReportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op,
......
...@@ -95,8 +95,22 @@ static cl::opt<bool> SubzeroTimingEnabled( ...@@ -95,8 +95,22 @@ static cl::opt<bool> SubzeroTimingEnabled(
"timing", cl::desc("Enable breakdown timing of Subzero translation")); "timing", cl::desc("Enable breakdown timing of Subzero translation"));
static cl::opt<bool> static cl::opt<bool>
DisableGlobals("disable-globals", TimeEachFunction("timing-funcs",
cl::desc("Disable global initializer translation")); cl::desc("Print total translation time for each function"));
static cl::opt<std::string> TimingFocusOn(
"timing-focus",
cl::desc("Break down timing for a specific function (use '*' for all)"),
cl::init(""));
static cl::opt<std::string> VerboseFocusOn(
"verbose-focus",
cl::desc("Temporarily enable full verbosity for a specific function"),
cl::init(""));
static cl::opt<bool>
DisableGlobals("disable-globals",
cl::desc("Disable global initializer translation"));
// This is currently unused, and is a placeholder for lit tests. // This is currently unused, and is a placeholder for lit tests.
static cl::opt<bool> static cl::opt<bool>
...@@ -169,13 +183,15 @@ int main(int argc, char **argv) { ...@@ -169,13 +183,15 @@ int main(int argc, char **argv) {
Flags.UseIntegratedAssembler = UseIntegratedAssembler; Flags.UseIntegratedAssembler = UseIntegratedAssembler;
Flags.UseSandboxing = UseSandboxing; Flags.UseSandboxing = UseSandboxing;
Flags.DumpStats = DumpStats; Flags.DumpStats = DumpStats;
Flags.TimeEachFunction = TimeEachFunction;
Flags.DefaultGlobalPrefix = DefaultGlobalPrefix; Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
Flags.DefaultFunctionPrefix = DefaultFunctionPrefix; Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;
Flags.TimingFocusOn = TimingFocusOn;
Flags.VerboseFocusOn = VerboseFocusOn;
Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix, Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
Flags); Flags);
static Ice::TimerIdT IDszmain = Ice::GlobalContext::getTimerID("szmain"); Ice::TimerMarker T(Ice::TimerStack::TT_szmain, &Ctx);
Ice::TimerMarker T(IDszmain, &Ctx);
int ErrorStatus = 0; int ErrorStatus = 0;
if (BuildOnRead) { if (BuildOnRead) {
...@@ -185,8 +201,7 @@ int main(int argc, char **argv) { ...@@ -185,8 +201,7 @@ int main(int argc, char **argv) {
} else { } else {
// Parse the input LLVM IR file into a module. // Parse the input LLVM IR file into a module.
SMDiagnostic Err; SMDiagnostic Err;
static Ice::TimerIdT IDparse = Ice::GlobalContext::getTimerID("parse"); Ice::TimerMarker T1(Ice::TimerStack::TT_parse, &Ctx);
Ice::TimerMarker T1(IDparse, &Ctx);
Module *Mod = Module *Mod =
NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext()); NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext());
...@@ -199,6 +214,10 @@ int main(int argc, char **argv) { ...@@ -199,6 +214,10 @@ int main(int argc, char **argv) {
Converter.convertToIce(); Converter.convertToIce();
ErrorStatus = Converter.getErrorStatus(); ErrorStatus = Converter.getErrorStatus();
} }
if (TimeEachFunction) {
const bool DumpCumulative = false;
Ctx.dumpTimers(Ice::GlobalContext::TSK_Funcs, DumpCumulative);
}
if (SubzeroTimingEnabled) if (SubzeroTimingEnabled)
Ctx.dumpTimers(); Ctx.dumpTimers();
const bool FinalStats = true; const bool FinalStats = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment