Commit 8363a066 by Jim Stichnoth

Subzero: Add a few performance measurement tools.

--timing-funcs - Produces a sorted list of total time spent translating each function. --timing-focus=<F> - Turns on the --timing equivalent just for one function. Use '*' to do this for all functions, i.e. get complete timing breakdowns across all functions. --verbose-focus=<F> - Temporarily turns on --verbose=all for one function. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/620373004
parent e4dc61bf
......@@ -25,8 +25,8 @@ namespace Ice {
Cfg::Cfg(GlobalContext *Ctx)
: Ctx(Ctx), FunctionName(""), ReturnType(IceType_void),
IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL),
NextInstNumber(1), Live(nullptr),
IsInternalLinkage(false), HasError(false), FocusedTiming(false),
ErrorMessage(""), Entry(NULL), NextInstNumber(1), Live(nullptr),
Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
VMetadata(new VariablesMetadata(this)),
TargetAssembler(
......@@ -69,8 +69,15 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
void Cfg::translate() {
if (hasError())
return;
static TimerIdT IDtranslate = GlobalContext::getTimerID("translate");
TimerMarker T(IDtranslate, getContext());
VerboseMask OldVerboseMask = getContext()->getVerbose();
const IceString &TimingFocusOn = getContext()->getFlags().TimingFocusOn;
if (TimingFocusOn == "*" || TimingFocusOn == getFunctionName())
setFocusedTiming();
bool VerboseFocus =
(getContext()->getFlags().VerboseFocusOn == getFunctionName());
if (VerboseFocus)
getContext()->setVerbose(IceV_All);
TimerMarker T(TimerStack::TT_translate, this);
dump("Initial CFG");
......@@ -79,6 +86,10 @@ void Cfg::translate() {
getTarget()->translate();
dump("Final output");
if (getFocusedTiming())
getContext()->dumpTimers();
if (VerboseFocus)
getContext()->setVerbose(OldVerboseMask);
}
void Cfg::computePredecessors() {
......@@ -87,9 +98,7 @@ void Cfg::computePredecessors() {
}
void Cfg::renumberInstructions() {
static TimerIdT IDrenumberInstructions =
GlobalContext::getTimerID("renumberInstructions");
TimerMarker T(IDrenumberInstructions, getContext());
TimerMarker T(TimerStack::TT_renumberInstructions, this);
NextInstNumber = 1;
for (CfgNode *Node : Nodes)
Node->renumberInstructions();
......@@ -97,60 +106,50 @@ void Cfg::renumberInstructions() {
// placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() {
static TimerIdT IDplacePhiLoads = GlobalContext::getTimerID("placePhiLoads");
TimerMarker T(IDplacePhiLoads, getContext());
TimerMarker T(TimerStack::TT_placePhiLoads, this);
for (CfgNode *Node : Nodes)
Node->placePhiLoads();
}
// placePhiStores() must be called after placePhiLoads().
void Cfg::placePhiStores() {
static TimerIdT IDplacePhiStores =
GlobalContext::getTimerID("placePhiStores");
TimerMarker T(IDplacePhiStores, getContext());
TimerMarker T(TimerStack::TT_placePhiStores, this);
for (CfgNode *Node : Nodes)
Node->placePhiStores();
}
void Cfg::deletePhis() {
static TimerIdT IDdeletePhis = GlobalContext::getTimerID("deletePhis");
TimerMarker T(IDdeletePhis, getContext());
TimerMarker T(TimerStack::TT_deletePhis, this);
for (CfgNode *Node : Nodes)
Node->deletePhis();
}
void Cfg::doArgLowering() {
static TimerIdT IDdoArgLowering = GlobalContext::getTimerID("doArgLowering");
TimerMarker T(IDdoArgLowering, getContext());
TimerMarker T(TimerStack::TT_doArgLowering, this);
getTarget()->lowerArguments();
}
void Cfg::doAddressOpt() {
static TimerIdT IDdoAddressOpt = GlobalContext::getTimerID("doAddressOpt");
TimerMarker T(IDdoAddressOpt, getContext());
TimerMarker T(TimerStack::TT_doAddressOpt, this);
for (CfgNode *Node : Nodes)
Node->doAddressOpt();
}
void Cfg::doNopInsertion() {
static TimerIdT IDdoNopInsertion =
GlobalContext::getTimerID("doNopInsertion");
TimerMarker T(IDdoNopInsertion, getContext());
TimerMarker T(TimerStack::TT_doNopInsertion, this);
for (CfgNode *Node : Nodes)
Node->doNopInsertion();
}
void Cfg::genCode() {
static TimerIdT IDgenCode = GlobalContext::getTimerID("genCode");
TimerMarker T(IDgenCode, getContext());
TimerMarker T(TimerStack::TT_genCode, this);
for (CfgNode *Node : Nodes)
Node->genCode();
}
// Compute the stack frame layout.
void Cfg::genFrame() {
static TimerIdT IDgenFrame = GlobalContext::getTimerID("genFrame");
TimerMarker T(IDgenFrame, getContext());
TimerMarker T(TimerStack::TT_genFrame, this);
getTarget()->addProlog(Entry);
// TODO: Consider folding epilog generation into the final
// emission/assembly pass to avoid an extra iteration over the node
......@@ -165,17 +164,14 @@ void Cfg::genFrame() {
// completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence.
void Cfg::livenessLightweight() {
static TimerIdT IDlivenessLightweight =
GlobalContext::getTimerID("livenessLightweight");
TimerMarker T(IDlivenessLightweight, getContext());
TimerMarker T(TimerStack::TT_livenessLightweight, this);
getVMetadata()->init();
for (CfgNode *Node : Nodes)
Node->livenessLightweight();
}
void Cfg::liveness(LivenessMode Mode) {
static TimerIdT IDliveness = GlobalContext::getTimerID("liveness");
TimerMarker T(IDliveness, getContext());
TimerMarker T(TimerStack::TT_liveness, this);
Live.reset(new Liveness(this, Mode));
getVMetadata()->init();
Live->init();
......@@ -208,8 +204,7 @@ void Cfg::liveness(LivenessMode Mode) {
// finer breakdown of the cost.
// Make a final pass over instructions to delete dead instructions
// and build each Variable's live range.
static TimerIdT IDliveRange = GlobalContext::getTimerID("liveRange");
TimerMarker T1(IDliveRange, getContext());
TimerMarker T1(TimerStack::TT_liveRange, this);
for (CfgNode *Node : Nodes)
Node->livenessPostprocess(Mode, getLiveness());
if (Mode == Liveness_Intervals) {
......@@ -255,9 +250,7 @@ void Cfg::liveness(LivenessMode Mode) {
// Traverse every Variable of every Inst and verify that it
// appears within the Variable's computed live range.
bool Cfg::validateLiveness() const {
static TimerIdT IDvalidateLiveness =
GlobalContext::getTimerID("validateLiveness");
TimerMarker T(IDvalidateLiveness, getContext());
TimerMarker T(TimerStack::TT_validateLiveness, this);
bool Valid = true;
Ostream &Str = Ctx->getStrDump();
for (CfgNode *Node : Nodes) {
......@@ -300,8 +293,7 @@ bool Cfg::validateLiveness() const {
}
void Cfg::doBranchOpt() {
static TimerIdT IDdoBranchOpt = GlobalContext::getTimerID("doBranchOpt");
TimerMarker T(IDdoBranchOpt, getContext());
TimerMarker T(TimerStack::TT_doBranchOpt, this);
for (auto I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
auto NextNode = I;
++NextNode;
......@@ -312,8 +304,7 @@ void Cfg::doBranchOpt() {
// ======================== Dump routines ======================== //
void Cfg::emit() {
static TimerIdT IDemit = GlobalContext::getTimerID("emit");
TimerMarker T(IDemit, getContext());
TimerMarker T(TimerStack::TT_emit, this);
Ostream &Str = Ctx->getStrEmit();
if (!Ctx->testAndSetHasEmittedFirstMethod()) {
// Print a helpful command for assembling the output.
......
......@@ -96,6 +96,8 @@ public:
return getContext()->getFlags().UseIntegratedAssembler;
}
bool hasComputedFrame() const;
bool getFocusedTiming() const { return FocusedTiming; }
void setFocusedTiming() { FocusedTiming = true; }
// Passes over the CFG.
void translate();
......@@ -165,6 +167,7 @@ private:
Type ReturnType;
bool IsInternalLinkage;
bool HasError;
bool FocusedTiming;
IceString ErrorMessage;
CfgNode *Entry; // entry basic block
NodeList Nodes; // linearized node list; Entry should be first
......
......@@ -495,7 +495,7 @@ void CfgNode::dump(Cfg *Func) const {
Str << " // preds = ";
bool First = true;
for (CfgNode *I : InEdges) {
if (First)
if (!First)
Str << ", ";
First = false;
Str << "%" << I->getName();
......@@ -540,7 +540,7 @@ void CfgNode::dump(Cfg *Func) const {
Str << " // succs = ";
bool First = true;
for (CfgNode *I : OutEdges) {
if (First)
if (!First)
Str << ", ";
First = false;
Str << "%" << I->getName();
......
......@@ -25,8 +25,9 @@ public:
: DisableInternal(false), SubzeroTimingEnabled(false),
DisableTranslation(false), DisableGlobals(false),
FunctionSections(false), UseIntegratedAssembler(false),
UseSandboxing(false), DumpStats(false), DefaultGlobalPrefix(""),
DefaultFunctionPrefix("") {}
UseSandboxing(false), DumpStats(false), TimeEachFunction(false),
DefaultGlobalPrefix(""), DefaultFunctionPrefix(""), TimingFocusOn(""),
VerboseFocusOn("") {}
bool DisableInternal;
bool SubzeroTimingEnabled;
bool DisableTranslation;
......@@ -35,8 +36,11 @@ public:
bool UseIntegratedAssembler;
bool UseSandboxing;
bool DumpStats;
bool TimeEachFunction;
IceString DefaultGlobalPrefix;
IceString DefaultFunctionPrefix;
IceString TimingFocusOn;
IceString VerboseFocusOn;
};
} // end of namespace Ice
......
......@@ -59,15 +59,13 @@ public:
// Caller is expected to delete the returned Ice::Cfg object.
Ice::Cfg *convertFunction(const Function *F) {
static Ice::TimerIdT IDllvmConvert =
Ice::GlobalContext::getTimerID("llvmConvert");
Ice::TimerMarker T(IDllvmConvert, Ctx);
VarMap.clear();
NodeMap.clear();
Func = new Ice::Cfg(Ctx);
Func->setFunctionName(F->getName());
Func->setReturnType(convertToIceType(F->getReturnType()));
Func->setInternal(F->hasInternalLinkage());
Ice::TimerMarker T(Ice::TimerStack::TT_llvmConvert, Func);
// The initial definition/use of each arg is the entry node.
for (auto ArgI = F->arg_begin(), ArgE = F->arg_end(); ArgI != ArgE;
......@@ -617,8 +615,7 @@ private:
namespace Ice {
void Converter::convertToIce() {
static TimerIdT IDconvertToIce = GlobalContext::getTimerID("convertToIce");
TimerMarker T(IDconvertToIce, Ctx);
TimerMarker T(TimerStack::TT_convertToIce, Ctx);
nameUnnamedGlobalAddresses(Mod);
if (!Ctx->getFlags().DisableGlobals)
convertGlobals(Mod);
......@@ -626,13 +623,21 @@ void Converter::convertToIce() {
}
void Converter::convertFunctions() {
TimerStackIdT StackID = GlobalContext::TSK_Funcs;
for (const Function &I : *Mod) {
if (I.empty())
continue;
TimerIdT TimerID = 0;
if (Ctx->getFlags().TimeEachFunction) {
TimerID = Ctx->getTimerID(StackID, I.getName());
Ctx->pushTimer(TimerID, StackID);
}
LLVM2ICEConverter FunctionConverter(Ctx, Mod->getContext());
Cfg *Fcn = FunctionConverter.convertFunction(&I);
translateFcn(Fcn);
if (Ctx->getFlags().TimeEachFunction)
Ctx->popTimer(TimerID, StackID);
}
emitConstants();
......
......@@ -69,6 +69,7 @@ typedef uint32_t SizeT;
// numbers are used for representing Variable live ranges.
typedef int32_t InstNumberT;
typedef uint32_t TimerStackIdT;
typedef uint32_t TimerIdT;
enum LivenessMode {
......
......@@ -119,7 +119,11 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump,
: StrDump(OsDump), StrEmit(OsEmit), VMask(Mask),
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false),
RNG(""), Timers(new TimerStack("main")) {}
RNG("") {
// Pre-register built-in stack names.
newTimerStackID("Total across all functions");
newTimerStackID("Per-function summary");
}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name
......@@ -381,13 +385,27 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const {
llvm_unreachable("Unknown type");
}
TimerIdT GlobalContext::getTimerID(const IceString &Name) {
return TimerStack::getTimerID(Name);
TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
const IceString &Name) {
assert(StackID < Timers.size());
return Timers[StackID].getTimerID(Name);
}
TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
TimerStackIdT NewID = Timers.size();
Timers.push_back(TimerStack(Name));
return NewID;
}
void GlobalContext::pushTimer(TimerIdT ID) { Timers->push(ID); }
void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
assert(StackID < Timers.size());
Timers[StackID].push(ID);
}
void GlobalContext::popTimer(TimerIdT ID) { Timers->pop(ID); }
void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
assert(StackID < Timers.size());
Timers[StackID].pop(ID);
}
void GlobalContext::dumpStats(const IceString &Name, bool Final) {
if (Flags.DumpStats) {
......@@ -400,6 +418,16 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
}
}
void GlobalContext::dumpTimers() { Timers->dump(getStrDump()); }
void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
assert(Timers.size() > StackID);
Timers[StackID].dump(getStrDump(), DumpCumulative);
}
TimerMarker::TimerMarker(TimerIdT ID, const Cfg *Func)
: ID(ID), Ctx(Func->getContext()),
Active(Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled) {
if (Active)
Ctx->pushTimer(ID);
}
} // end of namespace Ice
......@@ -23,6 +23,7 @@
#include "IceDefs.h"
#include "IceIntrinsics.h"
#include "IceRNG.h"
#include "IceTimerTree.h"
#include "IceTypes.h"
namespace Ice {
......@@ -71,6 +72,7 @@ public:
// Returns true if any of the specified options in the verbose mask
// are set. If the argument is omitted, it checks if any verbose
// options at all are set.
VerboseMask getVerbose() const { return VMask; }
bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
void setVerbose(VerboseMask Mask) { VMask = Mask; }
void addVerbose(VerboseMask Mask) { VMask |= Mask; }
......@@ -151,10 +153,19 @@ public:
StatsCumulative.updateFills();
}
static TimerIdT getTimerID(const IceString &Name);
void pushTimer(TimerIdT ID);
void popTimer(TimerIdT ID);
void dumpTimers();
// These are predefined TimerStackIdT values.
enum TimerStackKind {
TSK_Default = 0,
TSK_Funcs,
TSK_Num
};
TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
TimerStackIdT newTimerStackID(const IceString &Name);
void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
private:
Ostream *StrDump; // Stream for dumping / diagnostics
......@@ -172,7 +183,7 @@ private:
RandomNumberGenerator RNG;
CodeStats StatsFunction;
CodeStats StatsCumulative;
std::unique_ptr<class TimerStack> Timers;
std::vector<TimerStack> Timers;
GlobalContext(const GlobalContext &) = delete;
GlobalContext &operator=(const GlobalContext &) = delete;
......@@ -194,6 +205,8 @@ public:
if (Active)
Ctx->pushTimer(ID);
}
TimerMarker(TimerIdT ID, const Cfg *Func);
~TimerMarker() {
if (Active)
Ctx->popTimer(ID);
......
......@@ -782,7 +782,8 @@ const x86::AssemblerX86::GPREmitterRegOp InstX8632Lea::Emitter = {
// Unary XMM ops
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Sqrtss::Emitter = {
&x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss};
&x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss
};
// Binary GPR ops
template <>
......@@ -824,58 +825,76 @@ const x86::AssemblerX86::GPREmitterShiftOp InstX8632Shr::Emitter = {
// Binary XMM ops
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addss::Emitter = {
&x86::AssemblerX86::addss, &x86::AssemblerX86::addss};
&x86::AssemblerX86::addss, &x86::AssemblerX86::addss
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addps::Emitter = {
&x86::AssemblerX86::addps, &x86::AssemblerX86::addps};
&x86::AssemblerX86::addps, &x86::AssemblerX86::addps
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divss::Emitter = {
&x86::AssemblerX86::divss, &x86::AssemblerX86::divss};
&x86::AssemblerX86::divss, &x86::AssemblerX86::divss
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divps::Emitter = {
&x86::AssemblerX86::divps, &x86::AssemblerX86::divps};
&x86::AssemblerX86::divps, &x86::AssemblerX86::divps
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulss::Emitter = {
&x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss};
&x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulps::Emitter = {
&x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps};
&x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Padd::Emitter = {
&x86::AssemblerX86::padd, &x86::AssemblerX86::padd};
&x86::AssemblerX86::padd, &x86::AssemblerX86::padd
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pand::Emitter = {
&x86::AssemblerX86::pand, &x86::AssemblerX86::pand};
&x86::AssemblerX86::pand, &x86::AssemblerX86::pand
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pandn::Emitter = {
&x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn};
&x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpeq::Emitter = {
&x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq};
&x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpgt::Emitter = {
&x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt};
&x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmull::Emitter = {
&x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull};
&x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmuludq::Emitter = {
&x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq};
&x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Por::Emitter = {
&x86::AssemblerX86::por, &x86::AssemblerX86::por};
&x86::AssemblerX86::por, &x86::AssemblerX86::por
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Psub::Emitter = {
&x86::AssemblerX86::psub, &x86::AssemblerX86::psub};
&x86::AssemblerX86::psub, &x86::AssemblerX86::psub
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pxor::Emitter = {
&x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor};
&x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subss::Emitter = {
&x86::AssemblerX86::subss, &x86::AssemblerX86::subss};
&x86::AssemblerX86::subss, &x86::AssemblerX86::subss
};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subps::Emitter = {
&x86::AssemblerX86::subps, &x86::AssemblerX86::subps};
&x86::AssemblerX86::subps, &x86::AssemblerX86::subps
};
// Binary XMM Shift ops
template <>
......@@ -1427,10 +1446,11 @@ void InstX8632Icmp::emitIAS(const Cfg *Func) const {
const Operand *Src1 = getSrc(1);
Type Ty = Src0->getType();
static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
&x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp,
&x86::AssemblerX86::cmp};
&x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
};
static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
&x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp};
&x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
};
if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar0->hasReg()) {
emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
......@@ -1471,7 +1491,8 @@ void InstX8632Ucomiss::emitIAS(const Cfg *Func) const {
const Variable *Src0 = llvm::cast<Variable>(getSrc(0));
Type Ty = Src0->getType();
const static x86::AssemblerX86::XmmEmitterRegOp Emitter = {
&x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss};
&x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss
};
emitIASRegOpTyXMM(Func, Ty, Src0, getSrc(1), Emitter);
}
......@@ -1517,9 +1538,11 @@ void InstX8632Test::emitIAS(const Cfg *Func) const {
Type Ty = Src0->getType();
// The Reg/Addr form of test is not encodeable.
static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
&x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test};
&x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test
};
static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
&x86::AssemblerX86::test, &x86::AssemblerX86::test};
&x86::AssemblerX86::test, &x86::AssemblerX86::test
};
if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar0->hasReg()) {
emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
......@@ -1770,8 +1793,9 @@ template <> void InstX8632Movp::emitIAS(const Cfg *Func) const {
const Variable *Dest = getDest();
const Operand *Src = getSrc(0);
const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
&x86::AssemblerX86::movups, &x86::AssemblerX86::movups,
&x86::AssemblerX86::movups};
&x86::AssemblerX86::movups, &x86::AssemblerX86::movups,
&x86::AssemblerX86::movups
};
emitIASMovlikeXMM(Func, Dest, Src, Emitter);
}
......@@ -1794,8 +1818,8 @@ template <> void InstX8632Movq::emitIAS(const Cfg *Func) const {
const Variable *Dest = getDest();
const Operand *Src = getSrc(0);
const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
&x86::AssemblerX86::movq, &x86::AssemblerX86::movq,
&x86::AssemblerX86::movq};
&x86::AssemblerX86::movq, &x86::AssemblerX86::movq, &x86::AssemblerX86::movq
};
emitIASMovlikeXMM(Func, Dest, Src, Emitter);
}
......
......@@ -278,8 +278,7 @@ const Inst *VariableTracking::getSingleDefinition() const {
}
void VariablesMetadata::init() {
static TimerIdT IDvmetadata = GlobalContext::getTimerID("vmetadata");
TimerMarker T(IDvmetadata, Func->getContext());
TimerMarker T(TimerStack::TT_vmetadata, Func);
Metadata.clear();
Metadata.resize(Func->getNumVariables());
......@@ -438,7 +437,7 @@ void LiveRange::dump(Ostream &Str) const {
Str << "(weight=" << Weight << ") ";
bool First = true;
for (const RangeElementType &I : Range) {
if (First)
if (!First)
Str << ", ";
First = false;
Str << "[" << I.first << ":" << I.second << ")";
......
......@@ -64,8 +64,7 @@ void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
// preparation. Results are assigned to Variable::RegNum for each
// Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
static TimerIdT IDscan = GlobalContext::getTimerID("linearScan");
TimerMarker T(IDscan, Func->getContext());
TimerMarker T(TimerStack::TT_linearScan, Func);
assert(RegMaskFull.any()); // Sanity check
Unhandled.clear();
UnhandledPrecolored.clear();
......@@ -86,9 +85,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// storing Func->getVariables().
const VarList &Vars = Func->getVariables();
{
static TimerIdT IDinitUnhandled =
GlobalContext::getTimerID("initUnhandled");
TimerMarker T(IDinitUnhandled, Func->getContext());
TimerMarker T(TimerStack::TT_initUnhandled, Func);
for (Variable *Var : Vars) {
// Explicitly don't consider zero-weight variables, which are
// meant to be spill slots.
......
......@@ -229,8 +229,7 @@ void TargetLowering::lower() {
// registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing.
void TargetLowering::regAlloc() {
static TimerIdT IDregAlloc = GlobalContext::getTimerID("regAlloc");
TimerMarker T(IDregAlloc, Ctx);
TimerMarker T(TimerStack::TT_regAlloc, Func);
LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None;
RegSetMask RegExclude = RegSet_None;
......
......@@ -313,9 +313,7 @@ TargetX8632::TargetX8632(Cfg *Func)
}
void TargetX8632::translateO2() {
GlobalContext *Context = Func->getContext();
static TimerIdT IDO2 = GlobalContext::getTimerID("O2");
TimerMarker T(IDO2, Context);
TimerMarker T(TimerStack::TT_O2, Func);
// Lower Phi instructions.
Func->placePhiLoads();
......@@ -400,9 +398,7 @@ void TargetX8632::translateO2() {
}
void TargetX8632::translateOm1() {
GlobalContext *Context = Func->getContext();
static TimerIdT IDOm1 = GlobalContext::getTimerID("Om1");
TimerMarker T(IDOm1, Context);
TimerMarker T(TimerStack::TT_Om1, Func);
Func->placePhiLoads();
if (Func->hasError())
return;
......@@ -4305,8 +4301,7 @@ Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
void TargetX8632::postLower() {
if (Ctx->getOptLevel() != Opt_m1)
return;
static TimerIdT IDpostLower = GlobalContext::getTimerID("postLower");
TimerMarker T(IDpostLower, Ctx);
TimerMarker T(TimerStack::TT_postLower, Func);
// TODO: Avoid recomputing WhiteList every instruction.
RegSetMask RegInclude = RegSet_All;
RegSetMask RegExclude = RegSet_StackPointer;
......
......@@ -19,26 +19,28 @@
namespace Ice {
std::vector<IceString> TimerStack::IDs;
TimerStack::TimerStack(const IceString &TopLevelName)
: FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
TimerStack::TimerStack(const IceString &Name)
: Name(Name), FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
StateChangeCount(0), StackTop(0) {
Nodes.resize(1); // Reserve Nodes[0] for the root node.
push(getTimerID(TopLevelName));
IDs.resize(TT__num);
#define STR(s) #s
#define X(tag) \
IDs[TT_##tag] = STR(tag); \
IDsIndex[STR(tag)] = TT_##tag;
TIMERTREE_TABLE;
#undef X
#undef STR
}
// Returns the unique timer ID for the given Name, creating a new ID
// if needed. For performance reasons, it's best to make only one
// call per Name and cache the result, e.g. via a static initializer.
// if needed.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
TimerIdT Size = IDs.size();
for (TimerIdT i = 0; i < Size; ++i) {
if (IDs[i] == Name)
return i;
if (IDsIndex.find(Name) == IDsIndex.end()) {
IDsIndex[Name] = IDs.size();
IDs.push_back(Name);
}
IDs.push_back(Name);
return Size;
return IDsIndex[Name];
}
// Pushes a new marker onto the timer stack.
......@@ -112,27 +114,29 @@ void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
} // end of anonymous namespace
void TimerStack::dump(Ostream &Str) {
void TimerStack::dump(Ostream &Str, bool DumpCumulative) {
update();
double TotalTime = LastTimestamp - FirstTimestamp;
assert(TotalTime);
Str << "Cumulative function times:\n";
DumpMapType CumulativeMap;
for (TTindex i = 1; i < Nodes.size(); ++i) {
TTindex Prefix = i;
IceString Suffix = "";
while (Prefix) {
if (Suffix.empty())
Suffix = IDs[Nodes[Prefix].Interior];
else
Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
assert(Nodes[Prefix].Parent < Prefix);
Prefix = Nodes[Prefix].Parent;
if (DumpCumulative) {
Str << Name << " - Cumulative times:\n";
DumpMapType CumulativeMap;
for (TTindex i = 1; i < Nodes.size(); ++i) {
TTindex Prefix = i;
IceString Suffix = "";
while (Prefix) {
if (Suffix.empty())
Suffix = IDs[Nodes[Prefix].Interior];
else
Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
assert(Nodes[Prefix].Parent < Prefix);
Prefix = Nodes[Prefix].Parent;
}
CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
}
CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
dumpHelper(Str, CumulativeMap, TotalTime);
}
dumpHelper(Str, CumulativeMap, TotalTime);
Str << "Flat function times:\n";
Str << Name << " - Flat times:\n";
DumpMapType FlatMap;
for (TimerIdT i = 0; i < LeafTimes.size(); ++i) {
FlatMap.insert(std::make_pair(LeafTimes[i], IDs[i]));
......
//===- subzero/src/IceTimerTree.def - X-macros for timing -------*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file lists predefined timing tags. New tags can be added to
// avoid a runtime string lookup.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETIMERTREE_DEF
#define TIMERTREE_TABLE \
/* enum value */ \
X(O2) \
X(Om1) \
X(convertToIce) \
X(deletePhis) \
X(doAddressOpt) \
X(doArgLowering) \
X(doBranchOpt) \
X(doNopInsertion) \
X(emit) \
X(genCode) \
X(genFrame) \
X(initUnhandled) \
X(linearScan) \
X(liveRange) \
X(liveness) \
X(livenessLightweight) \
X(llvmConvert) \
X(parse) \
X(placePhiLoads) \
X(placePhiStores) \
X(postLower) \
X(regAlloc) \
X(renumberInstructions) \
X(szmain) \
X(translate) \
X(validateLiveness) \
X(vmetadata)
//#define X(tag)
#define SUBZERO_SRC_ICETIMERTREE_DEF
#endif // SUBZERO_SRC_ICETIMERTREE_DEF
......@@ -15,6 +15,8 @@
#ifndef SUBZERO_SRC_ICETIMERTREE_H
#define SUBZERO_SRC_ICETIMERTREE_H
#include "IceTimerTree.def"
namespace Ice {
class TimerTreeNode;
......@@ -38,23 +40,32 @@ public:
};
class TimerStack {
TimerStack(const TimerStack &) = delete;
// TimerStack(const TimerStack &) = delete;
TimerStack &operator=(const TimerStack &) = delete;
public:
TimerStack(const IceString &TopLevelName);
static TimerIdT getTimerID(const IceString &Name);
enum TimerTag {
#define X(tag) TT_##tag,
TIMERTREE_TABLE
#undef X
TT__num
};
TimerStack(const IceString &Name);
TimerIdT getTimerID(const IceString &Name);
void push(TimerIdT ID);
void pop(TimerIdT ID);
void dump(Ostream &Str);
void dump(Ostream &Str, bool DumpCumulative);
private:
void update();
static double timestamp();
const IceString Name;
const double FirstTimestamp;
double LastTimestamp;
uint64_t StateChangeCount;
static std::vector<IceString> IDs; // indexed by TimerIdT
// IDsIndex maps a symbolic timer name to its integer ID.
std::map<IceString, TimerIdT> IDsIndex;
std::vector<IceString> IDs; // indexed by TimerIdT
std::vector<TimerTreeNode> Nodes; // indexed by TTindex
std::vector<double> LeafTimes; // indexed by TimerIdT
TTindex StackTop;
......
......@@ -843,6 +843,11 @@ public:
NextLocalInstIndex(Context->getNumGlobalValueIDs()),
InstIsTerminating(false) {
Func->setFunctionName(LLVMFunc->getName());
if (getFlags().TimeEachFunction)
getTranslator().getContext()->pushTimer(
getTranslator().getContext()->getTimerID(
Ice::GlobalContext::TSK_Funcs, Func->getFunctionName()),
Ice::GlobalContext::TSK_Funcs);
Func->setReturnType(Context->convertToIceType(LLVMFunc->getReturnType()));
Func->setInternal(LLVMFunc->hasInternalLinkage());
CurrentNode = InstallNextBasicBlock();
......@@ -1404,6 +1409,11 @@ void FunctionParser::ExitBlock() {
// for such parsing errors.
if (Context->getNumErrors() == 0)
getTranslator().translateFcn(Func);
if (getFlags().TimeEachFunction)
getTranslator().getContext()->popTimer(
getTranslator().getContext()->getTimerID(Ice::GlobalContext::TSK_Funcs,
Func->getFunctionName()),
Ice::GlobalContext::TSK_Funcs);
}
void FunctionParser::ReportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op,
......
......@@ -95,8 +95,22 @@ static cl::opt<bool> SubzeroTimingEnabled(
"timing", cl::desc("Enable breakdown timing of Subzero translation"));
static cl::opt<bool>
DisableGlobals("disable-globals",
cl::desc("Disable global initializer translation"));
TimeEachFunction("timing-funcs",
cl::desc("Print total translation time for each function"));
static cl::opt<std::string> TimingFocusOn(
"timing-focus",
cl::desc("Break down timing for a specific function (use '*' for all)"),
cl::init(""));
static cl::opt<std::string> VerboseFocusOn(
"verbose-focus",
cl::desc("Temporarily enable full verbosity for a specific function"),
cl::init(""));
static cl::opt<bool>
DisableGlobals("disable-globals",
cl::desc("Disable global initializer translation"));
// This is currently unused, and is a placeholder for lit tests.
static cl::opt<bool>
......@@ -169,13 +183,15 @@ int main(int argc, char **argv) {
Flags.UseIntegratedAssembler = UseIntegratedAssembler;
Flags.UseSandboxing = UseSandboxing;
Flags.DumpStats = DumpStats;
Flags.TimeEachFunction = TimeEachFunction;
Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;
Flags.TimingFocusOn = TimingFocusOn;
Flags.VerboseFocusOn = VerboseFocusOn;
Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
Flags);
static Ice::TimerIdT IDszmain = Ice::GlobalContext::getTimerID("szmain");
Ice::TimerMarker T(IDszmain, &Ctx);
Ice::TimerMarker T(Ice::TimerStack::TT_szmain, &Ctx);
int ErrorStatus = 0;
if (BuildOnRead) {
......@@ -185,8 +201,7 @@ int main(int argc, char **argv) {
} else {
// Parse the input LLVM IR file into a module.
SMDiagnostic Err;
static Ice::TimerIdT IDparse = Ice::GlobalContext::getTimerID("parse");
Ice::TimerMarker T1(IDparse, &Ctx);
Ice::TimerMarker T1(Ice::TimerStack::TT_parse, &Ctx);
Module *Mod =
NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext());
......@@ -199,6 +214,10 @@ int main(int argc, char **argv) {
Converter.convertToIce();
ErrorStatus = Converter.getErrorStatus();
}
if (TimeEachFunction) {
const bool DumpCumulative = false;
Ctx.dumpTimers(Ice::GlobalContext::TSK_Funcs, DumpCumulative);
}
if (SubzeroTimingEnabled)
Ctx.dumpTimers();
const bool FinalStats = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment