Commit b88d8c87 by Jim Stichnoth

Subzero: Improve the use of timers.

Several things are done here: 1. Move timer support to be guarded by the ALLOW_TIMERS define, or the BuildDefs::timers() constexpr method. 2. Add a NODUMP build configuration to control whether dump support is built in. So "make -f Makefile.standalone NODUMP=1 NOASSERT=1" is pretty close to a MINIMAL build with timer support. 3. Add some missing timers: alloca analysis, RMW analysis, helper call pre-lowering, load optimization analysis. These omitted pass timings were being rolled up into the "O2" bucket. 4. Add timers around push and pop operations on the translate queue and the emit queue. 5. Refactor the clumsy code to push/pop function timers (as opposed to pass timers), so that it fits into the nice RAII TimerMarker class like the pass timers. 6. It turns out that even with MINIMAL or NODUMP builds, we still construct a longish std::string every time Cfg::dump() is called, even though the string isn't used in MINIMAL/NODUMP mode. The dump() arg might as well be a const char * arg instead. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4360 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1784243006 .
parent 2f00bf6d
......@@ -84,12 +84,20 @@ LD_EXTRA =
ifdef MINIMAL
NOASSERT = 1
NODUMP = 1
OBJDIR := $(OBJDIR)+Min
BASE_CXX_DEFINES += -DALLOW_DUMP=0 -DALLOW_LLVM_CL=0 -DALLOW_LLVM_IR=0 \
-DALLOW_LLVM_IR_AS_INPUT=0 -DALLOW_MINIMAL_BUILD=1
BASE_CXX_DEFINES += -DALLOW_LLVM_CL=0 -DALLOW_LLVM_IR=0 \
-DALLOW_LLVM_IR_AS_INPUT=0 -DALLOW_TIMERS=0 -DALLOW_MINIMAL_BUILD=1
else
BASE_CXX_DEFINES += -DALLOW_DUMP=1 -DALLOW_LLVM_CL=1 -DALLOW_LLVM_IR=1 \
-DALLOW_LLVM_IR_AS_INPUT=1 -DALLOW_MINIMAL_BUILD=0
BASE_CXX_DEFINES += -DALLOW_LLVM_CL=1 -DALLOW_LLVM_IR=1 \
-DALLOW_LLVM_IR_AS_INPUT=1 -DALLOW_TIMERS=1 -DALLOW_MINIMAL_BUILD=0
endif
ifdef NODUMP
OBJDIR := $(OBJDIR)+NoDump
BASE_CXX_DEFINES += -DALLOW_DUMP=0
else
BASE_CXX_DEFINES += -DALLOW_DUMP=1
endif
CXX_DEFINES := $(BASE_CXX_DEFINES) -DPNACL_BROWSER_TRANSLATOR=0
......
......@@ -77,6 +77,8 @@ namespace BuildDefs {
// The ALLOW_* etc. symbols must be #defined to zero or non-zero.
/// Return true if ALLOW_DUMP is defined as a non-zero value
constexpr bool dump() { return ALLOW_DUMP; }
/// Return true if ALLOW_TIMERS is defined as a non-zero value
constexpr bool timers() { return ALLOW_TIMERS; }
/// Return true if ALLOW_LLVM_CL is defined as a non-zero value
// TODO(stichnot): this ALLOW_LLVM_CL is a TBD option which will
// allow for replacement of llvm:cl command line processor with a
......
......@@ -187,9 +187,6 @@ void Cfg::addCallToProfileSummary() {
void Cfg::translate() {
if (hasError())
return;
// FunctionTimer conditionally pushes/pops a TimerMarker if TimeEachFunction
// is enabled.
std::unique_ptr<TimerMarker> FunctionTimer;
if (BuildDefs::dump()) {
const IceString &TimingFocusOn =
getContext()->getFlags().getTimingFocusOn();
......@@ -199,15 +196,12 @@ void Cfg::translate() {
getContext()->resetTimer(GlobalContext::TSK_Default);
getContext()->setTimerName(GlobalContext::TSK_Default, Name);
}
if (getContext()->getFlags().getTimeEachFunction())
FunctionTimer.reset(new TimerMarker(
getContext()->getTimerID(GlobalContext::TSK_Funcs, Name),
getContext(), GlobalContext::TSK_Funcs));
if (isVerbose(IceV_Status)) {
getContext()->getStrDump() << ">>>Translating "
<< getFunctionNameAndSize() << "\n";
}
}
TimerMarker T_func(getContext(), getFunctionName());
TimerMarker T(TimerStack::TT_translate, this);
dump("Initial CFG");
......@@ -573,6 +567,7 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
}
void Cfg::processAllocas(bool SortAndCombine) {
TimerMarker _(TimerStack::TT_alloca, this);
const uint32_t StackAlignment = getTarget()->getStackAlignment();
CfgNode *EntryNode = getEntryNode();
// LLVM enforces power of 2 alignment.
......@@ -1083,14 +1078,14 @@ size_t Cfg::getTotalMemoryMB() {
}
// Dumps the IR with an optional introductory message.
void Cfg::dump(const IceString &Message) {
void Cfg::dump(const char *Message) {
if (!BuildDefs::dump())
return;
if (!isVerbose())
return;
OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrDump();
if (!Message.empty())
if (Message[0])
Str << "================ " << Message << " ================\n";
if (isVerbose(IceV_Mem)) {
Str << "Memory size = " << getTotalMemoryMB() << " MB\n";
......
......@@ -227,7 +227,7 @@ public:
void emitIAS();
static void emitTextHeader(const IceString &Name, GlobalContext *Ctx,
const Assembler *Asm);
void dump(const IceString &Message = "");
void dump(const char *Message = "");
/// Allocate data of type T using the per-Cfg allocator.
template <typename T> T *allocate() { return Allocator->Allocate<T>(); }
......
......@@ -139,8 +139,8 @@ void Compiler::run(const Ice::ClFlagsExtra &ExtraFlags, GlobalContext &Ctx,
Ctx.dumpTimers();
if (Ctx.getFlags().getTimeEachFunction()) {
constexpr bool DumpCumulative = false;
Ctx.dumpTimers(GlobalContext::TSK_Funcs, DumpCumulative);
constexpr bool NoDumpCumulative = false;
Ctx.dumpTimers(GlobalContext::TSK_Funcs, NoDumpCumulative);
}
constexpr bool FinalStats = true;
Ctx.dumpStats("_FINAL_", FinalStats);
......
......@@ -913,21 +913,12 @@ void Converter::convertGlobals(Module *Mod) {
}
void Converter::convertFunctions() {
const TimerStackIdT StackID = GlobalContext::TSK_Funcs;
for (const Function &I : *Mod) {
if (I.empty())
continue;
TimerIdT TimerID = 0;
const bool TimeThisFunction = Ctx->getFlags().getTimeEachFunction();
if (TimeThisFunction) {
TimerID = Ctx->getTimerID(StackID, I.getName());
Ctx->pushTimer(TimerID, StackID);
}
TimerMarker _(Ctx, I.getName());
LLVM2ICEFunctionConverter FunctionConverter(*this);
FunctionConverter.convertFunction(&I);
if (TimeThisFunction)
Ctx->popTimer(TimerID, StackID);
}
}
......
......@@ -219,14 +219,8 @@ Elf64_Off ELFObjectWriter::alignFileOffset(Elf64_Xword Align) {
void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
bool IsInternal, Assembler *Asm) {
assert(!SectionNumbersAssigned);
TimerMarker T_func(&Ctx, FuncName);
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
constexpr TimerStackIdT StackID = GlobalContext::TSK_Funcs;
TimerIdT TimerID = 0;
bool TimeThisFunction = Ctx.getFlags().getTimeEachFunction();
if (TimeThisFunction) {
TimerID = Ctx.getTimerID(StackID, FuncName);
Ctx.pushTimer(TimerID, StackID);
}
ELFTextSection *Section = nullptr;
ELFRelocationSection *RelSection = nullptr;
const bool FunctionSections = Ctx.getFlags().getFunctionSections();
......@@ -278,8 +272,6 @@ void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
RelSection->addRelocations(OffsetInSection, Asm->fixups(), SymTab);
}
Section->appendData(Str, Asm->getBufferView());
if (TimeThisFunction)
Ctx.popTimer(TimerID, StackID);
}
namespace {
......
......@@ -276,7 +276,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
AllThreadContexts.push_back(MyTLS);
ICE_TLS_SET_FIELD(TLS, MyTLS);
// Pre-register built-in stack names.
if (BuildDefs::dump()) {
if (BuildDefs::timers()) {
// TODO(stichnot): There needs to be a strong relationship between
// the newTimerStackID() return values and TSK_Default/TSK_Funcs.
newTimerStackID("Total across all functions");
......@@ -807,7 +807,7 @@ GlobalContext::addJumpTable(const IceString &FuncName, SizeT Id,
}
TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return 0;
auto Timers = getTimers();
TimerStackIdT NewID = Timers->size();
......@@ -853,23 +853,31 @@ void GlobalContext::setTimerName(TimerStackIdT StackID,
// such as the use of atomics to modify queue elements.
void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
assert(Func);
OptQ.blockingPush(Func.release());
{
TimerMarker _(TimerStack::TT_qTransPush, this);
OptQ.blockingPush(Func.release());
}
if (getFlags().isSequential())
translateFunctions();
}
std::unique_ptr<Cfg> GlobalContext::optQueueBlockingPop() {
TimerMarker _(TimerStack::TT_qTransPop, this);
return std::unique_ptr<Cfg>(OptQ.blockingPop());
}
void GlobalContext::emitQueueBlockingPush(EmitterWorkItem *Item) {
assert(Item);
EmitQ.blockingPush(Item);
{
TimerMarker _(TimerStack::TT_qEmitPush, this);
EmitQ.blockingPush(Item);
}
if (getFlags().isSequential())
emitItems();
}
EmitterWorkItem *GlobalContext::emitQueueBlockingPop() {
TimerMarker _(TimerStack::TT_qEmitPop, this);
return EmitQ.blockingPop();
}
......@@ -884,7 +892,7 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
}
void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
auto Timers = getTimers();
assert(Timers->size() > StackID);
......@@ -895,6 +903,15 @@ void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
ClFlags GlobalContext::Flags;
ClFlagsExtra GlobalContext::ExtraFlags;
TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx,
const IceString &FuncName) {
if (!BuildDefs::timers())
return 0;
if (!Ctx->getFlags().getTimeEachFunction())
return 0;
return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName);
}
void TimerMarker::push() {
switch (StackID) {
case GlobalContext::TSK_Default:
......
......@@ -118,7 +118,7 @@ class GlobalContext {
/// timers, in the same order, with the same names, but initially
/// empty of timing data.
void initInto(TimerList &Dest) const {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
Dest.clear();
for (const TimerStack &Stack : *this) {
......@@ -126,7 +126,7 @@ class GlobalContext {
}
}
void mergeFrom(TimerList &Src) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
assert(size() == Src.size());
size_type i = 0;
......@@ -381,7 +381,7 @@ public:
}
EmitterThreads.clear();
if (BuildDefs::dump()) {
if (BuildDefs::timers()) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
......@@ -560,25 +560,33 @@ public:
TimerMarker(TimerIdT ID, GlobalContext *Ctx,
TimerStackIdT StackID = GlobalContext::TSK_Default)
: ID(ID), Ctx(Ctx), StackID(StackID) {
if (BuildDefs::dump())
if (BuildDefs::timers())
push();
}
TimerMarker(TimerIdT ID, const Cfg *Func,
TimerStackIdT StackID = GlobalContext::TSK_Default)
: ID(ID), Ctx(nullptr), StackID(StackID) {
// Ctx gets set at the beginning of pushCfg().
if (BuildDefs::dump())
if (BuildDefs::timers())
pushCfg(Func);
}
TimerMarker(GlobalContext *Ctx, const IceString &FuncName)
: ID(getTimerIdFromFuncName(Ctx, FuncName)), Ctx(Ctx),
StackID(GlobalContext::TSK_Funcs) {
if (BuildDefs::timers())
push();
}
~TimerMarker() {
if (BuildDefs::dump() && Active)
if (BuildDefs::timers() && Active)
Ctx->popTimer(ID, StackID);
}
private:
void push();
void pushCfg(const Cfg *Func);
static TimerIdT getTimerIdFromFuncName(GlobalContext *Ctx,
const IceString &FuncName);
const TimerIdT ID;
GlobalContext *Ctx;
const TimerStackIdT StackID;
......
......@@ -313,6 +313,7 @@ TargetLowering::AutoBundle::~AutoBundle() {
}
void TargetLowering::genTargetHelperCalls() {
TimerMarker T(TimerStack::TT_genHelpers, Func);
Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
......
......@@ -615,6 +615,7 @@ bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
}
template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
TimerMarker _(TimerStack::TT_findRMW, Func);
Func->dump("Before RMW");
if (Func->isVerbose(IceV_RMW))
Func->getContext()->lockStr();
......@@ -732,6 +733,7 @@ inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
}
template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
TimerMarker _(TimerStack::TT_loadOpt, Func);
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
while (!Context.atEnd()) {
......
......@@ -32,7 +32,7 @@ namespace Ice {
TimerStack::TimerStack(const IceString &Name)
: Name(Name), FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
Nodes.resize(1); // Reserve Nodes[0] for the root node (sentinel).
IDs.resize(TT__num);
......@@ -49,7 +49,7 @@ TimerStack::TimerStack(const IceString &Name)
// Returns the unique timer ID for the given Name, creating a new ID if needed.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return 0;
if (IDsIndex.find(Name) == IDsIndex.end()) {
IDsIndex[Name] = IDs.size();
......@@ -76,7 +76,7 @@ TimerStack::translateIDsFrom(const TimerStack &Src) {
// Merges two timer stacks, by combining and summing corresponding entries.
// This timer stack is updated from Src.
void TimerStack::mergeFrom(const TimerStack &Src) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
TranslationType Mapping = translateIDsFrom(Src);
TTindex SrcIndex = 0;
......@@ -146,7 +146,7 @@ TimerStack::TTindex TimerStack::findPath(const PathType &Path) {
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
constexpr bool UpdateCounts = false;
update(UpdateCounts);
......@@ -157,7 +157,7 @@ void TimerStack::push(TimerIdT ID) {
// Pops the top marker from the timer stack. Validates via assert() that the
// expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
constexpr bool UpdateCounts = true;
update(UpdateCounts);
......@@ -174,7 +174,7 @@ void TimerStack::pop(TimerIdT ID) {
// At a state change (e.g. push or pop), updates the flat and cumulative
// timings for everything on the timer stack.
void TimerStack::update(bool UpdateCounts) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
++StateChangeCount;
// Whenever the stack is about to change, we grab the time delta since the
......@@ -210,7 +210,7 @@ void TimerStack::update(bool UpdateCounts) {
}
void TimerStack::reset() {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
StateChangeCount = 0;
FirstTimestamp = LastTimestamp = timestamp();
......@@ -228,7 +228,7 @@ using DumpMapType = std::multimap<double, IceString>;
// Dump the Map items in reverse order of their time contribution.
void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
for (auto &I : reverse_range(Map)) {
char buf[80];
......@@ -244,7 +244,7 @@ void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
// MaxVal=5 ==> "[%1lu] "
// MaxVal=9876 ==> "[%4lu] "
void makePrintfFormatString(char *Buf, size_t BufLen, size_t MaxVal) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
int NumDigits = 0;
do {
......@@ -257,7 +257,7 @@ void makePrintfFormatString(char *Buf, size_t BufLen, size_t MaxVal) {
} // end of anonymous namespace
void TimerStack::dump(Ostream &Str, bool DumpCumulative) {
if (!BuildDefs::dump())
if (!BuildDefs::timers())
return;
constexpr bool UpdateCounts = true;
update(UpdateCounts);
......
......@@ -20,6 +20,7 @@
X(O2) \
X(Om1) \
X(advancedPhiLowering) \
X(alloca) \
X(computeLoopNestDepth) \
X(convertToIce) \
X(deletePhis) \
......@@ -29,14 +30,17 @@
X(doNopInsertion) \
X(emitAsm) \
X(emitGlobalInitializers) \
X(findRMW) \
X(genCode) \
X(genFrame) \
X(genHelpers) \
X(initUnhandled) \
X(linearScan) \
X(liveRange) \
X(liveness) \
X(livenessLightweight) \
X(llvmConvert) \
X(loadOpt) \
X(lowerPhiAssignments) \
X(parse) \
X(parseConstants) \
......@@ -49,6 +53,10 @@
X(phiValidation) \
X(placePhiLoads) \
X(placePhiStores) \
X(qEmitPop) \
X(qEmitPush) \
X(qTransPop) \
X(qTransPush) \
X(regAlloc) \
X(renumberInstructions) \
X(szmain) \
......
......@@ -1339,21 +1339,14 @@ public:
NextLocalInstIndex(Context->getNumGlobalIDs()) {}
bool convertFunction() {
const Ice::TimerStackIdT StackID = Ice::GlobalContext::TSK_Funcs;
Ice::TimerIdT TimerID = 0;
const bool TimeThisFunction = getFlags().getTimeEachFunction();
if (TimeThisFunction) {
TimerID = getTranslator().getContext()->getTimerID(StackID,
FuncDecl->getName());
getTranslator().getContext()->pushTimer(TimerID, StackID);
}
// Note: The Cfg is created, even when IR generation is disabled. This is
// done to install a CfgLocalAllocator for various internal containers.
Func = Ice::Cfg::create(getTranslator().getContext(),
getTranslator().getNextSequenceNumber());
bool ParserResult;
{
Ice::TimerMarker T(getTranslator().getContext(), FuncDecl->getName());
// Note: The Cfg is created, even when IR generation is disabled. This is
// done to install a CfgLocalAllocator for various internal containers.
Func = Ice::Cfg::create(getTranslator().getContext(),
getTranslator().getNextSequenceNumber());
Ice::CfgLocalAllocatorScope _(Func.get());
// TODO(kschimpf) Clean up API to add a function signature to a CFG.
......@@ -1370,12 +1363,6 @@ public:
ParserResult = ParseThisBlock();
// Temporarily end per-function timing, which will be resumed by the
// translator function. This is because translation may be done
// asynchronously in a separate thread.
if (TimeThisFunction)
getTranslator().getContext()->popTimer(TimerID, StackID);
// Note: Once any errors have been found, we turn off all translation of
// all remaining functions. This allows successive parsing errors to be
// reported, without adding extra checks to the translator for such
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment