Commit 380d7b96 by Jim Stichnoth

Subzero: Fix timers for multithreaded translation.

Now that multithreaded parsing and translation is in place, timer operations have to be made thread-local. After the non-main threads end, their thread-local timer data needs to be merged into the global timer data, which resides in the GlobalContext object. The merge is a bit tricky because the internal timer stack structure is built up dynamically as items are pushed and popped. Two threads may have radically different timing data: 1. The parser thread profile is completely different from a translator thread. 2. For -timing-funcs, two translator threads hold data for entirely different sets of functions. A bit more tweaking will need to be done to make the timing output fully usable in a multithreaded run. Because of multiple threads, times may add up to >100%. Also, time spent blocked is being "unfairly" attributed to the caller of the blocking operation - we should either count the user time instead of wall-clock time, or add a special timer marker for blocking locking operations. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/878383004
parent 51d00936
......@@ -81,13 +81,21 @@ bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
void Cfg::translate() {
if (hasError())
return;
// FunctionTimer conditionally pushes/pops a TimerMarker if
// TimeEachFunction is enabled.
std::unique_ptr<TimerMarker> FunctionTimer;
if (ALLOW_DUMP) {
const IceString &TimingFocusOn = getContext()->getFlags().TimingFocusOn;
if (TimingFocusOn == "*" || TimingFocusOn == getFunctionName()) {
const IceString &Name = getFunctionName();
if (TimingFocusOn == "*" || TimingFocusOn == Name) {
setFocusedTiming();
getContext()->resetTimer(GlobalContext::TSK_Default);
getContext()->setTimerName(GlobalContext::TSK_Default, getFunctionName());
getContext()->setTimerName(GlobalContext::TSK_Default, Name);
}
if (getContext()->getFlags().TimeEachFunction)
FunctionTimer.reset(new TimerMarker(
getContext()->getTimerID(GlobalContext::TSK_Funcs, Name),
getContext(), GlobalContext::TSK_Funcs));
}
TimerMarker T(TimerStack::TT_translate, this);
......
......@@ -143,8 +143,9 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit,
// Create a new ThreadContext for the current thread. No need to
// lock AllThreadContexts at this point since no other threads have
// access yet to this GlobalContext object.
AllThreadContexts.push_back(new ThreadContext());
ICE_TLS_SET_FIELD(TLS, AllThreadContexts.back());
ThreadContext *MyTLS = new ThreadContext();
AllThreadContexts.push_back(MyTLS);
ICE_TLS_SET_FIELD(TLS, MyTLS);
// Pre-register built-in stack names.
if (ALLOW_DUMP) {
// TODO(stichnot): There needs to be a strong relationship between
......@@ -152,6 +153,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit,
newTimerStackID("Total across all functions");
newTimerStackID("Per-function summary");
}
Timers.initInto(MyTLS->Timers);
if (Flags.UseELFWriter) {
ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
}
......@@ -503,38 +505,32 @@ TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
const IceString &Name) {
auto Timers = getTimers();
auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
return Timers->at(StackID).getTimerID(Name);
}
void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
// TODO(stichnot): Timers are completely broken for multithreading; fix.
if (getFlags().NumTranslationThreads)
llvm::report_fatal_error("Timers and multithreading are currently broken");
auto Timers = getTimers();
auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).push(ID);
}
void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
// TODO(stichnot): Timers are completely broken for multithreading; fix.
if (getFlags().NumTranslationThreads)
llvm::report_fatal_error("Timers and multithreading are currently broken");
auto Timers = getTimers();
auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).pop(ID);
}
void GlobalContext::resetTimer(TimerStackIdT StackID) {
auto Timers = getTimers();
auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).reset();
}
void GlobalContext::setTimerName(TimerStackIdT StackID,
const IceString &NewName) {
auto Timers = getTimers();
auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).setName(NewName);
}
......@@ -560,13 +556,26 @@ void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
Timers->at(StackID).dump(getStrDump(), DumpCumulative);
}
TimerMarker::TimerMarker(TimerIdT ID, const Cfg *Func)
: ID(ID), Ctx(Func->getContext()), Active(false) {
if (ALLOW_DUMP) {
Active = Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled;
if (Active)
Ctx->pushTimer(ID);
void TimerMarker::push() {
switch (StackID) {
case GlobalContext::TSK_Default:
Active = Ctx->getFlags().SubzeroTimingEnabled;
break;
case GlobalContext::TSK_Funcs:
Active = Ctx->getFlags().TimeEachFunction;
break;
default:
break;
}
if (Active)
Ctx->pushTimer(ID, StackID);
}
void TimerMarker::pushCfg(const Cfg *Func) {
Ctx = Func->getContext();
Active = Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled;
if (Active)
Ctx->pushTimer(ID, StackID);
}
ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
......
......@@ -83,6 +83,35 @@ class GlobalContext {
uint32_t Fills;
};
// TimerList is a vector of TimerStack objects, with extra methods
// to initialize and merge these vectors.
class TimerList : public std::vector<TimerStack> {
public:
// initInto() initializes a target list of timers based on the
// current list. In particular, it creates the same number of
// timers, in the same order, with the same names, but initially
// empty of timing data.
void initInto(TimerList &Dest) const {
if (!ALLOW_DUMP)
return;
Dest.clear();
for (const TimerStack &Stack : *this) {
Dest.push_back(TimerStack(Stack.getName()));
}
}
void mergeFrom(TimerList &Src) {
if (!ALLOW_DUMP)
return;
assert(size() == Src.size());
size_type i = 0;
for (TimerStack &Stack : *this) {
assert(Stack.getName() == Src[i].getName());
Stack.mergeFrom(Src[i]);
++i;
}
}
};
// ThreadContext contains thread-local data. This data can be
// combined/reduced as needed after all threads complete.
class ThreadContext {
......@@ -92,7 +121,7 @@ class GlobalContext {
public:
ThreadContext() {}
CodeStats StatsFunction;
std::vector<TimerStack> Timers;
TimerList Timers;
};
public:
......@@ -211,14 +240,20 @@ public:
// These are predefined TimerStackIdT values.
enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num };
// newTimerStackID() creates a new TimerStack in the global space.
// It does not affect any TimerStack objects in TLS.
TimerStackIdT newTimerStackID(const IceString &Name);
// dumpTimers() dumps the global timer data. As such, one probably
// wants to call mergeTimerStacks() as a prerequisite.
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
// The following methods affect only the calling thread's TLS timer
// data.
TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void pushTimer(TimerIdT ID, TimerStackIdT StackID);
void popTimer(TimerIdT ID, TimerStackIdT StackID);
void resetTimer(TimerStackIdT StackID);
void setTimerName(TimerStackIdT StackID, const IceString &NewName);
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
// Adds a newly parsed and constructed function to the Cfg work
// queue. Notifies any idle workers that a new function is
......@@ -235,8 +270,10 @@ public:
void startWorkerThreads() {
size_t NumWorkers = getFlags().NumTranslationThreads;
auto Timers = getTimers();
for (size_t i = 0; i < NumWorkers; ++i) {
ThreadContext *WorkerTLS = new ThreadContext();
Timers->initInto(WorkerTLS->Timers);
AllThreadContexts.push_back(WorkerTLS);
TranslationThreads.push_back(std::thread(
&GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
......@@ -254,6 +291,11 @@ public:
}
TranslationThreads.clear();
// TODO(stichnot): join the emitter thread.
if (ALLOW_DUMP) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
}
}
// Translation thread startup routine.
......@@ -301,7 +343,7 @@ private:
ICE_CACHELINE_BOUNDARY;
// Managed by getTimers()
GlobalLockType TimerLock;
std::vector<TimerStack> Timers;
TimerList Timers;
ICE_CACHELINE_BOUNDARY;
// StrLock is a global lock on the dump and emit output streams.
......@@ -331,8 +373,8 @@ private:
LockedPtr<CodeStats> getStatsCumulative() {
return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
}
LockedPtr<std::vector<TimerStack>> getTimers() {
return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);
LockedPtr<TimerList> getTimers() {
return LockedPtr<TimerList>(&Timers, &TimerLock);
}
std::vector<ThreadContext *> AllThreadContexts;
......@@ -357,24 +399,31 @@ class TimerMarker {
TimerMarker &operator=(const TimerMarker &) = delete;
public:
TimerMarker(TimerIdT ID, GlobalContext *Ctx)
: ID(ID), Ctx(Ctx), Active(false) {
if (ALLOW_DUMP) {
Active = Ctx->getFlags().SubzeroTimingEnabled;
if (Active)
Ctx->pushTimer(ID);
}
TimerMarker(TimerIdT ID, GlobalContext *Ctx,
TimerStackIdT StackID = GlobalContext::TSK_Default)
: ID(ID), Ctx(Ctx), StackID(StackID), Active(false) {
if (ALLOW_DUMP)
push();
}
TimerMarker(TimerIdT ID, const Cfg *Func,
TimerStackIdT StackID = GlobalContext::TSK_Default)
: ID(ID), Ctx(nullptr), StackID(StackID), Active(false) {
// Ctx gets set at the beginning of pushCfg().
if (ALLOW_DUMP)
pushCfg(Func);
}
TimerMarker(TimerIdT ID, const Cfg *Func);
~TimerMarker() {
if (ALLOW_DUMP && Active)
Ctx->popTimer(ID);
Ctx->popTimer(ID, StackID);
}
private:
TimerIdT ID;
GlobalContext *const Ctx;
void push();
void pushCfg(const Cfg *Func);
const TimerIdT ID;
GlobalContext *Ctx;
const TimerStackIdT StackID;
bool Active;
};
......
......@@ -24,8 +24,10 @@ TimerStack::TimerStack(const IceString &Name)
StateChangeCount(0), StackTop(0) {
if (!ALLOW_DUMP)
return;
Nodes.resize(1); // Reserve Nodes[0] for the root node.
Nodes.resize(1); // Reserve Nodes[0] for the root node (sentinel).
IDs.resize(TT__num);
LeafTimes.resize(TT__num);
LeafCounts.resize(TT__num);
#define STR(s) #s
#define X(tag) \
IDs[TT_##tag] = STR(tag); \
......@@ -43,29 +45,108 @@ TimerIdT TimerStack::getTimerID(const IceString &Name) {
if (IDsIndex.find(Name) == IDsIndex.end()) {
IDsIndex[Name] = IDs.size();
IDs.push_back(Name);
LeafTimes.push_back(decltype(LeafTimes)::value_type());
LeafCounts.push_back(decltype(LeafCounts)::value_type());
}
return IDsIndex[Name];
}
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
// Creates a mapping from TimerIdT (leaf) values in the Src timer
// stack into TimerIdT values in this timer stack. Creates new
// entries in this timer stack as needed.
TimerStack::TranslationType
TimerStack::translateIDsFrom(const TimerStack &Src) {
size_t Size = Src.IDs.size();
TranslationType Mapping(Size);
for (TimerIdT i = 0; i < Size; ++i) {
Mapping[i] = getTimerID(Src.IDs[i]);
}
return Mapping;
}
// Merges two timer stacks, by combining and summing corresponding
// entries. This timer stack is updated from Src.
void TimerStack::mergeFrom(const TimerStack &Src) {
if (!ALLOW_DUMP)
return;
const bool UpdateCounts = false;
update(UpdateCounts);
if (Nodes[StackTop].Children.size() <= ID)
Nodes[StackTop].Children.resize(ID + 1);
if (Nodes[StackTop].Children[ID] == 0) {
TranslationType Mapping = translateIDsFrom(Src);
TTindex SrcIndex = 0;
for (const TimerTreeNode &SrcNode : Src.Nodes) {
// The first node is reserved as a sentinel, so avoid it.
if (SrcIndex > 0) {
// Find the full path to the Src node, translated to path
// components corresponding to this timer stack.
PathType MyPath = Src.getPath(SrcIndex, Mapping);
// Find a node in this timer stack corresponding to the given
// path, creating new interior nodes as necessary.
TTindex MyIndex = findPath(MyPath);
Nodes[MyIndex].Time += SrcNode.Time;
Nodes[MyIndex].UpdateCount += SrcNode.UpdateCount;
}
++SrcIndex;
}
for (TimerIdT i = 0; i < Src.LeafTimes.size(); ++i) {
LeafTimes[Mapping[i]] += Src.LeafTimes[i];
LeafCounts[Mapping[i]] += Src.LeafCounts[i];
}
StateChangeCount += Src.StateChangeCount;
}
// Constructs a path consisting of the sequence of leaf values leading
// to a given node, with the Mapping translation applied to the leaf
// values. The path ends up being in "reverse" order, i.e. from leaf
// to root.
TimerStack::PathType TimerStack::getPath(TTindex Index,
const TranslationType &Mapping) const {
PathType Path;
while (Index) {
Path.push_back(Mapping[Nodes[Index].Interior]);
assert(Nodes[Index].Parent < Index);
Index = Nodes[Index].Parent;
}
return Path;
}
// Given a parent node and a leaf ID, returns the index of the
// parent's child ID, creating a new node for the child as necessary.
TimerStack::TTindex TimerStack::getChildIndex(TimerStack::TTindex Parent,
TimerIdT ID) {
if (Nodes[Parent].Children.size() <= ID)
Nodes[Parent].Children.resize(ID + 1);
if (Nodes[Parent].Children[ID] == 0) {
TTindex Size = Nodes.size();
Nodes[StackTop].Children[ID] = Size;
Nodes[Parent].Children[ID] = Size;
Nodes.resize(Size + 1);
Nodes[Size].Parent = StackTop;
Nodes[Size].Parent = Parent;
Nodes[Size].Interior = ID;
}
StackTop = Nodes[StackTop].Children[ID];
return Nodes[Parent].Children[ID];
}
// Finds a node in the timer stack corresponding to the given path,
// creating new interior nodes as necessary.
TimerStack::TTindex TimerStack::findPath(const PathType &Path) {
TTindex CurIndex = 0;
// The path is in reverse order (leaf to root), so it needs to be
// followed in reverse.
for (TTindex Index : reverse_range(Path)) {
CurIndex = getChildIndex(CurIndex, Index);
}
assert(CurIndex); // shouldn't be the sentinel node
return CurIndex;
}
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
if (!ALLOW_DUMP)
return;
const bool UpdateCounts = false;
update(UpdateCounts);
StackTop = getChildIndex(StackTop, ID);
assert(StackTop);
}
// Pop the top marker from the timer stack. Validates via assert()
// Pops the top marker from the timer stack. Validates via assert()
// that the expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
if (!ALLOW_DUMP)
......
......@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETIMERTREE_DEF
#define SUBZERO_SRC_ICETIMERTREE_DEF
#define TIMERTREE_TABLE \
/* enum value */ \
......@@ -53,5 +54,4 @@
X(vmetadata)
//#define X(tag)
#define SUBZERO_SRC_ICETIMERTREE_DEF
#endif // SUBZERO_SRC_ICETIMERTREE_DEF
......@@ -22,8 +22,16 @@ namespace Ice {
class TimerStack {
TimerStack &operator=(const TimerStack &) = delete;
// Timer tree index type
// Timer tree index type. A variable of this type is used to access
// an interior, not-necessarily-leaf node of the tree.
typedef std::vector<class TimerTreeNode>::size_type TTindex;
// Representation of a path of leaf values leading to a particular
// node. The representation happens to be in "reverse" order,
// i.e. from leaf/interior to root, for implementation efficiency.
typedef llvm::SmallVector<TTindex, 8> PathType;
// Representation of a mapping of leaf node indexes from one timer
// stack to another.
typedef std::vector<TimerIdT> TranslationType;
// TimerTreeNode represents an interior or leaf node in the call tree.
// It contains a list of children, a pointer to its parent, and the
......@@ -54,7 +62,9 @@ public:
TimerStack(const IceString &Name);
TimerStack(const TimerStack &) = default;
TimerIdT getTimerID(const IceString &Name);
void mergeFrom(const TimerStack &Src);
void setName(const IceString &NewName) { Name = NewName; }
const IceString &getName() const { return Name; }
void push(TimerIdT ID);
void pop(TimerIdT ID);
void reset();
......@@ -63,6 +73,10 @@ public:
private:
void update(bool UpdateCounts);
static double timestamp();
TranslationType translateIDsFrom(const TimerStack &Src);
PathType getPath(TTindex Index, const TranslationType &Mapping) const;
TTindex getChildIndex(TTindex Parent, TimerIdT ID);
TTindex findPath(const PathType &Path);
IceString Name;
double FirstTimestamp;
double LastTimestamp;
......
......@@ -1157,7 +1157,7 @@ private:
if (ALLOW_DUMP && getFlags().TimeEachFunction) {
getTranslator().getContext()->popTimer(
getTranslator().getContext()->getTimerID(
Ice::GlobalContext::TSK_Funcs, Func->getFunctionName()),
Ice::GlobalContext::TSK_Funcs, FuncDecl->getName()),
Ice::GlobalContext::TSK_Funcs);
}
}
......@@ -1827,6 +1827,7 @@ private:
void FunctionParser::ExitBlock() {
if (isIRGenerationDisabled()) {
popTimerIfTimingEachFunction();
delete Func;
return;
}
// Before translating, check for blocks without instructions, and
......@@ -1844,13 +1845,16 @@ void FunctionParser::ExitBlock() {
++Index;
}
Func->computePredecessors();
// Temporarily end per-function timing, which will be resumed by the
// translator function. This is because translation may be done
// asynchronously in a separate thread.
popTimerIfTimingEachFunction();
// Note: Once any errors have been found, we turn off all
// translation of all remaining functions. This allows use to see
// multiple errors, without adding extra checks to the translator
// for such parsing errors.
if (Context->getNumErrors() == 0)
getTranslator().translateFcn(Func);
popTimerIfTimingEachFunction();
}
void FunctionParser::ReportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment