Commit a1dd3cc8 by Jim Stichnoth

Subzero: Fix stats collection and output for multithreading.

Updates of current-function and cumulative stats are done entirely in TLS. At the end, cumulative stats are merged across all threads' TLS into the global cumulative stats. Printing of cumulative stats after every function is removed, since there's very little value from that. It was probably done in the first place just to give partial cumulative information in the face of crashes or assertion failures. BUG= none R=jfb@chromium.org Review URL: https://codereview.chromium.org/887213002
parent ae6e12ca
......@@ -111,12 +111,12 @@ public:
void GlobalContext::CodeStats::dump(const IceString &Name, Ostream &Str) {
if (!ALLOW_DUMP)
return;
Str << "|" << Name << "|Inst Count |" << InstructionsEmitted << "\n";
Str << "|" << Name << "|Regs Saved |" << RegistersSaved << "\n";
Str << "|" << Name << "|Frame Bytes |" << FrameBytes << "\n";
Str << "|" << Name << "|Spills |" << Spills << "\n";
Str << "|" << Name << "|Fills |" << Fills << "\n";
Str << "|" << Name << "|Spills+Fills|" << Spills + Fills << "\n";
#define X(str, tag) \
Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n";
CODESTATS_TABLE
#undef X
Str << "|" << Name << "|Spills+Fills|"
<< Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n";
Str << "|" << Name << "|Memory Usage|";
if (ssize_t MemUsed = llvm::TimeRecord::getCurrentTime(false).getMemUsed())
Str << MemUsed;
......@@ -543,7 +543,6 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
getStatsCumulative()->dump(Name, getStrDump());
} else {
ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Name, getStrDump());
getStatsCumulative()->dump("_TOTAL_", getStrDump());
}
}
......
......@@ -15,6 +15,7 @@
#ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
#define SUBZERO_SRC_ICEGLOBALCONTEXT_H
#include <array>
#include <mutex>
#include <thread>
......@@ -62,25 +63,36 @@ class GlobalContext {
class CodeStats {
CodeStats(const CodeStats &) = delete;
CodeStats &operator=(const CodeStats &) = default;
#define CODESTATS_TABLE \
/* dump string, enum value */ \
X("Inst Count ", InstCount) \
X("Regs Saved ", RegsSaved) \
X("Frame Bytes ", FrameByte) \
X("Spills ", NumSpills) \
X("Fills ", NumFills)
//#define X(str, tag)
public:
CodeStats()
: InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
Fills(0) {}
void reset() { *this = CodeStats(); }
void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
void updateSpills() { ++Spills; }
void updateFills() { ++Fills; }
enum CSTag {
#define X(str, tag) CS_##tag,
CODESTATS_TABLE
#undef X
CS_NUM
};
CodeStats() { reset(); }
void reset() { Stats.fill(0); }
void update(CSTag Tag, uint32_t Count = 1) {
assert(Tag < Stats.size());
Stats[Tag] += Count;
}
void add(const CodeStats &Other) {
for (uint32_t i = 0; i < Stats.size(); ++i)
Stats[i] += Other.Stats[i];
}
void dump(const IceString &Name, Ostream &Str);
private:
uint32_t InstructionsEmitted;
uint32_t RegistersSaved;
uint32_t FrameBytes;
uint32_t Spills;
uint32_t Fills;
std::array<uint32_t, CS_NUM> Stats;
};
// TimerList is a vector of TimerStack objects, with extra methods
......@@ -121,6 +133,7 @@ class GlobalContext {
public:
ThreadContext() {}
CodeStats StatsFunction;
CodeStats StatsCumulative;
TimerList Timers;
};
......@@ -209,32 +222,37 @@ public:
void statsUpdateEmitted(uint32_t InstCount) {
if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateEmitted(InstCount);
getStatsCumulative()->updateEmitted(InstCount);
ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
TLS->StatsFunction.update(CodeStats::CS_InstCount, InstCount);
TLS->StatsCumulative.update(CodeStats::CS_InstCount, InstCount);
}
void statsUpdateRegistersSaved(uint32_t Num) {
if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateRegistersSaved(Num);
getStatsCumulative()->updateRegistersSaved(Num);
ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
TLS->StatsFunction.update(CodeStats::CS_RegsSaved, Num);
TLS->StatsCumulative.update(CodeStats::CS_RegsSaved, Num);
}
void statsUpdateFrameBytes(uint32_t Bytes) {
if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFrameBytes(Bytes);
getStatsCumulative()->updateFrameBytes(Bytes);
ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
TLS->StatsFunction.update(CodeStats::CS_FrameByte, Bytes);
TLS->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes);
}
void statsUpdateSpills() {
if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateSpills();
getStatsCumulative()->updateSpills();
ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
TLS->StatsFunction.update(CodeStats::CS_NumSpills);
TLS->StatsCumulative.update(CodeStats::CS_NumSpills);
}
void statsUpdateFills() {
if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills();
getStatsCumulative()->updateFills();
ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
TLS->StatsFunction.update(CodeStats::CS_NumFills);
TLS->StatsCumulative.update(CodeStats::CS_NumFills);
}
// These are predefined TimerStackIdT values.
......@@ -296,6 +314,13 @@ public:
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
}
if (ALLOW_DUMP) {
// Do a separate loop over AllThreadContexts to avoid holding
// two locks at once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
}
}
// Translation thread startup routine.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment