Commit a1dd3cc8 by Jim Stichnoth

Subzero: Fix stats collection and output for multithreading.

Updates of current-function and cumulative stats are done entirely in TLS. At the end, cumulative stats are merged across all threads' TLS into the global cumulative stats. Printing of cumulative stats after every function is removed, since there's very little value from that. It was probably done in the first place just to give partial cumulative information in the face of crashes or assertion failures. BUG= none R=jfb@chromium.org Review URL: https://codereview.chromium.org/887213002
parent ae6e12ca
...@@ -111,12 +111,12 @@ public: ...@@ -111,12 +111,12 @@ public:
void GlobalContext::CodeStats::dump(const IceString &Name, Ostream &Str) { void GlobalContext::CodeStats::dump(const IceString &Name, Ostream &Str) {
if (!ALLOW_DUMP) if (!ALLOW_DUMP)
return; return;
Str << "|" << Name << "|Inst Count |" << InstructionsEmitted << "\n"; #define X(str, tag) \
Str << "|" << Name << "|Regs Saved |" << RegistersSaved << "\n"; Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n";
Str << "|" << Name << "|Frame Bytes |" << FrameBytes << "\n"; CODESTATS_TABLE
Str << "|" << Name << "|Spills |" << Spills << "\n"; #undef X
Str << "|" << Name << "|Fills |" << Fills << "\n"; Str << "|" << Name << "|Spills+Fills|"
Str << "|" << Name << "|Spills+Fills|" << Spills + Fills << "\n"; << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n";
Str << "|" << Name << "|Memory Usage|"; Str << "|" << Name << "|Memory Usage|";
if (ssize_t MemUsed = llvm::TimeRecord::getCurrentTime(false).getMemUsed()) if (ssize_t MemUsed = llvm::TimeRecord::getCurrentTime(false).getMemUsed())
Str << MemUsed; Str << MemUsed;
...@@ -543,7 +543,6 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) { ...@@ -543,7 +543,6 @@ void GlobalContext::dumpStats(const IceString &Name, bool Final) {
getStatsCumulative()->dump(Name, getStrDump()); getStatsCumulative()->dump(Name, getStrDump());
} else { } else {
ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Name, getStrDump()); ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Name, getStrDump());
getStatsCumulative()->dump("_TOTAL_", getStrDump());
} }
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
#define SUBZERO_SRC_ICEGLOBALCONTEXT_H #define SUBZERO_SRC_ICEGLOBALCONTEXT_H
#include <array>
#include <mutex> #include <mutex>
#include <thread> #include <thread>
...@@ -62,25 +63,36 @@ class GlobalContext { ...@@ -62,25 +63,36 @@ class GlobalContext {
class CodeStats { class CodeStats {
CodeStats(const CodeStats &) = delete; CodeStats(const CodeStats &) = delete;
CodeStats &operator=(const CodeStats &) = default; CodeStats &operator=(const CodeStats &) = default;
#define CODESTATS_TABLE \
/* dump string, enum value */ \
X("Inst Count ", InstCount) \
X("Regs Saved ", RegsSaved) \
X("Frame Bytes ", FrameByte) \
X("Spills ", NumSpills) \
X("Fills ", NumFills)
//#define X(str, tag)
public: public:
CodeStats() enum CSTag {
: InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0), #define X(str, tag) CS_##tag,
Fills(0) {} CODESTATS_TABLE
void reset() { *this = CodeStats(); } #undef X
void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; } CS_NUM
void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; } };
void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; } CodeStats() { reset(); }
void updateSpills() { ++Spills; } void reset() { Stats.fill(0); }
void updateFills() { ++Fills; } void update(CSTag Tag, uint32_t Count = 1) {
assert(Tag < Stats.size());
Stats[Tag] += Count;
}
void add(const CodeStats &Other) {
for (uint32_t i = 0; i < Stats.size(); ++i)
Stats[i] += Other.Stats[i];
}
void dump(const IceString &Name, Ostream &Str); void dump(const IceString &Name, Ostream &Str);
private: private:
uint32_t InstructionsEmitted; std::array<uint32_t, CS_NUM> Stats;
uint32_t RegistersSaved;
uint32_t FrameBytes;
uint32_t Spills;
uint32_t Fills;
}; };
// TimerList is a vector of TimerStack objects, with extra methods // TimerList is a vector of TimerStack objects, with extra methods
...@@ -121,6 +133,7 @@ class GlobalContext { ...@@ -121,6 +133,7 @@ class GlobalContext {
public: public:
ThreadContext() {} ThreadContext() {}
CodeStats StatsFunction; CodeStats StatsFunction;
CodeStats StatsCumulative;
TimerList Timers; TimerList Timers;
}; };
...@@ -209,32 +222,37 @@ public: ...@@ -209,32 +222,37 @@ public:
void statsUpdateEmitted(uint32_t InstCount) { void statsUpdateEmitted(uint32_t InstCount) {
if (!ALLOW_DUMP || !getFlags().DumpStats) if (!ALLOW_DUMP || !getFlags().DumpStats)
return; return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateEmitted(InstCount); ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
getStatsCumulative()->updateEmitted(InstCount); TLS->StatsFunction.update(CodeStats::CS_InstCount, InstCount);
TLS->StatsCumulative.update(CodeStats::CS_InstCount, InstCount);
} }
void statsUpdateRegistersSaved(uint32_t Num) { void statsUpdateRegistersSaved(uint32_t Num) {
if (!ALLOW_DUMP || !getFlags().DumpStats) if (!ALLOW_DUMP || !getFlags().DumpStats)
return; return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateRegistersSaved(Num); ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
getStatsCumulative()->updateRegistersSaved(Num); TLS->StatsFunction.update(CodeStats::CS_RegsSaved, Num);
TLS->StatsCumulative.update(CodeStats::CS_RegsSaved, Num);
} }
void statsUpdateFrameBytes(uint32_t Bytes) { void statsUpdateFrameBytes(uint32_t Bytes) {
if (!ALLOW_DUMP || !getFlags().DumpStats) if (!ALLOW_DUMP || !getFlags().DumpStats)
return; return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFrameBytes(Bytes); ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
getStatsCumulative()->updateFrameBytes(Bytes); TLS->StatsFunction.update(CodeStats::CS_FrameByte, Bytes);
TLS->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes);
} }
void statsUpdateSpills() { void statsUpdateSpills() {
if (!ALLOW_DUMP || !getFlags().DumpStats) if (!ALLOW_DUMP || !getFlags().DumpStats)
return; return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateSpills(); ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
getStatsCumulative()->updateSpills(); TLS->StatsFunction.update(CodeStats::CS_NumSpills);
TLS->StatsCumulative.update(CodeStats::CS_NumSpills);
} }
void statsUpdateFills() { void statsUpdateFills() {
if (!ALLOW_DUMP || !getFlags().DumpStats) if (!ALLOW_DUMP || !getFlags().DumpStats)
return; return;
ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); ThreadContext *TLS = ICE_TLS_GET_FIELD(TLS);
getStatsCumulative()->updateFills(); TLS->StatsFunction.update(CodeStats::CS_NumFills);
TLS->StatsCumulative.update(CodeStats::CS_NumFills);
} }
// These are predefined TimerStackIdT values. // These are predefined TimerStackIdT values.
...@@ -296,6 +314,13 @@ public: ...@@ -296,6 +314,13 @@ public:
for (ThreadContext *TLS : AllThreadContexts) for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers); Timers->mergeFrom(TLS->Timers);
} }
if (ALLOW_DUMP) {
// Do a separate loop over AllThreadContexts to avoid holding
// two locks at once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
}
} }
// Translation thread startup routine. // Translation thread startup routine.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment