Commit 1873560a by Jim Stichnoth

Subzero: Add rudimentary statistics on generated code.

The following are collected: - Number of machine instructions emitted - Number of registers saved/restored in prolog/epilog - Number of stack frame bytes (non-alloca) allocated - Number of "spills", or stores to stack slots - Number of "fills", or loads/operations from stack slots - Fill+Spill count (sum of above two) These are somewhat reasonable approximations of code quality, and the primary intention is to compare before-and-after when trying out an optimization. The statistics are dumped after translating each function. Per-function and cumulative statistics are collected. The output lines have a prefix that is easy to filter. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/580633002
parent bc004630
...@@ -483,6 +483,21 @@ void CfgNode::emit(Cfg *Func) const { ...@@ -483,6 +483,21 @@ void CfgNode::emit(Cfg *Func) const {
if (Inst->isRedundantAssign()) if (Inst->isRedundantAssign())
continue; continue;
(*I)->emit(Func); (*I)->emit(Func);
// Update emitted instruction count, plus fill/spill count for
// Variable operands without a physical register.
if (uint32_t Count = (*I)->getEmitInstCount()) {
Func->getContext()->statsUpdateEmitted(Count);
if (Variable *Dest = (*I)->getDest()) {
if (!Dest->hasReg())
Func->getContext()->statsUpdateFills();
}
for (SizeT S = 0; S < (*I)->getSrcSize(); ++S) {
if (Variable *Src = llvm::dyn_cast<Variable>((*I)->getSrc(S))) {
if (!Src->hasReg())
Func->getContext()->statsUpdateSpills();
}
}
}
} }
} }
......
...@@ -24,14 +24,15 @@ public: ...@@ -24,14 +24,15 @@ public:
ClFlags() ClFlags()
: DisableInternal(false), SubzeroTimingEnabled(false), : DisableInternal(false), SubzeroTimingEnabled(false),
DisableTranslation(false), DisableGlobals(false), DisableTranslation(false), DisableGlobals(false),
FunctionSections(false), UseSandboxing(false), DefaultGlobalPrefix(""), FunctionSections(false), UseSandboxing(false), DumpStats(false),
DefaultFunctionPrefix("") {} DefaultGlobalPrefix(""), DefaultFunctionPrefix("") {}
bool DisableInternal; bool DisableInternal;
bool SubzeroTimingEnabled; bool SubzeroTimingEnabled;
bool DisableTranslation; bool DisableTranslation;
bool DisableGlobals; bool DisableGlobals;
bool FunctionSections; bool FunctionSections;
bool UseSandboxing; bool UseSandboxing;
bool DumpStats;
IceString DefaultGlobalPrefix; IceString DefaultGlobalPrefix;
IceString DefaultFunctionPrefix; IceString DefaultFunctionPrefix;
}; };
......
...@@ -384,6 +384,13 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const { ...@@ -384,6 +384,13 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const {
llvm_unreachable("Unknown type"); llvm_unreachable("Unknown type");
} }
void GlobalContext::dumpStats(const IceString &Name) {
if (Flags.DumpStats) {
StatsFunction.dump(Name, getStrDump());
StatsCumulative.dump("_TOTAL_", getStrDump());
}
}
void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const { void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const {
if (Ctx->isVerbose(IceV_Timing)) { if (Ctx->isVerbose(IceV_Timing)) {
// Prefixing with '#' allows timing strings to be included // Prefixing with '#' allows timing strings to be included
......
...@@ -28,6 +28,35 @@ namespace Ice { ...@@ -28,6 +28,35 @@ namespace Ice {
class ClFlags; class ClFlags;
// This class collects rudimentary statistics during translation.
class CodeStats {
public:
CodeStats()
: InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
Fills(0) {}
void reset() { *this = CodeStats(); }
void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
void updateSpills() { ++Spills; }
void updateFills() { ++Fills; }
void dump(const IceString &Name, Ostream &Str) {
Str << "|" << Name << "|Inst Count |" << InstructionsEmitted << "\n";
Str << "|" << Name << "|Regs Saved |" << RegistersSaved << "\n";
Str << "|" << Name << "|Frame Bytes |" << FrameBytes << "\n";
Str << "|" << Name << "|Spills |" << Spills << "\n";
Str << "|" << Name << "|Fills |" << Fills << "\n";
Str << "|" << Name << "|Spills+Fills|" << Spills + Fills << "\n";
}
private:
uint32_t InstructionsEmitted;
uint32_t RegistersSaved;
uint32_t FrameBytes;
uint32_t Spills;
uint32_t Fills;
};
// TODO: Accesses to all non-const fields of GlobalContext need to // TODO: Accesses to all non-const fields of GlobalContext need to
// be synchronized, especially the constant pool, the allocator, and // be synchronized, especially the constant pool, the allocator, and
// the output streams. // the output streams.
...@@ -101,6 +130,30 @@ public: ...@@ -101,6 +130,30 @@ public:
// translation. // translation.
RandomNumberGenerator &getRNG() { return RNG; } RandomNumberGenerator &getRNG() { return RNG; }
// Reset stats at the beginning of a function.
void resetStats() { StatsFunction.reset(); }
void dumpStats(const IceString &Name);
void statsUpdateEmitted(uint32_t InstCount) {
StatsFunction.updateEmitted(InstCount);
StatsCumulative.updateEmitted(InstCount);
}
void statsUpdateRegistersSaved(uint32_t Num) {
StatsFunction.updateRegistersSaved(Num);
StatsCumulative.updateRegistersSaved(Num);
}
void statsUpdateFrameBytes(uint32_t Bytes) {
StatsFunction.updateFrameBytes(Bytes);
StatsCumulative.updateFrameBytes(Bytes);
}
void statsUpdateSpills() {
StatsFunction.updateSpills();
StatsCumulative.updateSpills();
}
void statsUpdateFills() {
StatsFunction.updateFills();
StatsCumulative.updateFills();
}
private: private:
Ostream *StrDump; // Stream for dumping / diagnostics Ostream *StrDump; // Stream for dumping / diagnostics
Ostream *StrEmit; // Stream for code emission Ostream *StrEmit; // Stream for code emission
...@@ -115,6 +168,8 @@ private: ...@@ -115,6 +168,8 @@ private:
const ClFlags &Flags; const ClFlags &Flags;
bool HasEmittedFirstMethod; bool HasEmittedFirstMethod;
RandomNumberGenerator RNG; RandomNumberGenerator RNG;
CodeStats StatsFunction;
CodeStats StatsCumulative;
GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION; GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION; GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
......
...@@ -101,6 +101,12 @@ public: ...@@ -101,6 +101,12 @@ public:
void livenessLightweight(llvm::BitVector &Live); void livenessLightweight(llvm::BitVector &Live);
void liveness(InstNumberT InstNumber, llvm::BitVector &Live, void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
Liveness *Liveness, const CfgNode *Node); Liveness *Liveness, const CfgNode *Node);
// Get the number of native instructions that this instruction
// ultimately emits. By default, high-level instructions don't
// result in any native instructions, and a target-specific
// instruction results in a single native instruction.
virtual uint32_t getEmitInstCount() const { return 0; }
virtual void emit(const Cfg *Func) const; virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const; virtual void dump(const Cfg *Func) const;
virtual void dumpExtras(const Cfg *Func) const; virtual void dumpExtras(const Cfg *Func) const;
...@@ -744,6 +750,7 @@ private: ...@@ -744,6 +750,7 @@ private:
// instructions. // instructions.
class InstTarget : public Inst { class InstTarget : public Inst {
public: public:
virtual uint32_t getEmitInstCount() const { return 1; }
virtual void emit(const Cfg *Func) const = 0; virtual void emit(const Cfg *Func) const = 0;
virtual void dump(const Cfg *Func) const; virtual void dump(const Cfg *Func) const;
virtual void dumpExtras(const Cfg *Func) const; virtual void dumpExtras(const Cfg *Func) const;
......
...@@ -290,6 +290,7 @@ public: ...@@ -290,6 +290,7 @@ public:
static InstX8632Label *create(Cfg *Func, TargetX8632 *Target) { static InstX8632Label *create(Cfg *Func, TargetX8632 *Target) {
return new (Func->allocate<InstX8632Label>()) InstX8632Label(Func, Target); return new (Func->allocate<InstX8632Label>()) InstX8632Label(Func, Target);
} }
virtual uint32_t getEmitInstCount() const { return 0; }
IceString getName(const Cfg *Func) const; IceString getName(const Cfg *Func) const;
virtual void emit(const Cfg *Func) const; virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const; virtual void dump(const Cfg *Func) const;
...@@ -324,7 +325,7 @@ public: ...@@ -324,7 +325,7 @@ public:
InstX8632Br(Func, Target, NULL, NULL, Condition); InstX8632Br(Func, Target, NULL, NULL, Condition);
} }
// Create a conditional intra-block branch (or unconditional, if // Create a conditional intra-block branch (or unconditional, if
// Condition==None) to a label in the current block. // Condition==Br_None) to a label in the current block.
static InstX8632Br *create(Cfg *Func, InstX8632Label *Label, static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
BrCond Condition) { BrCond Condition) {
return new (Func->allocate<InstX8632Br>()) return new (Func->allocate<InstX8632Br>())
...@@ -332,6 +333,15 @@ public: ...@@ -332,6 +333,15 @@ public:
} }
CfgNode *getTargetTrue() const { return TargetTrue; } CfgNode *getTargetTrue() const { return TargetTrue; }
CfgNode *getTargetFalse() const { return TargetFalse; } CfgNode *getTargetFalse() const { return TargetFalse; }
virtual uint32_t getEmitInstCount() const {
if (Label)
return 1;
if (Condition == Br_None)
return 1;
if (getTargetFalse())
return 2;
return 1;
}
virtual void emit(const Cfg *Func) const; virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const; virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Br); } static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
......
...@@ -606,6 +606,11 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, ...@@ -606,6 +606,11 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
} else { } else {
_mov(Arg, Mem); _mov(Arg, Mem);
} }
// This argument-copying instruction uses an explicit
// OperandX8632Mem operand instead of a Variable, so its
// fill-from-stack operation has to be tracked separately for
// statistics.
Ctx->statsUpdateFills();
} }
} }
...@@ -746,13 +751,16 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -746,13 +751,16 @@ void TargetX8632::addProlog(CfgNode *Node) {
SpillAreaSizeBytes += GlobalsSize; SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers. // Add push instructions for preserved registers.
uint32_t NumCallee = 0;
for (SizeT i = 0; i < CalleeSaves.size(); ++i) { for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) { if (CalleeSaves[i] && RegsUsed[i]) {
++NumCallee;
PreservedRegsSizeBytes += 4; PreservedRegsSizeBytes += 4;
const bool SuppressStackAdjustment = true; const bool SuppressStackAdjustment = true;
_push(getPhysicalRegister(i), SuppressStackAdjustment); _push(getPhysicalRegister(i), SuppressStackAdjustment);
} }
} }
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp" // Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) { if (IsEbpBasedFrame) {
...@@ -800,6 +808,7 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -800,6 +808,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
if (SpillAreaSizeBytes) if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Reg_esp), _sub(getPhysicalRegister(Reg_esp),
Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment(); resetStackAdjustment();
......
...@@ -71,6 +71,7 @@ void Translator::nameUnnamedGlobalAddresses(llvm::Module *Mod) { ...@@ -71,6 +71,7 @@ void Translator::nameUnnamedGlobalAddresses(llvm::Module *Mod) {
} }
void Translator::translateFcn(Cfg *Fcn) { void Translator::translateFcn(Cfg *Fcn) {
Ctx->resetStats();
Func.reset(Fcn); Func.reset(Fcn);
if (Ctx->getFlags().DisableInternal) if (Ctx->getFlags().DisableInternal)
Func->setInternal(false); Func->setInternal(false);
...@@ -95,6 +96,7 @@ void Translator::translateFcn(Cfg *Fcn) { ...@@ -95,6 +96,7 @@ void Translator::translateFcn(Cfg *Fcn) {
std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName() std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName()
<< ": " << TEmit.getElapsedSec() << " sec\n"; << ": " << TEmit.getElapsedSec() << " sec\n";
} }
Ctx->dumpStats(Func->getFunctionName());
} }
} }
......
...@@ -102,6 +102,10 @@ static cl::opt<bool> ...@@ -102,6 +102,10 @@ static cl::opt<bool>
DisablePhiEdgeSplit("no-phi-edge-split", DisablePhiEdgeSplit("no-phi-edge-split",
cl::desc("Disable edge splitting for Phi lowering")); cl::desc("Disable edge splitting for Phi lowering"));
static cl::opt<bool>
DumpStats("stats",
cl::desc("Print statistics after translating each function"));
static cl::opt<NaClFileFormat> InputFileFormat( static cl::opt<NaClFileFormat> InputFileFormat(
"bitcode-format", cl::desc("Define format of input file:"), "bitcode-format", cl::desc("Define format of input file:"),
cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"), cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
...@@ -155,6 +159,7 @@ int main(int argc, char **argv) { ...@@ -155,6 +159,7 @@ int main(int argc, char **argv) {
Flags.DisableGlobals = DisableGlobals; Flags.DisableGlobals = DisableGlobals;
Flags.FunctionSections = FunctionSections; Flags.FunctionSections = FunctionSections;
Flags.UseSandboxing = UseSandboxing; Flags.UseSandboxing = UseSandboxing;
Flags.DumpStats = DumpStats;
Flags.DefaultGlobalPrefix = DefaultGlobalPrefix; Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
Flags.DefaultFunctionPrefix = DefaultFunctionPrefix; Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment