Commit 1873560a by Jim Stichnoth

Subzero: Add rudimentary statistics on generated code.

The following are collected: - Number of machine instructions emitted - Number of registers saved/restored in prolog/epilog - Number of stack frame bytes (non-alloca) allocated - Number of "spills", or stores to stack slots - Number of "fills", or loads/operations from stack slots - Fill+Spill count (sum of above two) These are somewhat reasonable approximations of code quality, and the primary intention is to compare before-and-after when trying out an optimization. The statistics are dumped after translating each function. Per-function and cumulative statistics are collected. The output lines have a prefix that is easy to filter. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/580633002
parent bc004630
......@@ -483,6 +483,21 @@ void CfgNode::emit(Cfg *Func) const {
if (Inst->isRedundantAssign())
continue;
(*I)->emit(Func);
// Update emitted instruction count, plus fill/spill count for
// Variable operands without a physical register.
if (uint32_t Count = (*I)->getEmitInstCount()) {
Func->getContext()->statsUpdateEmitted(Count);
if (Variable *Dest = (*I)->getDest()) {
if (!Dest->hasReg())
Func->getContext()->statsUpdateFills();
}
for (SizeT S = 0; S < (*I)->getSrcSize(); ++S) {
if (Variable *Src = llvm::dyn_cast<Variable>((*I)->getSrc(S))) {
if (!Src->hasReg())
Func->getContext()->statsUpdateSpills();
}
}
}
}
}
......
......@@ -24,14 +24,15 @@ public:
ClFlags()
: DisableInternal(false), SubzeroTimingEnabled(false),
DisableTranslation(false), DisableGlobals(false),
FunctionSections(false), UseSandboxing(false), DefaultGlobalPrefix(""),
DefaultFunctionPrefix("") {}
FunctionSections(false), UseSandboxing(false), DumpStats(false),
DefaultGlobalPrefix(""), DefaultFunctionPrefix("") {}
bool DisableInternal;
bool SubzeroTimingEnabled;
bool DisableTranslation;
bool DisableGlobals;
bool FunctionSections;
bool UseSandboxing;
bool DumpStats;
IceString DefaultGlobalPrefix;
IceString DefaultFunctionPrefix;
};
......
......@@ -384,6 +384,13 @@ ConstantList GlobalContext::getConstantPool(Type Ty) const {
llvm_unreachable("Unknown type");
}
void GlobalContext::dumpStats(const IceString &Name) {
if (Flags.DumpStats) {
StatsFunction.dump(Name, getStrDump());
StatsCumulative.dump("_TOTAL_", getStrDump());
}
}
void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const {
if (Ctx->isVerbose(IceV_Timing)) {
// Prefixing with '#' allows timing strings to be included
......
......@@ -28,6 +28,35 @@ namespace Ice {
class ClFlags;
// This class collects rudimentary statistics during translation.
class CodeStats {
public:
CodeStats()
: InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
Fills(0) {}
void reset() { *this = CodeStats(); }
void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
void updateSpills() { ++Spills; }
void updateFills() { ++Fills; }
void dump(const IceString &Name, Ostream &Str) {
Str << "|" << Name << "|Inst Count |" << InstructionsEmitted << "\n";
Str << "|" << Name << "|Regs Saved |" << RegistersSaved << "\n";
Str << "|" << Name << "|Frame Bytes |" << FrameBytes << "\n";
Str << "|" << Name << "|Spills |" << Spills << "\n";
Str << "|" << Name << "|Fills |" << Fills << "\n";
Str << "|" << Name << "|Spills+Fills|" << Spills + Fills << "\n";
}
private:
uint32_t InstructionsEmitted;
uint32_t RegistersSaved;
uint32_t FrameBytes;
uint32_t Spills;
uint32_t Fills;
};
// TODO: Accesses to all non-const fields of GlobalContext need to
// be synchronized, especially the constant pool, the allocator, and
// the output streams.
......@@ -101,6 +130,30 @@ public:
// translation.
RandomNumberGenerator &getRNG() { return RNG; }
// Reset stats at the beginning of a function.
void resetStats() { StatsFunction.reset(); }
void dumpStats(const IceString &Name);
void statsUpdateEmitted(uint32_t InstCount) {
StatsFunction.updateEmitted(InstCount);
StatsCumulative.updateEmitted(InstCount);
}
void statsUpdateRegistersSaved(uint32_t Num) {
StatsFunction.updateRegistersSaved(Num);
StatsCumulative.updateRegistersSaved(Num);
}
void statsUpdateFrameBytes(uint32_t Bytes) {
StatsFunction.updateFrameBytes(Bytes);
StatsCumulative.updateFrameBytes(Bytes);
}
void statsUpdateSpills() {
StatsFunction.updateSpills();
StatsCumulative.updateSpills();
}
void statsUpdateFills() {
StatsFunction.updateFills();
StatsCumulative.updateFills();
}
private:
Ostream *StrDump; // Stream for dumping / diagnostics
Ostream *StrEmit; // Stream for code emission
......@@ -115,6 +168,8 @@ private:
const ClFlags &Flags;
bool HasEmittedFirstMethod;
RandomNumberGenerator RNG;
CodeStats StatsFunction;
CodeStats StatsCumulative;
GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
......
......@@ -101,6 +101,12 @@ public:
void livenessLightweight(llvm::BitVector &Live);
void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
Liveness *Liveness, const CfgNode *Node);
// Get the number of native instructions that this instruction
// ultimately emits. By default, high-level instructions don't
// result in any native instructions, and a target-specific
// instruction results in a single native instruction.
virtual uint32_t getEmitInstCount() const { return 0; }
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
virtual void dumpExtras(const Cfg *Func) const;
......@@ -744,6 +750,7 @@ private:
// instructions.
class InstTarget : public Inst {
public:
virtual uint32_t getEmitInstCount() const { return 1; }
virtual void emit(const Cfg *Func) const = 0;
virtual void dump(const Cfg *Func) const;
virtual void dumpExtras(const Cfg *Func) const;
......
......@@ -290,6 +290,7 @@ public:
static InstX8632Label *create(Cfg *Func, TargetX8632 *Target) {
return new (Func->allocate<InstX8632Label>()) InstX8632Label(Func, Target);
}
virtual uint32_t getEmitInstCount() const { return 0; }
IceString getName(const Cfg *Func) const;
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
......@@ -324,7 +325,7 @@ public:
InstX8632Br(Func, Target, NULL, NULL, Condition);
}
// Create a conditional intra-block branch (or unconditional, if
// Condition==None) to a label in the current block.
// Condition==Br_None) to a label in the current block.
static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
BrCond Condition) {
return new (Func->allocate<InstX8632Br>())
......@@ -332,6 +333,15 @@ public:
}
CfgNode *getTargetTrue() const { return TargetTrue; }
CfgNode *getTargetFalse() const { return TargetFalse; }
virtual uint32_t getEmitInstCount() const {
if (Label)
return 1;
if (Condition == Br_None)
return 1;
if (getTargetFalse())
return 2;
return 1;
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
......
......@@ -606,6 +606,11 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
} else {
_mov(Arg, Mem);
}
// This argument-copying instruction uses an explicit
// OperandX8632Mem operand instead of a Variable, so its
// fill-from-stack operation has to be tracked separately for
// statistics.
Ctx->statsUpdateFills();
}
}
......@@ -746,13 +751,16 @@ void TargetX8632::addProlog(CfgNode *Node) {
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
++NumCallee;
PreservedRegsSizeBytes += 4;
const bool SuppressStackAdjustment = true;
_push(getPhysicalRegister(i), SuppressStackAdjustment);
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
......@@ -800,6 +808,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Reg_esp),
Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment();
......
......@@ -71,6 +71,7 @@ void Translator::nameUnnamedGlobalAddresses(llvm::Module *Mod) {
}
void Translator::translateFcn(Cfg *Fcn) {
Ctx->resetStats();
Func.reset(Fcn);
if (Ctx->getFlags().DisableInternal)
Func->setInternal(false);
......@@ -95,6 +96,7 @@ void Translator::translateFcn(Cfg *Fcn) {
std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName()
<< ": " << TEmit.getElapsedSec() << " sec\n";
}
Ctx->dumpStats(Func->getFunctionName());
}
}
......
......@@ -102,6 +102,10 @@ static cl::opt<bool>
DisablePhiEdgeSplit("no-phi-edge-split",
cl::desc("Disable edge splitting for Phi lowering"));
static cl::opt<bool>
DumpStats("stats",
cl::desc("Print statistics after translating each function"));
static cl::opt<NaClFileFormat> InputFileFormat(
"bitcode-format", cl::desc("Define format of input file:"),
cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
......@@ -155,6 +159,7 @@ int main(int argc, char **argv) {
Flags.DisableGlobals = DisableGlobals;
Flags.FunctionSections = FunctionSections;
Flags.UseSandboxing = UseSandboxing;
Flags.DumpStats = DumpStats;
Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment