Commit b6e9b897 by Karl Schimpf

Fix timing of parseFunctions.

The previous implementation was charging about 24% more time that it should to the function parser. The cause was that the time to "queue" the parsed functions, and the time to emit the assembled code (again including "queue" time) was not accounted for. About 15% was going to queuing costs, and 7% to emitting the ELF file. This CL adds timing of function translateFunctions, which captures most of the queueing costs, and timing for each of the major ELF emission functions (emitELF). This allows the corresponding costs to be better bucketed, and not charged to the time it takes to parse functions in bitcode files. Bug=None R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1775603002 .
parent 324334e5
......@@ -1027,7 +1027,7 @@ void Cfg::emitJumpTables() {
void Cfg::emit() {
if (!BuildDefs::dump())
return;
TimerMarker T(TimerStack::TT_emit, this);
TimerMarker T(TimerStack::TT_emitAsm, this);
if (Ctx->getFlags().getDecorateAsm()) {
renumberInstructions();
getVMetadata()->init(VMK_Uses);
......@@ -1061,7 +1061,7 @@ void Cfg::emit() {
}
void Cfg::emitIAS() {
TimerMarker T(TimerStack::TT_emit, this);
TimerMarker T(TimerStack::TT_emitAsm, this);
// The emitIAS() routines emit into the internal assembler buffer, so there's
// no need to lock the streams.
deleteJumpTableInsts();
......
......@@ -129,7 +129,7 @@ void Compiler::run(const Ice::ClFlagsExtra &ExtraFlags, GlobalContext &Ctx,
Ctx.lowerJumpTables();
if (Ctx.getFlags().getOutFileType() == FT_Elf) {
TimerMarker T1(Ice::TimerStack::TT_emit, &Ctx);
TimerMarker T1(Ice::TimerStack::TT_emitAsm, &Ctx);
Ctx.getObjectWriter()->setUndefinedSyms(Ctx.getConstantExternSyms());
Ctx.getObjectWriter()->writeNonUserSections();
}
......
......@@ -219,6 +219,14 @@ Elf64_Off ELFObjectWriter::alignFileOffset(Elf64_Xword Align) {
void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
bool IsInternal, Assembler *Asm) {
assert(!SectionNumbersAssigned);
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
constexpr TimerStackIdT StackID = GlobalContext::TSK_Funcs;
TimerIdT TimerID = 0;
bool TimeThisFunction = Ctx.getFlags().getTimeEachFunction();
if (TimeThisFunction) {
TimerID = Ctx.getTimerID(StackID, FuncName);
Ctx.pushTimer(TimerID, StackID);
}
ELFTextSection *Section = nullptr;
ELFRelocationSection *RelSection = nullptr;
const bool FunctionSections = Ctx.getFlags().getFunctionSections();
......@@ -270,6 +278,8 @@ void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
RelSection->addRelocations(OffsetInSection, Asm->fixups());
}
Section->appendData(Str, Asm->getBufferView());
if (TimeThisFunction)
Ctx.popTimer(TimerID, StackID);
}
namespace {
......@@ -303,6 +313,7 @@ void ELFObjectWriter::writeDataSection(const VariableDeclarationList &Vars,
FixupKind RelocationKind,
const IceString &SectionSuffix,
bool IsPIC) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
assert(!SectionNumbersAssigned);
VariableDeclarationList VarsBySection[ELFObjectWriter::NumSectionTypes];
for (auto &SectionList : VarsBySection)
......@@ -439,6 +450,7 @@ void ELFObjectWriter::writeDataOfType(SectionType ST,
}
void ELFObjectWriter::writeInitialELFHeader() {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
assert(!SectionNumbersAssigned);
constexpr Elf64_Off DummySHOffset = 0;
constexpr SizeT DummySHStrIndex = 0;
......@@ -500,6 +512,7 @@ void ELFObjectWriter::writeELFHeaderInternal(Elf64_Off SectionHeaderOffset,
}
template <typename ConstType> void ELFObjectWriter::writeConstantPool(Type Ty) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
ConstantList Pool = Ctx.getConstantPool(Ty);
if (Pool.empty()) {
return;
......@@ -576,6 +589,7 @@ void ELFObjectWriter::writeAllRelocationSections() {
void ELFObjectWriter::writeJumpTable(const JumpTableData &JT,
FixupKind RelocationKind, bool IsPIC) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
ELFDataSection *Section;
ELFRelocationSection *RelSection;
const Elf64_Xword PointerSize = typeWidthInBytes(getPointerType());
......@@ -611,6 +625,7 @@ void ELFObjectWriter::writeJumpTable(const JumpTableData &JT,
}
void ELFObjectWriter::setUndefinedSyms(const ConstantList &UndefSyms) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
for (const Constant *S : UndefSyms) {
const auto *Sym = llvm::cast<ConstantRelocatable>(S);
const IceString &Name = Sym->getName();
......@@ -642,6 +657,8 @@ void ELFObjectWriter::writeRelocationSections(RelSectionList &RelSections) {
}
void ELFObjectWriter::writeNonUserSections() {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
// Write out the shstrtab now that all sections are known.
ShStrTab->doLayout();
ShStrTab->setSize(ShStrTab->getSectionDataSize());
......
......@@ -307,6 +307,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
}
void GlobalContext::translateFunctions() {
TimerMarker Timer(TimerStack::TT_translateFunctions, this);
while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
// Install Func in TLS for Cfg-specific container allocators.
CfgLocalAllocatorScope _(Func.get());
......@@ -380,7 +381,7 @@ void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) {
} // end of anonymous namespace
void GlobalContext::emitFileHeader() {
TimerMarker T1(Ice::TimerStack::TT_emit, this);
TimerMarker T1(Ice::TimerStack::TT_emitAsm, this);
if (getFlags().getOutFileType() == FT_Elf) {
getObjectWriter()->writeInitialELFHeader();
} else {
......
......@@ -27,7 +27,7 @@
X(doArgLowering) \
X(doBranchOpt) \
X(doNopInsertion) \
X(emit) \
X(emitAsm) \
X(emitGlobalInitializers) \
X(genCode) \
X(genFrame) \
......@@ -53,8 +53,10 @@
X(renumberInstructions) \
X(szmain) \
X(translate) \
X(translateFunctions) \
X(validateLiveness) \
X(vmetadata)
X(vmetadata) \
X(writeELF)
//#define X(tag)
#endif // SUBZERO_SRC_ICETIMERTREE_DEF
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment