Commit b6e9b897 by Karl Schimpf

Fix timing of parseFunctions.

The previous implementation was charging about 24% more time that it should to the function parser. The cause was that the time to "queue" the parsed functions, and the time to emit the assembled code (again including "queue" time) was not accounted for. About 15% was going to queuing costs, and 7% to emitting the ELF file. This CL adds timing of function translateFunctions, which captures most of the queueing costs, and timing for each of the major ELF emission functions (emitELF). This allows the corresponding costs to be better bucketed, and not charged to the time it takes to parse functions in bitcode files. Bug=None R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1775603002 .
parent 324334e5
...@@ -1027,7 +1027,7 @@ void Cfg::emitJumpTables() { ...@@ -1027,7 +1027,7 @@ void Cfg::emitJumpTables() {
void Cfg::emit() { void Cfg::emit() {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
TimerMarker T(TimerStack::TT_emit, this); TimerMarker T(TimerStack::TT_emitAsm, this);
if (Ctx->getFlags().getDecorateAsm()) { if (Ctx->getFlags().getDecorateAsm()) {
renumberInstructions(); renumberInstructions();
getVMetadata()->init(VMK_Uses); getVMetadata()->init(VMK_Uses);
...@@ -1061,7 +1061,7 @@ void Cfg::emit() { ...@@ -1061,7 +1061,7 @@ void Cfg::emit() {
} }
void Cfg::emitIAS() { void Cfg::emitIAS() {
TimerMarker T(TimerStack::TT_emit, this); TimerMarker T(TimerStack::TT_emitAsm, this);
// The emitIAS() routines emit into the internal assembler buffer, so there's // The emitIAS() routines emit into the internal assembler buffer, so there's
// no need to lock the streams. // no need to lock the streams.
deleteJumpTableInsts(); deleteJumpTableInsts();
......
...@@ -129,7 +129,7 @@ void Compiler::run(const Ice::ClFlagsExtra &ExtraFlags, GlobalContext &Ctx, ...@@ -129,7 +129,7 @@ void Compiler::run(const Ice::ClFlagsExtra &ExtraFlags, GlobalContext &Ctx,
Ctx.lowerJumpTables(); Ctx.lowerJumpTables();
if (Ctx.getFlags().getOutFileType() == FT_Elf) { if (Ctx.getFlags().getOutFileType() == FT_Elf) {
TimerMarker T1(Ice::TimerStack::TT_emit, &Ctx); TimerMarker T1(Ice::TimerStack::TT_emitAsm, &Ctx);
Ctx.getObjectWriter()->setUndefinedSyms(Ctx.getConstantExternSyms()); Ctx.getObjectWriter()->setUndefinedSyms(Ctx.getConstantExternSyms());
Ctx.getObjectWriter()->writeNonUserSections(); Ctx.getObjectWriter()->writeNonUserSections();
} }
......
...@@ -219,6 +219,14 @@ Elf64_Off ELFObjectWriter::alignFileOffset(Elf64_Xword Align) { ...@@ -219,6 +219,14 @@ Elf64_Off ELFObjectWriter::alignFileOffset(Elf64_Xword Align) {
void ELFObjectWriter::writeFunctionCode(const IceString &FuncName, void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
bool IsInternal, Assembler *Asm) { bool IsInternal, Assembler *Asm) {
assert(!SectionNumbersAssigned); assert(!SectionNumbersAssigned);
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
constexpr TimerStackIdT StackID = GlobalContext::TSK_Funcs;
TimerIdT TimerID = 0;
bool TimeThisFunction = Ctx.getFlags().getTimeEachFunction();
if (TimeThisFunction) {
TimerID = Ctx.getTimerID(StackID, FuncName);
Ctx.pushTimer(TimerID, StackID);
}
ELFTextSection *Section = nullptr; ELFTextSection *Section = nullptr;
ELFRelocationSection *RelSection = nullptr; ELFRelocationSection *RelSection = nullptr;
const bool FunctionSections = Ctx.getFlags().getFunctionSections(); const bool FunctionSections = Ctx.getFlags().getFunctionSections();
...@@ -270,6 +278,8 @@ void ELFObjectWriter::writeFunctionCode(const IceString &FuncName, ...@@ -270,6 +278,8 @@ void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
RelSection->addRelocations(OffsetInSection, Asm->fixups()); RelSection->addRelocations(OffsetInSection, Asm->fixups());
} }
Section->appendData(Str, Asm->getBufferView()); Section->appendData(Str, Asm->getBufferView());
if (TimeThisFunction)
Ctx.popTimer(TimerID, StackID);
} }
namespace { namespace {
...@@ -303,6 +313,7 @@ void ELFObjectWriter::writeDataSection(const VariableDeclarationList &Vars, ...@@ -303,6 +313,7 @@ void ELFObjectWriter::writeDataSection(const VariableDeclarationList &Vars,
FixupKind RelocationKind, FixupKind RelocationKind,
const IceString &SectionSuffix, const IceString &SectionSuffix,
bool IsPIC) { bool IsPIC) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
assert(!SectionNumbersAssigned); assert(!SectionNumbersAssigned);
VariableDeclarationList VarsBySection[ELFObjectWriter::NumSectionTypes]; VariableDeclarationList VarsBySection[ELFObjectWriter::NumSectionTypes];
for (auto &SectionList : VarsBySection) for (auto &SectionList : VarsBySection)
...@@ -439,6 +450,7 @@ void ELFObjectWriter::writeDataOfType(SectionType ST, ...@@ -439,6 +450,7 @@ void ELFObjectWriter::writeDataOfType(SectionType ST,
} }
void ELFObjectWriter::writeInitialELFHeader() { void ELFObjectWriter::writeInitialELFHeader() {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
assert(!SectionNumbersAssigned); assert(!SectionNumbersAssigned);
constexpr Elf64_Off DummySHOffset = 0; constexpr Elf64_Off DummySHOffset = 0;
constexpr SizeT DummySHStrIndex = 0; constexpr SizeT DummySHStrIndex = 0;
...@@ -500,6 +512,7 @@ void ELFObjectWriter::writeELFHeaderInternal(Elf64_Off SectionHeaderOffset, ...@@ -500,6 +512,7 @@ void ELFObjectWriter::writeELFHeaderInternal(Elf64_Off SectionHeaderOffset,
} }
template <typename ConstType> void ELFObjectWriter::writeConstantPool(Type Ty) { template <typename ConstType> void ELFObjectWriter::writeConstantPool(Type Ty) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
ConstantList Pool = Ctx.getConstantPool(Ty); ConstantList Pool = Ctx.getConstantPool(Ty);
if (Pool.empty()) { if (Pool.empty()) {
return; return;
...@@ -576,6 +589,7 @@ void ELFObjectWriter::writeAllRelocationSections() { ...@@ -576,6 +589,7 @@ void ELFObjectWriter::writeAllRelocationSections() {
void ELFObjectWriter::writeJumpTable(const JumpTableData &JT, void ELFObjectWriter::writeJumpTable(const JumpTableData &JT,
FixupKind RelocationKind, bool IsPIC) { FixupKind RelocationKind, bool IsPIC) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
ELFDataSection *Section; ELFDataSection *Section;
ELFRelocationSection *RelSection; ELFRelocationSection *RelSection;
const Elf64_Xword PointerSize = typeWidthInBytes(getPointerType()); const Elf64_Xword PointerSize = typeWidthInBytes(getPointerType());
...@@ -611,6 +625,7 @@ void ELFObjectWriter::writeJumpTable(const JumpTableData &JT, ...@@ -611,6 +625,7 @@ void ELFObjectWriter::writeJumpTable(const JumpTableData &JT,
} }
void ELFObjectWriter::setUndefinedSyms(const ConstantList &UndefSyms) { void ELFObjectWriter::setUndefinedSyms(const ConstantList &UndefSyms) {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
for (const Constant *S : UndefSyms) { for (const Constant *S : UndefSyms) {
const auto *Sym = llvm::cast<ConstantRelocatable>(S); const auto *Sym = llvm::cast<ConstantRelocatable>(S);
const IceString &Name = Sym->getName(); const IceString &Name = Sym->getName();
...@@ -642,6 +657,8 @@ void ELFObjectWriter::writeRelocationSections(RelSectionList &RelSections) { ...@@ -642,6 +657,8 @@ void ELFObjectWriter::writeRelocationSections(RelSectionList &RelSections) {
} }
void ELFObjectWriter::writeNonUserSections() { void ELFObjectWriter::writeNonUserSections() {
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
// Write out the shstrtab now that all sections are known. // Write out the shstrtab now that all sections are known.
ShStrTab->doLayout(); ShStrTab->doLayout();
ShStrTab->setSize(ShStrTab->getSectionDataSize()); ShStrTab->setSize(ShStrTab->getSectionDataSize());
......
...@@ -307,6 +307,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, ...@@ -307,6 +307,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
} }
void GlobalContext::translateFunctions() { void GlobalContext::translateFunctions() {
TimerMarker Timer(TimerStack::TT_translateFunctions, this);
while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) { while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
// Install Func in TLS for Cfg-specific container allocators. // Install Func in TLS for Cfg-specific container allocators.
CfgLocalAllocatorScope _(Func.get()); CfgLocalAllocatorScope _(Func.get());
...@@ -380,7 +381,7 @@ void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) { ...@@ -380,7 +381,7 @@ void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) {
} // end of anonymous namespace } // end of anonymous namespace
void GlobalContext::emitFileHeader() { void GlobalContext::emitFileHeader() {
TimerMarker T1(Ice::TimerStack::TT_emit, this); TimerMarker T1(Ice::TimerStack::TT_emitAsm, this);
if (getFlags().getOutFileType() == FT_Elf) { if (getFlags().getOutFileType() == FT_Elf) {
getObjectWriter()->writeInitialELFHeader(); getObjectWriter()->writeInitialELFHeader();
} else { } else {
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
X(doArgLowering) \ X(doArgLowering) \
X(doBranchOpt) \ X(doBranchOpt) \
X(doNopInsertion) \ X(doNopInsertion) \
X(emit) \ X(emitAsm) \
X(emitGlobalInitializers) \ X(emitGlobalInitializers) \
X(genCode) \ X(genCode) \
X(genFrame) \ X(genFrame) \
...@@ -53,8 +53,10 @@ ...@@ -53,8 +53,10 @@
X(renumberInstructions) \ X(renumberInstructions) \
X(szmain) \ X(szmain) \
X(translate) \ X(translate) \
X(translateFunctions) \
X(validateLiveness) \ X(validateLiveness) \
X(vmetadata) X(vmetadata) \
X(writeELF)
//#define X(tag) //#define X(tag)
#endif // SUBZERO_SRC_ICETIMERTREE_DEF #endif // SUBZERO_SRC_ICETIMERTREE_DEF
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment