Commit f8b4cc84 by John Porto

Subzero: Basic Block Profiler.

BUG= None R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1147023007.
parent cbb1d3d7
...@@ -276,7 +276,7 @@ $(SB_OBJDIR): ...@@ -276,7 +276,7 @@ $(SB_OBJDIR):
$(OBJDIR)/unittest: $(OBJDIR) $(OBJDIR)/unittest: $(OBJDIR)
@mkdir -p $@ @mkdir -p $@
RT_SRC := runtime/szrt.c runtime/szrt_ll.ll RT_SRC := runtime/szrt.c runtime/szrt_ll.ll runtime/szrt_profiler.c
RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o
runtime: $(RT_OBJ) runtime: $(RT_OBJ)
...@@ -285,7 +285,7 @@ runtime: $(RT_OBJ) ...@@ -285,7 +285,7 @@ runtime: $(RT_OBJ)
# even in a parallel build. # even in a parallel build.
.INTERMEDIATE: runtime.is.built .INTERMEDIATE: runtime.is.built
$(RT_OBJ): runtime.is.built $(RT_OBJ): runtime.is.built
runtime.is.built: $(RT_SRC) runtime.is.built: $(RT_SRC) pydir/build-runtime.py
@echo ================ Building Subzero runtime ================ @echo ================ Building Subzero runtime ================
./pydir/build-runtime.py -v --pnacl-root $(PNACL_TOOLCHAIN_ROOT) ./pydir/build-runtime.py -v --pnacl-root $(PNACL_TOOLCHAIN_ROOT)
......
...@@ -23,11 +23,18 @@ def Translate(ll_files, extra_args, obj, verbose): ...@@ -23,11 +23,18 @@ def Translate(ll_files, extra_args, obj, verbose):
'-o', obj '-o', obj
] + extra_args, echo=verbose) ] + extra_args, echo=verbose)
shellcmd(['objcopy', shellcmd(['objcopy',
'--localize-symbol=nacl_tp_tdb_offset', '--strip-symbol=nacl_tp_tdb_offset',
'--localize-symbol=nacl_tp_tls_offset', '--strip-symbol=nacl_tp_tls_offset',
obj obj
], echo=verbose) ], echo=verbose)
def PartialLink(obj_files, extra_args, lib, verbose):
"""Partially links a set of obj files into a final obj library."""
shellcmd(['ld',
'-o', lib,
'-r',
] + extra_args + obj_files, echo=verbose)
def main(): def main():
"""Build the Subzero runtime support library for all architectures. """Build the Subzero runtime support library for all architectures.
""" """
...@@ -72,12 +79,30 @@ def main(): ...@@ -72,12 +79,30 @@ def main():
], echo=args.verbose) ], echo=args.verbose)
ll_files = ['{dir}/szrt.ll'.format(dir=tempdir), ll_files = ['{dir}/szrt.ll'.format(dir=tempdir),
'{srcdir}/szrt_ll.ll'.format(srcdir=srcdir)] '{srcdir}/szrt_ll.ll'.format(srcdir=srcdir)]
# Translate tempdir/szrt.ll and srcdir/szrt_ll.ll to szrt_native_x8632.o
# Translate tempdir/szrt.ll and tempdir/szrt_ll.ll to
# szrt_native_x8632.tmp.o.
Translate(ll_files, Translate(ll_files,
['-mtriple=i686', '-mcpu=pentium4m'], ['-mtriple=i686', '-mcpu=pentium4m'],
'{rtdir}/szrt_native_x8632.o'.format(rtdir=rtdir), '{dir}/szrt_native_x8632.tmp.o'.format(dir=tempdir),
args.verbose) args.verbose)
# Translate tempdir/szrt.ll and srcdir/szrt_ll.ll to szrt_sb_x8632.o # Compile srcdir/szrt_profiler.c to tempdir/szrt_profiler_native_i686.o
shellcmd(['clang',
'-O2',
'-target=i686',
'-c',
'{srcdir}/szrt_profiler.c'.format(srcdir=srcdir),
'-o', '{dir}/szrt_profiler_native_x8632.o'.format(dir=tempdir)
], echo=args.verbose)
# Writing full szrt_native_i686.o.
PartialLink(['{dir}/szrt_native_x8632.tmp.o'.format(dir=tempdir),
'{dir}/szrt_profiler_native_x8632.o'.format(dir=tempdir)
], ['-m elf_i386'],
'{rtdir}/szrt_native_x8632.o'.format(rtdir=rtdir), args.verbose)
# Translate tempdir/szrt.ll and tempdir/szrt_ll.ll to szrt_sb_x8632.o
# The sandboxed library does not get the profiler helper function as the
# binaries are linked with -nostdlib.
Translate(ll_files, Translate(ll_files,
['-mtriple=i686-nacl', '-mcpu=pentium4m'], ['-mtriple=i686-nacl', '-mcpu=pentium4m'],
'{rtdir}/szrt_sb_x8632.o'.format(rtdir=rtdir), '{rtdir}/szrt_sb_x8632.o'.format(rtdir=rtdir),
......
...@@ -84,7 +84,10 @@ def AddOptionalArgs(argparser): ...@@ -84,7 +84,10 @@ def AddOptionalArgs(argparser):
choices=['obj', 'asm', 'iasm'], choices=['obj', 'asm', 'iasm'],
help='Output file type. Default %(default)s.') help='Output file type. Default %(default)s.')
argparser.add_argument('--sandbox', dest='sandbox', action='store_true', argparser.add_argument('--sandbox', dest='sandbox', action='store_true',
help='Enabled sandboxing in the translator') help='Enable sandboxing in the translator')
argparser.add_argument('--enable-block-profile',
dest='enable_block_profile', action='store_true',
help='Enable basic block profiling.')
argparser.add_argument('--verbose', '-v', dest='verbose', argparser.add_argument('--verbose', '-v', dest='verbose',
action='store_true', action='store_true',
help='Display some extra debugging output') help='Display some extra debugging output')
...@@ -217,6 +220,8 @@ def ProcessPexe(args, pexe, exe): ...@@ -217,6 +220,8 @@ def ProcessPexe(args, pexe, exe):
'-ffunction-sections', '-ffunction-sections',
'-fdata-sections'] if hybrid else []) + '-fdata-sections'] if hybrid else []) +
(['-sandbox'] if args.sandbox else []) + (['-sandbox'] if args.sandbox else []) +
(['-enable-block-profile'] if
args.enable_block_profile and not args.sandbox else []) +
args.sz_args + args.sz_args +
[pexe], [pexe],
echo=args.verbose) echo=args.verbose)
......
#include <stdint.h>
#include <stdio.h>
struct BlockProfileInfo {
uint64_t Counter;
const char *const BlockName;
} __attribute__((aligned(8)));
extern const struct BlockProfileInfo *__Sz_block_profile_info;
static const char SubzeroLogo[] =
"\n"
"\n"
"__________________________________________________________________________"
"____________________________\n"
" _____/\\\\\\\\\\\\\\\\\\\\\\__________________/"
"\\\\\\_______________________________________________________________\n"
" "
"___/\\\\\\/////////\\\\\\_______________\\/"
"\\\\\\_______________________________________________________________\n"
" "
"__\\//\\\\\\______\\///________________\\/"
"\\\\\\_______________________________________________________________\n"
" "
"___\\////\\\\\\__________/\\\\\\____/\\\\\\_\\/\\\\\\_________/"
"\\\\\\\\\\\\\\\\\\\\\\_____/\\\\\\\\\\\\\\\\___/\\\\/\\\\\\\\\\\\\\____/"
"\\\\\\\\\\____\n"
" "
"______\\////\\\\\\______\\/\\\\\\___\\/\\\\\\_\\/\\\\\\\\\\\\\\\\\\__\\///"
"////\\\\\\/____/\\\\\\/////\\\\\\_\\/\\\\\\/////\\\\\\_/\\\\\\///"
"\\\\\\__\n"
" "
"_________\\////\\\\\\___\\/\\\\\\___\\/\\\\\\_\\/\\\\\\////\\\\\\______/"
"\\\\\\/_____/\\\\\\\\\\\\\\\\\\\\\\__\\/\\\\\\__\\///__/\\\\\\__\\//"
"\\\\\\_\n"
" "
"__/\\\\\\______\\//\\\\\\__\\/\\\\\\___\\/\\\\\\_\\/\\\\\\__\\/\\\\\\____/"
"\\\\\\/______\\//\\\\///////___\\/\\\\\\_______\\//\\\\\\__/\\\\\\__\n"
" "
"_\\///\\\\\\\\\\\\\\\\\\\\\\/___\\//\\\\\\\\\\\\\\\\\\__\\/"
"\\\\\\\\\\\\\\\\\\___/\\\\\\\\\\\\\\\\\\\\\\__\\//\\\\\\\\\\\\\\\\\\\\_\\/"
"\\\\\\________\\///\\\\\\\\\\/___\n"
" "
"___\\///////////______\\/////////___\\/////////___\\///////////____\\/////"
"/////__\\///___________\\/////_____\n"
" "
"__________________________________________________________________________"
"____________________________\n"
"\n"
"\n";
void __Sz_profile_summary() {
printf("%s", SubzeroLogo);
for (const struct BlockProfileInfo **curr = &__Sz_block_profile_info;
*curr != NULL; ++curr) {
printf("%lld\t%s\n", (*curr)->Counter, (*curr)->BlockName);
}
fflush(stdout);
}
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceDefs.h" #include "IceDefs.h"
#include "IceELFObjectWriter.h" #include "IceELFObjectWriter.h"
#include "IceGlobalInits.h"
#include "IceInst.h" #include "IceInst.h"
#include "IceLiveness.h" #include "IceLiveness.h"
#include "IceOperand.h" #include "IceOperand.h"
...@@ -75,6 +76,69 @@ void Cfg::addImplicitArg(Variable *Arg) { ...@@ -75,6 +76,69 @@ void Cfg::addImplicitArg(Variable *Arg) {
// is used for dumping the stack frame location of Variables. // is used for dumping the stack frame location of Variables.
bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); } bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
namespace {
constexpr char BlockNameGlobalPrefix[] = ".L$profiler$block_name$";
constexpr char BlockStatsGlobalPrefix[] = ".L$profiler$block_info$";
VariableDeclaration *nodeNameDeclaration(const IceString &NodeAsmName) {
VariableDeclaration *Var = VariableDeclaration::create();
Var->setName(BlockNameGlobalPrefix + NodeAsmName);
Var->setIsConstant(true);
Var->addInitializer(new VariableDeclaration::DataInitializer(
NodeAsmName.data(), NodeAsmName.size() + 1));
const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
Var->setAlignment(Int64ByteSize); // Wasteful, 32-bit could use 4 bytes.
return Var;
}
VariableDeclaration *
blockProfilingInfoDeclaration(const IceString &NodeAsmName,
VariableDeclaration *NodeNameDeclaration) {
VariableDeclaration *Var = VariableDeclaration::create();
Var->setName(BlockStatsGlobalPrefix + NodeAsmName);
const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
Var->addInitializer(new VariableDeclaration::ZeroInitializer(Int64ByteSize));
const RelocOffsetT NodeNameDeclarationOffset = 0;
Var->addInitializer(new VariableDeclaration::RelocInitializer(
NodeNameDeclaration, NodeNameDeclarationOffset));
Var->setAlignment(Int64ByteSize);
return Var;
}
} // end of anonymous namespace
void Cfg::profileBlocks() {
if (GlobalInits == nullptr)
GlobalInits.reset(new VariableDeclarationList());
for (CfgNode *Node : Nodes) {
IceString NodeAsmName = Node->getAsmName();
GlobalInits->push_back(nodeNameDeclaration(NodeAsmName));
GlobalInits->push_back(
blockProfilingInfoDeclaration(NodeAsmName, GlobalInits->back()));
Node->profileExecutionCount(GlobalInits->back());
}
}
bool Cfg::isProfileGlobal(const VariableDeclaration &Var) {
return Var.getName().find(BlockStatsGlobalPrefix) == 0;
}
void Cfg::addCallToProfileSummary() {
// The call(s) to __Sz_profile_summary are added by the profiler in functions
// that cause the program to exit. This function is defined in
// runtime/szrt_profiler.c.
Constant *ProfileSummarySym =
Ctx->getConstantExternSym("__Sz_profile_summary");
constexpr SizeT NumArgs = 0;
constexpr Variable *Void = nullptr;
constexpr bool HasTailCall = false;
auto *Call =
InstCall::create(this, NumArgs, Void, ProfileSummarySym, HasTailCall);
getEntryNode()->getInsts().push_front(Call);
}
void Cfg::translate() { void Cfg::translate() {
if (hasError()) if (hasError())
return; return;
...@@ -99,6 +163,16 @@ void Cfg::translate() { ...@@ -99,6 +163,16 @@ void Cfg::translate() {
dump("Initial CFG"); dump("Initial CFG");
if (getContext()->getFlags().getEnableBlockProfile()) {
profileBlocks();
// TODO(jpp): this is fragile, at best. Figure out a better way of detecting
// exit functions.
if (GlobalContext::matchSymbolName(getFunctionName(), "exit")) {
addCallToProfileSummary();
}
dump("Profiled CFG");
}
// The set of translation passes and their order are determined by // The set of translation passes and their order are determined by
// the target. // the target.
getTarget()->translate(); getTarget()->translate();
......
...@@ -128,10 +128,17 @@ public: ...@@ -128,10 +128,17 @@ public:
return static_cast<T *>(TargetAssembler.get()); return static_cast<T *>(TargetAssembler.get());
} }
Assembler *releaseAssembler() { return TargetAssembler.release(); } Assembler *releaseAssembler() { return TargetAssembler.release(); }
std::unique_ptr<VariableDeclarationList> getGlobalInits() {
return std::move(GlobalInits);
}
bool hasComputedFrame() const; bool hasComputedFrame() const;
bool getFocusedTiming() const { return FocusedTiming; } bool getFocusedTiming() const { return FocusedTiming; }
void setFocusedTiming() { FocusedTiming = true; } void setFocusedTiming() { FocusedTiming = true; }
// Returns true if Var is a global variable that is used by the profiling
// code.
static bool isProfileGlobal(const VariableDeclaration &Var);
// Passes over the CFG. // Passes over the CFG.
void translate(); void translate();
// After the CFG is fully constructed, iterate over the nodes and // After the CFG is fully constructed, iterate over the nodes and
...@@ -188,6 +195,15 @@ public: ...@@ -188,6 +195,15 @@ public:
private: private:
Cfg(GlobalContext *Ctx, uint32_t SequenceNumber); Cfg(GlobalContext *Ctx, uint32_t SequenceNumber);
// Adds a call to the ProfileSummary runtime function as the first instruction
// in this CFG's entry block.
void addCallToProfileSummary();
// Iterates over the basic blocks in this CFG, adding profiling code to each
// one of them. It returns a list with all the globals that the profiling code
// needs to be defined.
void profileBlocks();
GlobalContext *Ctx; GlobalContext *Ctx;
uint32_t SequenceNumber; // output order for emission uint32_t SequenceNumber; // output order for emission
VerboseMask VMask; VerboseMask VMask;
...@@ -209,6 +225,8 @@ private: ...@@ -209,6 +225,8 @@ private:
std::unique_ptr<TargetLowering> Target; std::unique_ptr<TargetLowering> Target;
std::unique_ptr<VariablesMetadata> VMetadata; std::unique_ptr<VariablesMetadata> VMetadata;
std::unique_ptr<Assembler> TargetAssembler; std::unique_ptr<Assembler> TargetAssembler;
// Globals required by this CFG. Mostly used for the profiler's globals.
std::unique_ptr<VariableDeclarationList> GlobalInits;
// CurrentNode is maintained during dumping/emitting just for // CurrentNode is maintained during dumping/emitting just for
// validating Variable::DefNode. Normally, a traversal over // validating Variable::DefNode. Normally, a traversal over
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "assembler.h" #include "assembler.h"
#include "IceCfg.h" #include "IceCfg.h"
#include "IceCfgNode.h" #include "IceCfgNode.h"
#include "IceGlobalInits.h"
#include "IceInst.h" #include "IceInst.h"
#include "IceLiveness.h" #include "IceLiveness.h"
#include "IceOperand.h" #include "IceOperand.h"
...@@ -1243,4 +1244,31 @@ void CfgNode::dump(Cfg *Func) const { ...@@ -1243,4 +1244,31 @@ void CfgNode::dump(Cfg *Func) const {
} }
} }
void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
constexpr char RMW_I64[] = "llvm.nacl.atomic.rmw.i64";
GlobalContext *Context = Func->getContext();
bool BadIntrinsic = false;
const Intrinsics::FullIntrinsicInfo *Info =
Context->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
assert(!BadIntrinsic);
assert(Info != nullptr);
Operand *RMWI64Name = Context->getConstantExternSym(RMW_I64);
Constant *Counter = Context->getConstantExternSym(Var->getName());
Constant *AtomicRMWOp = Context->getConstantInt32(Intrinsics::AtomicAdd);
Constant *One = Context->getConstantInt64(1);
Constant *OrderAcquireRelease =
Context->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
InstIntrinsicCall *Inst = InstIntrinsicCall::create(
Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
Inst->addArg(AtomicRMWOp);
Inst->addArg(Counter);
Inst->addArg(One);
Inst->addArg(OrderAcquireRelease);
Insts.push_front(Inst);
}
} // end of namespace Ice } // end of namespace Ice
...@@ -91,6 +91,8 @@ public: ...@@ -91,6 +91,8 @@ public:
void emitIAS(Cfg *Func) const; void emitIAS(Cfg *Func) const;
void dump(Cfg *Func) const; void dump(Cfg *Func) const;
void profileExecutionCount(VariableDeclaration *Var);
private: private:
CfgNode(Cfg *Func, SizeT LabelIndex); CfgNode(Cfg *Func, SizeT LabelIndex);
Cfg *const Func; Cfg *const Func;
......
...@@ -65,6 +65,12 @@ cl::opt<bool> ...@@ -65,6 +65,12 @@ cl::opt<bool>
DumpStats("szstats", DumpStats("szstats",
cl::desc("Print statistics after translating each function")); cl::desc("Print statistics after translating each function"));
cl::opt<bool> EnableBlockProfile(
"enable-block-profile",
cl::desc("If true, instrument basic blocks, and output profiling "
"information to stdout at the end of program execution."),
cl::init(false));
cl::opt<bool> cl::opt<bool>
FunctionSections("ffunction-sections", FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections")); cl::desc("Emit functions into separate sections"));
...@@ -261,6 +267,7 @@ void ClFlags::resetClFlags(ClFlags &OutFlags) { ...@@ -261,6 +267,7 @@ void ClFlags::resetClFlags(ClFlags &OutFlags) {
OutFlags.DisableIRGeneration = false; OutFlags.DisableIRGeneration = false;
OutFlags.DisableTranslation = false; OutFlags.DisableTranslation = false;
OutFlags.DumpStats = false; OutFlags.DumpStats = false;
OutFlags.EnableBlockProfile = false;
OutFlags.FunctionSections = false; OutFlags.FunctionSections = false;
OutFlags.GenerateUnitTestMessages = false; OutFlags.GenerateUnitTestMessages = false;
OutFlags.PhiEdgeSplit = false; OutFlags.PhiEdgeSplit = false;
...@@ -311,6 +318,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) { ...@@ -311,6 +318,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
OutFlags.setDisableIRGeneration(::DisableIRGeneration); OutFlags.setDisableIRGeneration(::DisableIRGeneration);
OutFlags.setDisableTranslation(::DisableTranslation); OutFlags.setDisableTranslation(::DisableTranslation);
OutFlags.setDumpStats(::DumpStats); OutFlags.setDumpStats(::DumpStats);
OutFlags.setEnableBlockProfile(::EnableBlockProfile);
OutFlags.setFunctionSections(::FunctionSections); OutFlags.setFunctionSections(::FunctionSections);
OutFlags.setNumTranslationThreads(::NumThreads); OutFlags.setNumTranslationThreads(::NumThreads);
OutFlags.setOptLevel(::OLevel); OutFlags.setOptLevel(::OLevel);
......
...@@ -65,6 +65,9 @@ public: ...@@ -65,6 +65,9 @@ public:
bool getDumpStats() const { return ALLOW_DUMP && DumpStats; } bool getDumpStats() const { return ALLOW_DUMP && DumpStats; }
void setDumpStats(bool NewValue) { DumpStats = NewValue; } void setDumpStats(bool NewValue) { DumpStats = NewValue; }
bool getEnableBlockProfile() const { return EnableBlockProfile; }
void setEnableBlockProfile(bool NewValue) { EnableBlockProfile = NewValue; }
bool getFunctionSections() const { return FunctionSections; } bool getFunctionSections() const { return FunctionSections; }
void setFunctionSections(bool NewValue) { FunctionSections = NewValue; } void setFunctionSections(bool NewValue) { FunctionSections = NewValue; }
...@@ -182,6 +185,7 @@ private: ...@@ -182,6 +185,7 @@ private:
bool DisableIRGeneration; bool DisableIRGeneration;
bool DisableTranslation; bool DisableTranslation;
bool DumpStats; bool DumpStats;
bool EnableBlockProfile;
bool FunctionSections; bool FunctionSections;
bool GenerateUnitTestMessages; bool GenerateUnitTestMessages;
bool PhiEdgeSplit; bool PhiEdgeSplit;
......
...@@ -383,9 +383,8 @@ void ELFObjectWriter::writeDataOfType(SectionType ST, ...@@ -383,9 +383,8 @@ void ELFObjectWriter::writeDataOfType(SectionType ST,
for (VariableDeclaration::Initializer *Init : Var->getInitializers()) { for (VariableDeclaration::Initializer *Init : Var->getInitializers()) {
switch (Init->getKind()) { switch (Init->getKind()) {
case VariableDeclaration::Initializer::DataInitializerKind: { case VariableDeclaration::Initializer::DataInitializerKind: {
const auto Data = const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(
llvm::cast<VariableDeclaration::DataInitializer>(Init) Init)->getContents();
->getContents();
Section->appendData(Str, llvm::StringRef(Data.data(), Data.size())); Section->appendData(Str, llvm::StringRef(Data.data(), Data.size()));
break; break;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "llvm/Support/Timer.h" #include "llvm/Support/Timer.h"
#include "IceCfg.h" #include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceDefs.h" #include "IceDefs.h"
#include "IceELFObjectWriter.h" #include "IceELFObjectWriter.h"
...@@ -277,6 +278,7 @@ void GlobalContext::translateFunctions() { ...@@ -277,6 +278,7 @@ void GlobalContext::translateFunctions() {
Cfg::setCurrentCfg(nullptr); Cfg::setCurrentCfg(nullptr);
continue; // Func goes out of scope and gets deleted continue; // Func goes out of scope and gets deleted
} }
Func->translate(); Func->translate();
EmitterWorkItem *Item = nullptr; EmitterWorkItem *Item = nullptr;
if (Func->hasError()) { if (Func->hasError()) {
...@@ -285,6 +287,7 @@ void GlobalContext::translateFunctions() { ...@@ -285,6 +287,7 @@ void GlobalContext::translateFunctions() {
getStrError() << "ICE translation error: " << Func->getFunctionName() getStrError() << "ICE translation error: " << Func->getFunctionName()
<< ": " << Func->getError() << "\n"; << ": " << Func->getError() << "\n";
Item = new EmitterWorkItem(Func->getSequenceNumber()); Item = new EmitterWorkItem(Func->getSequenceNumber());
Item->setGlobalInits(Func->getGlobalInits());
} else { } else {
Func->getAssembler<>()->setInternal(Func->getInternal()); Func->getAssembler<>()->setInternal(Func->getInternal());
switch (getFlags().getOutFileType()) { switch (getFlags().getOutFileType()) {
...@@ -299,11 +302,15 @@ void GlobalContext::translateFunctions() { ...@@ -299,11 +302,15 @@ void GlobalContext::translateFunctions() {
// Copy relevant fields into Asm before Func is deleted. // Copy relevant fields into Asm before Func is deleted.
Asm->setFunctionName(Func->getFunctionName()); Asm->setFunctionName(Func->getFunctionName());
Item = new EmitterWorkItem(Func->getSequenceNumber(), Asm); Item = new EmitterWorkItem(Func->getSequenceNumber(), Asm);
Item->setGlobalInits(Func->getGlobalInits());
} break; } break;
case FT_Asm: case FT_Asm:
// The Cfg has not been emitted yet, so stats are not ready // The Cfg has not been emitted yet, so stats are not ready
// to be dumped. // to be dumped.
std::unique_ptr<VariableDeclarationList> GlobalInits =
Func->getGlobalInits();
Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release()); Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release());
Item->setGlobalInits(std::move(GlobalInits));
break; break;
} }
} }
...@@ -316,6 +323,43 @@ void GlobalContext::translateFunctions() { ...@@ -316,6 +323,43 @@ void GlobalContext::translateFunctions() {
namespace { namespace {
// Adds an array of pointers to all the profiler-generated globals. The
// __Sz_profile_summary function iterates over this array for printing the
// profiling counters.
VariableDeclaration *blockProfileInfo(const VariableDeclarationList &Globals) {
auto *Var = VariableDeclaration::create();
Var->setAlignment(typeWidthInBytes(IceType_i64));
Var->setIsConstant(true);
// Note: if you change this symbol, make sure to update
// runtime/szrt_profiler.c as well.
Var->setName("__Sz_block_profile_info");
Var->setLinkage(llvm::GlobalValue::ExternalLinkage);
for (const VariableDeclaration *Global : Globals) {
if (Cfg::isProfileGlobal(*Global)) {
constexpr RelocOffsetT BlockExecutionCounterOffset = 0;
Var->addInitializer(new VariableDeclaration::RelocInitializer(
Global, BlockExecutionCounterOffset));
}
}
// This adds a 64-bit sentinel entry to the end of our array. For 32-bit
// architectures this will waste 4 bytes.
const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64);
Var->addInitializer(
new VariableDeclaration::ZeroInitializer(Sizeof64BitNullPtr));
return Var;
}
void addBlockProfileInfoArrayToGlobals(VariableDeclarationList *Globals) {
// Purposefully create the Var temp to prevent bugs in case the compiler
// reorders instructions in a way that Globals is extended before the call
// to profileInfoArray.
VariableDeclaration *Var = blockProfileInfo(*Globals);
Globals->push_back(Var);
}
void lowerGlobals(GlobalContext *Ctx, void lowerGlobals(GlobalContext *Ctx,
std::unique_ptr<VariableDeclarationList> VariableDeclarations, std::unique_ptr<VariableDeclarationList> VariableDeclarations,
TargetDataLowering *DataLowering) { TargetDataLowering *DataLowering) {
...@@ -331,6 +375,13 @@ void lowerGlobals(GlobalContext *Ctx, ...@@ -331,6 +375,13 @@ void lowerGlobals(GlobalContext *Ctx,
} }
if (Ctx->getFlags().getDisableTranslation()) if (Ctx->getFlags().getDisableTranslation())
return; return;
// There should be no need to emit the block_profile_info array if profiling
// is disabled. In practice, given that szrt_profiler.o will always be
// embedded in the application, we need to add it. In a non-profiled build
// this array will only contain the nullptr terminator.
addBlockProfileInfoArrayToGlobals(VariableDeclarations.get());
DataLowering->lowerGlobals(std::move(VariableDeclarations)); DataLowering->lowerGlobals(std::move(VariableDeclarations));
} }
...@@ -340,6 +391,13 @@ void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) { ...@@ -340,6 +391,13 @@ void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) {
Pending.resize(Index + 1); Pending.resize(Index + 1);
} }
void addAllIfNotNull(std::unique_ptr<VariableDeclarationList> src,
VariableDeclarationList *dst) {
if (src != nullptr) {
dst->insert(dst->end(), src->begin(), src->end());
}
}
} // end of anonymous namespace } // end of anonymous namespace
void GlobalContext::emitItems() { void GlobalContext::emitItems() {
...@@ -350,6 +408,8 @@ void GlobalContext::emitItems() { ...@@ -350,6 +408,8 @@ void GlobalContext::emitItems() {
// the work queue, and if it's not the item we're waiting for, we // the work queue, and if it's not the item we're waiting for, we
// insert it into Pending and repeat. The work item is deleted // insert it into Pending and repeat. The work item is deleted
// after it is processed. // after it is processed.
std::unique_ptr<VariableDeclarationList> GlobalInits(
new VariableDeclarationList());
std::vector<EmitterWorkItem *> Pending; std::vector<EmitterWorkItem *> Pending;
uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
while (true) { while (true) {
...@@ -359,7 +419,7 @@ void GlobalContext::emitItems() { ...@@ -359,7 +419,7 @@ void GlobalContext::emitItems() {
if (RawItem == nullptr) if (RawItem == nullptr)
RawItem = emitQueueBlockingPop(); RawItem = emitQueueBlockingPop();
if (RawItem == nullptr) if (RawItem == nullptr)
return; break;
uint32_t ItemSeq = RawItem->getSequenceNumber(); uint32_t ItemSeq = RawItem->getSequenceNumber();
if (Threaded && ItemSeq != DesiredSequenceNumber) { if (Threaded && ItemSeq != DesiredSequenceNumber) {
resizePending(Pending, ItemSeq); resizePending(Pending, ItemSeq);
...@@ -373,10 +433,10 @@ void GlobalContext::emitItems() { ...@@ -373,10 +433,10 @@ void GlobalContext::emitItems() {
case EmitterWorkItem::WI_Nop: case EmitterWorkItem::WI_Nop:
break; break;
case EmitterWorkItem::WI_GlobalInits: { case EmitterWorkItem::WI_GlobalInits: {
lowerGlobals(this, Item->getGlobalInits(), addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
TargetDataLowering::createLowering(this).get());
} break; } break;
case EmitterWorkItem::WI_Asm: { case EmitterWorkItem::WI_Asm: {
addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
std::unique_ptr<Assembler> Asm = Item->getAsm(); std::unique_ptr<Assembler> Asm = Item->getAsm();
Asm->alignFunction(); Asm->alignFunction();
IceString MangledName = mangleName(Asm->getFunctionName()); IceString MangledName = mangleName(Asm->getFunctionName());
...@@ -398,6 +458,9 @@ void GlobalContext::emitItems() { ...@@ -398,6 +458,9 @@ void GlobalContext::emitItems() {
case EmitterWorkItem::WI_Cfg: { case EmitterWorkItem::WI_Cfg: {
if (!ALLOW_DUMP) if (!ALLOW_DUMP)
llvm::report_fatal_error("WI_Cfg work item created inappropriately"); llvm::report_fatal_error("WI_Cfg work item created inappropriately");
addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
assert(getFlags().getOutFileType() == FT_Asm); assert(getFlags().getOutFileType() == FT_Asm);
std::unique_ptr<Cfg> Func = Item->getCfg(); std::unique_ptr<Cfg> Func = Item->getCfg();
// Unfortunately, we have to temporarily install the Cfg in TLS // Unfortunately, we have to temporarily install the Cfg in TLS
...@@ -410,6 +473,9 @@ void GlobalContext::emitItems() { ...@@ -410,6 +473,9 @@ void GlobalContext::emitItems() {
} break; } break;
} }
} }
lowerGlobals(this, std::move(GlobalInits),
TargetDataLowering::createLowering(this).get());
} }
// Scan a string for S[0-9A-Z]*_ patterns and replace them with // Scan a string for S[0-9A-Z]*_ patterns and replace them with
......
...@@ -3119,10 +3119,9 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -3119,10 +3119,9 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Func->setError("Unexpected memory ordering for AtomicRMW"); Func->setError("Unexpected memory ordering for AtomicRMW");
return; return;
} }
lowerAtomicRMW( lowerAtomicRMW(Instr->getDest(),
Instr->getDest(), static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
static_cast<uint32_t>( Instr->getArg(0))->getValue()),
llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
Instr->getArg(1), Instr->getArg(2)); Instr->getArg(1), Instr->getArg(2));
return; return;
case Intrinsics::AtomicStore: { case Intrinsics::AtomicStore: {
......
...@@ -30,8 +30,15 @@ EmitterWorkItem::EmitterWorkItem(uint32_t Seq, Cfg *F) ...@@ -30,8 +30,15 @@ EmitterWorkItem::EmitterWorkItem(uint32_t Seq, Cfg *F)
: Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr), : Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr),
RawFunc(F) {} RawFunc(F) {}
void EmitterWorkItem::setGlobalInits(
std::unique_ptr<VariableDeclarationList> GloblInits) {
assert(getKind() == WI_Asm || getKind() == WI_Cfg);
GlobalInits = std::move(GloblInits);
}
std::unique_ptr<VariableDeclarationList> EmitterWorkItem::getGlobalInits() { std::unique_ptr<VariableDeclarationList> EmitterWorkItem::getGlobalInits() {
assert(getKind() == WI_GlobalInits); assert(getKind() == WI_GlobalInits || getKind() == WI_Asm ||
getKind() == WI_Cfg);
return std::move(GlobalInits); return std::move(GlobalInits);
} }
......
...@@ -190,6 +190,7 @@ public: ...@@ -190,6 +190,7 @@ public:
EmitterWorkItem(uint32_t Seq, Cfg *F); EmitterWorkItem(uint32_t Seq, Cfg *F);
uint32_t getSequenceNumber() const { return Sequence; } uint32_t getSequenceNumber() const { return Sequence; }
ItemKind getKind() const { return Kind; } ItemKind getKind() const { return Kind; }
void setGlobalInits(std::unique_ptr<VariableDeclarationList> GloblInits);
std::unique_ptr<VariableDeclarationList> getGlobalInits(); std::unique_ptr<VariableDeclarationList> getGlobalInits();
std::unique_ptr<Assembler> getAsm(); std::unique_ptr<Assembler> getAsm();
std::unique_ptr<Cfg> getCfg(); std::unique_ptr<Cfg> getCfg();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment