Commit bbca754a by Jim Stichnoth

Subzero: Emit functions and global initializers in a separate thread.

(This is a continuation of https://codereview.chromium.org/876083007/ .) Emission is done in a separate thread when -threads=N with N>0 is specified. This includes both functions and global initializers. Emission is deterministic. The parser assigns sequence numbers, and the emitter thread reassembles work units into their original order, regardless of the number of threads. Dump output, however, is not intended to be in deterministic, reassembled order. As such, lit tests that test dump output (i.e., '-verbose inst') are explicitly run with -threads=0. For -elf-writer and -ias=1, the translator thread invokes Cfg::emitIAS() and the assembler buffer is passed to the emitter thread. For -ias=0, the translator thread passed the Cfg to the emitter thread which then invokes Cfg::emit() to produce the textual asm. Minor cleanup along the way: * Removed Flags from the Ice::Translator object and ctor, since it was redundant with Ctx->getFlags(). * Cfg::getAssembler<> is the same as Cfg::getAssembler<Assembler> and is useful for just passing the assembler around. * Removed the redundant Ctx argument from TargetDataLowering::lowerConstants() . BUG= https://code.google.com/p/nativeclient/issues/detail?id=4075 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/916653004
parent 6ca7d2b6
...@@ -118,6 +118,7 @@ SRCS = \ ...@@ -118,6 +118,7 @@ SRCS = \
IceRNG.cpp \ IceRNG.cpp \
IceTargetLowering.cpp \ IceTargetLowering.cpp \
IceTargetLoweringX8632.cpp \ IceTargetLoweringX8632.cpp \
IceThreading.cpp \
IceTimerTree.cpp \ IceTimerTree.cpp \
IceTranslator.cpp \ IceTranslator.cpp \
IceTypes.cpp \ IceTypes.cpp \
......
...@@ -70,7 +70,10 @@ def main(): ...@@ -70,7 +70,10 @@ def main():
cmd += ['|'] cmd += ['|']
cmd += [args.llvm2ice] cmd += [args.llvm2ice]
if args.insts: if args.insts:
cmd += ['-verbose', 'inst', '-notranslate'] # If the tests are based on '-verbose inst' output, force
# single-threaded translation because dump output does not get
# reassembled into order.
cmd += ['-verbose', 'inst', '-notranslate', '-threads=0']
if not args.llvm_source: if not args.llvm_source:
cmd += ['--bitcode-format=pnacl'] cmd += ['--bitcode-format=pnacl']
if not args.no_local_syms: if not args.no_local_syms:
......
...@@ -31,10 +31,10 @@ ArenaAllocator<> *getCurrentCfgAllocator() { ...@@ -31,10 +31,10 @@ ArenaAllocator<> *getCurrentCfgAllocator() {
return Cfg::getCurrentCfgAllocator(); return Cfg::getCurrentCfgAllocator();
} }
Cfg::Cfg(GlobalContext *Ctx) Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
: Ctx(Ctx), VMask(Ctx->getVerbose()), FunctionName(""), : Ctx(Ctx), SequenceNumber(SequenceNumber), VMask(Ctx->getVerbose()),
ReturnType(IceType_void), IsInternalLinkage(false), HasError(false), FunctionName(""), ReturnType(IceType_void), IsInternalLinkage(false),
FocusedTiming(false), ErrorMessage(""), Entry(nullptr), HasError(false), FocusedTiming(false), ErrorMessage(""), Entry(nullptr),
NextInstNumber(Inst::NumberInitial), Allocator(new ArenaAllocator<>()), NextInstNumber(Inst::NumberInitial), Allocator(new ArenaAllocator<>()),
Live(nullptr), Live(nullptr),
Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)), Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
...@@ -418,17 +418,20 @@ void Cfg::doBranchOpt() { ...@@ -418,17 +418,20 @@ void Cfg::doBranchOpt() {
// ======================== Dump routines ======================== // // ======================== Dump routines ======================== //
void Cfg::emitTextHeader(const IceString &MangledName) { // emitTextHeader() is not target-specific (apart from what is
// abstracted by the Assembler), so it is defined here rather than in
// the target lowering class.
void Cfg::emitTextHeader(const IceString &MangledName, GlobalContext *Ctx,
const Assembler *Asm) {
// Note: Still used by emit IAS. // Note: Still used by emit IAS.
Ostream &Str = Ctx->getStrEmit(); Ostream &Str = Ctx->getStrEmit();
Str << "\t.text\n"; Str << "\t.text\n";
if (Ctx->getFlags().getFunctionSections()) if (Ctx->getFlags().getFunctionSections())
Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n"; Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n";
if (!getInternal() || Ctx->getFlags().getDisableInternal()) { if (!Asm->getInternal() || Ctx->getFlags().getDisableInternal()) {
Str << "\t.globl\t" << MangledName << "\n"; Str << "\t.globl\t" << MangledName << "\n";
Str << "\t.type\t" << MangledName << ",@function\n"; Str << "\t.type\t" << MangledName << ",@function\n";
} }
Assembler *Asm = getAssembler<Assembler>();
Str << "\t.p2align " << Asm->getBundleAlignLog2Bytes() << ",0x"; Str << "\t.p2align " << Asm->getBundleAlignLog2Bytes() << ",0x";
for (uint8_t I : Asm->getNonExecBundlePadding()) for (uint8_t I : Asm->getNonExecBundlePadding())
Str.write_hex(I); Str.write_hex(I);
...@@ -449,7 +452,7 @@ void Cfg::emit() { ...@@ -449,7 +452,7 @@ void Cfg::emit() {
OstreamLocker L(Ctx); OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrEmit(); Ostream &Str = Ctx->getStrEmit();
IceString MangledName = getContext()->mangleName(getFunctionName()); IceString MangledName = getContext()->mangleName(getFunctionName());
emitTextHeader(MangledName); emitTextHeader(MangledName, Ctx, getAssembler<>());
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->emit(this); Node->emit(this);
Str << "\n"; Str << "\n";
...@@ -458,22 +461,10 @@ void Cfg::emit() { ...@@ -458,22 +461,10 @@ void Cfg::emit() {
void Cfg::emitIAS() { void Cfg::emitIAS() {
TimerMarker T(TimerStack::TT_emit, this); TimerMarker T(TimerStack::TT_emit, this);
assert(!Ctx->getFlags().getDecorateAsm()); assert(!Ctx->getFlags().getDecorateAsm());
IceString MangledName = getContext()->mangleName(getFunctionName());
// The emitIAS() routines emit into the internal assembler buffer, // The emitIAS() routines emit into the internal assembler buffer,
// so there's no need to lock the streams until we're ready to call // so there's no need to lock the streams.
// emitIASBytes().
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
Node->emitIAS(this); Node->emitIAS(this);
// Now write the function to the file and track.
if (Ctx->getFlags().getUseELFWriter()) {
getAssembler<Assembler>()->alignFunction();
Ctx->getObjectWriter()->writeFunctionCode(MangledName, getInternal(),
getAssembler<Assembler>());
} else {
OstreamLocker L(Ctx);
emitTextHeader(MangledName);
getAssembler<Assembler>()->emitIASBytes(Ctx);
}
} }
// Dumps the IR with an optional introductory message. // Dumps the IR with an optional introductory message.
......
...@@ -30,8 +30,9 @@ class Cfg { ...@@ -30,8 +30,9 @@ class Cfg {
public: public:
~Cfg(); ~Cfg();
static std::unique_ptr<Cfg> create(GlobalContext *Ctx) { static std::unique_ptr<Cfg> create(GlobalContext *Ctx,
return std::unique_ptr<Cfg>(new Cfg(Ctx)); uint32_t SequenceNumber) {
return std::unique_ptr<Cfg>(new Cfg(Ctx, SequenceNumber));
} }
// Gets a pointer to the current thread's Cfg. // Gets a pointer to the current thread's Cfg.
static const Cfg *getCurrentCfg() { return ICE_TLS_GET_FIELD(CurrentCfg); } static const Cfg *getCurrentCfg() { return ICE_TLS_GET_FIELD(CurrentCfg); }
...@@ -45,6 +46,7 @@ public: ...@@ -45,6 +46,7 @@ public:
} }
GlobalContext *getContext() const { return Ctx; } GlobalContext *getContext() const { return Ctx; }
uint32_t getSequenceNumber() const { return SequenceNumber; }
// Returns true if any of the specified options in the verbose mask // Returns true if any of the specified options in the verbose mask
// are set. If the argument is omitted, it checks if any verbose // are set. If the argument is omitted, it checks if any verbose
...@@ -121,9 +123,10 @@ public: ...@@ -121,9 +123,10 @@ public:
TargetLowering *getTarget() const { return Target.get(); } TargetLowering *getTarget() const { return Target.get(); }
VariablesMetadata *getVMetadata() const { return VMetadata.get(); } VariablesMetadata *getVMetadata() const { return VMetadata.get(); }
Liveness *getLiveness() const { return Live.get(); } Liveness *getLiveness() const { return Live.get(); }
template <typename T> T *getAssembler() const { template <typename T = Assembler> T *getAssembler() const {
return static_cast<T *>(TargetAssembler.get()); return static_cast<T *>(TargetAssembler.get());
} }
Assembler *releaseAssembler() { return TargetAssembler.release(); }
bool hasComputedFrame() const; bool hasComputedFrame() const;
bool getFocusedTiming() const { return FocusedTiming; } bool getFocusedTiming() const { return FocusedTiming; }
void setFocusedTiming() { FocusedTiming = true; } void setFocusedTiming() { FocusedTiming = true; }
...@@ -159,7 +162,8 @@ public: ...@@ -159,7 +162,8 @@ public:
void emit(); void emit();
void emitIAS(); void emitIAS();
void emitTextHeader(const IceString &MangledName); static void emitTextHeader(const IceString &MangledName, GlobalContext *Ctx,
const Assembler *Asm);
void dump(const IceString &Message = ""); void dump(const IceString &Message = "");
// Allocate data of type T using the per-Cfg allocator. // Allocate data of type T using the per-Cfg allocator.
...@@ -181,9 +185,10 @@ public: ...@@ -181,9 +185,10 @@ public:
} }
private: private:
Cfg(GlobalContext *Ctx); Cfg(GlobalContext *Ctx, uint32_t SequenceNumber);
GlobalContext *Ctx; GlobalContext *Ctx;
uint32_t SequenceNumber; // output order for emission
VerboseMask VMask; VerboseMask VMask;
IceString FunctionName; IceString FunctionName;
Type ReturnType; Type ReturnType;
......
...@@ -893,7 +893,7 @@ void CfgNode::emit(Cfg *Func) const { ...@@ -893,7 +893,7 @@ void CfgNode::emit(Cfg *Func) const {
void CfgNode::emitIAS(Cfg *Func) const { void CfgNode::emitIAS(Cfg *Func) const {
Func->setCurrentNode(this); Func->setCurrentNode(this);
Assembler *Asm = Func->getAssembler<Assembler>(); Assembler *Asm = Func->getAssembler<>();
Asm->BindCfgNodeLabel(getIndex()); Asm->BindCfgNodeLabel(getIndex());
for (const Inst &I : Phis) { for (const Inst &I : Phis) {
if (I.isDeleted()) if (I.isDeleted())
......
...@@ -140,6 +140,7 @@ public: ...@@ -140,6 +140,7 @@ public:
// size_t accessors. // size_t accessors.
size_t getNumTranslationThreads() const { return NumTranslationThreads; } size_t getNumTranslationThreads() const { return NumTranslationThreads; }
bool isSequential() const { return NumTranslationThreads == 0; }
void setNumTranslationThreads(size_t NewValue) { void setNumTranslationThreads(size_t NewValue) {
NumTranslationThreads = NewValue; NumTranslationThreads = NewValue;
} }
......
...@@ -84,7 +84,9 @@ public: ...@@ -84,7 +84,9 @@ public:
: LLVM2ICEConverter(Converter), Func(nullptr) {} : LLVM2ICEConverter(Converter), Func(nullptr) {}
void convertFunction(const Function *F) { void convertFunction(const Function *F) {
Func = Ice::Cfg::create(Ctx); if (Ctx->isIRGenerationDisabled())
return;
Func = Ice::Cfg::create(Ctx, Converter.getNextSequenceNumber());
Ice::Cfg::setCurrentCfg(Func.get()); Ice::Cfg::setCurrentCfg(Func.get());
VarMap.clear(); VarMap.clear();
...@@ -658,10 +660,10 @@ public: ...@@ -658,10 +660,10 @@ public:
: LLVM2ICEConverter(Converter) {} : LLVM2ICEConverter(Converter) {}
/// Converts global variables, and their initializers into ICE /// Converts global variables, and their initializers into ICE
/// global variable declarations, for module Mod. Puts corresponding /// global variable declarations, for module Mod. Returns the set of
/// converted declarations into VariableDeclarations. /// converted declarations.
void convertGlobalsToIce(Module *Mod, std::unique_ptr<Ice::VariableDeclarationList>
Ice::VariableDeclarationList &VariableDeclarations); convertGlobalsToIce(Module *Mod);
private: private:
// Adds the Initializer to the list of initializers for the Global // Adds the Initializer to the list of initializers for the Global
...@@ -696,8 +698,10 @@ private: ...@@ -696,8 +698,10 @@ private:
} }
}; };
void LLVM2ICEGlobalsConverter::convertGlobalsToIce( std::unique_ptr<Ice::VariableDeclarationList>
Module *Mod, Ice::VariableDeclarationList &VariableDeclarations) { LLVM2ICEGlobalsConverter::convertGlobalsToIce(Module *Mod) {
std::unique_ptr<Ice::VariableDeclarationList> VariableDeclarations(
new Ice::VariableDeclarationList);
for (Module::const_global_iterator I = Mod->global_begin(), for (Module::const_global_iterator I = Mod->global_begin(),
E = Mod->global_end(); E = Mod->global_end();
I != E; ++I) { I != E; ++I) {
...@@ -706,7 +710,7 @@ void LLVM2ICEGlobalsConverter::convertGlobalsToIce( ...@@ -706,7 +710,7 @@ void LLVM2ICEGlobalsConverter::convertGlobalsToIce(
Ice::GlobalDeclaration *Var = getConverter().getGlobalDeclaration(GV); Ice::GlobalDeclaration *Var = getConverter().getGlobalDeclaration(GV);
Ice::VariableDeclaration *VarDecl = cast<Ice::VariableDeclaration>(Var); Ice::VariableDeclaration *VarDecl = cast<Ice::VariableDeclaration>(Var);
VariableDeclarations.push_back(VarDecl); VariableDeclarations->push_back(VarDecl);
if (!GV->hasInternalLinkage() && GV->hasInitializer()) { if (!GV->hasInternalLinkage() && GV->hasInitializer()) {
std::string Buffer; std::string Buffer;
...@@ -739,6 +743,7 @@ void LLVM2ICEGlobalsConverter::convertGlobalsToIce( ...@@ -739,6 +743,7 @@ void LLVM2ICEGlobalsConverter::convertGlobalsToIce(
addGlobalInitializer(*VarDecl, Initializer); addGlobalInitializer(*VarDecl, Initializer);
} }
} }
return std::move(VariableDeclarations);
} }
void LLVM2ICEGlobalsConverter::addGlobalInitializer( void LLVM2ICEGlobalsConverter::addGlobalInitializer(
...@@ -801,7 +806,7 @@ void LLVM2ICEGlobalsConverter::addGlobalInitializer( ...@@ -801,7 +806,7 @@ void LLVM2ICEGlobalsConverter::addGlobalInitializer(
namespace Ice { namespace Ice {
void Converter::nameUnnamedGlobalVariables(Module *Mod) { void Converter::nameUnnamedGlobalVariables(Module *Mod) {
const IceString &GlobalPrefix = Flags.getDefaultGlobalPrefix(); const IceString &GlobalPrefix = Ctx->getFlags().getDefaultGlobalPrefix();
if (GlobalPrefix.empty()) if (GlobalPrefix.empty())
return; return;
uint32_t NameIndex = 0; uint32_t NameIndex = 0;
...@@ -816,7 +821,7 @@ void Converter::nameUnnamedGlobalVariables(Module *Mod) { ...@@ -816,7 +821,7 @@ void Converter::nameUnnamedGlobalVariables(Module *Mod) {
} }
void Converter::nameUnnamedFunctions(Module *Mod) { void Converter::nameUnnamedFunctions(Module *Mod) {
const IceString &FunctionPrefix = Flags.getDefaultFunctionPrefix(); const IceString &FunctionPrefix = Ctx->getFlags().getDefaultFunctionPrefix();
if (FunctionPrefix.empty()) if (FunctionPrefix.empty())
return; return;
uint32_t NameIndex = 0; uint32_t NameIndex = 0;
...@@ -882,10 +887,7 @@ void Converter::installGlobalDeclarations(Module *Mod) { ...@@ -882,10 +887,7 @@ void Converter::installGlobalDeclarations(Module *Mod) {
} }
void Converter::convertGlobals(Module *Mod) { void Converter::convertGlobals(Module *Mod) {
LLVM2ICEGlobalsConverter GlobalsConverter(*this); lowerGlobals(LLVM2ICEGlobalsConverter(*this).convertGlobalsToIce(Mod));
VariableDeclarationList VariableDeclarations;
GlobalsConverter.convertGlobalsToIce(Mod, VariableDeclarations);
lowerGlobals(VariableDeclarations);
} }
void Converter::convertFunctions() { void Converter::convertFunctions() {
......
...@@ -29,8 +29,8 @@ class Converter : public Translator { ...@@ -29,8 +29,8 @@ class Converter : public Translator {
Converter &operator=(const Converter &) = delete; Converter &operator=(const Converter &) = delete;
public: public:
Converter(llvm::Module *Mod, GlobalContext *Ctx, const Ice::ClFlags &Flags) Converter(llvm::Module *Mod, GlobalContext *Ctx)
: Translator(Ctx, Flags), Mod(Mod) {} : Translator(Ctx), Mod(Mod) {}
~Converter() {} ~Converter() {}
......
...@@ -134,8 +134,10 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, ...@@ -134,8 +134,10 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit,
: ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump), : ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump),
StrEmit(OsEmit), VMask(Mask), Arch(Arch), Opt(Opt), StrEmit(OsEmit), VMask(Mask), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), RNG(""), ObjectWriter(), TestPrefix(TestPrefix), Flags(Flags), RNG(""), ObjectWriter(),
CfgQ(/*MaxSize=*/Flags.getNumTranslationThreads(), OptQ(/*Sequential=*/Flags.isSequential(),
/*Sequential=*/(Flags.getNumTranslationThreads() == 0)) { /*MaxSize=*/Flags.getNumTranslationThreads()),
// EmitQ is allowed unlimited size.
EmitQ(/*Sequential=*/Flags.isSequential()) {
// Make sure thread_local fields are properly initialized before any // Make sure thread_local fields are properly initialized before any
// accesses are made. Do this here instead of at the start of // accesses are made. Do this here instead of at the start of
// main() so that all clients (e.g. unit tests) can benefit for // main() so that all clients (e.g. unit tests) can benefit for
...@@ -162,7 +164,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, ...@@ -162,7 +164,7 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit,
} }
void GlobalContext::translateFunctions() { void GlobalContext::translateFunctions() {
while (std::unique_ptr<Cfg> Func = cfgQueueBlockingPop()) { while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
// Install Func in TLS for Cfg-specific container allocators. // Install Func in TLS for Cfg-specific container allocators.
Cfg::setCurrentCfg(Func.get()); Cfg::setCurrentCfg(Func.get());
// Reset per-function stats being accumulated in TLS. // Reset per-function stats being accumulated in TLS.
...@@ -180,26 +182,133 @@ void GlobalContext::translateFunctions() { ...@@ -180,26 +182,133 @@ void GlobalContext::translateFunctions() {
!matchSymbolName(Func->getFunctionName(), !matchSymbolName(Func->getFunctionName(),
getFlags().getTranslateOnly())) { getFlags().getTranslateOnly())) {
Func->dump(); Func->dump();
Cfg::setCurrentCfg(nullptr);
continue; // Func goes out of scope and gets deleted
}
Func->translate();
EmitterWorkItem *Item = nullptr;
if (Func->hasError()) {
getErrorStatus()->assign(EC_Translation);
OstreamLocker L(this);
getStrDump() << "ICE translation error: " << Func->getError() << "\n";
Item = new EmitterWorkItem(Func->getSequenceNumber());
} else { } else {
Func->translate(); if (getFlags().getUseIntegratedAssembler()) {
if (Func->hasError()) { Func->emitIAS();
getErrorStatus()->assign(EC_Translation); // The Cfg has already emitted into the assembly buffer, so
OstreamLocker L(this); // stats have been fully collected into this thread's TLS.
getStrDump() << "ICE translation error: " << Func->getError() << "\n"; // Dump them before TLS is reset for the next Cfg.
dumpStats(Func->getFunctionName());
Assembler *Asm = Func->releaseAssembler();
// Copy relevant fields into Asm before Func is deleted.
Asm->setFunctionName(Func->getFunctionName());
Asm->setInternal(Func->getInternal());
Item = new EmitterWorkItem(Func->getSequenceNumber(), Asm);
} else { } else {
if (getFlags().getUseIntegratedAssembler()) // The Cfg has not been emitted yet, so stats are not ready
Func->emitIAS(); // to be dumped.
else Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release());
Func->emit();
// TODO(stichnot): actually add to emit queue
} }
dumpStats(Func->getFunctionName());
} }
Cfg::setCurrentCfg(nullptr); Cfg::setCurrentCfg(nullptr);
assert(Item);
emitQueueBlockingPush(Item);
// The Cfg now gets deleted as Func goes out of scope. // The Cfg now gets deleted as Func goes out of scope.
} }
} }
namespace {
void lowerGlobals(GlobalContext *Ctx,
std::unique_ptr<VariableDeclarationList> VariableDeclarations,
TargetDataLowering *DataLowering) {
TimerMarker T(TimerStack::TT_emitGlobalInitializers, Ctx);
const bool DumpGlobalVariables = ALLOW_DUMP && Ctx->getVerbose() &&
Ctx->getFlags().getVerboseFocusOn().empty();
if (DumpGlobalVariables) {
OstreamLocker L(Ctx);
Ostream &Stream = Ctx->getStrDump();
for (const Ice::VariableDeclaration *Global : *VariableDeclarations) {
Global->dump(Ctx, Stream);
}
}
if (Ctx->getFlags().getDisableTranslation())
return;
DataLowering->lowerGlobals(std::move(VariableDeclarations));
}
// Ensure Pending is large enough that Pending[Index] is valid.
void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) {
if (Index >= Pending.size())
Pending.resize(Index + 1);
}
} // end of anonymous namespace
void GlobalContext::emitItems() {
const bool Threaded = !getFlags().isSequential();
// Pending is a vector containing the reassembled, ordered list of
// work items. When we're ready for the next item, we first check
// whether it's in the Pending list. If not, we take an item from
// the work queue, and if it's not the item we're waiting for, we
// insert it into Pending and repeat. The work item is deleted
// after it is processed.
std::vector<EmitterWorkItem *> Pending;
uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
while (true) {
resizePending(Pending, DesiredSequenceNumber);
// See if Pending contains DesiredSequenceNumber.
EmitterWorkItem *RawItem = Pending[DesiredSequenceNumber];
if (RawItem == nullptr)
RawItem = emitQueueBlockingPop();
if (RawItem == nullptr)
return;
uint32_t ItemSeq = RawItem->getSequenceNumber();
if (Threaded && ItemSeq != DesiredSequenceNumber) {
resizePending(Pending, ItemSeq);
Pending[ItemSeq] = RawItem;
continue;
}
std::unique_ptr<EmitterWorkItem> Item(RawItem);
++DesiredSequenceNumber;
switch (Item->getKind()) {
case EmitterWorkItem::WI_Nop:
break;
case EmitterWorkItem::WI_GlobalInits: {
lowerGlobals(this, Item->getGlobalInits(),
TargetDataLowering::createLowering(this).get());
} break;
case EmitterWorkItem::WI_Asm: {
std::unique_ptr<Assembler> Asm = Item->getAsm();
Asm->alignFunction();
IceString MangledName = mangleName(Asm->getFunctionName());
if (getFlags().getUseELFWriter()) {
getObjectWriter()->writeFunctionCode(MangledName, Asm->getInternal(),
Asm.get());
} else {
OstreamLocker L(this);
Cfg::emitTextHeader(MangledName, this, Asm.get());
Asm->emitIASBytes(this);
}
} break;
case EmitterWorkItem::WI_Cfg: {
if (!ALLOW_DUMP)
llvm::report_fatal_error("WI_Cfg work item created inappropriately");
assert(!getFlags().getUseIntegratedAssembler());
std::unique_ptr<Cfg> Func = Item->getCfg();
// Unfortunately, we have to temporarily install the Cfg in TLS
// because Variable::asType() uses the allocator to create the
// differently-typed copy.
Cfg::setCurrentCfg(Func.get());
Func->emit();
Cfg::setCurrentCfg(nullptr);
dumpStats(Func->getFunctionName());
} break;
}
}
}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with // Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name // S<num>_ where <num> is the next base-36 value. If a type name
// legitimately contains that pattern, then the substitution will be // legitimately contains that pattern, then the substitution will be
...@@ -550,17 +659,31 @@ void GlobalContext::setTimerName(TimerStackIdT StackID, ...@@ -550,17 +659,31 @@ void GlobalContext::setTimerName(TimerStackIdT StackID,
Timers->at(StackID).setName(NewName); Timers->at(StackID).setName(NewName);
} }
// Note: cfgQueueBlockingPush and cfgQueueBlockingPop use unique_ptr // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr
// at the interface to take and transfer ownership, but they // at the interface to take and transfer ownership, but they
// internally store the raw Cfg pointer in the work queue. This // internally store the raw Cfg pointer in the work queue. This
// allows e.g. future queue optimizations such as the use of atomics // allows e.g. future queue optimizations such as the use of atomics
// to modify queue elements. // to modify queue elements.
void GlobalContext::cfgQueueBlockingPush(std::unique_ptr<Cfg> Func) { void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
CfgQ.blockingPush(Func.release()); assert(Func);
OptQ.blockingPush(Func.release());
if (getFlags().isSequential())
translateFunctions();
}
std::unique_ptr<Cfg> GlobalContext::optQueueBlockingPop() {
return std::unique_ptr<Cfg>(OptQ.blockingPop());
}
void GlobalContext::emitQueueBlockingPush(EmitterWorkItem *Item) {
assert(Item);
EmitQ.blockingPush(Item);
if (getFlags().isSequential())
emitItems();
} }
std::unique_ptr<Cfg> GlobalContext::cfgQueueBlockingPop() { EmitterWorkItem *GlobalContext::emitQueueBlockingPop() {
return std::unique_ptr<Cfg>(CfgQ.blockingPop()); return EmitQ.blockingPop();
} }
void GlobalContext::dumpStats(const IceString &Name, bool Final) { void GlobalContext::dumpStats(const IceString &Name, bool Final) {
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceIntrinsics.h" #include "IceIntrinsics.h"
#include "IceRNG.h" #include "IceRNG.h"
#include "IceThreading.h"
#include "IceTimerTree.h" #include "IceTimerTree.h"
#include "IceTypes.h" #include "IceTypes.h"
#include "IceUtils.h" #include "IceUtils.h"
...@@ -31,6 +32,7 @@ namespace Ice { ...@@ -31,6 +32,7 @@ namespace Ice {
class ClFlags; class ClFlags;
class ConstantPool; class ConstantPool;
class EmitterWorkItem;
class FuncSigType; class FuncSigType;
// LockedPtr is a way to provide automatically locked access to some object. // LockedPtr is a way to provide automatically locked access to some object.
...@@ -276,18 +278,28 @@ public: ...@@ -276,18 +278,28 @@ public:
void resetTimer(TimerStackIdT StackID); void resetTimer(TimerStackIdT StackID);
void setTimerName(TimerStackIdT StackID, const IceString &NewName); void setTimerName(TimerStackIdT StackID, const IceString &NewName);
// This is the first work item sequence number that the parser
// produces, and correspondingly the first sequence number that the
// emitter thread will wait for. Start numbering at 1 to leave room
// for a sentinel, in case e.g. we wish to inject items with a
// special sequence number that may be executed out of order.
static uint32_t getFirstSequenceNumber() { return 1; }
// Adds a newly parsed and constructed function to the Cfg work // Adds a newly parsed and constructed function to the Cfg work
// queue. Notifies any idle workers that a new function is // queue. Notifies any idle workers that a new function is
// available for translating. May block if the work queue is too // available for translating. May block if the work queue is too
// large, in order to control memory footprint. // large, in order to control memory footprint.
void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); void optQueueBlockingPush(std::unique_ptr<Cfg> Func);
// Takes a Cfg from the work queue for translating. May block if // Takes a Cfg from the work queue for translating. May block if
// the work queue is currently empty. Returns nullptr if there is // the work queue is currently empty. Returns nullptr if there is
// no more work - the queue is empty and either end() has been // no more work - the queue is empty and either end() has been
// called or the Sequential flag was set. // called or the Sequential flag was set.
std::unique_ptr<Cfg> cfgQueueBlockingPop(); std::unique_ptr<Cfg> optQueueBlockingPop();
// Notifies that no more work will be added to the work queue. // Notifies that no more work will be added to the work queue.
void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } void optQueueNotifyEnd() { OptQ.notifyEnd(); }
void emitQueueBlockingPush(EmitterWorkItem *Item);
EmitterWorkItem *emitQueueBlockingPop();
void emitQueueNotifyEnd() { EmitQ.notifyEnd(); }
void startWorkerThreads() { void startWorkerThreads() {
size_t NumWorkers = getFlags().getNumTranslationThreads(); size_t NumWorkers = getFlags().getNumTranslationThreads();
...@@ -300,18 +312,29 @@ public: ...@@ -300,18 +312,29 @@ public:
&GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
} }
if (NumWorkers) { if (NumWorkers) {
// TODO(stichnot): start a new thread for the emitter queue worker. ThreadContext *WorkerTLS = new ThreadContext();
Timers->initInto(WorkerTLS->Timers);
AllThreadContexts.push_back(WorkerTLS);
EmitterThreads.push_back(
std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS));
} }
} }
void waitForWorkerThreads() { void waitForWorkerThreads() {
cfgQueueNotifyEnd(); optQueueNotifyEnd();
// TODO(stichnot): call end() on the emitter work queue.
for (std::thread &Worker : TranslationThreads) { for (std::thread &Worker : TranslationThreads) {
Worker.join(); Worker.join();
} }
TranslationThreads.clear(); TranslationThreads.clear();
// TODO(stichnot): join the emitter thread.
// Only notify the emit queue to end after all the translation
// threads have ended.
emitQueueNotifyEnd();
for (std::thread &Worker : EmitterThreads) {
Worker.join();
}
EmitterThreads.clear();
if (ALLOW_DUMP) { if (ALLOW_DUMP) {
auto Timers = getTimers(); auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts) for (ThreadContext *TLS : AllThreadContexts)
...@@ -334,6 +357,15 @@ public: ...@@ -334,6 +357,15 @@ public:
// Translate functions from the Cfg queue until the queue is empty. // Translate functions from the Cfg queue until the queue is empty.
void translateFunctions(); void translateFunctions();
// Emitter thread startup routine.
void emitterWrapper(ThreadContext *MyTLS) {
ICE_TLS_SET_FIELD(TLS, MyTLS);
emitItems();
}
// Emit functions and global initializers from the emitter queue
// until the queue is empty.
void emitItems();
// Utility function to match a symbol name against a match string. // Utility function to match a symbol name against a match string.
// This is used in a few cases where we want to take some action on // This is used in a few cases where we want to take some action on
// a particular function or symbol based on a command-line argument, // a particular function or symbol based on a command-line argument,
...@@ -390,7 +422,8 @@ private: ...@@ -390,7 +422,8 @@ private:
const ClFlags &Flags; const ClFlags &Flags;
RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg.
std::unique_ptr<ELFObjectWriter> ObjectWriter; std::unique_ptr<ELFObjectWriter> ObjectWriter;
BoundedProducerConsumerQueue<Cfg> CfgQ; BoundedProducerConsumerQueue<Cfg> OptQ;
BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ;
LockedPtr<ArenaAllocator<>> getAllocator() { LockedPtr<ArenaAllocator<>> getAllocator() {
return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
...@@ -405,8 +438,9 @@ private: ...@@ -405,8 +438,9 @@ private:
return LockedPtr<TimerList>(&Timers, &TimerLock); return LockedPtr<TimerList>(&Timers, &TimerLock);
} }
std::vector<ThreadContext *> AllThreadContexts; llvm::SmallVector<ThreadContext *, 128> AllThreadContexts;
std::vector<std::thread> TranslationThreads; llvm::SmallVector<std::thread, 128> TranslationThreads;
llvm::SmallVector<std::thread, 128> EmitterThreads;
// Each thread has its own TLS pointer which is also held in // Each thread has its own TLS pointer which is also held in
// AllThreadContexts. // AllThreadContexts.
ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS);
......
...@@ -252,19 +252,20 @@ void TargetLowering::regAlloc(RegAllocKind Kind) { ...@@ -252,19 +252,20 @@ void TargetLowering::regAlloc(RegAllocKind Kind) {
LinearScan.scan(RegMask, RandomizeRegisterAllocation); LinearScan.scan(RegMask, RandomizeRegisterAllocation);
} }
TargetDataLowering *TargetDataLowering::createLowering(GlobalContext *Ctx) { std::unique_ptr<TargetDataLowering>
TargetDataLowering::createLowering(GlobalContext *Ctx) {
// These statements can be #ifdef'd to specialize the code generator // These statements can be #ifdef'd to specialize the code generator
// to a subset of the available targets. TODO: use CRTP. // to a subset of the available targets. TODO: use CRTP.
TargetArch Target = Ctx->getTargetArch(); TargetArch Target = Ctx->getTargetArch();
if (Target == Target_X8632) if (Target == Target_X8632)
return TargetDataX8632::create(Ctx); return std::unique_ptr<TargetDataLowering>(TargetDataX8632::create(Ctx));
#if 0 #if 0
if (Target == Target_X8664) if (Target == Target_X8664)
return TargetDataX8664::create(Ctx); return std::unique_ptr<TargetDataLowering>(TargetDataX8664::create(Ctx));
if (Target == Target_ARM32) if (Target == Target_ARM32)
return TargetDataARM32::create(Ctx); return std::unique_ptr<TargetDataLowering>(TargetDataARM32::create(Ctx));
if (Target == Target_ARM64) if (Target == Target_ARM64)
return TargetDataARM64::create(Ctx); return std::unique_ptr<TargetDataLowering>(TargetDataARM64::create(Ctx));
#endif #endif
llvm_unreachable("Unsupported target"); llvm_unreachable("Unsupported target");
return nullptr; return nullptr;
......
...@@ -7,11 +7,14 @@ ...@@ -7,11 +7,14 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// //
// This file declares the TargetLowering and LoweringContext // This file declares the TargetLowering, LoweringContext, and
// classes. TargetLowering is an abstract class used to drive the // TargetDataLowering classes. TargetLowering is an abstract class
// translation/lowering process. LoweringContext maintains a // used to drive the translation/lowering process. LoweringContext
// context for lowering each instruction, offering conveniences such // maintains a context for lowering each instruction, offering
// as iterating over non-deleted instructions. // conveniences such as iterating over non-deleted instructions.
// TargetDataLowering is an abstract class used to drive the
// lowering/emission of global initializers, external global
// declarations, and internal constant pools.
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
...@@ -247,12 +250,12 @@ class TargetDataLowering { ...@@ -247,12 +250,12 @@ class TargetDataLowering {
TargetDataLowering &operator=(const TargetDataLowering &) = delete; TargetDataLowering &operator=(const TargetDataLowering &) = delete;
public: public:
static TargetDataLowering *createLowering(GlobalContext *Ctx); static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
virtual ~TargetDataLowering(); virtual ~TargetDataLowering();
virtual void lowerGlobal(const VariableDeclaration &Var) const = 0; virtual void
virtual void lowerGlobalsELF(const VariableDeclarationList &Vars) const = 0; lowerGlobals(std::unique_ptr<VariableDeclarationList> Vars) const = 0;
virtual void lowerConstants(GlobalContext *Ctx) const = 0; virtual void lowerConstants() const = 0;
protected: protected:
TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {} TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
......
...@@ -4639,10 +4639,20 @@ void TargetDataX8632::lowerGlobal(const VariableDeclaration &Var) const { ...@@ -4639,10 +4639,20 @@ void TargetDataX8632::lowerGlobal(const VariableDeclaration &Var) const {
Str << "\t.size\t" << MangledName << ", " << Size << "\n"; Str << "\t.size\t" << MangledName << ", " << Size << "\n";
} }
void void TargetDataX8632::lowerGlobals(
TargetDataX8632::lowerGlobalsELF(const VariableDeclarationList &Vars) const { std::unique_ptr<VariableDeclarationList> Vars) const {
ELFObjectWriter *Writer = Ctx->getObjectWriter(); if (Ctx->getFlags().getUseELFWriter()) {
Writer->writeDataSection(Vars, llvm::ELF::R_386_32); ELFObjectWriter *Writer = Ctx->getObjectWriter();
Writer->writeDataSection(*Vars, llvm::ELF::R_386_32);
} else {
const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
OstreamLocker L(Ctx);
for (const VariableDeclaration *Var : *Vars) {
if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
lowerGlobal(*Var);
}
}
}
} }
template <typename T> struct PoolTypeConverter {}; template <typename T> struct PoolTypeConverter {};
...@@ -4701,7 +4711,7 @@ void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { ...@@ -4701,7 +4711,7 @@ void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
} }
} }
void TargetDataX8632::lowerConstants(GlobalContext *Ctx) const { void TargetDataX8632::lowerConstants() const {
if (Ctx->getFlags().getDisableTranslation()) if (Ctx->getFlags().getDisableTranslation())
return; return;
// No need to emit constants from the int pool since (for x86) they // No need to emit constants from the int pool since (for x86) they
......
...@@ -497,14 +497,14 @@ public: ...@@ -497,14 +497,14 @@ public:
return new TargetDataX8632(Ctx); return new TargetDataX8632(Ctx);
} }
void lowerGlobal(const VariableDeclaration &Var) const final; void lowerGlobals(std::unique_ptr<VariableDeclarationList> Vars) const final;
void lowerGlobalsELF(const VariableDeclarationList &Vars) const final; void lowerConstants() const final;
void lowerConstants(GlobalContext *Ctx) const final;
protected: protected:
TargetDataX8632(GlobalContext *Ctx); TargetDataX8632(GlobalContext *Ctx);
private: private:
void lowerGlobal(const VariableDeclaration &Var) const;
~TargetDataX8632() override {} ~TargetDataX8632() override {}
template <typename T> static void emitConstantPool(GlobalContext *Ctx); template <typename T> static void emitConstantPool(GlobalContext *Ctx);
}; };
......
//===- subzero/src/IceThreading.cpp - Threading function definitions ------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines threading-related functions.
//
//===----------------------------------------------------------------------===//
#include "IceCfg.h"
#include "IceDefs.h"
#include "IceThreading.h"
namespace Ice {
EmitterWorkItem::EmitterWorkItem(uint32_t Seq)
: Sequence(Seq), Kind(WI_Nop), GlobalInits(nullptr), Function(nullptr),
RawFunc(nullptr) {}
EmitterWorkItem::EmitterWorkItem(uint32_t Seq, VariableDeclarationList *D)
: Sequence(Seq), Kind(WI_GlobalInits), GlobalInits(D), Function(nullptr),
RawFunc(nullptr) {}
EmitterWorkItem::EmitterWorkItem(uint32_t Seq, Assembler *A)
: Sequence(Seq), Kind(WI_Asm), GlobalInits(nullptr), Function(A),
RawFunc(nullptr) {}
EmitterWorkItem::EmitterWorkItem(uint32_t Seq, Cfg *F)
: Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr),
RawFunc(F) {}
std::unique_ptr<VariableDeclarationList> EmitterWorkItem::getGlobalInits() {
assert(getKind() == WI_GlobalInits);
return std::move(GlobalInits);
}
std::unique_ptr<Assembler> EmitterWorkItem::getAsm() {
assert(getKind() == WI_Asm);
return std::move(Function);
}
std::unique_ptr<Cfg> EmitterWorkItem::getCfg() {
assert(getKind() == WI_Cfg);
return std::move(RawFunc);
}
} // end of namespace Ice
//===- subzero/src/IceThreading.h - Threading functions ---------*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares threading-related functions.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETHREADING_H
#define SUBZERO_SRC_ICETHREADING_H
#include <condition_variable>
#include <mutex>
#include "IceDefs.h"
namespace Ice {
// BoundedProducerConsumerQueue is a work queue that allows multiple
// producers and multiple consumers. A producer adds entries using
// blockingPush(), and may block if the queue is "full". A producer
// uses notifyEnd() to indicate that no more entries will be added. A
// consumer removes an item using blockingPop(), which will return
// nullptr if notifyEnd() has been called and the queue is empty (it
// never returns nullptr if the queue contained any items).
//
// The MaxSize ctor arg controls the maximum size the queue can grow
// to (subject to a hard limit of MaxStaticSize-1). The Sequential
// arg indicates purely sequential execution in which the single
// thread should never wait().
//
// Two condition variables are used in the implementation.
// GrewOrEnded signals a waiting worker that a producer has changed
// the state of the queue. Shrunk signals a blocked producer that a
// consumer has changed the state of the queue.
//
// The methods begin with Sequential-specific code to be most clear.
// The lock and condition variables are not used in the Sequential
// case.
//
// Internally, the queue is implemented as a circular array of size
// MaxStaticSize, where the queue boundaries are denoted by the Front
// and Back fields. Front==Back indicates an empty queue.
template <typename T, size_t MaxStaticSize = 128>
class BoundedProducerConsumerQueue {
BoundedProducerConsumerQueue() = delete;
BoundedProducerConsumerQueue(const BoundedProducerConsumerQueue &) = delete;
BoundedProducerConsumerQueue &
operator=(const BoundedProducerConsumerQueue &) = delete;
public:
BoundedProducerConsumerQueue(bool Sequential, size_t MaxSize = MaxStaticSize)
: Back(0), Front(0), MaxSize(std::min(MaxSize, MaxStaticSize)),
Sequential(Sequential), IsEnded(false) {}
void blockingPush(T *Item) {
{
std::unique_lock<GlobalLockType> L(Lock);
// If the work queue is already "full", wait for a consumer to
// grab an element and shrink the queue.
Shrunk.wait(L, [this] { return size() < MaxSize || Sequential; });
push(Item);
}
GrewOrEnded.notify_one();
}
T *blockingPop() {
T *Item = nullptr;
bool ShouldNotifyProducer = false;
{
std::unique_lock<GlobalLockType> L(Lock);
GrewOrEnded.wait(L, [this] { return IsEnded || !empty() || Sequential; });
if (!empty()) {
Item = pop();
ShouldNotifyProducer = !IsEnded;
}
}
if (ShouldNotifyProducer)
Shrunk.notify_one();
return Item;
}
void notifyEnd() {
{
std::lock_guard<GlobalLockType> L(Lock);
IsEnded = true;
}
GrewOrEnded.notify_all();
}
private:
const static size_t MaxStaticSizeMask = MaxStaticSize - 1;
static_assert(!(MaxStaticSize & (MaxStaticSize - 1)),
"MaxStaticSize must be a power of 2");
// WorkItems and Lock are read/written by all.
ICE_CACHELINE_BOUNDARY;
T *WorkItems[MaxStaticSize];
ICE_CACHELINE_BOUNDARY;
// Lock guards access to WorkItems, Front, Back, and IsEnded.
GlobalLockType Lock;
ICE_CACHELINE_BOUNDARY;
// GrewOrEnded is written by the producers and read by the
// consumers. It is notified (by the producer) when something is
// added to the queue, in case consumers are waiting for a non-empty
// queue.
std::condition_variable GrewOrEnded;
// Back is the index into WorkItems[] of where the next element will
// be pushed. (More precisely, Back&MaxStaticSize is the index.)
// It is written by the producers, and read by all via size() and
// empty().
size_t Back;
ICE_CACHELINE_BOUNDARY;
// Shrunk is notified (by the consumer) when something is removed
// from the queue, in case a producer is waiting for the queue to
// drop below maximum capacity. It is written by the consumers and
// read by the producers.
std::condition_variable Shrunk;
// Front is the index into WorkItems[] of the oldest element,
// i.e. the next to be popped. (More precisely Front&MaxStaticSize
// is the index.) It is written by the consumers, and read by all
// via size() and empty().
size_t Front;
ICE_CACHELINE_BOUNDARY;
// MaxSize and Sequential are read by all and written by none.
const size_t MaxSize;
const bool Sequential;
// IsEnded is read by the consumers, and only written once by the
// producer.
bool IsEnded;
// The lock must be held when the following methods are called.
bool empty() const { return Front == Back; }
size_t size() const { return Back - Front; }
void push(T *Item) {
WorkItems[Back++ & MaxStaticSizeMask] = Item;
assert(size() <= MaxStaticSize);
}
T *pop() {
assert(!empty());
return WorkItems[Front++ & MaxStaticSizeMask];
}
};
// EmitterWorkItem is a simple wrapper around a pointer that
// represents a work item to be emitted, i.e. a function or a set of
// global declarations and initializers, and it includes a sequence
// number so that work items can be emitted in a particular order for
// deterministic output. It acts like an interface class, but instead
// of making the classes of interest inherit from EmitterWorkItem, it
// wraps pointers to these classes. Some space is wasted compared to
// storing the pointers in a union, but not too much due to the work
// granularity.
class EmitterWorkItem {
EmitterWorkItem() = delete;
EmitterWorkItem(const EmitterWorkItem &) = delete;
EmitterWorkItem &operator=(const EmitterWorkItem &) = delete;
public:
// ItemKind can be one of the following:
//
// WI_Nop: No actual work. This is a placeholder to maintain
// sequence numbers in case there is a translation error.
//
// WI_GlobalInits: A list of global declarations and initializers.
//
// WI_Asm: A function that has already had emitIAS() called on it.
// The work is transferred via the Assembler buffer, and the
// originating Cfg has been deleted (to recover lots of memory).
//
// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on
// it. This is only used as a debugging configuration when we want
// to emit "readable" assembly code, possibly annotated with
// liveness and other information only available in the Cfg and not
// in the Assembler buffer.
enum ItemKind { WI_Nop, WI_GlobalInits, WI_Asm, WI_Cfg };
// Constructor for a WI_Nop work item.
explicit EmitterWorkItem(uint32_t Seq);
// Constructor for a WI_GlobalInits work item.
EmitterWorkItem(uint32_t Seq, VariableDeclarationList *D);
// Constructor for a WI_Asm work item.
EmitterWorkItem(uint32_t Seq, Assembler *A);
// Constructor for a WI_Cfg work item.
EmitterWorkItem(uint32_t Seq, Cfg *F);
uint32_t getSequenceNumber() const { return Sequence; }
ItemKind getKind() const { return Kind; }
std::unique_ptr<VariableDeclarationList> getGlobalInits();
std::unique_ptr<Assembler> getAsm();
std::unique_ptr<Cfg> getCfg();
private:
const uint32_t Sequence;
const ItemKind Kind;
std::unique_ptr<VariableDeclarationList> GlobalInits;
std::unique_ptr<Assembler> Function;
std::unique_ptr<Cfg> RawFunc;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICETHREADING_H
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
using namespace Ice; using namespace Ice;
Translator::Translator(GlobalContext *Ctx, const ClFlags &Flags) Translator::Translator(GlobalContext *Ctx)
: Ctx(Ctx), Flags(Flags), : Ctx(Ctx), NextSequenceNumber(GlobalContext::getFirstSequenceNumber()),
DataLowering(TargetDataLowering::createLowering(Ctx)), ErrorStatus() {} ErrorStatus() {}
Translator::~Translator() {} Translator::~Translator() {}
...@@ -54,15 +54,12 @@ bool Translator::checkIfUnnamedNameSafe(const IceString &Name, const char *Kind, ...@@ -54,15 +54,12 @@ bool Translator::checkIfUnnamedNameSafe(const IceString &Name, const char *Kind,
} }
void Translator::translateFcn(std::unique_ptr<Cfg> Func) { void Translator::translateFcn(std::unique_ptr<Cfg> Func) {
Ctx->cfgQueueBlockingPush(std::move(Func)); Ctx->optQueueBlockingPush(std::move(Func));
if (Ctx->getFlags().getNumTranslationThreads() == 0) {
Ctx->translateFunctions();
}
} }
void Translator::emitConstants() { void Translator::emitConstants() {
if (!getErrorStatus()) if (!getErrorStatus())
DataLowering->lowerConstants(Ctx); TargetDataLowering::createLowering(Ctx)->lowerConstants();
} }
void Translator::transferErrorCode() const { void Translator::transferErrorCode() const {
...@@ -70,33 +67,9 @@ void Translator::transferErrorCode() const { ...@@ -70,33 +67,9 @@ void Translator::transferErrorCode() const {
Ctx->getErrorStatus()->assign(getErrorStatus().value()); Ctx->getErrorStatus()->assign(getErrorStatus().value());
} }
void void Translator::lowerGlobals(
Translator::lowerGlobals(const VariableDeclarationList &VariableDeclarations) { std::unique_ptr<VariableDeclarationList> VariableDeclarations) {
TimerMarker T(TimerStack::TT_emitGlobalInitializers, Ctx); EmitterWorkItem *Item = new EmitterWorkItem(getNextSequenceNumber(),
bool DisableTranslation = Ctx->getFlags().getDisableTranslation(); VariableDeclarations.release());
const bool DumpGlobalVariables = ALLOW_DUMP && Ctx->getVerbose() && Ctx->emitQueueBlockingPush(Item);
Ctx->getFlags().getVerboseFocusOn().empty();
if (Ctx->getFlags().getUseELFWriter()) {
// Dump all globals if requested, but don't interleave w/ emission.
if (DumpGlobalVariables) {
OstreamLocker L(Ctx);
Ostream &Stream = Ctx->getStrDump();
for (const Ice::VariableDeclaration *Global : VariableDeclarations) {
Global->dump(getContext(), Stream);
}
}
DataLowering->lowerGlobalsELF(VariableDeclarations);
} else {
const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
OstreamLocker L(Ctx);
Ostream &Stream = Ctx->getStrDump();
for (const Ice::VariableDeclaration *Global : VariableDeclarations) {
// Interleave dump output w/ emit output.
if (DumpGlobalVariables)
Global->dump(getContext(), Stream);
if (!DisableTranslation &&
GlobalContext::matchSymbolName(Global->getName(), TranslateOnly))
DataLowering->lowerGlobal(*Global);
}
}
} }
...@@ -34,14 +34,14 @@ class Translator { ...@@ -34,14 +34,14 @@ class Translator {
Translator &operator=(const Translator &) = delete; Translator &operator=(const Translator &) = delete;
public: public:
Translator(GlobalContext *Ctx, const ClFlags &Flags); Translator(GlobalContext *Ctx);
~Translator(); ~Translator();
const ErrorCode &getErrorStatus() const { return ErrorStatus; } const ErrorCode &getErrorStatus() const { return ErrorStatus; }
GlobalContext *getContext() const { return Ctx; } GlobalContext *getContext() const { return Ctx; }
const ClFlags &getFlags() const { return Flags; } const ClFlags &getFlags() const { return Ctx->getFlags(); }
/// Translates the constructed ICE function Fcn to machine code. /// Translates the constructed ICE function Fcn to machine code.
/// Takes ownership of Func. /// Takes ownership of Func.
...@@ -56,7 +56,8 @@ public: ...@@ -56,7 +56,8 @@ public:
/// Lowers the given list of global addresses to target. Generates /// Lowers the given list of global addresses to target. Generates
/// list of corresponding variable declarations. /// list of corresponding variable declarations.
void lowerGlobals(const VariableDeclarationList &VariableDeclarations); void
lowerGlobals(std::unique_ptr<VariableDeclarationList> VariableDeclarations);
/// Creates a name using the given prefix and corresponding index. /// Creates a name using the given prefix and corresponding index.
std::string createUnnamedName(const IceString &Prefix, SizeT Index); std::string createUnnamedName(const IceString &Prefix, SizeT Index);
...@@ -67,10 +68,11 @@ public: ...@@ -67,10 +68,11 @@ public:
bool checkIfUnnamedNameSafe(const IceString &Name, const char *Kind, bool checkIfUnnamedNameSafe(const IceString &Name, const char *Kind,
const IceString &Prefix); const IceString &Prefix);
uint32_t getNextSequenceNumber() { return NextSequenceNumber++; }
protected: protected:
GlobalContext *Ctx; GlobalContext *Ctx;
const ClFlags &Flags; uint32_t NextSequenceNumber;
std::unique_ptr<TargetDataLowering> DataLowering;
// Exit status of the translation. False is successful. True otherwise. // Exit status of the translation. False is successful. True otherwise.
ErrorCode ErrorStatus; ErrorCode ErrorStatus;
}; };
......
...@@ -13,9 +13,7 @@ ...@@ -13,9 +13,7 @@
#ifndef SUBZERO_SRC_ICEUTILS_H #ifndef SUBZERO_SRC_ICEUTILS_H
#define SUBZERO_SRC_ICEUTILS_H #define SUBZERO_SRC_ICEUTILS_H
#include <climits> #include <climits>
#include <condition_variable>
namespace Ice { namespace Ice {
...@@ -63,133 +61,6 @@ public: ...@@ -63,133 +61,6 @@ public:
} }
}; };
// BoundedProducerConsumerQueue is a work queue that allows multiple
// producers and multiple consumers. A producer adds entries using
// blockingPush(), and may block if the queue is "full". A producer
// uses notifyEnd() to indicate that no more entries will be added. A
// consumer removes an item using blockingPop(), which will return
// nullptr if notifyEnd() has been called and the queue is empty (it
// never returns nullptr if the queue contained any items).
//
// The MaxSize ctor arg controls the maximum size the queue can grow
// to (subject to a hard limit of MaxStaticSize-1). The Sequential
// arg indicates purely sequential execution in which the single
// thread should never wait().
//
// Two condition variables are used in the implementation.
// GrewOrEnded signals a waiting worker that a producer has changed
// the state of the queue. Shrunk signals a blocked producer that a
// consumer has changed the state of the queue.
//
// The methods begin with Sequential-specific code to be most clear.
// The lock and condition variables are not used in the Sequential
// case.
//
// Internally, the queue is implemented as a circular array of size
// MaxStaticSize, where the queue boundaries are denoted by the Front
// and Back fields. Front==Back indicates an empty queue.
template <typename T, size_t MaxStaticSize = 128>
class BoundedProducerConsumerQueue {
BoundedProducerConsumerQueue() = delete;
BoundedProducerConsumerQueue(const BoundedProducerConsumerQueue &) = delete;
BoundedProducerConsumerQueue &
operator=(const BoundedProducerConsumerQueue &) = delete;
public:
BoundedProducerConsumerQueue(size_t MaxSize, bool Sequential)
: Back(0), Front(0), MaxSize(std::min(MaxSize, MaxStaticSize)),
Sequential(Sequential), IsEnded(false) {}
void blockingPush(T *Item) {
{
std::unique_lock<GlobalLockType> L(Lock);
// If the work queue is already "full", wait for a consumer to
// grab an element and shrink the queue.
Shrunk.wait(L, [this] { return size() < MaxSize || Sequential; });
push(Item);
}
GrewOrEnded.notify_one();
}
T *blockingPop() {
T *Item = nullptr;
bool ShouldNotifyProducer = false;
{
std::unique_lock<GlobalLockType> L(Lock);
GrewOrEnded.wait(L, [this] { return IsEnded || !empty() || Sequential; });
if (!empty()) {
Item = pop();
ShouldNotifyProducer = !IsEnded;
}
}
if (ShouldNotifyProducer)
Shrunk.notify_one();
return Item;
}
void notifyEnd() {
{
std::lock_guard<GlobalLockType> L(Lock);
IsEnded = true;
}
GrewOrEnded.notify_all();
}
private:
const static size_t MaxStaticSizeMask = MaxStaticSize - 1;
static_assert(!(MaxStaticSize & (MaxStaticSize - 1)),
"MaxStaticSize must be a power of 2");
// WorkItems and Lock are read/written by all.
ICE_CACHELINE_BOUNDARY;
T *WorkItems[MaxStaticSize];
ICE_CACHELINE_BOUNDARY;
// Lock guards access to WorkItems, Front, Back, and IsEnded.
GlobalLockType Lock;
ICE_CACHELINE_BOUNDARY;
// GrewOrEnded is written by the producers and read by the
// consumers. It is notified (by the producer) when something is
// added to the queue, in case consumers are waiting for a non-empty
// queue.
std::condition_variable GrewOrEnded;
// Back is the index into WorkItems[] of where the next element will
// be pushed. (More precisely, Back&MaxStaticSize is the index.)
// It is written by the producers, and read by all via size() and
// empty().
size_t Back;
ICE_CACHELINE_BOUNDARY;
// Shrunk is notified (by the consumer) when something is removed
// from the queue, in case a producer is waiting for the queue to
// drop below maximum capacity. It is written by the consumers and
// read by the producers.
std::condition_variable Shrunk;
// Front is the index into WorkItems[] of the oldest element,
// i.e. the next to be popped. (More precisely Front&MaxStaticSize
// is the index.) It is written by the consumers, and read by all
// via size() and empty().
size_t Front;
ICE_CACHELINE_BOUNDARY;
// MaxSize and Sequential are read by all and written by none.
const size_t MaxSize;
const bool Sequential;
// IsEnded is read by the consumers, and only written once by the
// producer.
bool IsEnded;
// The lock must be held when the following methods are called.
bool empty() const { return Front == Back; }
size_t size() const { return Back - Front; }
void push(T *Item) {
WorkItems[Back++ & MaxStaticSizeMask] = Item;
assert(size() <= MaxStaticSize);
}
T *pop() {
assert(!empty());
return WorkItems[Front++ & MaxStaticSizeMask];
}
};
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICEUTILS_H #endif // SUBZERO_SRC_ICEUTILS_H
...@@ -1150,7 +1150,8 @@ public: ...@@ -1150,7 +1150,8 @@ public:
} }
if (!isIRGenerationDisabled()) if (!isIRGenerationDisabled())
Func = Ice::Cfg::create(getTranslator().getContext()); Func = Ice::Cfg::create(getTranslator().getContext(),
getTranslator().getNextSequenceNumber());
Ice::Cfg::setCurrentCfg(Func.get()); Ice::Cfg::setCurrentCfg(Func.get());
// TODO(kschimpf) Clean up API to add a function signature to // TODO(kschimpf) Clean up API to add a function signature to
...@@ -1185,7 +1186,7 @@ public: ...@@ -1185,7 +1186,7 @@ public:
// translation of all remaining functions. This allows successive // translation of all remaining functions. This allows successive
// parsing errors to be reported, without adding extra checks to // parsing errors to be reported, without adding extra checks to
// the translator for such parsing errors. // the translator for such parsing errors.
if (Context->getNumErrors() == 0) { if (Context->getNumErrors() == 0 && Func) {
getTranslator().translateFcn(std::move(Func)); getTranslator().translateFcn(std::move(Func));
// The translator now has ownership of Func. // The translator now has ownership of Func.
} else { } else {
...@@ -2863,10 +2864,7 @@ private: ...@@ -2863,10 +2864,7 @@ private:
if (!GlobalDeclarationNamesAndInitializersInstalled) { if (!GlobalDeclarationNamesAndInitializersInstalled) {
Context->installGlobalNames(); Context->installGlobalNames();
Context->createValueIDs(); Context->createValueIDs();
std::unique_ptr<Ice::VariableDeclarationList> DeclsPtr = getTranslator().lowerGlobals(Context->getGlobalVariables());
Context->getGlobalVariables();
const Ice::VariableDeclarationList &Decls = *DeclsPtr;
getTranslator().lowerGlobals(Decls);
GlobalDeclarationNamesAndInitializersInstalled = true; GlobalDeclarationNamesAndInitializersInstalled = true;
} }
} }
......
...@@ -30,8 +30,7 @@ class PNaClTranslator : public Translator { ...@@ -30,8 +30,7 @@ class PNaClTranslator : public Translator {
PNaClTranslator &operator=(const PNaClTranslator &) = delete; PNaClTranslator &operator=(const PNaClTranslator &) = delete;
public: public:
PNaClTranslator(GlobalContext *Ctx, const ClFlags &Flags) PNaClTranslator(GlobalContext *Ctx) : Translator(Ctx) {}
: Translator(Ctx, Flags) {}
// Reads the PNaCl bitcode file and translates to ICE, which is then // Reads the PNaCl bitcode file and translates to ICE, which is then
// converted to machine code. Sets ErrorStatus to 1 if any errors // converted to machine code. Sets ErrorStatus to 1 if any errors
......
...@@ -149,7 +149,7 @@ class Assembler { ...@@ -149,7 +149,7 @@ class Assembler {
Assembler &operator=(const Assembler &) = delete; Assembler &operator=(const Assembler &) = delete;
public: public:
Assembler() : buffer_(*this) {} Assembler() : FunctionName(""), IsInternal(false), buffer_(*this) {}
virtual ~Assembler() {} virtual ~Assembler() {}
// Allocate a chunk of bytes using the per-Assembler allocator. // Allocate a chunk of bytes using the per-Assembler allocator.
...@@ -190,9 +190,18 @@ public: ...@@ -190,9 +190,18 @@ public:
} }
void emitIASBytes(GlobalContext *Ctx) const; void emitIASBytes(GlobalContext *Ctx) const;
bool getInternal() const { return IsInternal; }
void setInternal(bool Internal) { IsInternal = Internal; }
const IceString &getFunctionName() { return FunctionName; }
void setFunctionName(const IceString &NewName) { FunctionName = NewName; }
private: private:
ArenaAllocator<32 * 1024> Allocator; ArenaAllocator<32 * 1024> Allocator;
// FunctionName and IsInternal are transferred from the original Cfg
// object, since the Cfg object may be deleted by the time the
// assembler buffer is emitted.
IceString FunctionName;
bool IsInternal;
protected: protected:
AssemblerBuffer buffer_; AssemblerBuffer buffer_;
......
...@@ -374,7 +374,7 @@ int main(int argc, char **argv) { ...@@ -374,7 +374,7 @@ int main(int argc, char **argv) {
std::unique_ptr<Ice::Translator> Translator; std::unique_ptr<Ice::Translator> Translator;
if (BuildOnRead) { if (BuildOnRead) {
std::unique_ptr<Ice::PNaClTranslator> PTranslator( std::unique_ptr<Ice::PNaClTranslator> PTranslator(
new Ice::PNaClTranslator(&Ctx, Flags)); new Ice::PNaClTranslator(&Ctx));
PTranslator->translate(IRFilename); PTranslator->translate(IRFilename);
Translator.reset(PTranslator.release()); Translator.reset(PTranslator.release());
} else if (ALLOW_LLVM_IR) { } else if (ALLOW_LLVM_IR) {
...@@ -390,8 +390,7 @@ int main(int argc, char **argv) { ...@@ -390,8 +390,7 @@ int main(int argc, char **argv) {
return GetReturnValue(Ice::EC_Bitcode); return GetReturnValue(Ice::EC_Bitcode);
} }
std::unique_ptr<Ice::Converter> Converter( std::unique_ptr<Ice::Converter> Converter(new Ice::Converter(Mod, &Ctx));
new Ice::Converter(Mod, &Ctx, Flags));
Converter->convertToIce(); Converter->convertToIce();
Translator.reset(Converter.release()); Translator.reset(Converter.release());
} else { } else {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; RUN: %p2i -i %s --args --verbose inst -ias=0 | FileCheck %s ; RUN: %p2i -i %s --args --verbose inst -threads=0 -ias=0 | FileCheck %s
define i32 @Add(i32 %a, i32 %b) { define i32 @Add(i32 %a, i32 %b) {
; CHECK: define i32 @Add ; CHECK: define i32 @Add
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; RUN: %p2i -i %s --args -O2 --verbose inst | FileCheck %s ; RUN: %p2i -i %s --args -O2 --verbose inst -threads=0 | FileCheck %s
; RUN: %p2i -i %s --args -Om1 --verbose inst | FileCheck %s ; RUN: %p2i -i %s --args -Om1 --verbose inst -threads=0 | FileCheck %s
define i32 @simple_cond_branch(i32 %foo, i32 %bar) { define i32 @simple_cond_branch(i32 %foo, i32 %bar) {
entry: entry:
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; RUN: %p2i -i %s --args --verbose inst | FileCheck %s ; RUN: %p2i -i %s --args --verbose inst -threads=0 | FileCheck %s
define void @load_i64(i32 %addr_arg) { define void @load_i64(i32 %addr_arg) {
entry: entry:
......
; Test that some errors trigger when the usage of NaCl atomic ; Test that some errors trigger when the usage of NaCl atomic
; intrinsics does not match the required ABI. ; intrinsics does not match the required ABI.
; RUN: %p2i -i %s --args --verbose none --exit-success 2>&1 | FileCheck %s ; RUN: %p2i -i %s --args --verbose none --exit-success -threads=0 2>&1 \
; RUN: | FileCheck %s
declare i8 @llvm.nacl.atomic.load.i8(i8*, i32) declare i8 @llvm.nacl.atomic.load.i8(i8*, i32)
declare i16 @llvm.nacl.atomic.load.i16(i16*, i32) declare i16 @llvm.nacl.atomic.load.i16(i16*, i32)
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; RUN: %p2i -i %s --args --verbose inst | FileCheck %s ; RUN: %p2i -i %s --args --verbose inst -threads=0 | FileCheck %s
define void @store_i64(i32 %addr_arg) { define void @store_i64(i32 %addr_arg) {
entry: entry:
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
; TODO(kschimpf) Find out why lc2i is needed. ; TODO(kschimpf) Find out why lc2i is needed.
; REQUIRES: allow_llvm_ir_as_input ; REQUIRES: allow_llvm_ir_as_input
; RUN: %lc2i -i %s --args --verbose inst | FileCheck %s ; RUN: %lc2i -i %s --args --verbose inst -threads=0 | FileCheck %s
define internal i32 @compute_important_function(i32 %v1, i32 %v2) { define internal i32 @compute_important_function(i32 %v1, i32 %v2) {
entry: entry:
......
...@@ -28,10 +28,11 @@ bool IceTest::SubzeroBitcodeMunger::runTest(const char *TestName, ...@@ -28,10 +28,11 @@ bool IceTest::SubzeroBitcodeMunger::runTest(const char *TestName,
Ice::ClFlags Flags; Ice::ClFlags Flags;
Flags.setAllowErrorRecovery(true); Flags.setAllowErrorRecovery(true);
Flags.setGenerateUnitTestMessages(true); Flags.setGenerateUnitTestMessages(true);
Flags.setUseIntegratedAssembler(true); // for the MINIMAL build
Ice::GlobalContext Ctx(DumpStream, DumpStream, nullptr, Ice::GlobalContext Ctx(DumpStream, DumpStream, nullptr,
Ice::IceV_Instructions, Ice::Target_X8632, Ice::Opt_m1, Ice::IceV_Instructions, Ice::Target_X8632, Ice::Opt_m1,
"", Flags); "", Flags);
Ice::PNaClTranslator Translator(&Ctx, Flags); Ice::PNaClTranslator Translator(&Ctx);
Translator.translateBuffer(TestName, MungedInput.get()); Translator.translateBuffer(TestName, MungedInput.get());
cleanupTest(); cleanupTest();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment