Commit e8457a26 by Karl Schimpf

Allow Subzero to parse function blocks in parallel.

This CL modifies the code so that we can do sequential and parallel parsing of function blocks in bitcode files, based on a command line argument. The command line argument was added because during testing, I had one compilation failure (transient), and do not know the cause. Hence, I was reluctant to install this CL without a command-line flag. To test the new parallel parser, the easiest solution is to edit IceClFlags.def and set the default value of ParseParallel to true. This code also fixes up unit parsing tests, as well as one parsing test. The cause of these problems was the implicit assumption that function blocks are parsed sequentially, which no longer applies when function blocks are parsed in parallel. To fix this, the "threads=0" command line argument was added. It also added the starting up of worker threads, since parsing of function blocks will happen in the translation thread if parallel parsing is turned on. The OptQ queue was modified to contain OptWorkerItem instances with a single virtual to get the parsed code. This allows the IceConverter to continue to work, by simply passing the generated Cfg as a work item. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4363 R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1834473002 .
parent bbd449d2
...@@ -94,6 +94,9 @@ def main(): ...@@ -94,6 +94,9 @@ def main():
help='Input is textual bitcode (not .ll)') help='Input is textual bitcode (not .ll)')
argparser.add_argument('--expect-fail', required=False, action='store_true', argparser.add_argument('--expect-fail', required=False, action='store_true',
help='Negate success of run by using LLVM not') help='Negate success of run by using LLVM not')
argparser.add_argument('--allow-pnacl-reader-error-recovery',
action='store_true',
help='Continue parsing after first error')
argparser.add_argument('--args', '-a', nargs=argparse.REMAINDER, argparser.add_argument('--args', '-a', nargs=argparse.REMAINDER,
default=[], default=[],
help='Remaining arguments are passed to pnacl-sz') help='Remaining arguments are passed to pnacl-sz')
...@@ -118,7 +121,9 @@ def main(): ...@@ -118,7 +121,9 @@ def main():
raise RuntimeError("Can't specify both '--tbc' and '--llvm'") raise RuntimeError("Can't specify both '--tbc' and '--llvm'")
if args.forceasm: if args.forceasm:
if args.filetype == 'asm': if args.expect_fail:
args.forceasm = False
elif args.filetype == 'asm':
pass pass
elif args.filetype == 'iasm': elif args.filetype == 'iasm':
# TODO(sehr) implement forceasm for iasm. # TODO(sehr) implement forceasm for iasm.
...@@ -148,6 +153,8 @@ def main(): ...@@ -148,6 +153,8 @@ def main():
# single-threaded translation because dump output does not get # single-threaded translation because dump output does not get
# reassembled into order. # reassembled into order.
cmd += ['-verbose', 'inst,global_init', '-notranslate', '-threads=0'] cmd += ['-verbose', 'inst,global_init', '-notranslate', '-threads=0']
elif args.allow_pnacl_reader_error_recovery:
cmd += ['-allow-pnacl-reader-error-recovery', '-threads=0']
if not args.llvm_source: if not args.llvm_source:
cmd += ['--bitcode-format=pnacl'] cmd += ['--bitcode-format=pnacl']
if not args.no_local_syms: if not args.no_local_syms:
......
...@@ -196,6 +196,9 @@ struct dev_list_flag {}; ...@@ -196,6 +196,9 @@ struct dev_list_flag {};
"Low-level integrated assembly ('.s') file"), \ "Low-level integrated assembly ('.s') file"), \
clEnumValEnd)) \ clEnumValEnd)) \
\ \
X(ParseParallel, bool, dev_opt_flag, "parse-parallel", \
cl::desc("Parse function blocks in parallel"), cl::init(true)) \
\
X(RandomizeAndPoolImmediatesOption, Ice::RandomizeAndPoolImmediatesEnum, \ X(RandomizeAndPoolImmediatesOption, Ice::RandomizeAndPoolImmediatesEnum, \
dev_opt_flag, "randomize-pool-immediates", \ dev_opt_flag, "randomize-pool-immediates", \
cl::desc("Randomize or pooling the representation of immediates"), \ cl::desc("Randomize or pooling the representation of immediates"), \
......
...@@ -62,8 +62,6 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx, ...@@ -62,8 +62,6 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
// allows only --filetype=obj. Check here to avoid cryptic error messages // allows only --filetype=obj. Check here to avoid cryptic error messages
// downstream. // downstream.
if (!BuildDefs::dump() && Ctx.getFlags().getOutFileType() != FT_Elf) { if (!BuildDefs::dump() && Ctx.getFlags().getOutFileType() != FT_Elf) {
// TODO(stichnot): Access the actual command-line argument via
// llvm::Option.ArgStr and .ValueStr .
Ctx.getStrError() Ctx.getStrError()
<< "Error: only --filetype=obj is supported in this build.\n"; << "Error: only --filetype=obj is supported in this build.\n";
Ctx.getErrorStatus()->assign(EC_Args); Ctx.getErrorStatus()->assign(EC_Args);
...@@ -89,6 +87,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx, ...@@ -89,6 +87,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
Ctx.getStrError() Ctx.getStrError()
<< "non BuildOnRead is not supported w/ PNACL_BROWSER_TRANSLATOR\n"; << "non BuildOnRead is not supported w/ PNACL_BROWSER_TRANSLATOR\n";
Ctx.getErrorStatus()->assign(EC_Args); Ctx.getErrorStatus()->assign(EC_Args);
Ctx.waitForWorkerThreads();
return; return;
} }
// Globals must be kept alive after lowering when converting from LLVM to // Globals must be kept alive after lowering when converting from LLVM to
...@@ -107,6 +106,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx, ...@@ -107,6 +106,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
if (!Mod) { if (!Mod) {
Err.print(Flags.getAppName().c_str(), llvm::errs()); Err.print(Flags.getAppName().c_str(), llvm::errs());
Ctx.getErrorStatus()->assign(EC_Bitcode); Ctx.getErrorStatus()->assign(EC_Bitcode);
Ctx.waitForWorkerThreads();
return; return;
} }
...@@ -117,6 +117,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx, ...@@ -117,6 +117,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
Ctx.getStrError() << "Error: Build doesn't allow LLVM IR, " Ctx.getStrError() << "Error: Build doesn't allow LLVM IR, "
<< "--build-on-read=0 not allowed\n"; << "--build-on-read=0 not allowed\n";
Ctx.getErrorStatus()->assign(EC_Args); Ctx.getErrorStatus()->assign(EC_Args);
Ctx.waitForWorkerThreads();
return; return;
} }
......
...@@ -213,6 +213,37 @@ public: ...@@ -213,6 +213,37 @@ public:
UndefPool Undefs; UndefPool Undefs;
}; };
void GlobalContext::waitForWorkerThreads() {
if (WaitForWorkerThreadsCalled.exchange(true))
return;
optQueueNotifyEnd();
for (std::thread &Worker : TranslationThreads) {
Worker.join();
}
TranslationThreads.clear();
// Only notify the emit queue to end after all the translation threads have
// ended.
emitQueueNotifyEnd();
for (std::thread &Worker : EmitterThreads) {
Worker.join();
}
EmitterThreads.clear();
if (BuildDefs::timers()) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
}
if (BuildDefs::dump()) {
// Do a separate loop over AllThreadContexts to avoid holding two locks at
// once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
}
}
void GlobalContext::CodeStats::dump(const std::string &Name, void GlobalContext::CodeStats::dump(const std::string &Name,
GlobalContext *Ctx) { GlobalContext *Ctx) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -252,7 +283,10 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, ...@@ -252,7 +283,10 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
: Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(), : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(),
StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this), StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this),
ObjectWriter(), OptQ(/*Sequential=*/Flags.isSequential(), ObjectWriter(), OptQ(/*Sequential=*/Flags.isSequential(),
/*MaxSize=*/Flags.getNumTranslationThreads()), /*MaxSize=*/
(Flags.getParseParallel() && Flags.getBuildOnRead())
? MaxOptQSize
: Flags.getNumTranslationThreads()),
// EmitQ is allowed unlimited size. // EmitQ is allowed unlimited size.
EmitQ(/*Sequential=*/Flags.isSequential()), EmitQ(/*Sequential=*/Flags.isSequential()),
DataLowering(TargetDataLowering::createLowering(this)) { DataLowering(TargetDataLowering::createLowering(this)) {
...@@ -305,7 +339,8 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, ...@@ -305,7 +339,8 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
void GlobalContext::translateFunctions() { void GlobalContext::translateFunctions() {
TimerMarker Timer(TimerStack::TT_translateFunctions, this); TimerMarker Timer(TimerStack::TT_translateFunctions, this);
while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) { while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) {
auto Func = OptItem->getParsedCfg();
// Install Func in TLS for Cfg-specific container allocators. // Install Func in TLS for Cfg-specific container allocators.
CfgLocalAllocatorScope _(Func.get()); CfgLocalAllocatorScope _(Func.get());
// Reset per-function stats being accumulated in TLS. // Reset per-function stats being accumulated in TLS.
...@@ -878,19 +913,19 @@ void GlobalContext::setTimerName(TimerStackIdT StackID, ...@@ -878,19 +913,19 @@ void GlobalContext::setTimerName(TimerStackIdT StackID,
// interface to take and transfer ownership, but they internally store the raw // interface to take and transfer ownership, but they internally store the raw
// Cfg pointer in the work queue. This allows e.g. future queue optimizations // Cfg pointer in the work queue. This allows e.g. future queue optimizations
// such as the use of atomics to modify queue elements. // such as the use of atomics to modify queue elements.
void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) { void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) {
assert(Func); assert(Item);
{ {
TimerMarker _(TimerStack::TT_qTransPush, this); TimerMarker _(TimerStack::TT_qTransPush, this);
OptQ.blockingPush(std::move(Func)); OptQ.blockingPush(std::move(Item));
} }
if (getFlags().isSequential()) if (getFlags().isSequential())
translateFunctions(); translateFunctions();
} }
std::unique_ptr<Cfg> GlobalContext::optQueueBlockingPop() { std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() {
TimerMarker _(TimerStack::TT_qTransPop, this); TimerMarker _(TimerStack::TT_qTransPop, this);
return std::unique_ptr<Cfg>(OptQ.blockingPop()); return std::unique_ptr<OptWorkItem>(OptQ.blockingPop());
} }
void GlobalContext::emitQueueBlockingPush( void GlobalContext::emitQueueBlockingPush(
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "IceUtils.h" #include "IceUtils.h"
#include <array> #include <array>
#include <atomic>
#include <cassert> #include <cassert>
#include <functional> #include <functional>
#include <memory> #include <memory>
...@@ -54,6 +55,21 @@ enum class RuntimeHelper { ...@@ -54,6 +55,21 @@ enum class RuntimeHelper {
H_Num H_Num
}; };
/// OptWorkItem is a simple wrapper used to pass parse information on a function
/// block, to a translator thread.
class OptWorkItem {
OptWorkItem(const OptWorkItem &) = delete;
OptWorkItem &operator=(const OptWorkItem &) = delete;
public:
// Get the Cfg for the funtion to translate.
virtual std::unique_ptr<Cfg> getParsedCfg() = 0;
virtual ~OptWorkItem() = default;
protected:
OptWorkItem() = default;
};
class GlobalContext { class GlobalContext {
GlobalContext() = delete; GlobalContext() = delete;
GlobalContext(const GlobalContext &) = delete; GlobalContext(const GlobalContext &) = delete;
...@@ -358,12 +374,12 @@ public: ...@@ -358,12 +374,12 @@ public:
/// Notifies any idle workers that a new function is available for /// Notifies any idle workers that a new function is available for
/// translating. May block if the work queue is too large, in order to control /// translating. May block if the work queue is too large, in order to control
/// memory footprint. /// memory footprint.
void optQueueBlockingPush(std::unique_ptr<Cfg> Func); void optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item);
/// Takes a Cfg from the work queue for translating. May block if the work /// Takes a Cfg from the work queue for translating. May block if the work
/// queue is currently empty. Returns nullptr if there is no more work - the /// queue is currently empty. Returns nullptr if there is no more work - the
/// queue is empty and either end() has been called or the Sequential flag was /// queue is empty and either end() has been called or the Sequential flag was
/// set. /// set.
std::unique_ptr<Cfg> optQueueBlockingPop(); std::unique_ptr<OptWorkItem> optQueueBlockingPop();
/// Notifies that no more work will be added to the work queue. /// Notifies that no more work will be added to the work queue.
void optQueueNotifyEnd() { OptQ.notifyEnd(); } void optQueueNotifyEnd() { OptQ.notifyEnd(); }
...@@ -405,34 +421,7 @@ public: ...@@ -405,34 +421,7 @@ public:
} }
} }
void waitForWorkerThreads() { void waitForWorkerThreads();
optQueueNotifyEnd();
for (std::thread &Worker : TranslationThreads) {
Worker.join();
}
TranslationThreads.clear();
// Only notify the emit queue to end after all the translation threads have
// ended.
emitQueueNotifyEnd();
for (std::thread &Worker : EmitterThreads) {
Worker.join();
}
EmitterThreads.clear();
if (BuildDefs::timers()) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
}
if (BuildDefs::dump()) {
// Do a separate loop over AllThreadContexts to avoid holding two locks
// at once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
}
}
/// Translation thread startup routine. /// Translation thread startup routine.
void translateFunctionsWrapper(ThreadContext *MyTLS) { void translateFunctionsWrapper(ThreadContext *MyTLS) {
...@@ -545,12 +534,16 @@ private: ...@@ -545,12 +534,16 @@ private:
Ostream *StrEmit; /// Stream for code emission Ostream *StrEmit; /// Stream for code emission
Ostream *StrError; /// Stream for logging errors. Ostream *StrError; /// Stream for logging errors.
// True if waitForWorkerThreads() has been called.
std::atomic_bool WaitForWorkerThreadsCalled;
ICE_CACHELINE_BOUNDARY; ICE_CACHELINE_BOUNDARY;
Intrinsics IntrinsicsInfo; Intrinsics IntrinsicsInfo;
// TODO(jpp): move to EmitterContext. // TODO(jpp): move to EmitterContext.
std::unique_ptr<ELFObjectWriter> ObjectWriter; std::unique_ptr<ELFObjectWriter> ObjectWriter;
BoundedProducerConsumerQueue<Cfg> OptQ; static constexpr size_t MaxOptQSize = 1 << 16;
BoundedProducerConsumerQueue<OptWorkItem, MaxOptQSize> OptQ;
BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ; BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ;
// DataLowering is only ever used by a single thread at a time (either in // DataLowering is only ever used by a single thread at a time (either in
// emitItems(), or in IceCompiler::run before the compilation is over.) // emitItems(), or in IceCompiler::run before the compilation is over.)
......
...@@ -24,6 +24,20 @@ ...@@ -24,6 +24,20 @@
namespace Ice { namespace Ice {
class CfgOptWorkItem final : public OptWorkItem {
CfgOptWorkItem() = delete;
CfgOptWorkItem(const CfgOptWorkItem &) = delete;
CfgOptWorkItem &operator=(const CfgOptWorkItem &) = delete;
public:
CfgOptWorkItem(std::unique_ptr<Cfg> Func) : Func(std::move(Func)) {}
std::unique_ptr<Cfg> getParsedCfg() override { return std::move(Func); }
~CfgOptWorkItem() override = default;
private:
std::unique_ptr<Ice::Cfg> Func;
};
Translator::Translator(GlobalContext *Ctx) Translator::Translator(GlobalContext *Ctx)
: Ctx(Ctx), NextSequenceNumber(GlobalContext::getFirstSequenceNumber()), : Ctx(Ctx), NextSequenceNumber(GlobalContext::getFirstSequenceNumber()),
ErrorStatus() {} ErrorStatus() {}
...@@ -57,7 +71,7 @@ bool Translator::checkIfUnnamedNameSafe(const std::string &Name, ...@@ -57,7 +71,7 @@ bool Translator::checkIfUnnamedNameSafe(const std::string &Name,
} }
void Translator::translateFcn(std::unique_ptr<Cfg> Func) { void Translator::translateFcn(std::unique_ptr<Cfg> Func) {
Ctx->optQueueBlockingPush(std::move(Func)); Ctx->optQueueBlockingPush(makeUnique<CfgOptWorkItem>(std::move(Func)));
} }
void Translator::lowerGlobals( void Translator::lowerGlobals(
......
; Tests malformed insertelement and extractelement vector instructions. ; Tests malformed insertelement and extractelement vector instructions.
; RUN: %if --need=allow_dump --command llvm-as < %s \ ; RUN: %if --need=allow_dump --command \
; RUN: | %if --need=allow_dump --command pnacl-freeze \ ; RUN: %p2i --expect-fail -i %s --allow-pnacl-reader-error-recovery \
; RUN: | %if --need=allow_dump --command not %pnacl_sz -notranslate \ ; RUN: --filetype=obj -o /dev/null --args -notranslate \
; RUN: -build-on-read -allow-pnacl-reader-error-recovery \
; RUN: -filetype=obj -o /dev/null \
; RUN: | %if --need=allow_dump --command FileCheck %s ; RUN: | %if --need=allow_dump --command FileCheck %s
; RUN: %if --need=no_dump --command llvm-as < %s \ ; RUN: %if --need=no_dump --command \
; RUN: | %if --need=no_dump --command pnacl-freeze \ ; RUN: %p2i --expect-fail -i %s --allow-pnacl-reader-error-recovery \
; RUN: | %if --need=no_dump --command not %pnacl_sz -notranslate \ ; RUN: --filetype=obj -o /dev/null --args -notranslate \
; RUN: -build-on-read -allow-pnacl-reader-error-recovery \
; RUN: -filetype=obj -o /dev/null \
; RUN: | %if --need=no_dump --command FileCheck %s --check-prefix=MIN ; RUN: | %if --need=no_dump --command FileCheck %s --check-prefix=MIN
define void @ExtractV4xi1(<4 x i1> %v, i32 %i) { define void @ExtractV4xi1(<4 x i1> %v, i32 %i) {
......
...@@ -25,7 +25,8 @@ void IceTest::SubzeroBitcodeMunger::resetMungeFlags() { ...@@ -25,7 +25,8 @@ void IceTest::SubzeroBitcodeMunger::resetMungeFlags() {
Flags.setOptLevel(Ice::Opt_m1); Flags.setOptLevel(Ice::Opt_m1);
Flags.setOutFileType(Ice::FT_Iasm); Flags.setOutFileType(Ice::FT_Iasm);
Flags.setTargetArch(Ice::Target_X8632); Flags.setTargetArch(Ice::Target_X8632);
Flags.setVerbose(Ice::IceV_Instructions); Flags.setNumTranslationThreads(0);
Flags.setParseParallel(false);
} }
bool IceTest::SubzeroBitcodeMunger::runTest(const uint64_t Munges[], bool IceTest::SubzeroBitcodeMunger::runTest(const uint64_t Munges[],
...@@ -34,10 +35,12 @@ bool IceTest::SubzeroBitcodeMunger::runTest(const uint64_t Munges[], ...@@ -34,10 +35,12 @@ bool IceTest::SubzeroBitcodeMunger::runTest(const uint64_t Munges[],
const bool AddHeader = true; const bool AddHeader = true;
setupTest(Munges, MungeSize, AddHeader); setupTest(Munges, MungeSize, AddHeader);
Ice::GlobalContext Ctx(DumpStream, DumpStream, DumpStream, nullptr); Ice::GlobalContext Ctx(DumpStream, DumpStream, DumpStream, nullptr);
Ctx.startWorkerThreads();
Ice::PNaClTranslator Translator(&Ctx); Ice::PNaClTranslator Translator(&Ctx);
const char *BufferName = "Test"; const char *BufferName = "Test";
Flags.setDisableTranslation(DisableTranslation); Flags.setDisableTranslation(DisableTranslation);
Translator.translateBuffer(BufferName, MungedInput.get()); Translator.translateBuffer(BufferName, MungedInput.get());
Ctx.waitForWorkerThreads();
cleanupTest(); cleanupTest();
return Translator.getErrorStatus().value() == 0; return Translator.getErrorStatus().value() == 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment