Commit e8457a26 by Karl Schimpf

Allow Subzero to parse function blocks in parallel.

This CL modifies the code so that we can do sequential and parallel parsing of function blocks in bitcode files, based on a command line argument. The command line argument was added because during testing, I had one compilation failure (transient), and do not know the cause. Hence, I was reluctant to install this CL without a command-line flag. To test the new parallel parser, the easiest solution is to edit IceClFlags.def and set the default value of ParseParallel to true. This code also fixes up unit parsing tests, as well as one parsing test. The cause of these problems was the implicit assumption that function blocks are parsed sequentially, which no longer applies when function blocks are parsed in parallel. To fix this, the "threads=0" command line argument was added. It also added the starting up of worker threads, since parsing of function blocks will happen in the translation thread if parallel parsing is turned on. The OptQ queue was modified to contain OptWorkerItem instances with a single virtual to get the parsed code. This allows the IceConverter to continue to work, by simply passing the generated Cfg as a work item. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4363 R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1834473002 .
parent bbd449d2
......@@ -94,6 +94,9 @@ def main():
help='Input is textual bitcode (not .ll)')
argparser.add_argument('--expect-fail', required=False, action='store_true',
help='Negate success of run by using LLVM not')
argparser.add_argument('--allow-pnacl-reader-error-recovery',
action='store_true',
help='Continue parsing after first error')
argparser.add_argument('--args', '-a', nargs=argparse.REMAINDER,
default=[],
help='Remaining arguments are passed to pnacl-sz')
......@@ -118,7 +121,9 @@ def main():
raise RuntimeError("Can't specify both '--tbc' and '--llvm'")
if args.forceasm:
if args.filetype == 'asm':
if args.expect_fail:
args.forceasm = False
elif args.filetype == 'asm':
pass
elif args.filetype == 'iasm':
# TODO(sehr) implement forceasm for iasm.
......@@ -148,6 +153,8 @@ def main():
# single-threaded translation because dump output does not get
# reassembled into order.
cmd += ['-verbose', 'inst,global_init', '-notranslate', '-threads=0']
elif args.allow_pnacl_reader_error_recovery:
cmd += ['-allow-pnacl-reader-error-recovery', '-threads=0']
if not args.llvm_source:
cmd += ['--bitcode-format=pnacl']
if not args.no_local_syms:
......
......@@ -196,6 +196,9 @@ struct dev_list_flag {};
"Low-level integrated assembly ('.s') file"), \
clEnumValEnd)) \
\
X(ParseParallel, bool, dev_opt_flag, "parse-parallel", \
cl::desc("Parse function blocks in parallel"), cl::init(true)) \
\
X(RandomizeAndPoolImmediatesOption, Ice::RandomizeAndPoolImmediatesEnum, \
dev_opt_flag, "randomize-pool-immediates", \
cl::desc("Randomize or pooling the representation of immediates"), \
......
......@@ -62,8 +62,6 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
// allows only --filetype=obj. Check here to avoid cryptic error messages
// downstream.
if (!BuildDefs::dump() && Ctx.getFlags().getOutFileType() != FT_Elf) {
// TODO(stichnot): Access the actual command-line argument via
// llvm::Option.ArgStr and .ValueStr .
Ctx.getStrError()
<< "Error: only --filetype=obj is supported in this build.\n";
Ctx.getErrorStatus()->assign(EC_Args);
......@@ -89,6 +87,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
Ctx.getStrError()
<< "non BuildOnRead is not supported w/ PNACL_BROWSER_TRANSLATOR\n";
Ctx.getErrorStatus()->assign(EC_Args);
Ctx.waitForWorkerThreads();
return;
}
// Globals must be kept alive after lowering when converting from LLVM to
......@@ -107,6 +106,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
if (!Mod) {
Err.print(Flags.getAppName().c_str(), llvm::errs());
Ctx.getErrorStatus()->assign(EC_Bitcode);
Ctx.waitForWorkerThreads();
return;
}
......@@ -117,6 +117,7 @@ void Compiler::run(const Ice::ClFlags &Flags, GlobalContext &Ctx,
Ctx.getStrError() << "Error: Build doesn't allow LLVM IR, "
<< "--build-on-read=0 not allowed\n";
Ctx.getErrorStatus()->assign(EC_Args);
Ctx.waitForWorkerThreads();
return;
}
......
......@@ -213,6 +213,37 @@ public:
UndefPool Undefs;
};
void GlobalContext::waitForWorkerThreads() {
if (WaitForWorkerThreadsCalled.exchange(true))
return;
optQueueNotifyEnd();
for (std::thread &Worker : TranslationThreads) {
Worker.join();
}
TranslationThreads.clear();
// Only notify the emit queue to end after all the translation threads have
// ended.
emitQueueNotifyEnd();
for (std::thread &Worker : EmitterThreads) {
Worker.join();
}
EmitterThreads.clear();
if (BuildDefs::timers()) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
}
if (BuildDefs::dump()) {
// Do a separate loop over AllThreadContexts to avoid holding two locks at
// once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
}
}
void GlobalContext::CodeStats::dump(const std::string &Name,
GlobalContext *Ctx) {
if (!BuildDefs::dump())
......@@ -252,7 +283,10 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
: Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(),
StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this),
ObjectWriter(), OptQ(/*Sequential=*/Flags.isSequential(),
/*MaxSize=*/Flags.getNumTranslationThreads()),
/*MaxSize=*/
(Flags.getParseParallel() && Flags.getBuildOnRead())
? MaxOptQSize
: Flags.getNumTranslationThreads()),
// EmitQ is allowed unlimited size.
EmitQ(/*Sequential=*/Flags.isSequential()),
DataLowering(TargetDataLowering::createLowering(this)) {
......@@ -305,7 +339,8 @@ GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
void GlobalContext::translateFunctions() {
TimerMarker Timer(TimerStack::TT_translateFunctions, this);
while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) {
auto Func = OptItem->getParsedCfg();
// Install Func in TLS for Cfg-specific container allocators.
CfgLocalAllocatorScope _(Func.get());
// Reset per-function stats being accumulated in TLS.
......@@ -878,19 +913,19 @@ void GlobalContext::setTimerName(TimerStackIdT StackID,
// interface to take and transfer ownership, but they internally store the raw
// Cfg pointer in the work queue. This allows e.g. future queue optimizations
// such as the use of atomics to modify queue elements.
void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
assert(Func);
void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) {
assert(Item);
{
TimerMarker _(TimerStack::TT_qTransPush, this);
OptQ.blockingPush(std::move(Func));
OptQ.blockingPush(std::move(Item));
}
if (getFlags().isSequential())
translateFunctions();
}
std::unique_ptr<Cfg> GlobalContext::optQueueBlockingPop() {
std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() {
TimerMarker _(TimerStack::TT_qTransPop, this);
return std::unique_ptr<Cfg>(OptQ.blockingPop());
return std::unique_ptr<OptWorkItem>(OptQ.blockingPop());
}
void GlobalContext::emitQueueBlockingPush(
......
......@@ -29,6 +29,7 @@
#include "IceUtils.h"
#include <array>
#include <atomic>
#include <cassert>
#include <functional>
#include <memory>
......@@ -54,6 +55,21 @@ enum class RuntimeHelper {
H_Num
};
/// OptWorkItem is a simple wrapper used to pass parse information on a function
/// block, to a translator thread.
class OptWorkItem {
OptWorkItem(const OptWorkItem &) = delete;
OptWorkItem &operator=(const OptWorkItem &) = delete;
public:
// Get the Cfg for the funtion to translate.
virtual std::unique_ptr<Cfg> getParsedCfg() = 0;
virtual ~OptWorkItem() = default;
protected:
OptWorkItem() = default;
};
class GlobalContext {
GlobalContext() = delete;
GlobalContext(const GlobalContext &) = delete;
......@@ -358,12 +374,12 @@ public:
/// Notifies any idle workers that a new function is available for
/// translating. May block if the work queue is too large, in order to control
/// memory footprint.
void optQueueBlockingPush(std::unique_ptr<Cfg> Func);
void optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item);
/// Takes a Cfg from the work queue for translating. May block if the work
/// queue is currently empty. Returns nullptr if there is no more work - the
/// queue is empty and either end() has been called or the Sequential flag was
/// set.
std::unique_ptr<Cfg> optQueueBlockingPop();
std::unique_ptr<OptWorkItem> optQueueBlockingPop();
/// Notifies that no more work will be added to the work queue.
void optQueueNotifyEnd() { OptQ.notifyEnd(); }
......@@ -405,34 +421,7 @@ public:
}
}
void waitForWorkerThreads() {
optQueueNotifyEnd();
for (std::thread &Worker : TranslationThreads) {
Worker.join();
}
TranslationThreads.clear();
// Only notify the emit queue to end after all the translation threads have
// ended.
emitQueueNotifyEnd();
for (std::thread &Worker : EmitterThreads) {
Worker.join();
}
EmitterThreads.clear();
if (BuildDefs::timers()) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
}
if (BuildDefs::dump()) {
// Do a separate loop over AllThreadContexts to avoid holding two locks
// at once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
}
}
void waitForWorkerThreads();
/// Translation thread startup routine.
void translateFunctionsWrapper(ThreadContext *MyTLS) {
......@@ -545,12 +534,16 @@ private:
Ostream *StrEmit; /// Stream for code emission
Ostream *StrError; /// Stream for logging errors.
// True if waitForWorkerThreads() has been called.
std::atomic_bool WaitForWorkerThreadsCalled;
ICE_CACHELINE_BOUNDARY;
Intrinsics IntrinsicsInfo;
// TODO(jpp): move to EmitterContext.
std::unique_ptr<ELFObjectWriter> ObjectWriter;
BoundedProducerConsumerQueue<Cfg> OptQ;
static constexpr size_t MaxOptQSize = 1 << 16;
BoundedProducerConsumerQueue<OptWorkItem, MaxOptQSize> OptQ;
BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ;
// DataLowering is only ever used by a single thread at a time (either in
// emitItems(), or in IceCompiler::run before the compilation is over.)
......
......@@ -24,6 +24,20 @@
namespace Ice {
class CfgOptWorkItem final : public OptWorkItem {
CfgOptWorkItem() = delete;
CfgOptWorkItem(const CfgOptWorkItem &) = delete;
CfgOptWorkItem &operator=(const CfgOptWorkItem &) = delete;
public:
CfgOptWorkItem(std::unique_ptr<Cfg> Func) : Func(std::move(Func)) {}
std::unique_ptr<Cfg> getParsedCfg() override { return std::move(Func); }
~CfgOptWorkItem() override = default;
private:
std::unique_ptr<Ice::Cfg> Func;
};
Translator::Translator(GlobalContext *Ctx)
: Ctx(Ctx), NextSequenceNumber(GlobalContext::getFirstSequenceNumber()),
ErrorStatus() {}
......@@ -57,7 +71,7 @@ bool Translator::checkIfUnnamedNameSafe(const std::string &Name,
}
void Translator::translateFcn(std::unique_ptr<Cfg> Func) {
Ctx->optQueueBlockingPush(std::move(Func));
Ctx->optQueueBlockingPush(makeUnique<CfgOptWorkItem>(std::move(Func)));
}
void Translator::lowerGlobals(
......
; Tests malformed insertelement and extractelement vector instructions.
; RUN: %if --need=allow_dump --command llvm-as < %s \
; RUN: | %if --need=allow_dump --command pnacl-freeze \
; RUN: | %if --need=allow_dump --command not %pnacl_sz -notranslate \
; RUN: -build-on-read -allow-pnacl-reader-error-recovery \
; RUN: -filetype=obj -o /dev/null \
; RUN: %if --need=allow_dump --command \
; RUN: %p2i --expect-fail -i %s --allow-pnacl-reader-error-recovery \
; RUN: --filetype=obj -o /dev/null --args -notranslate \
; RUN: | %if --need=allow_dump --command FileCheck %s
; RUN: %if --need=no_dump --command llvm-as < %s \
; RUN: | %if --need=no_dump --command pnacl-freeze \
; RUN: | %if --need=no_dump --command not %pnacl_sz -notranslate \
; RUN: -build-on-read -allow-pnacl-reader-error-recovery \
; RUN: -filetype=obj -o /dev/null \
; RUN: %if --need=no_dump --command \
; RUN: %p2i --expect-fail -i %s --allow-pnacl-reader-error-recovery \
; RUN: --filetype=obj -o /dev/null --args -notranslate \
; RUN: | %if --need=no_dump --command FileCheck %s --check-prefix=MIN
define void @ExtractV4xi1(<4 x i1> %v, i32 %i) {
......
......@@ -25,7 +25,8 @@ void IceTest::SubzeroBitcodeMunger::resetMungeFlags() {
Flags.setOptLevel(Ice::Opt_m1);
Flags.setOutFileType(Ice::FT_Iasm);
Flags.setTargetArch(Ice::Target_X8632);
Flags.setVerbose(Ice::IceV_Instructions);
Flags.setNumTranslationThreads(0);
Flags.setParseParallel(false);
}
bool IceTest::SubzeroBitcodeMunger::runTest(const uint64_t Munges[],
......@@ -34,10 +35,12 @@ bool IceTest::SubzeroBitcodeMunger::runTest(const uint64_t Munges[],
const bool AddHeader = true;
setupTest(Munges, MungeSize, AddHeader);
Ice::GlobalContext Ctx(DumpStream, DumpStream, DumpStream, nullptr);
Ctx.startWorkerThreads();
Ice::PNaClTranslator Translator(&Ctx);
const char *BufferName = "Test";
Flags.setDisableTranslation(DisableTranslation);
Translator.translateBuffer(BufferName, MungedInput.get());
Ctx.waitForWorkerThreads();
cleanupTest();
return Translator.getErrorStatus().value() == 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment