Commit 3018cf2b by Karl Schimpf

Reduce wait times for very large PEXE files.

Investigated how many parser waits occur when the OptQ fills up. The current implementation has 64k entries, which for 10Mb examples, never fill up (but do come close to filling up). To test, I dropped the queue size down. The numbers I got was that the queue size plus the number of parse waits was within 2% of the total number of function blocks. Hence, once OptQ fills up a lot of slow notifies get applied. Hence, for scaling, I modifed the code to not wake up the parse thread (during a pop) until OptQ got half empty. The results were that once the Opt got up to size 1024, less than 100 notifies would be issued. From 1024 on, as the queue size doubled, the number of notifies would drop roughly in half. Based on this, I decided to add the feature that the OptQ did not wake up the waiting parse thread until half empty. Since the queue size was not shrunk, this CL shouldn't add any overhead for the PEXES we have, and very few waits with significantly largers than the current (10Mb) PEXES. BUG=None R=jpp@chromium.org Review URL: https://codereview.chromium.org/1877873002 .
parent b627f094
...@@ -289,15 +289,25 @@ void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) { ...@@ -289,15 +289,25 @@ void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) {
} }
} }
namespace {
// By default, wake up the main parser thread when the OptQ gets half empty.
static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1;
} // end of anonymous namespace
GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
ELFStreamer *ELFStr) ELFStreamer *ELFStr)
: Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(), : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(),
StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this), StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this),
ObjectWriter(), OptQ(/*Sequential=*/getFlags().isSequential(), ObjectWriter(),
/*MaxSize=*/ OptQWakeupSize(std::max(DefaultOptQWakeupSize,
getFlags().isParseParallel() size_t(getFlags().getNumTranslationThreads()))),
? MaxOptQSize OptQ(/*Sequential=*/getFlags().isSequential(),
: getFlags().getNumTranslationThreads()), /*MaxSize=*/
getFlags().isParseParallel()
? MaxOptQSize
: getFlags().getNumTranslationThreads()),
// EmitQ is allowed unlimited size. // EmitQ is allowed unlimited size.
EmitQ(/*Sequential=*/getFlags().isSequential()), EmitQ(/*Sequential=*/getFlags().isSequential()),
DataLowering(TargetDataLowering::createLowering(this)) { DataLowering(TargetDataLowering::createLowering(this)) {
...@@ -939,7 +949,7 @@ void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) { ...@@ -939,7 +949,7 @@ void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) {
std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() { std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() {
TimerMarker _(TimerStack::TT_qTransPop, this); TimerMarker _(TimerStack::TT_qTransPop, this);
return std::unique_ptr<OptWorkItem>(OptQ.blockingPop()); return OptQ.blockingPop(OptQWakeupSize);
} }
void GlobalContext::emitQueueBlockingPush( void GlobalContext::emitQueueBlockingPush(
......
...@@ -477,6 +477,11 @@ public: ...@@ -477,6 +477,11 @@ public:
return LockedPtr<StringPool>(Strings.get(), &StringsLock); return LockedPtr<StringPool>(Strings.get(), &StringsLock);
} }
/// Number of function blocks that can be queued before waiting for
/// translation
/// threads to consume.
static constexpr size_t MaxOptQSize = 1 << 16;
private: private:
// Try to ensure mutexes are allocated on separate cache lines. // Try to ensure mutexes are allocated on separate cache lines.
...@@ -543,7 +548,8 @@ private: ...@@ -543,7 +548,8 @@ private:
Intrinsics IntrinsicsInfo; Intrinsics IntrinsicsInfo;
// TODO(jpp): move to EmitterContext. // TODO(jpp): move to EmitterContext.
std::unique_ptr<ELFObjectWriter> ObjectWriter; std::unique_ptr<ELFObjectWriter> ObjectWriter;
static constexpr size_t MaxOptQSize = 1 << 16; // Value defining when to wake up the main parse thread.
const size_t OptQWakeupSize;
BoundedProducerConsumerQueue<OptWorkItem, MaxOptQSize> OptQ; BoundedProducerConsumerQueue<OptWorkItem, MaxOptQSize> OptQ;
BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ; BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ;
// DataLowering is only ever used by a single thread at a time (either in // DataLowering is only ever used by a single thread at a time (either in
......
...@@ -67,7 +67,7 @@ public: ...@@ -67,7 +67,7 @@ public:
} }
GrewOrEnded.notify_one(); GrewOrEnded.notify_one();
} }
std::unique_ptr<T> blockingPop() { std::unique_ptr<T> blockingPop(size_t NotifyWhenDownToSize = MaxStaticSize) {
std::unique_ptr<T> Item; std::unique_ptr<T> Item;
bool ShouldNotifyProducer = false; bool ShouldNotifyProducer = false;
{ {
...@@ -75,7 +75,7 @@ public: ...@@ -75,7 +75,7 @@ public:
GrewOrEnded.wait(L, [this] { return IsEnded || !empty() || Sequential; }); GrewOrEnded.wait(L, [this] { return IsEnded || !empty() || Sequential; });
if (!empty()) { if (!empty()) {
Item = pop(); Item = pop();
ShouldNotifyProducer = !IsEnded; ShouldNotifyProducer = (size() < NotifyWhenDownToSize) && !IsEnded;
} }
} }
if (ShouldNotifyProducer) if (ShouldNotifyProducer)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment