Commit 36d6aa65 by John Porto

Subzero. Moar performance tweaks.

1) Clones llvm::BitVector, and makes it Allocator aware (using the CfgLocalAllocator<>) 2) Uses mallopt to set the malloc granularity. The default granularity is too small, which forces too many mmap calls. BUG= R=sehr@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1738683003 .
parent 8159aae6
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "IceCfg.h" #include "IceCfg.h"
#include "IceAssembler.h" #include "IceAssembler.h"
#include "IceBitVector.h"
#include "IceCfgNode.h" #include "IceCfgNode.h"
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceDefs.h" #include "IceDefs.h"
...@@ -237,8 +238,8 @@ void Cfg::computeInOutEdges() { ...@@ -237,8 +238,8 @@ void Cfg::computeInOutEdges() {
// Prune any unreachable nodes before computing in-edges. // Prune any unreachable nodes before computing in-edges.
SizeT NumNodes = getNumNodes(); SizeT NumNodes = getNumNodes();
llvm::BitVector Reachable(NumNodes); BitVector Reachable(NumNodes);
llvm::BitVector Pending(NumNodes); BitVector Pending(NumNodes);
Pending.set(getEntryNode()->getIndex()); Pending.set(getEntryNode()->getIndex());
while (true) { while (true) {
int Index = Pending.find_first(); int Index = Pending.find_first();
...@@ -427,7 +428,7 @@ void Cfg::reorderNodes() { ...@@ -427,7 +428,7 @@ void Cfg::reorderNodes() {
} }
namespace { namespace {
void getRandomPostOrder(CfgNode *Node, llvm::BitVector &ToVisit, void getRandomPostOrder(CfgNode *Node, BitVector &ToVisit,
Ice::NodeList &PostOrder, Ice::NodeList &PostOrder,
Ice::RandomNumberGenerator *RNG) { Ice::RandomNumberGenerator *RNG) {
assert(ToVisit[Node->getIndex()]); assert(ToVisit[Node->getIndex()]);
...@@ -449,7 +450,7 @@ void Cfg::shuffleNodes() { ...@@ -449,7 +450,7 @@ void Cfg::shuffleNodes() {
NodeList ReversedReachable; NodeList ReversedReachable;
NodeList Unreachable; NodeList Unreachable;
llvm::BitVector ToVisit(Nodes.size(), true); BitVector ToVisit(Nodes.size(), true);
// Create Random number generator for function reordering // Create Random number generator for function reordering
RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(), RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(),
RPE_BasicBlockReordering, SequenceNumber); RPE_BasicBlockReordering, SequenceNumber);
...@@ -813,7 +814,7 @@ void Cfg::liveness(LivenessMode Mode) { ...@@ -813,7 +814,7 @@ void Cfg::liveness(LivenessMode Mode) {
getVMetadata()->init(VMK_Uses); getVMetadata()->init(VMK_Uses);
Live->init(); Live->init();
// Initialize with all nodes needing to be processed. // Initialize with all nodes needing to be processed.
llvm::BitVector NeedToProcess(Nodes.size(), true); BitVector NeedToProcess(Nodes.size(), true);
while (NeedToProcess.any()) { while (NeedToProcess.any()) {
// Iterate in reverse topological order to speed up convergence. // Iterate in reverse topological order to speed up convergence.
for (CfgNode *Node : reverse_range(Nodes)) { for (CfgNode *Node : reverse_range(Nodes)) {
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include "IceTLS.h" #include "IceTLS.h"
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/iterator_range.h"
...@@ -50,6 +49,7 @@ ...@@ -50,6 +49,7 @@
namespace Ice { namespace Ice {
class Assembler; class Assembler;
class BitVector;
class Cfg; class Cfg;
class CfgNode; class CfgNode;
class Constant; class Constant;
...@@ -155,7 +155,7 @@ using InstNumberT = int32_t; ...@@ -155,7 +155,7 @@ using InstNumberT = int32_t;
/// range. /// range.
using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>; using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>;
using LiveBeginEndMap = CfgVector<LiveBeginEndMapEntry>; using LiveBeginEndMap = CfgVector<LiveBeginEndMapEntry>;
using LivenessBV = llvm::BitVector; using LivenessBV = BitVector;
using TimerStackIdT = uint32_t; using TimerStackIdT = uint32_t;
using TimerIdT = uint32_t; using TimerIdT = uint32_t;
......
...@@ -21,8 +21,9 @@ ...@@ -21,8 +21,9 @@
#ifndef SUBZERO_SRC_ICELIVENESS_H #ifndef SUBZERO_SRC_ICELIVENESS_H
#define SUBZERO_SRC_ICELIVENESS_H #define SUBZERO_SRC_ICELIVENESS_H
#include "IceCfgNode.h"
#include "IceDefs.h" #include "IceDefs.h"
#include "IceBitVector.h"
#include "IceCfgNode.h"
#include "IceTypes.h" #include "IceTypes.h"
namespace Ice { namespace Ice {
...@@ -117,7 +118,7 @@ private: ...@@ -117,7 +118,7 @@ private:
CfgVector<Variable *> LiveToVarMap; CfgVector<Variable *> LiveToVarMap;
/// RangeMask[Variable::Number] indicates whether we want to track that /// RangeMask[Variable::Number] indicates whether we want to track that
/// Variable's live range. /// Variable's live range.
llvm::BitVector RangeMask; LivenessBV RangeMask;
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "IceRNG.h" #include "IceRNG.h"
#include <climits>
#include <ctime> #include <ctime>
namespace Ice { namespace Ice {
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceBitVector.h"
#include "IceCfg.h" // setError() #include "IceCfg.h" // setError()
#include "IceCfgNode.h" #include "IceCfgNode.h"
#include "IceGlobalContext.h" #include "IceGlobalContext.h"
...@@ -543,7 +544,7 @@ void TargetLowering::getVarStackSlotParams( ...@@ -543,7 +544,7 @@ void TargetLowering::getVarStackSlotParams(
uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes, uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
std::function<bool(Variable *)> TargetVarHook) { std::function<bool(Variable *)> TargetVarHook) {
const VariablesMetadata *VMetadata = Func->getVMetadata(); const VariablesMetadata *VMetadata = Func->getVMetadata();
llvm::BitVector IsVarReferenced(Func->getNumVariables()); BitVector IsVarReferenced(Func->getNumVariables());
for (CfgNode *Node : Func->getNodes()) { for (CfgNode *Node : Func->getNodes()) {
for (Inst &Instr : Node->getInsts()) { for (Inst &Instr : Node->getInsts()) {
if (Instr.isDeleted()) if (Instr.isDeleted())
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#include "IceDefs.h" #include "IceDefs.h"
#include <climits>
namespace Ice { namespace Ice {
namespace { namespace {
......
...@@ -17,6 +17,10 @@ ...@@ -17,6 +17,10 @@
#include "IceBuildDefs.h" #include "IceBuildDefs.h"
#include "IceCompileServer.h" #include "IceCompileServer.h"
#ifdef __pnacl__
#include <malloc.h>
#endif // __pnacl__
/// Depending on whether we are building the compiler for the browser or /// Depending on whether we are building the compiler for the browser or
/// standalone, we will end up creating a Ice::BrowserCompileServer or /// standalone, we will end up creating a Ice::BrowserCompileServer or
/// Ice::CLCompileServer object. Method /// Ice::CLCompileServer object. Method
...@@ -26,6 +30,15 @@ ...@@ -26,6 +30,15 @@
/// We can only compile the Ice::BrowserCompileServer object with the PNaCl /// We can only compile the Ice::BrowserCompileServer object with the PNaCl
/// compiler toolchain, when building Subzero as a sandboxed translator. /// compiler toolchain, when building Subzero as a sandboxed translator.
int main(int argc, char **argv) { int main(int argc, char **argv) {
#ifdef __pnacl__
#define M_GRANULARITY (-2)
// PNaCl's default malloc implementation grabs small chunks of memory with
// mmap at a time, hence causing significant slowdowns. This call ensures that
// mmap is used to allocate 16MB at a time, to amortize the system call cost.
mallopt(M_GRANULARITY, 16 * 1024 * 1024);
#undef M_GRANULARITY
#endif // __pnacl__
if (Ice::BuildDefs::browser()) { if (Ice::BuildDefs::browser()) {
assert(argc == 1); assert(argc == 1);
return Ice::BrowserCompileServer().runAndReturnErrorCode(); return Ice::BrowserCompileServer().runAndReturnErrorCode();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment