Subzero. Moar performance tweaks.

1) Clones llvm::BitVector, and makes it Allocator aware (using the CfgLocalAllocator<>) 2) Uses mallopt to set the malloc granularity. The default granularity is too small, which forces too many mmap calls. BUG= R=sehr@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1738683003 .

Subzero. Moar performance tweaks.
36d6aa65 · John Porto · 8159aae6 · 36d6aa65 · 36d6aa65 · 36d6aa65
Commit 36d6aa65 authored Feb 26, 2016 by John Porto
8 changed files
--- a/src/IceBitVector.h
+++ b/src/IceBitVector.h
@@ -8,24 +8,30 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Defines and implements a bit vector with inline storage. It is a drop
+/// \brief Defines and implements a bit vector classes.
-/// in replacement for llvm::SmallBitVector in subzero -- i.e., not all of
+///
-/// llvm::SmallBitVector interface is implemented.
+/// SmallBitVector is a drop in replacement for llvm::SmallBitVector. It uses
+/// inline storage, at the expense of limited, static size.
+///
+/// BitVector is a allocator aware version of llvm::BitVector. Its
+/// implementation was copied ipsis literis from llvm.
 ///
 //===----------------------------------------------------------------------===//
 #ifndef SUBZERO_SRC_ICEBITVECTOR_H
 #define SUBZERO_SRC_ICEBITVECTOR_H
-#include "IceDefs.h"
+#include "IceMemory.h"
 #include "IceOperand.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
+#include <cassert>
 #include <climits>
 #include <memory>
 #include <type_traits>
+#include <utility>
 namespace Ice {
 class SmallBitVector {
@@ -240,6 +246,566 @@ private:
  }
 };
+class BitVector {
+  typedef unsigned long BitWord;
+  using Allocator = CfgLocalAllocator<BitWord>;
+  enum { BITWORD_SIZE = (unsigned)sizeof(BitWord) * CHAR_BIT };
+  static_assert(BITWORD_SIZE == 64 || BITWORD_SIZE == 32,
+                "Unsupported word size");
+  BitWord *Bits;     // Actual bits.
+  unsigned Size;     // Size of bitvector in bits.
+  unsigned Capacity; // Size of allocated memory in BitWord.
+  Allocator Alloc;
+public:
+  typedef unsigned size_type;
+  // Encapsulation of a single bit.
+  class reference {
+    friend class BitVector;
+    BitWord *WordRef;
+    unsigned BitPos;
+    reference(); // Undefined
+  public:
+    reference(BitVector &b, unsigned Idx) {
+      WordRef = &b.Bits[Idx / BITWORD_SIZE];
+      BitPos = Idx % BITWORD_SIZE;
+    }
+    reference(const reference &) = default;
+    reference &operator=(reference t) {
+      *this = bool(t);
+      return *this;
+    }
+    reference &operator=(bool t) {
+      if (t)
+        *WordRef |= BitWord(1) << BitPos;
+      else
+        *WordRef &= ~(BitWord(1) << BitPos);
+      return *this;
+    }
+    operator bool() const {
+      return ((*WordRef) & (BitWord(1) << BitPos)) ? true : false;
+    }
+  };
+  /// BitVector default ctor - Creates an empty bitvector.
+  BitVector(Allocator A = Allocator())
+      : Size(0), Capacity(0), Alloc(std::move(A)) {
+    Bits = nullptr;
+  }
+  /// BitVector ctor - Creates a bitvector of specified number of bits. All
+  /// bits are initialized to the specified value.
+  explicit BitVector(unsigned s, bool t = false, Allocator A = Allocator())
+      : Size(s), Alloc(std::move(A)) {
+    Capacity = NumBitWords(s);
+    Bits = Alloc.allocate(Capacity * sizeof(BitWord));
+    init_words(Bits, Capacity, t);
+    if (t)
+      clear_unused_bits();
+  }
+  /// BitVector copy ctor.
+  BitVector(const BitVector &RHS) : Size(RHS.size()), Alloc(RHS.Alloc) {
+    if (Size == 0) {
+      Bits = nullptr;
+      Capacity = 0;
+      return;
+    }
+    Capacity = NumBitWords(RHS.size());
+    Bits = Alloc.allocate(Capacity * sizeof(BitWord));
+    std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord));
+  }
+  BitVector(BitVector &&RHS)
+      : Bits(RHS.Bits), Size(RHS.Size), Capacity(RHS.Capacity),
+        Alloc(std::move(RHS.Alloc)) {
+    RHS.Bits = nullptr;
+  }
+  ~BitVector() {
+    if (Bits != nullptr) {
+      Alloc.deallocate(Bits, Capacity * sizeof(BitWord));
+    }
+  }
+  /// empty - Tests whether there are no bits in this bitvector.
+  bool empty() const { return Size == 0; }
+  /// size - Returns the number of bits in this bitvector.
+  size_type size() const { return Size; }
+  /// count - Returns the number of bits which are set.
+  size_type count() const {
+    unsigned NumBits = 0;
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      NumBits += llvm::countPopulation(Bits[i]);
+    return NumBits;
+  }
+  /// any - Returns true if any bit is set.
+  bool any() const {
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      if (Bits[i] != 0)
+        return true;
+    return false;
+  }
+  /// all - Returns true if all bits are set.
+  bool all() const {
+    for (unsigned i = 0; i < Size / BITWORD_SIZE; ++i)
+      if (Bits[i] != ~0UL)
+        return false;
+    // If bits remain check that they are ones. The unused bits are always zero.
+    if (unsigned Remainder = Size % BITWORD_SIZE)
+      return Bits[Size / BITWORD_SIZE] == (1UL << Remainder) - 1;
+    return true;
+  }
+  /// none - Returns true if none of the bits are set.
+  bool none() const { return !any(); }
+  /// find_first - Returns the index of the first set bit, -1 if none
+  /// of the bits are set.
+  int find_first() const {
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      if (Bits[i] != 0)
+        return i * BITWORD_SIZE + llvm::countTrailingZeros(Bits[i]);
+    return -1;
+  }
+  /// find_next - Returns the index of the next set bit following the
+  /// "Prev" bit. Returns -1 if the next set bit is not found.
+  int find_next(unsigned Prev) const {
+    ++Prev;
+    if (Prev >= Size)
+      return -1;
+    unsigned WordPos = Prev / BITWORD_SIZE;
+    unsigned BitPos = Prev % BITWORD_SIZE;
+    BitWord Copy = Bits[WordPos];
+    // Mask off previous bits.
+    Copy &= ~0UL << BitPos;
+    if (Copy != 0)
+      return WordPos * BITWORD_SIZE + llvm::countTrailingZeros(Copy);
+    // Check subsequent words.
+    for (unsigned i = WordPos + 1; i < NumBitWords(size()); ++i)
+      if (Bits[i] != 0)
+        return i * BITWORD_SIZE + llvm::countTrailingZeros(Bits[i]);
+    return -1;
+  }
+  /// clear - Clear all bits.
+  void clear() { Size = 0; }
+  /// resize - Grow or shrink the bitvector.
+  void resize(unsigned N, bool t = false) {
+    if (N > Capacity * BITWORD_SIZE) {
+      unsigned OldCapacity = Capacity;
+      grow(N);
+      init_words(&Bits[OldCapacity], (Capacity - OldCapacity), t);
+    }
+    // Set any old unused bits that are now included in the BitVector. This
+    // may set bits that are not included in the new vector, but we will clear
+    // them back out below.
+    if (N > Size)
+      set_unused_bits(t);
+    // Update the size, and clear out any bits that are now unused
+    unsigned OldSize = Size;
+    Size = N;
+    if (t || N < OldSize)
+      clear_unused_bits();
+  }
+  void reserve(unsigned N) {
+    if (N > Capacity * BITWORD_SIZE)
+      grow(N);
+  }
+  // Set, reset, flip
+  BitVector &set() {
+    init_words(Bits, Capacity, true);
+    clear_unused_bits();
+    return *this;
+  }
+  BitVector &set(unsigned Idx) {
+    assert(Bits && "Bits never allocated");
+    Bits[Idx / BITWORD_SIZE] |= BitWord(1) << (Idx % BITWORD_SIZE);
+    return *this;
+  }
+  /// set - Efficiently set a range of bits in [I, E)
+  BitVector &set(unsigned I, unsigned E) {
+    assert(I <= E && "Attempted to set backwards range!");
+    assert(E <= size() && "Attempted to set out-of-bounds range!");
+    if (I == E)
+      return *this;
+    if (I / BITWORD_SIZE == E / BITWORD_SIZE) {
+      BitWord EMask = 1UL << (E % BITWORD_SIZE);
+      BitWord IMask = 1UL << (I % BITWORD_SIZE);
+      BitWord Mask = EMask - IMask;
+      Bits[I / BITWORD_SIZE] |= Mask;
+      return *this;
+    }
+    BitWord PrefixMask = ~0UL << (I % BITWORD_SIZE);
+    Bits[I / BITWORD_SIZE] |= PrefixMask;
+    I = llvm::RoundUpToAlignment(I, BITWORD_SIZE);
+    for (; I + BITWORD_SIZE <= E; I += BITWORD_SIZE)
+      Bits[I / BITWORD_SIZE] = ~0UL;
+    BitWord PostfixMask = (1UL << (E % BITWORD_SIZE)) - 1;
+    if (I < E)
+      Bits[I / BITWORD_SIZE] |= PostfixMask;
+    return *this;
+  }
+  BitVector &reset() {
+    init_words(Bits, Capacity, false);
+    return *this;
+  }
+  BitVector &reset(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] &= ~(BitWord(1) << (Idx % BITWORD_SIZE));
+    return *this;
+  }
+  /// reset - Efficiently reset a range of bits in [I, E)
+  BitVector &reset(unsigned I, unsigned E) {
+    assert(I <= E && "Attempted to reset backwards range!");
+    assert(E <= size() && "Attempted to reset out-of-bounds range!");
+    if (I == E)
+      return *this;
+    if (I / BITWORD_SIZE == E / BITWORD_SIZE) {
+      BitWord EMask = 1UL << (E % BITWORD_SIZE);
+      BitWord IMask = 1UL << (I % BITWORD_SIZE);
+      BitWord Mask = EMask - IMask;
+      Bits[I / BITWORD_SIZE] &= ~Mask;
+      return *this;
+    }
+    BitWord PrefixMask = ~0UL << (I % BITWORD_SIZE);
+    Bits[I / BITWORD_SIZE] &= ~PrefixMask;
+    I = llvm::RoundUpToAlignment(I, BITWORD_SIZE);
+    for (; I + BITWORD_SIZE <= E; I += BITWORD_SIZE)
+      Bits[I / BITWORD_SIZE] = 0UL;
+    BitWord PostfixMask = (1UL << (E % BITWORD_SIZE)) - 1;
+    if (I < E)
+      Bits[I / BITWORD_SIZE] &= ~PostfixMask;
+    return *this;
+  }
+  BitVector &flip() {
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      Bits[i] = ~Bits[i];
+    clear_unused_bits();
+    return *this;
+  }
+  BitVector &flip(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] ^= BitWord(1) << (Idx % BITWORD_SIZE);
+    return *this;
+  }
+  // Indexing.
+  reference operator[](unsigned Idx) {
+    assert(Idx < Size && "Out-of-bounds Bit access.");
+    return reference(*this, Idx);
+  }
+  bool operator[](unsigned Idx) const {
+    assert(Idx < Size && "Out-of-bounds Bit access.");
+    BitWord Mask = BitWord(1) << (Idx % BITWORD_SIZE);
+    return (Bits[Idx / BITWORD_SIZE] & Mask) != 0;
+  }
+  bool test(unsigned Idx) const { return (*this)[Idx]; }
+  /// Test if any common bits are set.
+  bool anyCommon(const BitVector &RHS) const {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords = NumBitWords(RHS.size());
+    for (unsigned i = 0, e = std::min(ThisWords, RHSWords); i != e; ++i)
+      if (Bits[i] & RHS.Bits[i])
+        return true;
+    return false;
+  }
+  // Comparison operators.
+  bool operator==(const BitVector &RHS) const {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      if (Bits[i] != RHS.Bits[i])
+        return false;
+    // Verify that any extra words are all zeros.
+    if (i != ThisWords) {
+      for (; i != ThisWords; ++i)
+        if (Bits[i])
+          return false;
+    } else if (i != RHSWords) {
+      for (; i != RHSWords; ++i)
+        if (RHS.Bits[i])
+          return false;
+    }
+    return true;
+  }
+  bool operator!=(const BitVector &RHS) const { return !(*this == RHS); }
+  /// Intersection, union, disjoint union.
+  BitVector &operator&=(const BitVector &RHS) {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      Bits[i] &= RHS.Bits[i];
+    // Any bits that are just in this bitvector become zero, because they aren't
+    // in the RHS bit vector.  Any words only in RHS are ignored because they
+    // are already zero in the LHS.
+    for (; i != ThisWords; ++i)
+      Bits[i] = 0;
+    return *this;
+  }
+  /// reset - Reset bits that are set in RHS. Same as *this &= ~RHS.
+  BitVector &reset(const BitVector &RHS) {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      Bits[i] &= ~RHS.Bits[i];
+    return *this;
+  }
+  /// test - Check if (This - RHS) is zero.
+  /// This is the same as reset(RHS) and any().
+  bool test(const BitVector &RHS) const {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      if ((Bits[i] & ~RHS.Bits[i]) != 0)
+        return true;
+    for (; i != ThisWords; ++i)
+      if (Bits[i] != 0)
+        return true;
+    return false;
+  }
+  BitVector &operator|=(const BitVector &RHS) {
+    if (size() < RHS.size())
+      resize(RHS.size());
+    for (size_t i = 0, e = NumBitWords(RHS.size()); i != e; ++i)
+      Bits[i] |= RHS.Bits[i];
+    return *this;
+  }
+  BitVector &operator^=(const BitVector &RHS) {
+    if (size() < RHS.size())
+      resize(RHS.size());
+    for (size_t i = 0, e = NumBitWords(RHS.size()); i != e; ++i)
+      Bits[i] ^= RHS.Bits[i];
+    return *this;
+  }
+  // Assignment operator.
+  const BitVector &operator=(const BitVector &RHS) {
+    if (this == &RHS)
+      return *this;
+    Size = RHS.size();
+    unsigned RHSWords = NumBitWords(Size);
+    if (Size <= Capacity * BITWORD_SIZE) {
+      if (Size)
+        std::memcpy(Bits, RHS.Bits, RHSWords * sizeof(BitWord));
+      clear_unused_bits();
+      return *this;
+    }
+    // Grow the bitvector to have enough elements.
+    const auto OldCapacity = Capacity;
+    Capacity = RHSWords;
+    assert(Capacity > 0 && "negative capacity?");
+    BitWord *NewBits = Alloc.allocate(Capacity * sizeof(BitWord));
+    std::memcpy(NewBits, RHS.Bits, Capacity * sizeof(BitWord));
+    // Destroy the old bits.
+    Alloc.deallocate(Bits, OldCapacity * sizeof(BitWord));
+    Bits = NewBits;
+    return *this;
+  }
+  const BitVector &operator=(BitVector &&RHS) {
+    if (this == &RHS)
+      return *this;
+    Alloc.deallocate(Bits, Capacity * sizeof(BitWord));
+    Bits = RHS.Bits;
+    Size = RHS.Size;
+    Capacity = RHS.Capacity;
+    RHS.Bits = nullptr;
+    return *this;
+  }
+  void swap(BitVector &RHS) {
+    std::swap(Bits, RHS.Bits);
+    std::swap(Size, RHS.Size);
+    std::swap(Capacity, RHS.Capacity);
+  }
+  //===--------------------------------------------------------------------===//
+  // Portable bit mask operations.
+  //===--------------------------------------------------------------------===//
+  //
+  // These methods all operate on arrays of uint32_t, each holding 32 bits. The
+  // fixed word size makes it easier to work with literal bit vector constants
+  // in portable code.
+  //
+  // The LSB in each word is the lowest numbered bit.  The size of a portable
+  // bit mask is always a whole multiple of 32 bits.  If no bit mask size is
+  // given, the bit mask is assumed to cover the entire BitVector.
+  /// setBitsInMask - Add '1' bits from Mask to this vector. Don't resize.
+  /// This computes "*this |= Mask".
+  void setBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<true, false>(Mask, MaskWords);
+  }
+  /// clearBitsInMask - Clear any bits in this vector that are set in Mask.
+  /// Don't resize. This computes "*this &= ~Mask".
+  void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<false, false>(Mask, MaskWords);
+  }
+  /// setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
+  /// Don't resize.  This computes "*this |= ~Mask".
+  void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<true, true>(Mask, MaskWords);
+  }
+  /// clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
+  /// Don't resize.  This computes "*this &= Mask".
+  void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<false, true>(Mask, MaskWords);
+  }
+private:
+  unsigned NumBitWords(unsigned S) const {
+    return (S + BITWORD_SIZE - 1) / BITWORD_SIZE;
+  }
+  // Set the unused bits in the high words.
+  void set_unused_bits(bool t = true) {
+    //  Set high words first.
+    unsigned UsedWords = NumBitWords(Size);
+    if (Capacity > UsedWords)
+      init_words(&Bits[UsedWords], (Capacity - UsedWords), t);
+    //  Then set any stray high bits of the last used word.
+    unsigned ExtraBits = Size % BITWORD_SIZE;
+    if (ExtraBits) {
+      BitWord ExtraBitMask = ~0UL << ExtraBits;
+      if (t)
+        Bits[UsedWords - 1] |= ExtraBitMask;
+      else
+        Bits[UsedWords - 1] &= ~ExtraBitMask;
+    }
+  }
+  // Clear the unused bits in the high words.
+  void clear_unused_bits() { set_unused_bits(false); }
+  void grow(unsigned NewSize) {
+    const auto OldCapacity = Capacity;
+    Capacity = std::max(NumBitWords(NewSize), Capacity * 2);
+    assert(Capacity > 0 && "realloc-ing zero space");
+    auto *NewBits = Alloc.allocate(Capacity * sizeof(BitWord));
+    std::memcpy(Bits, NewBits, OldCapacity * sizeof(BitWord));
+    Alloc.deallocate(Bits, OldCapacity * sizeof(BitWord));
+    Bits = NewBits;
+    clear_unused_bits();
+  }
+  void init_words(BitWord *B, unsigned NumWords, bool t) {
+    memset(B, 0 - (int)t, NumWords * sizeof(BitWord));
+  }
+  template <bool AddBits, bool InvertMask>
+  void applyMask(const uint32_t *Mask, unsigned MaskWords) {
+    static_assert(BITWORD_SIZE % 32 == 0, "Unsupported BitWord size.");
+    MaskWords = std::min(MaskWords, (size() + 31) / 32);
+    const unsigned Scale = BITWORD_SIZE / 32;
+    unsigned i;
+    for (i = 0; MaskWords >= Scale; ++i, MaskWords -= Scale) {
+      BitWord BW = Bits[i];
+      // This inner loop should unroll completely when BITWORD_SIZE > 32.
+      for (unsigned b = 0; b != BITWORD_SIZE; b += 32) {
+        uint32_t M = *Mask++;
+        if (InvertMask)
+          M = ~M;
+        if (AddBits)
+          BW |= BitWord(M) << b;
+        else
+          BW &= ~(BitWord(M) << b);
+      }
+      Bits[i] = BW;
+    }
+    for (unsigned b = 0; MaskWords; b += 32, --MaskWords) {
+      uint32_t M = *Mask++;
+      if (InvertMask)
+        M = ~M;
+      if (AddBits)
+        Bits[i] |= BitWord(M) << b;
+      else
+        Bits[i] &= ~(BitWord(M) << b);
+    }
+    if (AddBits)
+      clear_unused_bits();
+  }
+};
 } // end of namespace Ice
+namespace std {
+/// Implement std::swap in terms of BitVector swap.
+inline void swap(Ice::BitVector &LHS, Ice::BitVector &RHS) { LHS.swap(RHS); }
+}
 #endif // SUBZERO_SRC_ICEBITVECTOR_H
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -15,6 +15,7 @@
 #include "IceCfg.h"
 #include "IceAssembler.h"
+#include "IceBitVector.h"
 #include "IceCfgNode.h"
 #include "IceClFlags.h"
 #include "IceDefs.h"
@@ -237,8 +238,8 @@ void Cfg::computeInOutEdges() {
  // Prune any unreachable nodes before computing in-edges.
  SizeT NumNodes = getNumNodes();
-  llvm::BitVector Reachable(NumNodes);
+  BitVector Reachable(NumNodes);
-  llvm::BitVector Pending(NumNodes);
+  BitVector Pending(NumNodes);
  Pending.set(getEntryNode()->getIndex());
  while (true) {
    int Index = Pending.find_first();
@@ -427,7 +428,7 @@ void Cfg::reorderNodes() {
 }
 namespace {
-void getRandomPostOrder(CfgNode *Node, llvm::BitVector &ToVisit,
+void getRandomPostOrder(CfgNode *Node, BitVector &ToVisit,
                        Ice::NodeList &PostOrder,
                        Ice::RandomNumberGenerator *RNG) {
  assert(ToVisit[Node->getIndex()]);
@@ -449,7 +450,7 @@ void Cfg::shuffleNodes() {
  NodeList ReversedReachable;
  NodeList Unreachable;
-  llvm::BitVector ToVisit(Nodes.size(), true);
+  BitVector ToVisit(Nodes.size(), true);
  // Create Random number generator for function reordering
  RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(),
                            RPE_BasicBlockReordering, SequenceNumber);
@@ -813,7 +814,7 @@ void Cfg::liveness(LivenessMode Mode) {
  getVMetadata()->init(VMK_Uses);
  Live->init();
  // Initialize with all nodes needing to be processed.
-  llvm::BitVector NeedToProcess(Nodes.size(), true);
+  BitVector NeedToProcess(Nodes.size(), true);
  while (NeedToProcess.any()) {
    // Iterate in reverse topological order to speed up convergence.
    for (CfgNode *Node : reverse_range(Nodes)) {

--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -21,7 +21,6 @@
 #include "IceTLS.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
@@ -50,6 +49,7 @@
 namespace Ice {
 class Assembler;
+class BitVector;
 class Cfg;
 class CfgNode;
 class Constant;
@@ -155,7 +155,7 @@ using InstNumberT = int32_t;
 /// range.
 using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>;
 using LiveBeginEndMap = CfgVector<LiveBeginEndMapEntry>;
-using LivenessBV = llvm::BitVector;
+using LivenessBV = BitVector;
 using TimerStackIdT = uint32_t;
 using TimerIdT = uint32_t;

--- a/src/IceLiveness.h
+++ b/src/IceLiveness.h
@@ -21,8 +21,9 @@
 #ifndef SUBZERO_SRC_ICELIVENESS_H
 #define SUBZERO_SRC_ICELIVENESS_H
-#include "IceCfgNode.h"
 #include "IceDefs.h"
+#include "IceBitVector.h"
+#include "IceCfgNode.h"
 #include "IceTypes.h"
 namespace Ice {
@@ -117,7 +118,7 @@ private:
  CfgVector<Variable *> LiveToVarMap;
  /// RangeMask[Variable::Number] indicates whether we want to track that
  /// Variable's live range.
-  llvm::BitVector RangeMask;
+  LivenessBV RangeMask;
 };
 } // end of namespace Ice

--- a/src/IceRNG.cpp
+++ b/src/IceRNG.cpp
@@ -14,6 +14,7 @@
 #include "IceRNG.h"
+#include <climits>
 #include <ctime>
 namespace Ice {

--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -18,6 +18,7 @@
 #include "IceTargetLowering.h"
+#include "IceBitVector.h"
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
 #include "IceGlobalContext.h"
@@ -543,7 +544,7 @@ void TargetLowering::getVarStackSlotParams(
    uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
    std::function<bool(Variable *)> TargetVarHook) {
  const VariablesMetadata *VMetadata = Func->getVMetadata();
-  llvm::BitVector IsVarReferenced(Func->getNumVariables());
+  BitVector IsVarReferenced(Func->getNumVariables());
  for (CfgNode *Node : Func->getNodes()) {
    for (Inst &Instr : Node->getInsts()) {
      if (Instr.isDeleted())

--- a/src/IceTypes.cpp
+++ b/src/IceTypes.cpp
@@ -16,6 +16,8 @@
 #include "IceDefs.h"
+#include <climits>
 namespace Ice {
 namespace {

--- a/src/main.cpp
+++ b/src/main.cpp
@@ -17,6 +17,10 @@
 #include "IceBuildDefs.h"
 #include "IceCompileServer.h"
+#ifdef __pnacl__
+#include <malloc.h>
+#endif // __pnacl__
 /// Depending on whether we are building the compiler for the browser or
 /// standalone, we will end up creating a Ice::BrowserCompileServer or
 /// Ice::CLCompileServer object. Method
@@ -26,6 +30,15 @@
 /// We can only compile the Ice::BrowserCompileServer object with the PNaCl
 /// compiler toolchain, when building Subzero as a sandboxed translator.
 int main(int argc, char **argv) {
+#ifdef __pnacl__
+#define M_GRANULARITY (-2)
+  // PNaCl's default malloc implementation grabs small chunks of memory with
+  // mmap at a time, hence causing significant slowdowns. This call ensures that
+  // mmap is used to allocate 16MB at a time, to amortize the system call cost.
+  mallopt(M_GRANULARITY, 16 * 1024 * 1024);
+#undef M_GRANULARITY
+#endif // __pnacl__
  if (Ice::BuildDefs::browser()) {
    assert(argc == 1);
    return Ice::BrowserCompileServer().runAndReturnErrorCode();