Commit 36d6aa65 by John Porto

Subzero. Moar performance tweaks.

1) Clones llvm::BitVector, and makes it Allocator aware (using the CfgLocalAllocator<>) 2) Uses mallopt to set the malloc granularity. The default granularity is too small, which forces too many mmap calls. BUG= R=sehr@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1738683003 .
parent 8159aae6
......@@ -15,6 +15,7 @@
#include "IceCfg.h"
#include "IceAssembler.h"
#include "IceBitVector.h"
#include "IceCfgNode.h"
#include "IceClFlags.h"
#include "IceDefs.h"
......@@ -237,8 +238,8 @@ void Cfg::computeInOutEdges() {
// Prune any unreachable nodes before computing in-edges.
SizeT NumNodes = getNumNodes();
llvm::BitVector Reachable(NumNodes);
llvm::BitVector Pending(NumNodes);
BitVector Reachable(NumNodes);
BitVector Pending(NumNodes);
Pending.set(getEntryNode()->getIndex());
while (true) {
int Index = Pending.find_first();
......@@ -427,7 +428,7 @@ void Cfg::reorderNodes() {
}
namespace {
void getRandomPostOrder(CfgNode *Node, llvm::BitVector &ToVisit,
void getRandomPostOrder(CfgNode *Node, BitVector &ToVisit,
Ice::NodeList &PostOrder,
Ice::RandomNumberGenerator *RNG) {
assert(ToVisit[Node->getIndex()]);
......@@ -449,7 +450,7 @@ void Cfg::shuffleNodes() {
NodeList ReversedReachable;
NodeList Unreachable;
llvm::BitVector ToVisit(Nodes.size(), true);
BitVector ToVisit(Nodes.size(), true);
// Create Random number generator for function reordering
RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(),
RPE_BasicBlockReordering, SequenceNumber);
......@@ -813,7 +814,7 @@ void Cfg::liveness(LivenessMode Mode) {
getVMetadata()->init(VMK_Uses);
Live->init();
// Initialize with all nodes needing to be processed.
llvm::BitVector NeedToProcess(Nodes.size(), true);
BitVector NeedToProcess(Nodes.size(), true);
while (NeedToProcess.any()) {
// Iterate in reverse topological order to speed up convergence.
for (CfgNode *Node : reverse_range(Nodes)) {
......
......@@ -21,7 +21,6 @@
#include "IceTLS.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
......@@ -50,6 +49,7 @@
namespace Ice {
class Assembler;
class BitVector;
class Cfg;
class CfgNode;
class Constant;
......@@ -155,7 +155,7 @@ using InstNumberT = int32_t;
/// range.
using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>;
using LiveBeginEndMap = CfgVector<LiveBeginEndMapEntry>;
using LivenessBV = llvm::BitVector;
using LivenessBV = BitVector;
using TimerStackIdT = uint32_t;
using TimerIdT = uint32_t;
......
......@@ -21,8 +21,9 @@
#ifndef SUBZERO_SRC_ICELIVENESS_H
#define SUBZERO_SRC_ICELIVENESS_H
#include "IceCfgNode.h"
#include "IceDefs.h"
#include "IceBitVector.h"
#include "IceCfgNode.h"
#include "IceTypes.h"
namespace Ice {
......@@ -117,7 +118,7 @@ private:
CfgVector<Variable *> LiveToVarMap;
/// RangeMask[Variable::Number] indicates whether we want to track that
/// Variable's live range.
llvm::BitVector RangeMask;
LivenessBV RangeMask;
};
} // end of namespace Ice
......
......@@ -14,6 +14,7 @@
#include "IceRNG.h"
#include <climits>
#include <ctime>
namespace Ice {
......
......@@ -18,6 +18,7 @@
#include "IceTargetLowering.h"
#include "IceBitVector.h"
#include "IceCfg.h" // setError()
#include "IceCfgNode.h"
#include "IceGlobalContext.h"
......@@ -543,7 +544,7 @@ void TargetLowering::getVarStackSlotParams(
uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
std::function<bool(Variable *)> TargetVarHook) {
const VariablesMetadata *VMetadata = Func->getVMetadata();
llvm::BitVector IsVarReferenced(Func->getNumVariables());
BitVector IsVarReferenced(Func->getNumVariables());
for (CfgNode *Node : Func->getNodes()) {
for (Inst &Instr : Node->getInsts()) {
if (Instr.isDeleted())
......
......@@ -16,6 +16,8 @@
#include "IceDefs.h"
#include <climits>
namespace Ice {
namespace {
......
......@@ -17,6 +17,10 @@
#include "IceBuildDefs.h"
#include "IceCompileServer.h"
#ifdef __pnacl__
#include <malloc.h>
#endif // __pnacl__
/// Depending on whether we are building the compiler for the browser or
/// standalone, we will end up creating a Ice::BrowserCompileServer or
/// Ice::CLCompileServer object. Method
......@@ -26,6 +30,15 @@
/// We can only compile the Ice::BrowserCompileServer object with the PNaCl
/// compiler toolchain, when building Subzero as a sandboxed translator.
int main(int argc, char **argv) {
#ifdef __pnacl__
#define M_GRANULARITY (-2)
// PNaCl's default malloc implementation grabs small chunks of memory with
// mmap at a time, hence causing significant slowdowns. This call ensures that
// mmap is used to allocate 16MB at a time, to amortize the system call cost.
mallopt(M_GRANULARITY, 16 * 1024 * 1024);
#undef M_GRANULARITY
#endif // __pnacl__
if (Ice::BuildDefs::browser()) {
assert(argc == 1);
return Ice::BrowserCompileServer().runAndReturnErrorCode();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment