Subzero: Initial O2 lowering

Includes the following: 1. Liveness analysis. 2. Linear-scan register allocation. 3. Address mode optimization. 4. Compare-branch fusing. All of these depend on liveness analysis. There are three versions of liveness analysis (in order of increasing cost): 1. Lightweight. This computes last-uses for variables local to a single basic block. 2. Full. This computes last-uses for all variables based on global dataflow analysis. 3. Full live ranges. This computes all last-uses, plus calculates the live range intervals in terms of instruction numbers. (The live ranges are needed for register allocation.) For testing the full live range computation, Cfg::validateLiveness() checks every Variable of every Inst and verifies that the current Inst is contained within the Variable's live range. The cross tests are run with O2 in addition to Om1. Some of the lit tests (for what good they do) are updated with O2 code sequences. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/300563003

Subzero: Initial O2 lowering
d97c7df5 · Jim Stichnoth · 88a485ed · d97c7df5 · d97c7df5 · d97c7df5
Commit d97c7df5 authored Jun 04, 2014 by Jim Stichnoth
34 changed files
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -39,7 +39,9 @@ SRCS= \
 	IceGlobalContext.cpp \
 	IceInst.cpp \
 	IceInstX8632.cpp \
+	IceLiveness.cpp \
 	IceOperand.cpp \
+	IceRegAlloc.cpp \
 	IceTargetLowering.cpp \
 	IceTargetLoweringX8632.cpp \
 	IceTypes.cpp \

--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -5,7 +5,7 @@

 set -eux

-OPTLEVELS="m1"
+OPTLEVELS="m1 2"
 OUTDIR=Output
 # Clean the output directory to avoid reusing stale results.
 rm -rf "${OUTDIR}"

--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -16,6 +16,7 @@
 #include "IceCfgNode.h"
 #include "IceDefs.h"
 #include "IceInst.h"
+#include "IceLiveness.h"
 #include "IceOperand.h"
 #include "IceTargetLowering.h"

@@ -24,7 +25,7 @@ namespace Ice {
 Cfg::Cfg(GlobalContext *Ctx)
    : Ctx(Ctx), FunctionName(""), ReturnType(IceType_void),
      IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL),
-      NextInstNumber(1),
+      NextInstNumber(1), Live(NULL),
      Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
      CurrentNode(NULL) {}

@@ -62,14 +63,10 @@ void Cfg::addArg(Variable *Arg) {
 bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }

 void Cfg::translate() {
-  Ostream &Str = Ctx->getStrDump();
  if (hasError())
    return;

-  if (Ctx->isVerbose()) {
-    Str << "================ Initial CFG ================\n";
-    dump();
-  }
+  dump("Initial CFG");

  Timer T_translate;
  // The set of translation passes and their order are determined by
@@ -77,10 +74,7 @@ void Cfg::translate() {
  getTarget()->translate();
  T_translate.printElapsedUs(getContext(), "translate()");

-  if (Ctx->isVerbose()) {
-    Str << "================ Final output ================\n";
-    dump();
-  }
+  dump("Final output");
 }

 void Cfg::computePredecessors() {
@@ -89,6 +83,13 @@ void Cfg::computePredecessors() {
  }
 }

+void Cfg::renumberInstructions() {
+  NextInstNumber = 1;
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->renumberInstructions();
+  }
+}
+
 // placePhiLoads() must be called before placePhiStores().
 void Cfg::placePhiLoads() {
  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
@@ -109,6 +110,12 @@ void Cfg::deletePhis() {
  }
 }

+void Cfg::doAddressOpt() {
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->doAddressOpt();
+  }
+}
+
 void Cfg::genCode() {
  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
    (*I)->genCode();
@@ -128,6 +135,150 @@ void Cfg::genFrame() {
  }
 }

+// This is a lightweight version of live-range-end calculation.  Marks
+// the last use of only those variables whose definition and uses are
+// completely with a single block.  It is a quick single pass and
+// doesn't need to iterate until convergence.
+void Cfg::livenessLightweight() {
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->livenessLightweight();
+  }
+}
+
+void Cfg::liveness(LivenessMode Mode) {
+  Live.reset(new Liveness(this, Mode));
+  Live->init();
+  // Initialize with all nodes needing to be processed.
+  llvm::BitVector NeedToProcess(Nodes.size(), true);
+  while (NeedToProcess.any()) {
+    // Iterate in reverse topological order to speed up convergence.
+    for (NodeList::reverse_iterator I = Nodes.rbegin(), E = Nodes.rend();
+         I != E; ++I) {
+      CfgNode *Node = *I;
+      if (NeedToProcess[Node->getIndex()]) {
+        NeedToProcess[Node->getIndex()] = false;
+        bool Changed = Node->liveness(getLiveness());
+        if (Changed) {
+          // If the beginning-of-block liveness changed since the last
+          // iteration, mark all in-edges as needing to be processed.
+          const NodeList &InEdges = Node->getInEdges();
+          for (NodeList::const_iterator I1 = InEdges.begin(),
+                                        E1 = InEdges.end();
+               I1 != E1; ++I1) {
+            CfgNode *Pred = *I1;
+            NeedToProcess[Pred->getIndex()] = true;
+          }
+        }
+      }
+    }
+  }
+  if (Mode == Liveness_Intervals) {
+    // Reset each variable's live range.
+    for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
+         I != E; ++I) {
+      if (Variable *Var = *I)
+        Var->resetLiveRange();
+    }
+  }
+  // Collect timing for just the portion that constructs the live
+  // range intervals based on the end-of-live-range computation, for a
+  // finer breakdown of the cost.
+  Timer T_liveRange;
+  // Make a final pass over instructions to delete dead instructions
+  // and build each Variable's live range.
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->livenessPostprocess(Mode, getLiveness());
+  }
+  if (Mode == Liveness_Intervals) {
+    // Special treatment for live in-args.  Their liveness needs to
+    // extend beyond the beginning of the function, otherwise an arg
+    // whose only use is in the first instruction will end up having
+    // the trivial live range [1,1) and will *not* interfere with
+    // other arguments.  So if the first instruction of the method is
+    // "r=arg1+arg2", both args may be assigned the same register.
+    for (SizeT I = 0; I < Args.size(); ++I) {
+      Variable *Arg = Args[I];
+      if (!Live->getLiveRange(Arg).isEmpty()) {
+        // Add live range [-1,0) with weight 0.  TODO: Here and below,
+        // use better symbolic constants along the lines of
+        // Inst::NumberDeleted and Inst::NumberSentinel instead of -1
+        // and 0.
+        Live->addLiveRange(Arg, -1, 0, 0);
+      }
+      // Do the same for i64 args that may have been lowered into i32
+      // Lo and Hi components.
+      Variable *Lo = Arg->getLo();
+      if (Lo && !Live->getLiveRange(Lo).isEmpty())
+        Live->addLiveRange(Lo, -1, 0, 0);
+      Variable *Hi = Arg->getHi();
+      if (Hi && !Live->getLiveRange(Hi).isEmpty())
+        Live->addLiveRange(Hi, -1, 0, 0);
+    }
+    // Copy Liveness::LiveRanges into individual variables.  TODO:
+    // Remove Variable::LiveRange and redirect to
+    // Liveness::LiveRanges.  TODO: make sure Variable weights
+    // are applied properly.
+    SizeT NumVars = Variables.size();
+    for (SizeT i = 0; i < NumVars; ++i) {
+      Variable *Var = Variables[i];
+      Var->setLiveRange(Live->getLiveRange(Var));
+      if (Var->getWeight().isInf())
+        Var->setLiveRangeInfiniteWeight();
+    }
+    T_liveRange.printElapsedUs(getContext(), "live range construction");
+    dump();
+  }
+}
+
+// Traverse every Variable of every Inst and verify that it
+// appears within the Variable's computed live range.
+bool Cfg::validateLiveness() const {
+  bool Valid = true;
+  Ostream &Str = Ctx->getStrDump();
+  for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1;
+       ++I1) {
+    CfgNode *Node = *I1;
+    InstList &Insts = Node->getInsts();
+    for (InstList::const_iterator I2 = Insts.begin(), E2 = Insts.end();
+         I2 != E2; ++I2) {
+      Inst *Inst = *I2;
+      if (Inst->isDeleted())
+        continue;
+      if (llvm::isa<InstFakeKill>(Inst))
+        continue;
+      InstNumberT InstNumber = Inst->getNumber();
+      Variable *Dest = Inst->getDest();
+      if (Dest) {
+        // TODO: This instruction should actually begin Dest's live
+        // range, so we could probably test that this instruction is
+        // the beginning of some segment of Dest's live range.  But
+        // this wouldn't work with non-SSA temporaries during
+        // lowering.
+        if (!Dest->getLiveRange().containsValue(InstNumber)) {
+          Valid = false;
+          Str << "Liveness error: inst " << Inst->getNumber() << " dest ";
+          Dest->dump(this);
+          Str << " live range " << Dest->getLiveRange() << "\n";
+        }
+      }
+      for (SizeT I = 0; I < Inst->getSrcSize(); ++I) {
+        Operand *Src = Inst->getSrc(I);
+        SizeT NumVars = Src->getNumVars();
+        for (SizeT J = 0; J < NumVars; ++J) {
+          const Variable *Var = Src->getVar(J);
+          if (!Var->getLiveRange().containsValue(InstNumber)) {
+            Valid = false;
+            Str << "Liveness error: inst " << Inst->getNumber() << " var ";
+            Var->dump(this);
+            Str << " live range " << Var->getLiveRange() << "\n";
+          }
+        }
+      }
+    }
+  }
+  return Valid;
+}
+
 // ======================== Dump routines ======================== //

 void Cfg::emit() {
@@ -158,8 +309,13 @@ void Cfg::emit() {
  T_emit.printElapsedUs(Ctx, "emit()");
 }

-void Cfg::dump() {
+// Dumps the IR with an optional introductory message.
+void Cfg::dump(const IceString &Message) {
+  if (!Ctx->isVerbose())
+    return;
  Ostream &Str = Ctx->getStrDump();
+  if (!Message.empty())
+    Str << "================ " << Message << " ================\n";
  setCurrentNode(getEntryNode());
  // Print function name+args
  if (getContext()->isVerbose(IceV_Instructions)) {
@@ -176,6 +332,18 @@ void Cfg::dump() {
    Str << ") {\n";
  }
  setCurrentNode(NULL);
+  if (getContext()->isVerbose(IceV_Liveness)) {
+    // Print summary info about variables
+    for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
+         I != E; ++I) {
+      Variable *Var = *I;
+      Str << "//"
+          << " multiblock=" << Var->isMultiblockLife() << " "
+          << " weight=" << Var->getWeight() << " ";
+      Var->dump(this);
+      Str << " LIVE=" << Var->getLiveRange() << "\n";
+    }
+  }
  // Print each basic block
  for (NodeList::const_iterator I = Nodes.begin(), E = Nodes.end(); I != E;
       ++I) {

--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -58,7 +58,7 @@ public:
  const NodeList &getNodes() const { return Nodes; }

  // Manage instruction numbering.
-  int32_t newInstNumber() { return NextInstNumber++; }
+  InstNumberT newInstNumber() { return NextInstNumber++; }

  // Manage Variables.
  Variable *makeVariable(Type Ty, const CfgNode *Node,
@@ -72,6 +72,7 @@ public:

  // Miscellaneous accessors.
  TargetLowering *getTarget() const { return Target.get(); }
+  Liveness *getLiveness() const { return Live.get(); }
  bool hasComputedFrame() const;

  // Passes over the CFG.
@@ -80,11 +81,16 @@ public:
  // compute the predecessor edges, in the form of
  // CfgNode::InEdges[].
  void computePredecessors();
+  void renumberInstructions();
  void placePhiLoads();
  void placePhiStores();
  void deletePhis();
+  void doAddressOpt();
  void genCode();
  void genFrame();
+  void livenessLightweight();
+  void liveness(LivenessMode Mode);
+  bool validateLiveness() const;

  // Manage the CurrentNode field, which is used for validating the
  // Variable::DefNode field during dumping/emitting.
@@ -92,7 +98,7 @@ public:
  const CfgNode *getCurrentNode() const { return CurrentNode; }

  void emit();
-  void dump();
+  void dump(const IceString &Message = "");

  // Allocate data of type T using the per-Cfg allocator.
  template <typename T> T *allocate() { return Allocator.Allocate<T>(); }
@@ -136,9 +142,10 @@ private:
  IceString ErrorMessage;
  CfgNode *Entry; // entry basic block
  NodeList Nodes; // linearized node list; Entry should be first
-  int32_t NextInstNumber;
+  InstNumberT NextInstNumber;
  VarList Variables;
  VarList Args; // subset of Variables, in argument order
+  llvm::OwningPtr<Liveness> Live;
  llvm::OwningPtr<TargetLowering> Target;

  // CurrentNode is maintained during dumping/emitting just for

--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -45,6 +45,7 @@ public:
  // Manage the instruction list.
  InstList &getInsts() { return Insts; }
  void appendInst(Inst *Inst);
+  void renumberInstructions();

  // Add a predecessor edge to the InEdges list for each of this
  // node's successors.
@@ -53,7 +54,11 @@ public:
  void placePhiLoads();
  void placePhiStores();
  void deletePhis();
+  void doAddressOpt();
  void genCode();
+  void livenessLightweight();
+  bool liveness(Liveness *Liveness);
+  void livenessPostprocess(LivenessMode Mode, Liveness *Liveness);
  void emit(Cfg *Func) const;
  void dump(Cfg *Func) const;


--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -50,6 +50,8 @@ class GlobalContext;
 class Inst;
 class InstPhi;
 class InstTarget;
+class LiveRange;
+class Liveness;
 class Operand;
 class TargetLowering;
 class Variable;
@@ -68,6 +70,23 @@ typedef std::vector<Constant *> ConstantList;
 // may be 64-bits wide) when we want to save space.
 typedef uint32_t SizeT;

+// InstNumberT is for holding an instruction number.  Instruction
+// numbers are used for representing Variable live ranges.
+typedef int32_t InstNumberT;
+
+enum LivenessMode {
+  // Basic version of live-range-end calculation.  Marks the last uses
+  // of variables based on dataflow analysis.  Records the set of
+  // live-in and live-out variables for each block.  Identifies and
+  // deletes dead instructions (primarily stores).
+  Liveness_Basic,
+
+  // In addition to Liveness_Basic, also calculate the complete
+  // live range for each variable in a form suitable for interference
+  // calculation and register allocation.
+  Liveness_Intervals
+};
+
 enum VerboseItem {
  IceV_None = 0,
  IceV_Instructions = 1 << 0,

--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -12,6 +12,8 @@
 //
 //===----------------------------------------------------------------------===//

+#include <ctype.h> // isdigit()
+
 #include "IceDefs.h"
 #include "IceTypes.h"
 #include "IceCfg.h"
@@ -129,8 +131,14 @@ IceString GlobalContext::mangleName(const IceString &Name) const {
    return NewName;
  }

-  ItemsParsed = sscanf(Name.c_str(), "_Z%u%s", &BaseLength, NameBase);
-  if (ItemsParsed == 2 && BaseLength <= strlen(NameBase)) {
+  // Artificially limit BaseLength to 9 digits (less than 1 billion)
+  // because sscanf behavior is undefined on integer overflow.  If
+  // there are more than 9 digits (which we test by looking at the
+  // beginning of NameBase), then we consider this a failure to parse
+  // a namespace mangling, and fall back to the simple prefixing.
+  ItemsParsed = sscanf(Name.c_str(), "_Z%9u%s", &BaseLength, NameBase);
+  if (ItemsParsed == 2 && BaseLength <= strlen(NameBase) &&
+      !isdigit(NameBase[0])) {
    // Transform _Z3barxyz ==> _ZN6Prefix3barExyz
    //                           ^^^^^^^^    ^
    // (splice in "N6Prefix", and insert "E" after "3bar")

--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -15,6 +15,7 @@
 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceInst.h"
+#include "IceLiveness.h"
 #include "IceOperand.h"

 namespace Ice {
@@ -74,25 +75,144 @@ const size_t InstIcmpAttributesSize = llvm::array_lengthof(InstIcmpAttributes);
 } // end of anonymous namespace

 Inst::Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest)
-    : Kind(Kind), Number(Func->newInstNumber()), Deleted(false),
+    : Kind(Kind), Number(Func->newInstNumber()), Deleted(false), Dead(false),
      HasSideEffects(false), Dest(Dest), MaxSrcs(MaxSrcs), NumSrcs(0),
-      Srcs(Func->allocateArrayOf<Operand *>(MaxSrcs)) {}
+      Srcs(Func->allocateArrayOf<Operand *>(MaxSrcs)), LiveRangesEnded(0) {}
+
+// Assign the instruction a new number.
+void Inst::renumber(Cfg *Func) {
+  Number = isDeleted() ? NumberDeleted : Func->newInstNumber();
+}
+
+// Delete the instruction if its tentative Dead flag is still set
+// after liveness analysis.
+void Inst::deleteIfDead() {
+  if (Dead)
+    setDeleted();
+}
+
+// If Src is a Variable, it returns true if this instruction ends
+// Src's live range.  Otherwise, returns false.
+bool Inst::isLastUse(const Operand *TestSrc) const {
+  if (LiveRangesEnded == 0)
+    return false; // early-exit optimization
+  if (const Variable *TestVar = llvm::dyn_cast<const Variable>(TestSrc)) {
+    LREndedBits Mask = LiveRangesEnded;
+    for (SizeT I = 0; I < getSrcSize(); ++I) {
+      Operand *Src = getSrc(I);
+      SizeT NumVars = Src->getNumVars();
+      for (SizeT J = 0; J < NumVars; ++J) {
+        const Variable *Var = Src->getVar(J);
+        if (Var == TestVar) {
+          // We've found where the variable is used in the instruction.
+          return Mask & 1;
+        }
+        Mask >>= 1;
+        if (Mask == 0)
+          return false; // another early-exit optimization
+      }
+    }
+  }
+  return false;
+}

 void Inst::updateVars(CfgNode *Node) {
  if (Dest)
    Dest->setDefinition(this, Node);

-  SizeT VarIndex = 0;
  for (SizeT I = 0; I < getSrcSize(); ++I) {
    Operand *Src = getSrc(I);
    SizeT NumVars = Src->getNumVars();
-    for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
+    for (SizeT J = 0; J < NumVars; ++J) {
      Variable *Var = Src->getVar(J);
      Var->setUse(this, Node);
    }
  }
 }

+void Inst::livenessLightweight(llvm::BitVector &Live) {
+  assert(!isDeleted());
+  if (llvm::isa<InstFakeKill>(this))
+    return;
+  resetLastUses();
+  SizeT VarIndex = 0;
+  for (SizeT I = 0; I < getSrcSize(); ++I) {
+    Operand *Src = getSrc(I);
+    SizeT NumVars = Src->getNumVars();
+    for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
+      const Variable *Var = Src->getVar(J);
+      if (Var->isMultiblockLife())
+        continue;
+      SizeT Index = Var->getIndex();
+      if (Live[Index])
+        continue;
+      Live[Index] = true;
+      setLastUse(VarIndex);
+    }
+  }
+}
+
+void Inst::liveness(InstNumberT InstNumber, llvm::BitVector &Live,
+                    Liveness *Liveness, const CfgNode *Node) {
+  assert(!isDeleted());
+  if (llvm::isa<InstFakeKill>(this))
+    return;
+
+  std::vector<InstNumberT> &LiveBegin = Liveness->getLiveBegin(Node);
+  std::vector<InstNumberT> &LiveEnd = Liveness->getLiveEnd(Node);
+  Dead = false;
+  if (Dest) {
+    SizeT VarNum = Liveness->getLiveIndex(Dest);
+    if (Live[VarNum]) {
+      Live[VarNum] = false;
+      LiveBegin[VarNum] = InstNumber;
+    } else {
+      if (!hasSideEffects())
+        Dead = true;
+    }
+  }
+  if (Dead)
+    return;
+  // Phi arguments only get added to Live in the predecessor node, but
+  // we still need to update LiveRangesEnded.
+  bool IsPhi = llvm::isa<InstPhi>(this);
+  resetLastUses();
+  SizeT VarIndex = 0;
+  for (SizeT I = 0; I < getSrcSize(); ++I) {
+    Operand *Src = getSrc(I);
+    SizeT NumVars = Src->getNumVars();
+    for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
+      const Variable *Var = Src->getVar(J);
+      SizeT VarNum = Liveness->getLiveIndex(Var);
+      if (!Live[VarNum]) {
+        setLastUse(VarIndex);
+        if (!IsPhi) {
+          Live[VarNum] = true;
+          // For a variable in SSA form, its live range can end at
+          // most once in a basic block.  However, after lowering to
+          // two-address instructions, we end up with sequences like
+          // "t=b;t+=c;a=t" where t's live range begins and ends
+          // twice.  ICE only allows a variable to have a single
+          // liveness interval in a basic block (except for blocks
+          // where a variable is live-in and live-out but there is a
+          // gap in the middle, and except for the special
+          // InstFakeKill instruction that can appear multiple
+          // times in the same block).  Therefore, this lowered
+          // sequence needs to represent a single conservative live
+          // range for t.  Since the instructions are being traversed
+          // backwards, we make sure LiveEnd is only set once by
+          // setting it only when LiveEnd[VarNum]==0 (sentinel value).
+          // Note that it's OK to set LiveBegin multiple times because
+          // of the backwards traversal.
+          if (LiveEnd[VarNum] == 0) {
+            LiveEnd[VarNum] = InstNumber;
+          }
+        }
+      }
+    }
+  }
+}
+
 InstAlloca::InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes,
                       Variable *Dest)
    : Inst(Func, Inst::Alloca, 1, Dest), AlignInBytes(AlignInBytes) {
@@ -192,6 +312,28 @@ Operand *InstPhi::getOperandForTarget(CfgNode *Target) const {
  return NULL;
 }

+// Updates liveness for a particular operand based on the given
+// predecessor edge.  Doesn't mark the operand as live if the Phi
+// instruction is dead or deleted.
+void InstPhi::livenessPhiOperand(llvm::BitVector &Live, CfgNode *Target,
+                                 Liveness *Liveness) {
+  if (isDeleted() || Dead)
+    return;
+  for (SizeT I = 0; I < getSrcSize(); ++I) {
+    if (Labels[I] == Target) {
+      if (Variable *Var = llvm::dyn_cast<Variable>(getSrc(I))) {
+        SizeT SrcIndex = Liveness->getLiveIndex(Var);
+        if (!Live[SrcIndex]) {
+          setLastUse(I);
+          Live[SrcIndex] = true;
+        }
+      }
+      return;
+    }
+  }
+  llvm_unreachable("Phi operand not found for specified target node");
+}
+
 // Change "a=phi(...)" to "a_phi=phi(...)" and return a new
 // instruction "a=a_phi".
 Inst *InstPhi::lower(Cfg *Func, CfgNode *Node) {
@@ -294,8 +436,8 @@ void Inst::dumpDecorated(const Cfg *Func) const {
    return;
  if (Func->getContext()->isVerbose(IceV_InstNumbers)) {
    char buf[30];
-    int32_t Number = getNumber();
-    if (Number < 0)
+    InstNumberT Number = getNumber();
+    if (Number == NumberDeleted)
      snprintf(buf, llvm::array_lengthof(buf), "[XXX]");
    else
      snprintf(buf, llvm::array_lengthof(buf), "[%3d]", Number);
@@ -305,6 +447,7 @@ void Inst::dumpDecorated(const Cfg *Func) const {
  if (isDeleted())
    Str << "  //";
  dump(Func);
+  dumpExtras(Func);
  Str << "\n";
 }

@@ -319,6 +462,32 @@ void Inst::dump(const Cfg *Func) const {
  dumpSources(Func);
 }

+void Inst::dumpExtras(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  bool First = true;
+  // Print "LIVEEND={a,b,c}" for all source operands whose live ranges
+  // are known to end at this instruction.
+  if (Func->getContext()->isVerbose(IceV_Liveness)) {
+    for (SizeT I = 0; I < getSrcSize(); ++I) {
+      Operand *Src = getSrc(I);
+      SizeT NumVars = Src->getNumVars();
+      for (SizeT J = 0; J < NumVars; ++J) {
+        const Variable *Var = Src->getVar(J);
+        if (isLastUse(Var)) {
+          if (First)
+            Str << " // LIVEEND={";
+          else
+            Str << ",";
+          Var->dump(Func);
+          First = false;
+        }
+      }
+    }
+    if (!First)
+      Str << "}";
+  }
+}
+
 void Inst::dumpSources(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrDump();
  for (SizeT I = 0; I < getSrcSize(); ++I) {
@@ -553,4 +722,6 @@ void InstTarget::dump(const Cfg *Func) const {
  Inst::dump(Func);
 }

+void InstTarget::dumpExtras(const Cfg *Func) const { Inst::dumpExtras(Func); }
+
 } // end of namespace Ice
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -56,10 +56,14 @@ public:
  };
  InstKind getKind() const { return Kind; }

-  int32_t getNumber() const { return Number; }
+  InstNumberT getNumber() const { return Number; }
+  void renumber(Cfg *Func);
+  static const InstNumberT NumberDeleted = -1;
+  static const InstNumberT NumberSentinel = 0;

  bool isDeleted() const { return Deleted; }
  void setDeleted() { Deleted = true; }
+  void deleteIfDead();

  bool hasSideEffects() const { return HasSideEffects; }

@@ -71,6 +75,8 @@ public:
    return Srcs[I];
  }

+  bool isLastUse(const Operand *Src) const;
+
  // Returns a list of out-edges corresponding to a terminator
  // instruction, which is the last instruction of the block.
  virtual NodeList getTerminatorEdges() const {
@@ -88,8 +94,12 @@ public:
  // basic blocks, i.e. used in a different block from their definition.
  void updateVars(CfgNode *Node);

+  void livenessLightweight(llvm::BitVector &Live);
+  void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
+                Liveness *Liveness, const CfgNode *Node);
  virtual void emit(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
+  virtual void dumpExtras(const Cfg *Func) const;
  void dumpDecorated(const Cfg *Func) const;
  void emitSources(const Cfg *Func) const;
  void dumpSources(const Cfg *Func) const;
@@ -105,15 +115,22 @@ protected:
    assert(NumSrcs < MaxSrcs);
    Srcs[NumSrcs++] = Src;
  }
+  void setLastUse(SizeT VarIndex) {
+    if (VarIndex < CHAR_BIT * sizeof(LiveRangesEnded))
+      LiveRangesEnded |= (((LREndedBits)1u) << VarIndex);
+  }
+  void resetLastUses() { LiveRangesEnded = 0; }
  // The destroy() method lets the instruction cleanly release any
  // memory that was allocated via the Cfg's allocator.
  virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); }

  const InstKind Kind;
  // Number is the instruction number for describing live ranges.
-  int32_t Number;
+  InstNumberT Number;
  // Deleted means irrevocably deleted.
  bool Deleted;
+  // Dead means pending deletion after liveness analysis converges.
+  bool Dead;
  // HasSideEffects means the instruction is something like a function
  // call or a volatile load that can't be removed even if its Dest
  // variable is not live.
@@ -124,6 +141,18 @@ protected:
  SizeT NumSrcs;
  Operand **Srcs;

+  // LiveRangesEnded marks which Variables' live ranges end in this
+  // instruction.  An instruction can have an arbitrary number of
+  // source operands (e.g. a call instruction), and each source
+  // operand can contain 0 or 1 Variable (and target-specific operands
+  // could contain more than 1 Variable).  All the variables in an
+  // instruction are conceptually flattened and each variable is
+  // mapped to one bit position of the LiveRangesEnded bit vector.
+  // Only the first CHAR_BIT * sizeof(LREndedBits) variables are
+  // tracked this way.
+  typedef uint32_t LREndedBits; // only first 32 src operands tracked, sorry
+  LREndedBits LiveRangesEnded;
+
 private:
  Inst(const Inst &) LLVM_DELETED_FUNCTION;
  Inst &operator=(const Inst &) LLVM_DELETED_FUNCTION;
@@ -393,6 +422,8 @@ public:
  }
  void addArgument(Operand *Source, CfgNode *Label);
  Operand *getOperandForTarget(CfgNode *Target) const;
+  void livenessPhiOperand(llvm::BitVector &Live, CfgNode *Target,
+                          Liveness *Liveness);
  Inst *lower(Cfg *Func, CfgNode *Node);
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return Inst->getKind() == Phi; }
@@ -626,6 +657,7 @@ class InstTarget : public Inst {
 public:
  virtual void emit(const Cfg *Func) const = 0;
  virtual void dump(const Cfg *Func) const;
+  virtual void dumpExtras(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return Inst->getKind() >= Target; }

 protected:

--- a/src/IceLiveness.cpp
+++ b/src/IceLiveness.cpp
+//===- subzero/src/IceLiveness.cpp - Liveness analysis implementation -----===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides some of the support for the Liveness class.  In
+// particular, it handles the sparsity representation of the mapping
+// between Variables and CfgNodes.  The idea is that since most
+// variables are used only within a single basic block, we can
+// partition the variables into "local" and "global" sets.  Instead of
+// sizing and indexing vectors according to Variable::Number, we
+// create a mapping such that global variables are mapped to low
+// indexes that are common across nodes, and local variables are
+// mapped to a higher index space that is shared across nodes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceDefs.h"
+#include "IceCfg.h"
+#include "IceCfgNode.h"
+#include "IceInst.h"
+#include "IceLiveness.h"
+#include "IceOperand.h"
+
+namespace Ice {
+
+void Liveness::init() {
+  // Initialize most of the container sizes.
+  SizeT NumVars = Func->getVariables().size();
+  SizeT NumNodes = Func->getNumNodes();
+  Nodes.resize(NumNodes);
+  VarToLiveMap.resize(NumVars);
+  if (Mode == Liveness_Intervals)
+    LiveRanges.resize(NumVars);
+
+  // Count the number of globals, and the number of locals for each
+  // block.
+  for (SizeT i = 0; i < NumVars; ++i) {
+    Variable *Var = Func->getVariables()[i];
+    if (Var->isMultiblockLife()) {
+      ++NumGlobals;
+    } else {
+      SizeT Index = Var->getLocalUseNode()->getIndex();
+      ++Nodes[Index].NumLocals;
+    }
+  }
+
+  // Resize each LivenessNode::LiveToVarMap, and the global
+  // LiveToVarMap.  Reset the counts to 0.
+  for (SizeT i = 0; i < NumNodes; ++i) {
+    Nodes[i].LiveToVarMap.assign(Nodes[i].NumLocals, NULL);
+    Nodes[i].NumLocals = 0;
+  }
+  LiveToVarMap.assign(NumGlobals, NULL);
+
+  // Sort each variable into the appropriate LiveToVarMap.  Also set
+  // VarToLiveMap.
+  SizeT TmpNumGlobals = 0;
+  for (SizeT i = 0; i < NumVars; ++i) {
+    Variable *Var = Func->getVariables()[i];
+    SizeT VarIndex = Var->getIndex();
+    SizeT LiveIndex;
+    if (Var->isMultiblockLife()) {
+      LiveIndex = TmpNumGlobals++;
+      LiveToVarMap[LiveIndex] = Var;
+    } else {
+      SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
+      LiveIndex = Nodes[NodeIndex].NumLocals++;
+      Nodes[NodeIndex].LiveToVarMap[LiveIndex] = Var;
+      LiveIndex += NumGlobals;
+    }
+    VarToLiveMap[VarIndex] = LiveIndex;
+  }
+  assert(NumGlobals == TmpNumGlobals);
+
+  // Process each node.
+  const NodeList &LNodes = Func->getNodes();
+  SizeT NumLNodes = LNodes.size();
+  for (SizeT i = 0; i < NumLNodes; ++i) {
+    LivenessNode &Node = Nodes[LNodes[i]->getIndex()];
+    // NumLocals, LiveToVarMap already initialized
+    Node.LiveIn.resize(NumGlobals);
+    Node.LiveOut.resize(NumGlobals);
+    // LiveBegin and LiveEnd are reinitialized before each pass over
+    // the block.
+  }
+}
+
+Variable *Liveness::getVariable(SizeT LiveIndex, const CfgNode *Node) const {
+  if (LiveIndex < NumGlobals)
+    return LiveToVarMap[LiveIndex];
+  SizeT NodeIndex = Node->getIndex();
+  return Nodes[NodeIndex].LiveToVarMap[LiveIndex - NumGlobals];
+}
+
+SizeT Liveness::getLiveIndex(const Variable *Var) const {
+  return VarToLiveMap[Var->getIndex()];
+}
+
+void Liveness::addLiveRange(Variable *Var, InstNumberT Start, InstNumberT End,
+                            uint32_t WeightDelta) {
+  LiveRange &LiveRange = LiveRanges[Var->getIndex()];
+  assert(WeightDelta != RegWeight::Inf);
+  LiveRange.addSegment(Start, End);
+  LiveRange.addWeight(WeightDelta);
+}
+
+LiveRange &Liveness::getLiveRange(Variable *Var) {
+  return LiveRanges[Var->getIndex()];
+}
+
+} // end of namespace Ice
--- a/src/IceLiveness.h
+++ b/src/IceLiveness.h
+//===- subzero/src/IceLiveness.h - Liveness analysis ------------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Liveness and LivenessNode classes,
+// which are used for liveness analysis.  The node-specific
+// information tracked for each Variable includes whether it is
+// live on entry, whether it is live on exit, the instruction number
+// that starts its live range, and the instruction number that ends
+// its live range.  At the Cfg level, the actual live intervals are
+// recorded.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICELIVENESS_H
+#define SUBZERO_SRC_ICELIVENESS_H
+
+#include "IceDefs.h"
+#include "IceTypes.h"
+
+namespace Ice {
+
+class LivenessNode {
+public:
+  LivenessNode() : NumLocals(0) {}
+  // NumLocals is the number of Variables local to this block.
+  SizeT NumLocals;
+  // LiveToVarMap maps a liveness bitvector index to a Variable.  This
+  // is generally just for printing/dumping.  The index should be less
+  // than NumLocals + Liveness::NumGlobals.
+  std::vector<Variable *> LiveToVarMap;
+  // LiveIn and LiveOut track the in- and out-liveness of the global
+  // variables.  The size of each vector is
+  // LivenessNode::NumGlobals.
+  llvm::BitVector LiveIn, LiveOut;
+  // LiveBegin and LiveEnd track the instruction numbers of the start
+  // and end of each variable's live range within this block.  The
+  // size of each vector is NumLocals + Liveness::NumGlobals.
+  std::vector<InstNumberT> LiveBegin, LiveEnd;
+
+private:
+  // TODO: Disable these constructors when Liveness::Nodes is no
+  // longer an STL container.
+  // LivenessNode(const LivenessNode &) LLVM_DELETED_FUNCTION;
+  // LivenessNode &operator=(const LivenessNode &) LLVM_DELETED_FUNCTION;
+};
+
+class Liveness {
+public:
+  Liveness(Cfg *Func, LivenessMode Mode)
+      : Func(Func), Mode(Mode), NumGlobals(0) {}
+  void init();
+  Variable *getVariable(SizeT LiveIndex, const CfgNode *Node) const;
+  SizeT getLiveIndex(const Variable *Var) const;
+  SizeT getNumGlobalVars() const { return NumGlobals; }
+  SizeT getNumVarsInNode(const CfgNode *Node) const {
+    return NumGlobals + Nodes[Node->getIndex()].NumLocals;
+  }
+  llvm::BitVector &getLiveIn(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveIn;
+  }
+  llvm::BitVector &getLiveOut(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveOut;
+  }
+  std::vector<InstNumberT> &getLiveBegin(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveBegin;
+  }
+  std::vector<InstNumberT> &getLiveEnd(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveEnd;
+  }
+  LiveRange &getLiveRange(Variable *Var);
+  void addLiveRange(Variable *Var, InstNumberT Start, InstNumberT End,
+                    uint32_t WeightDelta);
+
+private:
+  Cfg *Func;
+  LivenessMode Mode;
+  SizeT NumGlobals;
+  // Size of Nodes is Cfg::Nodes.size().
+  std::vector<LivenessNode> Nodes;
+  // VarToLiveMap maps a Variable's Variable::Number to its live index
+  // within its basic block.
+  std::vector<SizeT> VarToLiveMap;
+  // LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for
+  // non-local variables.
+  std::vector<Variable *> LiveToVarMap;
+  // LiveRanges maps a Variable::Number to its live range.
+  std::vector<LiveRange> LiveRanges;
+  Liveness(const Liveness &) LLVM_DELETED_FUNCTION;
+  Liveness &operator=(const Liveness &) LLVM_DELETED_FUNCTION;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICELIVENESS_H
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -7,9 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Operand class and its
-// target-independent subclasses, primarily for the methods of the
-// Variable class.
+// This file implements the Operand class and its target-independent
+// subclasses, primarily for the methods of the Variable class.
 //
 //===----------------------------------------------------------------------===//

@@ -36,6 +35,109 @@ bool operator==(const RegWeight &A, const RegWeight &B) {
  return !(B < A) && !(A < B);
 }

+void LiveRange::addSegment(InstNumberT Start, InstNumberT End) {
+#ifdef USE_SET
+  RangeElementType Element(Start, End);
+  RangeType::iterator Next = Range.lower_bound(Element);
+  assert(Next == Range.upper_bound(Element)); // Element not already present
+
+  // Beginning of code that merges contiguous segments.  TODO: change
+  // "if(true)" to "if(false)" to see if this extra optimization code
+  // gives any performance gain, or is just destabilizing.
+  if (true) {
+    RangeType::iterator FirstDelete = Next;
+    RangeType::iterator Prev = Next;
+    bool hasPrev = (Next != Range.begin());
+    bool hasNext = (Next != Range.end());
+    if (hasPrev)
+      --Prev;
+    // See if Element and Next should be joined.
+    if (hasNext && End == Next->first) {
+      Element.second = Next->second;
+      ++Next;
+    }
+    // See if Prev and Element should be joined.
+    if (hasPrev && Prev->second == Start) {
+      Element.first = Prev->first;
+      FirstDelete = Prev;
+    }
+    Range.erase(FirstDelete, Next);
+  }
+  // End of code that merges contiguous segments.
+
+  Range.insert(Next, Element);
+#else
+  if (Range.empty()) {
+    Range.push_back(RangeElementType(Start, End));
+    return;
+  }
+  // Special case for faking in-arg liveness.
+  if (End < Range.front().first) {
+    assert(Start < 0);
+    Range.push_front(RangeElementType(Start, End));
+    return;
+  }
+  InstNumberT CurrentEnd = Range.back().second;
+  assert(Start >= CurrentEnd);
+  // Check for merge opportunity.
+  if (Start == CurrentEnd) {
+    Range.back().second = End;
+    return;
+  }
+  Range.push_back(RangeElementType(Start, End));
+#endif
+}
+
+// Returns true if this live range ends before Other's live range
+// starts.  This means that the highest instruction number in this
+// live range is less than or equal to the lowest instruction number
+// of the Other live range.
+bool LiveRange::endsBefore(const LiveRange &Other) const {
+  // Neither range should be empty, but let's be graceful.
+  if (Range.empty() || Other.Range.empty())
+    return true;
+  InstNumberT MyEnd = (*Range.rbegin()).second;
+  InstNumberT OtherStart = (*Other.Range.begin()).first;
+  return MyEnd <= OtherStart;
+}
+
+// Returns true if there is any overlap between the two live ranges.
+bool LiveRange::overlaps(const LiveRange &Other) const {
+  // Do a two-finger walk through the two sorted lists of segments.
+  RangeType::const_iterator I1 = Range.begin(), I2 = Other.Range.begin();
+  RangeType::const_iterator E1 = Range.end(), E2 = Other.Range.end();
+  while (I1 != E1 && I2 != E2) {
+    if (I1->second <= I2->first) {
+      ++I1;
+      continue;
+    }
+    if (I2->second <= I1->first) {
+      ++I2;
+      continue;
+    }
+    return true;
+  }
+  return false;
+}
+
+bool LiveRange::overlaps(InstNumberT OtherBegin) const {
+  LiveRange Temp;
+  Temp.addSegment(OtherBegin, OtherBegin + 1);
+  return overlaps(Temp);
+}
+
+// Returns true if the live range contains the given instruction
+// number.  This is only used for validating the live range
+// calculation.
+bool LiveRange::containsValue(InstNumberT Value) const {
+  for (RangeType::const_iterator I = Range.begin(), E = Range.end(); I != E;
+       ++I) {
+    if (I->first <= Value && Value <= I->second)
+      return true;
+  }
+  return false;
+}
+
 void Variable::setUse(const Inst *Inst, const CfgNode *Node) {
  if (DefNode == NULL)
    return;
@@ -115,12 +217,12 @@ void Variable::dump(const Cfg *Func) const {
  }
 }

-void ConstantRelocatable::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
+void ConstantRelocatable::emit(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrEmit();
  if (SuppressMangling)
    Str << Name;
  else
-    Str << Func->getContext()->mangleName(Name);
+    Str << Ctx->mangleName(Name);
  if (Offset) {
    if (Offset > 0)
      Str << "+";
@@ -128,13 +230,28 @@ void ConstantRelocatable::emit(const Cfg *Func) const {
  }
 }

-void ConstantRelocatable::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
+void ConstantRelocatable::dump(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrDump();
  Str << "@" << Name;
  if (Offset)
    Str << "+" << Offset;
 }

+void LiveRange::dump(Ostream &Str) const {
+  Str << "(weight=" << Weight << ") ";
+  for (RangeType::const_iterator I = Range.begin(), E = Range.end(); I != E;
+       ++I) {
+    if (I != Range.begin())
+      Str << ", ";
+    Str << "[" << (*I).first << ":" << (*I).second << ")";
+  }
+}
+
+Ostream &operator<<(Ostream &Str, const LiveRange &L) {
+  L.dump(Str);
+  return Str;
+}
+
 Ostream &operator<<(Ostream &Str, const RegWeight &W) {
  if (W.getWeight() == RegWeight::Inf)
    Str << "Inf";

--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -81,8 +81,10 @@ private:
 class Constant : public Operand {
 public:
  uint32_t getPoolEntryID() const { return PoolEntryID; }
-  virtual void emit(const Cfg *Func) const = 0;
-  virtual void dump(const Cfg *Func) const = 0;
+  virtual void emit(const Cfg *Func) const { emit(Func->getContext()); }
+  virtual void dump(const Cfg *Func) const { dump(Func->getContext()); }
+  virtual void emit(GlobalContext *Ctx) const = 0;
+  virtual void dump(GlobalContext *Ctx) const = 0;

  static bool classof(const Operand *Operand) {
    OperandKind Kind = Operand->getKind();
@@ -116,12 +118,14 @@ public:
        ConstantPrimitive(Ty, Value, PoolEntryID);
  }
  T getValue() const { return Value; }
-  virtual void emit(const Cfg *Func) const {
-    Ostream &Str = Func->getContext()->getStrEmit();
+  using Constant::emit;
+  virtual void emit(GlobalContext *Ctx) const {
+    Ostream &Str = Ctx->getStrEmit();
    Str << getValue();
  }
-  virtual void dump(const Cfg *Func) const {
-    Ostream &Str = Func->getContext()->getStrDump();
+  using Constant::dump;
+  virtual void dump(GlobalContext *Ctx) const {
+    Ostream &Str = Ctx->getStrDump();
    Str << getValue();
  }

@@ -178,8 +182,10 @@ public:
  IceString getName() const { return Name; }
  void setSuppressMangling(bool Value) { SuppressMangling = Value; }
  bool getSuppressMangling() const { return SuppressMangling; }
-  virtual void emit(const Cfg *Func) const;
-  virtual void dump(const Cfg *Func) const;
+  using Constant::emit;
+  using Constant::dump;
+  virtual void emit(GlobalContext *Ctx) const;
+  virtual void dump(GlobalContext *Ctx) const;

  static bool classof(const Operand *Operand) {
    OperandKind Kind = Operand->getKind();
@@ -228,6 +234,55 @@ bool operator<(const RegWeight &A, const RegWeight &B);
 bool operator<=(const RegWeight &A, const RegWeight &B);
 bool operator==(const RegWeight &A, const RegWeight &B);

+// LiveRange is a set of instruction number intervals representing
+// a variable's live range.  Generally there is one interval per basic
+// block where the variable is live, but adjacent intervals get
+// coalesced into a single interval.  LiveRange also includes a
+// weight, in case e.g. we want a live range to have higher weight
+// inside a loop.
+class LiveRange {
+public:
+  LiveRange() : Weight(0) {}
+
+  void reset() {
+    Range.clear();
+    Weight.setWeight(0);
+  }
+  void addSegment(InstNumberT Start, InstNumberT End);
+
+  bool endsBefore(const LiveRange &Other) const;
+  bool overlaps(const LiveRange &Other) const;
+  bool overlaps(InstNumberT OtherBegin) const;
+  bool containsValue(InstNumberT Value) const;
+  bool isEmpty() const { return Range.empty(); }
+  InstNumberT getStart() const {
+    return Range.empty() ? -1 : Range.begin()->first;
+  }
+
+  RegWeight getWeight() const { return Weight; }
+  void setWeight(const RegWeight &NewWeight) { Weight = NewWeight; }
+  void addWeight(uint32_t Delta) { Weight.addWeight(Delta); }
+  void dump(Ostream &Str) const;
+
+  // Defining USE_SET uses std::set to hold the segments instead of
+  // std::list.  Using std::list will be slightly faster, but is more
+  // restrictive because new segments cannot be added in the middle.
+
+  //#define USE_SET
+
+private:
+  typedef std::pair<InstNumberT, InstNumberT> RangeElementType;
+#ifdef USE_SET
+  typedef std::set<RangeElementType> RangeType;
+#else
+  typedef std::list<RangeElementType> RangeType;
+#endif
+  RangeType Range;
+  RegWeight Weight;
+};
+
+Ostream &operator<<(Ostream &Str, const LiveRange &L);
+
 // Variable represents an operand that is register-allocated or
 // stack-allocated.  If it is register-allocated, it will ultimately
 // have a non-negative RegNum field.
@@ -263,6 +318,9 @@ public:
    assert(!hasReg() || RegNum == NewRegNum);
    RegNum = NewRegNum;
  }
+  bool hasRegTmp() const { return getRegNumTmp() != NoRegister; }
+  int32_t getRegNumTmp() const { return RegNumTmp; }
+  void setRegNumTmp(int32_t NewRegNum) { RegNumTmp = NewRegNum; }

  RegWeight getWeight() const { return Weight; }
  void setWeight(uint32_t NewWeight) { Weight = NewWeight; }
@@ -275,6 +333,19 @@ public:
    AllowRegisterOverlap = Overlap;
  }

+  const LiveRange &getLiveRange() const { return Live; }
+  void setLiveRange(const LiveRange &Range) { Live = Range; }
+  void resetLiveRange() { Live.reset(); }
+  void addLiveRange(InstNumberT Start, InstNumberT End, uint32_t WeightDelta) {
+    assert(WeightDelta != RegWeight::Inf);
+    Live.addSegment(Start, End);
+    if (Weight.isInf())
+      Live.setWeight(RegWeight::Inf);
+    else
+      Live.addWeight(WeightDelta * Weight.getWeight());
+  }
+  void setLiveRangeInfiniteWeight() { Live.setWeight(RegWeight::Inf); }
+
  Variable *getLo() const { return LoVar; }
  Variable *getHi() const { return HiVar; }
  void setLoHi(Variable *Lo, Variable *Hi) {
@@ -304,8 +375,8 @@ private:
  Variable(Type Ty, const CfgNode *Node, SizeT Index, const IceString &Name)
      : Operand(kVariable, Ty), Number(Index), Name(Name), DefInst(NULL),
        DefNode(Node), IsArgument(false), StackOffset(0), RegNum(NoRegister),
-        Weight(1), RegisterPreference(NULL), AllowRegisterOverlap(false),
-        LoVar(NULL), HiVar(NULL) {
+        RegNumTmp(NoRegister), Weight(1), RegisterPreference(NULL),
+        AllowRegisterOverlap(false), LoVar(NULL), HiVar(NULL) {
    Vars = VarsReal;
    Vars[0] = this;
    NumVars = 1;
@@ -334,6 +405,8 @@ private:
  // RegNum is the allocated register, or NoRegister if it isn't
  // register-allocated.
  int32_t RegNum;
+  // RegNumTmp is the tentative assignment during register allocation.
+  int32_t RegNumTmp;
  RegWeight Weight; // Register allocation priority
  // RegisterPreference says that if possible, the register allocator
  // should prefer the register that was assigned to this linked
@@ -345,6 +418,7 @@ private:
  // RegisterPreference and "share" a register even if the two live
  // ranges overlap.
  bool AllowRegisterOverlap;
+  LiveRange Live;
  // LoVar and HiVar are needed for lowering from 64 to 32 bits.  When
  // lowering from I64 to I32 on a 32-bit architecture, we split the
  // variable into two machine-size pieces.  LoVar is the low-order

--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
+//===- subzero/src/IceRegAlloc.h - Linear-scan reg. allocation --*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the data structures used during linear-scan
+// register allocation.  This includes LiveRangeWrapper which
+// encapsulates a variable and its live range, and LinearScan which
+// holds the various work queues for the linear-scan algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEREGALLOC_H
+#define SUBZERO_SRC_ICEREGALLOC_H
+
+#include "IceDefs.h"
+#include "IceTypes.h"
+
+namespace Ice {
+
+// Currently this just wraps a Variable pointer, so in principle we
+// could use containers of Variable* instead of LiveRangeWrapper.  But
+// in the future, we may want to do more complex things such as live
+// range splitting, and keeping a wrapper should make that simpler.
+class LiveRangeWrapper {
+public:
+  LiveRangeWrapper(Variable *Var) : Var(Var) {}
+  const LiveRange &range() const { return Var->getLiveRange(); }
+  bool endsBefore(const LiveRangeWrapper &Other) const {
+    return range().endsBefore(Other.range());
+  }
+  bool overlaps(const LiveRangeWrapper &Other) const {
+    return range().overlaps(Other.range());
+  }
+  bool overlapsStart(const LiveRangeWrapper &Other) const {
+    return range().overlaps(Other.range().getStart());
+  }
+  Variable *const Var;
+  void dump(const Cfg *Func) const;
+
+private:
+  // LiveRangeWrapper(const LiveRangeWrapper &) LLVM_DELETED_FUNCTION;
+  LiveRangeWrapper &operator=(const LiveRangeWrapper &) LLVM_DELETED_FUNCTION;
+};
+
+class LinearScan {
+public:
+  LinearScan(Cfg *Func) : Func(Func) {}
+  void scan(const llvm::SmallBitVector &RegMask);
+  void dump(Cfg *Func) const;
+
+private:
+  Cfg *const Func;
+  // RangeCompare is the comparator for sorting an LiveRangeWrapper
+  // by starting point in a std::set<>.  Ties are broken by variable
+  // number so that sorting is stable.
+  struct RangeCompare {
+    bool operator()(const LiveRangeWrapper &L,
+                    const LiveRangeWrapper &R) const {
+      InstNumberT Lstart = L.Var->getLiveRange().getStart();
+      InstNumberT Rstart = R.Var->getLiveRange().getStart();
+      if (Lstart == Rstart)
+        return L.Var->getIndex() < R.Var->getIndex();
+      return Lstart < Rstart;
+    }
+  };
+  typedef std::set<LiveRangeWrapper, RangeCompare> OrderedRanges;
+  typedef std::list<LiveRangeWrapper> UnorderedRanges;
+  OrderedRanges Unhandled;
+  UnorderedRanges Active, Inactive, Handled;
+  LinearScan(const LinearScan &) LLVM_DELETED_FUNCTION;
+  LinearScan &operator=(const LinearScan &) LLVM_DELETED_FUNCTION;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEREGALLOC_H
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -18,6 +18,7 @@
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
 #include "IceOperand.h"
+#include "IceRegAlloc.h"
 #include "IceTargetLowering.h"
 #include "IceTargetLoweringX8632.h"

@@ -66,6 +67,15 @@ TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
  return NULL;
 }

+void TargetLowering::doAddressOpt() {
+  if (llvm::isa<InstLoad>(*Context.getCur()))
+    doAddressOptLoad();
+  else if (llvm::isa<InstStore>(*Context.getCur()))
+    doAddressOptStore();
+  Context.advanceCur();
+  Context.advanceNext();
+}
+
 // Lowers a single instruction according to the information in
 // Context, by checking the Context.Cur instruction kind and calling
 // the appropriate lowering method.  The lowering method should insert
@@ -144,4 +154,21 @@ void TargetLowering::lower() {
  Context.advanceNext();
 }

+// Drives register allocation, allowing all physical registers (except
+// perhaps for the frame pointer) to be allocated.  This set of
+// registers could potentially be parameterized if we want to restrict
+// registers e.g. for performance testing.
+void TargetLowering::regAlloc() {
+  LinearScan LinearScan(Func);
+  RegSetMask RegInclude = RegSet_None;
+  RegSetMask RegExclude = RegSet_None;
+  RegInclude |= RegSet_CallerSave;
+  RegInclude |= RegSet_CalleeSave;
+  RegExclude |= RegSet_StackPointer;
+  if (hasFramePointer())
+    RegExclude |= RegSet_FramePointer;
+  llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
+  LinearScan.scan(RegMask);
+}
+
 } // end of namespace Ice
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -109,6 +109,8 @@ public:
    Func->setError("Target doesn't specify O2 lowering steps.");
  }

+  // Tries to do address mode optimization on a single instruction.
+  void doAddressOpt();
  // Lowers a single instruction.
  void lower();

@@ -173,6 +175,8 @@ protected:
  virtual void lowerSwitch(const InstSwitch *Inst) = 0;
  virtual void lowerUnreachable(const InstUnreachable *Inst) = 0;

+  virtual void doAddressOptLoad() {}
+  virtual void doAddressOptStore() {}
  // This gives the target an opportunity to post-process the lowered
  // expansion before returning.  The primary intention is to do some
  // Register Manager activity as necessary, specifically to eagerly

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -27,6 +27,7 @@ public:
  static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); }

  virtual void translateOm1();
+  virtual void translateO2();

  virtual Variable *getPhysicalRegister(SizeT RegNum);
  virtual IceString getRegName(SizeT RegNum, Type Ty) const;
@@ -56,7 +57,7 @@ public:
  // latter could be done by directly writing to the stack).
  void split64(Variable *Var);
  void setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
-                           int32_t BasicFrameOffset, int32_t &InArgsSizeBytes);
+                           size_t BasicFrameOffset, size_t &InArgsSizeBytes);
  Operand *loOperand(Operand *Operand);
  Operand *hiOperand(Operand *Operand);

@@ -89,6 +90,8 @@ protected:
  virtual void lowerStore(const InstStore *Inst);
  virtual void lowerSwitch(const InstSwitch *Inst);
  virtual void lowerUnreachable(const InstUnreachable *Inst);
+  virtual void doAddressOptLoad();
+  virtual void doAddressOptStore();

  // Operand legalization helpers.  To deal with address mode
  // constraints, the helpers will create a new Operand and emit
@@ -248,8 +251,8 @@ protected:
  }

  bool IsEbpBasedFrame;
-  int32_t FrameSizeLocals;
-  int32_t LocalsSizeBytes;
+  size_t FrameSizeLocals;
+  size_t LocalsSizeBytes;
  llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
  llvm::SmallBitVector ScratchRegs;
  llvm::SmallBitVector RegsUsed;
@@ -265,8 +268,8 @@ private:
  template <typename T> void emitConstantPool() const;
 };

-template <> void ConstantFloat::emit(const Cfg *Func) const;
-template <> void ConstantDouble::emit(const Cfg *Func) const;
+template <> void ConstantFloat::emit(GlobalContext *Ctx) const;
+template <> void ConstantDouble::emit(GlobalContext *Ctx) const;

 } // end of namespace Ice


--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -100,6 +100,29 @@ public:
    return Func;
  }

+  // convertConstant() does not use Func or require it to be a valid
+  // Ice::Cfg pointer.  As such, it's suitable for e.g. constructing
+  // global initializers.
+  Ice::Constant *convertConstant(const Constant *Const) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(Const)) {
+      return Ctx->getConstantSym(convertType(GV->getType()), 0, GV->getName());
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Const)) {
+      return Ctx->getConstantInt(convertIntegerType(CI->getType()),
+                                 CI->getZExtValue());
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Const)) {
+      Ice::Type Type = convertType(CFP->getType());
+      if (Type == Ice::IceType_f32)
+        return Ctx->getConstantFloat(CFP->getValueAPF().convertToFloat());
+      else if (Type == Ice::IceType_f64)
+        return Ctx->getConstantDouble(CFP->getValueAPF().convertToDouble());
+      llvm_unreachable("Unexpected floating point type");
+      return NULL;
+    } else {
+      llvm_unreachable("Unhandled constant type");
+      return NULL;
+    }
+  }
+
 private:
  // LLVM values (instructions, etc.) are mapped directly to ICE variables.
  // mapValueToIceVar has a version that forces an ICE type on the variable,
@@ -180,24 +203,7 @@ private:

  Ice::Operand *convertValue(const Value *Op) {
    if (const Constant *Const = dyn_cast<Constant>(Op)) {
-      if (const GlobalValue *GV = dyn_cast<GlobalValue>(Const)) {
-        return Ctx->getConstantSym(convertType(GV->getType()), 0,
-                                   GV->getName());
-      } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Const)) {
-        return Ctx->getConstantInt(convertIntegerType(CI->getType()),
-                                   CI->getZExtValue());
-      } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Const)) {
-        Ice::Type Type = convertType(CFP->getType());
-        if (Type == Ice::IceType_f32)
-          return Ctx->getConstantFloat(CFP->getValueAPF().convertToFloat());
-        else if (Type == Ice::IceType_f64)
-          return Ctx->getConstantDouble(CFP->getValueAPF().convertToDouble());
-        llvm_unreachable("Unexpected floating point type");
-        return NULL;
-      } else {
-        llvm_unreachable("Unhandled constant type");
-        return NULL;
-      }
+      return convertConstant(Const);
    } else {
      return mapValueToIceVar(Op);
    }

--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -2,7 +2,7 @@
 ; particular the patterns for lowering i64 operations into constituent
 ; i32 operations on x86-32.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
 ; This is a basic test of the alloca instruction - one test for alloca
 ; of a fixed size, and one test for variable size.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/bitcast.ll
+++ b/tests_lit/llvm2ice_tests/bitcast.ll
 ; Trivial smoke test of bitcast between integer and FP types.

-; RUN: %llvm2ice --verbose inst %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
@@ -9,30 +9,38 @@
 define internal i32 @cast_f2i(float %f) {
 entry:
  %v0 = bitcast float %f to i32
-; CHECK: bitcast
  ret i32 %v0
 }

+; CHECK: mov eax
+; CHECK: ret
+
 define internal float @cast_i2f(i32 %i) {
 entry:
  %v0 = bitcast i32 %i to float
-; CHECK: bitcast
  ret float %v0
 }

+; CHECK: fld dword ptr
+; CHECK: ret
+
 define internal i64 @cast_d2ll(double %d) {
 entry:
  %v0 = bitcast double %d to i64
-; CHECK: bitcast
  ret i64 %v0
 }

+; CHECK: mov edx
+; CHECK: ret
+
 define internal double @cast_ll2d(i64 %ll) {
 entry:
  %v0 = bitcast i64 %ll to double
-; CHECK: bitcast
  ret double %v0
 }

+; CHECK: fld qword ptr
+; CHECK: ret
+
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ
--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -2,7 +2,7 @@
 ; should be to the same operand, whether it's in a register or on the
 ; stack.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/casts.ll
+++ b/tests_lit/llvm2ice_tests/casts.ll
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice --verbose inst %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \

--- a/tests_lit/llvm2ice_tests/cmp-opt.ll
+++ b/tests_lit/llvm2ice_tests/cmp-opt.ll
 ; Simple test of non-fused compare/branch.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/convert.ll
+++ b/tests_lit/llvm2ice_tests/convert.ll
 ; Simple test of signed and unsigned integer conversions.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
-; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
@@ -32,25 +32,14 @@ entry:
  ret void
 }
 ; CHECK: from_int8:
-; CHECK: mov al, byte ptr [
-; CHECK-NEXT: movsx cx, al
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: movsx ecx, al
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movsx ecx, al
-; CHECK-NEXT: sar eax, 31
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_int8:
-; OPTM1: mov {{.*}}, byte ptr [
-; OPTM1: movsx
-; OPTM1: mov word ptr [
-; OPTM1: movsx
-; OPTM1: mov dword ptr [
-; OPTM1: movsx
-; OPTM1: sar {{.*}}, 31
-; OPTM1: i64v
+; CHECK: mov {{.*}}, byte ptr [
+; CHECK: movsx
+; CHECK: mov word ptr [
+; CHECK: movsx
+; CHECK: mov dword ptr [
+; CHECK: movsx
+; CHECK: sar {{.*}}, 31
+; CHECK: i64v

 define void @from_int16() {
 entry:
@@ -68,24 +57,13 @@ entry:
  ret void
 }
 ; CHECK: from_int16:
-; CHECK: mov ax, word ptr [
-; CHECK-NEXT: mov cx, ax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: movsx ecx, ax
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movsx ecx, ax
-; CHECK-NEXT: sar eax, 31
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_int16:
-; OPTM1: mov {{.*}}, word ptr [
-; OPTM1: i8v
-; OPTM1: movsx
-; OPTM1: i32v
-; OPTM1: movsx
-; OPTM1: sar {{.*}}, 31
-; OPTM1: i64v
+; CHECK: mov {{.*}}, word ptr [
+; CHECK: i8v
+; CHECK: movsx
+; CHECK: i32v
+; CHECK: movsx
+; CHECK: sar {{.*}}, 31
+; CHECK: i64v

 define void @from_int32() {
 entry:
@@ -103,22 +81,11 @@ entry:
  ret void
 }
 ; CHECK: from_int32:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: sar eax, 31
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_int32:
-; OPTM1: i32v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: sar {{.*}}, 31
-; OPTM1: i64v
+; CHECK: i32v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: sar {{.*}}, 31
+; CHECK: i64v

 define void @from_int64() {
 entry:
@@ -136,18 +103,10 @@ entry:
  ret void
 }
 ; CHECK: from_int64:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov dword ptr [
-;
-; OPTM1: from_int64:
-; OPTM1: i64v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: i32v
+; CHECK: i64v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: i32v

 define void @from_uint8() {
 entry:
@@ -165,25 +124,14 @@ entry:
  ret void
 }
 ; CHECK: from_uint8:
-; CHECK: mov al, byte ptr [
-; CHECK-NEXT: movzx cx, al
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: movzx ecx, al
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movzx eax, al
-; CHECK-NEXT: mov ecx, 0
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_uint8:
-; OPTM1: u8v
-; OPTM1: movzx
-; OPTM1: i16v
-; OPTM1: movzx
-; OPTM1: i32v
-; OPTM1: movzx
-; OPTM1: mov {{.*}}, 0
-; OPTM1: i64v
+; CHECK: u8v
+; CHECK: movzx
+; CHECK: i16v
+; CHECK: movzx
+; CHECK: i32v
+; CHECK: movzx
+; CHECK: mov {{.*}}, 0
+; CHECK: i64v

 define void @from_uint16() {
 entry:
@@ -201,24 +149,13 @@ entry:
  ret void
 }
 ; CHECK: from_uint16:
-; CHECK: mov ax, word ptr [
-; CHECK-NEXT: mov cx, ax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: movzx ecx, ax
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movzx eax, ax
-; CHECK-NEXT: mov ecx, 0
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_uint16:
-; OPTM1: u16v
-; OPTM1: i8v
-; OPTM1: movzx
-; OPTM1: i32v
-; OPTM1: movzx
-; OPTM1: mov {{.*}}, 0
-; OPTM1: i64v
+; CHECK: u16v
+; CHECK: i8v
+; CHECK: movzx
+; CHECK: i32v
+; CHECK: movzx
+; CHECK: mov {{.*}}, 0
+; CHECK: i64v

 define void @from_uint32() {
 entry:
@@ -236,21 +173,11 @@ entry:
  ret void
 }
 ; CHECK: from_uint32:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov ecx, 0
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_uint32:
-; OPTM1: u32v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: mov {{.*}}, 0
-; OPTM1: i64v
+; CHECK: u32v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: mov {{.*}}, 0
+; CHECK: i64v

 define void @from_uint64() {
 entry:
@@ -268,18 +195,10 @@ entry:
  ret void
 }
 ; CHECK: from_uint64:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov dword ptr [
-;
-; OPTM1: from_uint64:
-; OPTM1: u64v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: i32v
+; CHECK: u64v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: i32v

 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ
--- a/tests_lit/llvm2ice_tests/fp.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fp.pnacl.ll
@@ -3,7 +3,7 @@
 ; that should be present regardless of the optimization level, so
 ; there are no special OPTM1 match lines.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/fpconst.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fpconst.pnacl.ll
@@ -6,6 +6,7 @@
 ; number in a reasonable number of digits".  See
 ; http://llvm.org/docs/LangRef.html#simple-constants .

+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/select-opt.ll
+++ b/tests_lit/llvm2ice_tests/select-opt.ll
@@ -3,7 +3,7 @@
 ; regardless of the optimization level, so there are no special OPTM1
 ; match lines.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/shift.ll
+++ b/tests_lit/llvm2ice_tests/shift.ll
 ; This is a test of C-level conversion operations that clang lowers
 ; into pairs of shifts.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/simple-loop.ll
+++ b/tests_lit/llvm2ice_tests/simple-loop.ll
 ; This tests a simple loop that sums the elements of an input array.
 ; The O2 check patterns represent the best code currently achieved.

-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s

--- a/tests_lit/llvm2ice_tests/unreachable.ll
+++ b/tests_lit/llvm2ice_tests/unreachable.ll
@@ -20,5 +20,11 @@ return:                                           ; preds = %entry
  ret i32 %div
 }

+; CHECK: cmp
+; CHECK: call ice_unreachable
+; CHECK: cdq
+; CHECK: idiv
+; CHECK: ret
+
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ