Add initial integrated assembler w/ some Xmm ops.

Add a flag to use the integrated assembler. Handle simple XMM binary op instructions as an initial example of how instructions might be handled. This tests fixups in a very limited sense -- Track buffer locations of fixups for floating point immediates. Patchset one shows the original dart assembler code (revision 39313), so that it can be diffed. BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/574133002

Add initial integrated assembler w/ some Xmm ops.
8acded03 · Jan Voung · 144cdcea · 8acded03 · 8acded03 · 8acded03
Commit 8acded03 authored Sep 22, 2014 by Jan Voung
27 changed files
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -60,6 +60,8 @@ CXXFLAGS := $(LLVM_CXXFLAGS) -Wall -Wextra -Werror -fno-rtti \
 LDFLAGS := $(HOST_FLAGS) -L$(LIBCXX_INSTALL_PATH)/lib

 SRCS= \
+	assembler.cpp \
+	assembler_ia32.cpp \
 	IceCfg.cpp \
 	IceCfgNode.cpp \
 	IceConverter.cpp \
@@ -68,6 +70,7 @@ SRCS= \
 	IceInstX8632.cpp \
 	IceIntrinsics.cpp \
 	IceLiveness.cpp \
+	IceMemoryRegion.cpp \
 	IceOperand.cpp \
 	IceRegAlloc.cpp \
        IceRNG.cpp \

--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -28,7 +28,10 @@ Cfg::Cfg(GlobalContext *Ctx)
      IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL),
      NextInstNumber(1), Live(NULL),
      Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
-      VMetadata(new VariablesMetadata(this)), CurrentNode(NULL) {}
+      VMetadata(new VariablesMetadata(this)),
+      TargetAssembler(
+          TargetLowering::createAssembler(Ctx->getTargetArch(), this)),
+      CurrentNode(NULL) {}

 Cfg::~Cfg() {}


--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -17,6 +17,9 @@

 #include "IceDefs.h"
 #include "IceTypes.h"
+
+#include "assembler.h"
+#include "IceClFlags.h"
 #include "IceGlobalContext.h"

 #include "llvm/ADT/OwningPtr.h"
@@ -86,6 +89,12 @@ public:
  TargetLowering *getTarget() const { return Target.get(); }
  VariablesMetadata *getVMetadata() const { return VMetadata.get(); }
  Liveness *getLiveness() const { return Live.get(); }
+  template <typename T> T *getAssembler() const {
+    return static_cast<T *>(TargetAssembler.get());
+  }
+  bool UseIntegratedAssembler() const {
+    return getContext()->getFlags().UseIntegratedAssembler;
+  }
  bool hasComputedFrame() const;

  // Passes over the CFG.
@@ -166,6 +175,7 @@ private:
  llvm::OwningPtr<Liveness> Live;
  llvm::OwningPtr<TargetLowering> Target;
  llvm::OwningPtr<VariablesMetadata> VMetadata;
+  llvm::OwningPtr<Assembler> TargetAssembler;

  // CurrentNode is maintained during dumping/emitting just for
  // validating Variable::DefNode.  Normally, a traversal over

--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -492,7 +492,11 @@ void CfgNode::emit(Cfg *Func) const {
    // suppress them.
    if (Inst->isRedundantAssign())
      continue;
-    (*I)->emit(Func);
+    if (Func->UseIntegratedAssembler()) {
+      (*I)->emitIAS(Func);
+    } else {
+      (*I)->emit(Func);
+    }
    // Update emitted instruction count, plus fill/spill count for
    // Variable operands without a physical register.
    if (uint32_t Count = (*I)->getEmitInstCount()) {

--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -24,13 +24,15 @@ public:
  ClFlags()
      : DisableInternal(false), SubzeroTimingEnabled(false),
        DisableTranslation(false), DisableGlobals(false),
-        FunctionSections(false), UseSandboxing(false), DumpStats(false),
-        DefaultGlobalPrefix(""), DefaultFunctionPrefix("") {}
+        FunctionSections(false), UseIntegratedAssembler(false),
+        UseSandboxing(false), DumpStats(false), DefaultGlobalPrefix(""),
+        DefaultFunctionPrefix("") {}
  bool DisableInternal;
  bool SubzeroTimingEnabled;
  bool DisableTranslation;
  bool DisableGlobals;
  bool FunctionSections;
+  bool UseIntegratedAssembler;
  bool UseSandboxing;
  bool DumpStats;
  IceString DefaultGlobalPrefix;

--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -130,11 +130,6 @@ private:
  Timer &operator=(const Timer &) LLVM_DELETED_FUNCTION;
 };

-template <typename T> bool WouldOverflowAdd(T X, T Y) {
-  return ((X > 0 && Y > 0 && (X > std::numeric_limits<T>::max() - Y)) ||
-          (X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
-}
-
 } // end of namespace Ice

 #endif // SUBZERO_SRC_ICEDEFS_H
--- a/src/IceFixups.h
+++ b/src/IceFixups.h
+//===- subzero/src/IceFixups.h - Assembler fixup kinds ----------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares generic fixup types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEFIXUPS_H
+#define SUBZERO_SRC_ICEFIXUPS_H
+
+#include "IceTypes.def"
+
+namespace Ice {
+
+enum FixupKind {
+  // Specify some of the most common relocation types.
+  FK_Abs_4 = 0,
+  FK_PcRel_4 = 1,
+
+  // Target specific relocation types follow this.
+  FK_FirstTargetSpecific = 1 << 4
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEFIXUPS_H
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -458,6 +458,8 @@ void Inst::emit(const Cfg * /*Func*/) const {
  llvm_unreachable("emit() called on a non-lowered instruction");
 }

+void Inst::emitIAS(const Cfg *Func) const { emit(Func); }
+
 void Inst::dump(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrDump();
  dumpDest(Func);

--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -102,6 +102,7 @@ public:
  // instruction results in a single native instruction.
  virtual uint32_t getEmitInstCount() const { return 0; }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  virtual void dumpExtras(const Cfg *Func) const;
  void dumpDecorated(const Cfg *Func) const;

--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -16,6 +16,7 @@
 #ifndef SUBZERO_SRC_ICEINSTX8632_H
 #define SUBZERO_SRC_ICEINSTX8632_H

+#include "assembler_ia32.h"
 #include "IceDefs.h"
 #include "IceInst.h"
 #include "IceConditionCodesX8632.h"
@@ -75,6 +76,7 @@ public:
  Variable *getIndex() const { return Index; }
  uint16_t getShift() const { return Shift; }
  SegmentRegisters getSegmentRegister() const { return SegmentReg; }
+  x86::Address toAsmAddress(Assembler *Asm) const;
  virtual void emit(const Cfg *Func) const;
  using OperandX8632::dump;
  virtual void dump(const Cfg *Func, Ostream &Str) const;
@@ -396,6 +398,7 @@ public:
        InstX8632AdjustStack(Func, Amount, Esp);
  }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }

@@ -478,6 +481,7 @@ public:
    getSrc(0)->emit(Func);
    Str << "\n";
  }
+  virtual void emitIAS(const Cfg *Func) const { emit(Func); }
  virtual void dump(const Cfg *Func) const {
    Ostream &Str = Func->getContext()->getStrDump();
    dumpDest(Func);
@@ -497,6 +501,52 @@ private:
  static const char *Opcode;
 };

+void emitIASVarOperandTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
+                            const Operand *Src,
+                            const x86::AssemblerX86::TypedXmmEmitters &Emitter);
+
+template <InstX8632::InstKindX8632 K>
+class InstX8632UnaryopXmm : public InstX8632 {
+public:
+  static InstX8632UnaryopXmm *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX8632UnaryopXmm>())
+        InstX8632UnaryopXmm(Func, Dest, Src);
+  }
+  virtual void emit(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(getSrcSize() == 1);
+    Str << "\t" << Opcode << "\t";
+    getDest()->emit(Func);
+    Str << ", ";
+    getSrc(0)->emit(Func);
+    Str << "\n";
+  }
+  virtual void emitIAS(const Cfg *Func) const {
+    Type Ty = getDest()->getType();
+    assert(getSrcSize() == 1);
+    emitIASVarOperandTyXMM(Func, Ty, getDest(), getSrc(0), Emitter);
+  }
+  virtual void dump(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrDump();
+    dumpDest(Func);
+    Str << " = " << Opcode << "." << getDest()->getType() << " ";
+    dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+  InstX8632UnaryopXmm(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX8632(Func, K, 1, Dest) {
+    addSource(Src);
+  }
+  InstX8632UnaryopXmm(const InstX8632UnaryopXmm &) LLVM_DELETED_FUNCTION;
+  InstX8632UnaryopXmm &
+  operator=(const InstX8632UnaryopXmm &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632UnaryopXmm() {}
+  static const char *Opcode;
+  static const x86::AssemblerX86::TypedXmmEmitters Emitter;
+};
+
 // See the definition of emitTwoAddress() for a description of
 // ShiftHack.
 void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
@@ -533,6 +583,46 @@ private:
  static const char *Opcode;
 };

+template <InstX8632::InstKindX8632 K, bool NeedsElementType>
+class InstX8632BinopXmm : public InstX8632 {
+public:
+  // Create an XMM binary-op instruction like addss or addps.
+  static InstX8632BinopXmm *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX8632BinopXmm>())
+        InstX8632BinopXmm(Func, Dest, Source);
+  }
+  virtual void emit(const Cfg *Func) const {
+    const bool ShiftHack = false;
+    emitTwoAddress(Opcode, this, Func, ShiftHack);
+  }
+  virtual void emitIAS(const Cfg *Func) const {
+    Type Ty = getDest()->getType();
+    if (NeedsElementType)
+      Ty = typeElementType(Ty);
+    assert(getSrcSize() == 2);
+    emitIASVarOperandTyXMM(Func, Ty, getDest(), getSrc(1), Emitter);
+  }
+  virtual void dump(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrDump();
+    dumpDest(Func);
+    Str << " = " << Opcode << "." << getDest()->getType() << " ";
+    dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+  InstX8632BinopXmm(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX8632(Func, K, 2, Dest) {
+    addSource(Dest);
+    addSource(Source);
+  }
+  InstX8632BinopXmm(const InstX8632BinopXmm &) LLVM_DELETED_FUNCTION;
+  InstX8632BinopXmm &operator=(const InstX8632BinopXmm &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632BinopXmm() {}
+  static const char *Opcode;
+  static const x86::AssemblerX86::TypedXmmEmitters Emitter;
+};
+
 template <InstX8632::InstKindX8632 K> class InstX8632Ternop : public InstX8632 {
 public:
  // Create a ternary-op instruction like div or idiv.
@@ -657,7 +747,7 @@ typedef InstX8632Unaryop<InstX8632::Bsf> InstX8632Bsf;
 typedef InstX8632Unaryop<InstX8632::Bsr> InstX8632Bsr;
 typedef InstX8632Unaryop<InstX8632::Lea> InstX8632Lea;
 typedef InstX8632Unaryop<InstX8632::Movd> InstX8632Movd;
-typedef InstX8632Unaryop<InstX8632::Sqrtss> InstX8632Sqrtss;
+typedef InstX8632UnaryopXmm<InstX8632::Sqrtss> InstX8632Sqrtss;
 // Cbwdq instruction - wrapper for cbw, cwd, and cdq
 typedef InstX8632Unaryop<InstX8632::Cbwdq> InstX8632Cbwdq;
 // Move/assignment instruction - wrapper for mov/movss/movsd.
@@ -668,29 +758,29 @@ typedef InstX8632Movlike<InstX8632::Movp> InstX8632Movp;
 // Movq - copy between XMM registers, or mem64 and XMM registers.
 typedef InstX8632Movlike<InstX8632::Movq> InstX8632Movq;
 typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
-typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
+typedef InstX8632BinopXmm<InstX8632::Addps, true> InstX8632Addps;
 typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
-typedef InstX8632Binop<InstX8632::Addss> InstX8632Addss;
-typedef InstX8632Binop<InstX8632::Padd> InstX8632Padd;
+typedef InstX8632BinopXmm<InstX8632::Addss, false> InstX8632Addss;
+typedef InstX8632BinopXmm<InstX8632::Padd, true> InstX8632Padd;
 typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub;
-typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps;
-typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss;
+typedef InstX8632BinopXmm<InstX8632::Subps, true> InstX8632Subps;
+typedef InstX8632BinopXmm<InstX8632::Subss, false> InstX8632Subss;
 typedef InstX8632Binop<InstX8632::Sbb> InstX8632Sbb;
-typedef InstX8632Binop<InstX8632::Psub> InstX8632Psub;
+typedef InstX8632BinopXmm<InstX8632::Psub, true> InstX8632Psub;
 typedef InstX8632Binop<InstX8632::And> InstX8632And;
-typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand;
-typedef InstX8632Binop<InstX8632::Pandn> InstX8632Pandn;
+typedef InstX8632BinopXmm<InstX8632::Pand, false> InstX8632Pand;
+typedef InstX8632BinopXmm<InstX8632::Pandn, false> InstX8632Pandn;
 typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
-typedef InstX8632Binop<InstX8632::Por> InstX8632Por;
+typedef InstX8632BinopXmm<InstX8632::Por, false> InstX8632Por;
 typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
-typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor;
+typedef InstX8632BinopXmm<InstX8632::Pxor, false> InstX8632Pxor;
 typedef InstX8632Binop<InstX8632::Imul> InstX8632Imul;
-typedef InstX8632Binop<InstX8632::Mulps> InstX8632Mulps;
-typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss;
+typedef InstX8632BinopXmm<InstX8632::Mulps, true> InstX8632Mulps;
+typedef InstX8632BinopXmm<InstX8632::Mulss, false> InstX8632Mulss;
 typedef InstX8632Binop<InstX8632::Pmull> InstX8632Pmull;
-typedef InstX8632Binop<InstX8632::Pmuludq> InstX8632Pmuludq;
-typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
-typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
+typedef InstX8632BinopXmm<InstX8632::Pmuludq, false> InstX8632Pmuludq;
+typedef InstX8632BinopXmm<InstX8632::Divps, true> InstX8632Divps;
+typedef InstX8632BinopXmm<InstX8632::Divss, false> InstX8632Divss;
 typedef InstX8632Binop<InstX8632::Rol, true> InstX8632Rol;
 typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
 typedef InstX8632Binop<InstX8632::Psll> InstX8632Psll;
@@ -828,6 +918,7 @@ public:
        InstX8632Cmpps(Func, Dest, Source, Condition);
  }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpps); }

@@ -941,6 +1032,7 @@ public:
        InstX8632Ucomiss(Func, Src1, Src2);
  }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return isClassof(Inst, Ucomiss); }

@@ -1108,6 +1200,7 @@ public:
    return new (Func->allocate<InstX8632Nop>()) InstX8632Nop(Func, Variant);
  }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return isClassof(Inst, Nop); }

@@ -1160,6 +1253,7 @@ public:
    return new (Func->allocate<InstX8632Pop>()) InstX8632Pop(Func, Dest);
  }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return isClassof(Inst, Pop); }

@@ -1199,6 +1293,7 @@ public:
    return new (Func->allocate<InstX8632Ret>()) InstX8632Ret(Func, Source);
  }
  virtual void emit(const Cfg *Func) const;
+  virtual void emitIAS(const Cfg *Func) const;
  virtual void dump(const Cfg *Func) const;
  static bool classof(const Inst *Inst) { return isClassof(Inst, Ret); }


--- a/src/IceMemoryRegion.cpp
+++ b/src/IceMemoryRegion.cpp
+// Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===- subzero/src/IceMemoryRegion.cpp - Memory region --------------------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MemoryRegion class. It tracks a pointer plus its
+// bounds for bounds-checking in debug mode.
+//===----------------------------------------------------------------------===//
+
+#include "IceMemoryRegion.h"
+
+namespace Ice {
+
+void MemoryRegion::CopyFrom(uintptr_t offset, const MemoryRegion &from) const {
+  assert(from.pointer() != NULL && from.size() > 0);
+  assert(this->size() >= from.size());
+  assert(offset <= this->size() - from.size());
+  memmove(reinterpret_cast<void *>(start() + offset), from.pointer(),
+          from.size());
+}
+
+} // end of namespace Ice
--- a/src/IceMemoryRegion.h
+++ b/src/IceMemoryRegion.h
+// Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===- subzero/src/IceMemoryRegion.h - Memory region ------------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MemoryRegion class. It tracks a pointer
+// plus its bounds for bounds-checking in debug mode.
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICE_MEMORY_REGION_H_
+#define SUBZERO_SRC_ICE_MEMORY_REGION_H_
+
+#include "IceDefs.h"
+#include "IceUtils.h"
+
+namespace Ice {
+
+// Memory regions are useful for accessing memory with bounds check in
+// debug mode. They can be safely passed by value and do not assume ownership
+// of the region.
+class MemoryRegion {
+public:
+  MemoryRegion() : pointer_(NULL), size_(0) {}
+  MemoryRegion(void *pointer, size_t size) : pointer_(pointer), size_(size) {}
+  MemoryRegion(const MemoryRegion &other) { *this = other; }
+  MemoryRegion &operator=(const MemoryRegion &other) {
+    pointer_ = other.pointer_;
+    size_ = other.size_;
+    return *this;
+  }
+
+  void *pointer() const { return pointer_; }
+  size_t size() const { return size_; }
+
+  size_t start() const { return reinterpret_cast<size_t>(pointer_); }
+  size_t end() const { return start() + size_; }
+
+  template <typename T> T Load(size_t offset) const {
+    return *ComputeInternalPointer<T>(offset);
+  }
+
+  template <typename T> void Store(size_t offset, T value) const {
+    *ComputeInternalPointer<T>(offset) = value;
+  }
+
+  template <typename T> T *PointerTo(size_t offset) const {
+    return ComputeInternalPointer<T>(offset);
+  }
+
+  bool Contains(size_t address) const {
+    return (address >= start()) && (address < end());
+  }
+
+  void CopyFrom(size_t offset, const MemoryRegion &from) const;
+
+  // Compute a sub memory region based on an existing one.
+  void Subregion(const MemoryRegion &from, size_t offset, size_t size) {
+    assert(from.size() >= size);
+    assert(offset <= (from.size() - size));
+    pointer_ = reinterpret_cast<void *>(from.start() + offset);
+    size_ = size;
+  }
+
+  // Compute an extended memory region based on an existing one.
+  void Extend(const MemoryRegion &region, size_t extra) {
+    pointer_ = region.pointer();
+    size_ = (region.size() + extra);
+  }
+
+private:
+  template <typename T> T *ComputeInternalPointer(size_t offset) const {
+    assert(size() >= sizeof(T));
+    assert(offset <= size() - sizeof(T));
+    return reinterpret_cast<T *>(start() + offset);
+  }
+
+  void *pointer_;
+  size_t size_;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICE_MEMORY_REGION_H_
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -19,67 +19,71 @@

 namespace Ice {

-class RegX8632 {
-public:
-  // An enum of every register. The enum value may not match the encoding
-  // used to binary encode register operands in instructions.
-  enum AllRegisters {
+namespace RegX8632 {
+
+// An enum of every register. The enum value may not match the encoding
+// used to binary encode register operands in instructions.
+enum AllRegisters {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
          frameptr, isI8, isInt, isFP)                                         \
  val,
-    REGX8632_TABLE
+  REGX8632_TABLE
 #undef X
-        Reg_NUM,
+      Reg_NUM,
 #define X(val, init) val init,
-    REGX8632_TABLE_BOUNDS
+  REGX8632_TABLE_BOUNDS
 #undef X
-  };
+};

-  // An enum of GPR Registers. The enum value does match encoding used
-  // to binary encode register operands in instructions.
-  enum GPRRegister {
+// An enum of GPR Registers. The enum value does match encoding used
+// to binary encode register operands in instructions.
+enum GPRRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
          frameptr, isI8, isInt, isFP)                                         \
  Encoded_##val encode,
-    REGX8632_GPR_TABLE
+  REGX8632_GPR_TABLE
 #undef X
-  };
+      Encoded_Not_GPR = -1
+};

-  // An enum of XMM Registers. The enum value does match encoding used
-  // to binary encode register operands in instructions.
-  enum XmmRegister {
+// An enum of XMM Registers. The enum value does match encoding used
+// to binary encode register operands in instructions.
+enum XmmRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
          frameptr, isI8, isInt, isFP)                                         \
  Encoded_##val encode,
-    REGX8632_XMM_TABLE
+  REGX8632_XMM_TABLE
 #undef X
-  };
+      Encoded_Not_Xmm = -1
+};

-  // An enum of Byte Registers. The enum value does match encoding used
-  // to binary encode register operands in instructions.
-  enum ByteRegister {
+// An enum of Byte Registers. The enum value does match encoding used
+// to binary encode register operands in instructions.
+enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
-    REGX8632_BYTEREG_TABLE
+  REGX8632_BYTEREG_TABLE
 #undef X
-  };
+      Encoded_Not_ByteReg = -1
+};

-  static GPRRegister getEncodedGPR(int32_t RegNum) {
-    assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
-    return GPRRegister(RegNum - Reg_GPR_First);
-  }
+static inline GPRRegister getEncodedGPR(int32_t RegNum) {
+  assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
+  return GPRRegister(RegNum - Reg_GPR_First);
+}

-  static XmmRegister getEncodedXmm(int32_t RegNum) {
-    assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
-    return XmmRegister(RegNum - Reg_XMM_First);
-  }
+static inline XmmRegister getEncodedXmm(int32_t RegNum) {
+  assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
+  return XmmRegister(RegNum - Reg_XMM_First);
+}

-  static ByteRegister getEncodedByteReg(int32_t RegNum) {
-    assert(RegNum == Reg_ah || (Reg_GPR_First <= RegNum && RegNum <= Reg_ebx));
-    if (RegNum == Reg_ah)
-      return Encoded_Reg_ah;
-    return ByteRegister(RegNum - Reg_GPR_First);
-  }
-};
+static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
+  assert(RegNum == Reg_ah || (Reg_GPR_First <= RegNum && RegNum <= Reg_ebx));
+  if (RegNum == Reg_ah)
+    return Encoded_Reg_ah;
+  return ByteRegister(RegNum - Reg_GPR_First);
+}
+
+} // end of namespace RegX8632

 } // end of namespace Ice


--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -15,6 +15,7 @@
 //
 //===----------------------------------------------------------------------===//

+#include "assembler_ia32.h"
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
 #include "IceOperand.h"
@@ -97,6 +98,15 @@ TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
  return NULL;
 }

+Assembler *TargetLowering::createAssembler(TargetArch Target, Cfg *Func) {
+  // These statements can be #ifdef'd to specialize the assembler
+  // to a subset of the available targets.  TODO: use CRTP.
+  if (Target == Target_X8632)
+    return new x86::AssemblerX86();
+  Func->setError("Unsupported target");
+  return NULL;
+}
+
 void TargetLowering::doAddressOpt() {
  if (llvm::isa<InstLoad>(*Context.getCur()))
    doAddressOptLoad();

--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -25,6 +25,8 @@

 namespace Ice {

+class Assembler;
+
 // LoweringContext makes it easy to iterate through non-deleted
 // instructions in a node, and insert new (lowered) instructions at
 // the current point.  Along with the instruction list container and
@@ -87,6 +89,7 @@ private:
 class TargetLowering {
 public:
  static TargetLowering *createLowering(TargetArch Target, Cfg *Func);
+  static Assembler *createAssembler(TargetArch Target, Cfg *Func);
  void translate() {
    switch (Ctx->getOptLevel()) {
    case Opt_m1:

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -24,6 +24,7 @@
 #include "IceRegistersX8632.h"
 #include "IceTargetLoweringX8632.def"
 #include "IceTargetLoweringX8632.h"
+#include "IceUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/CommandLine.h"
@@ -528,6 +529,14 @@ void TargetX8632::emitVariable(const Variable *Var) const {
  Str << "]";
 }

+x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
+  assert(!Var->hasReg());
+  int32_t Offset = Var->getStackOffset();
+  if (!hasFramePointer())
+    Offset += getStackAdjustment();
+  return x86::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
+}
+
 void TargetX8632::lowerArguments() {
  VarList &Args = Func->getArgs();
  // The first four arguments of vector type, regardless of their
@@ -3710,7 +3719,7 @@ bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
    if (Var == NULL || Const == NULL || VMetadata->isMultiDef(Var))
      return false;
    int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
-    if (WouldOverflowAdd(Offset, MoreOffset))
+    if (Utils::WouldOverflowAdd(Offset, MoreOffset))
      return false;
    Base = Var;
    Offset += MoreOffset;

--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -18,6 +18,7 @@

 #include "IceDefs.h"
 #include "IceTargetLowering.h"
+#include "assembler_ia32.h"
 #include "IceInstX8632.h"
 #include "IceRegistersX8632.h"

@@ -68,6 +69,7 @@ public:
                              size_t BasicFrameOffset, size_t &InArgsSizeBytes);
  Operand *loOperand(Operand *Operand);
  Operand *hiOperand(Operand *Operand);
+  x86::Address stackVarToAsmOperand(const Variable *Var) const;

  enum X86InstructionSet {
    // SSE2 is the PNaCl baseline instruction set.

--- a/src/IceUtils.h
+++ b/src/IceUtils.h
+//===- subzero/src/IceUtils.h - Utility functions ---------------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares some utility functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEUTILS_H
+#define SUBZERO_SRC_ICEUTILS_H
+
+#include <climits>
+
+namespace Ice {
+
+// Similar to bit_cast, but allows copying from types of unrelated
+// sizes. This method was introduced to enable the strict aliasing
+// optimizations of GCC 4.4. Basically, GCC mindlessly relies on
+// obscure details in the C++ standard that make reinterpret_cast
+// virtually useless.
+template <class D, class S> inline D bit_copy(const S &source) {
+  D destination;
+  // This use of memcpy is safe: source and destination cannot overlap.
+  memcpy(&destination, reinterpret_cast<const void *>(&source),
+         sizeof(destination));
+  return destination;
+}
+
+class Utils {
+public:
+  // Check whether an N-bit two's-complement representation can hold value.
+  template <typename T> static inline bool IsInt(int N, T value) {
+    assert((0 < N) &&
+           (static_cast<unsigned int>(N) < (CHAR_BIT * sizeof(value))));
+    T limit = static_cast<T>(1) << (N - 1);
+    return (-limit <= value) && (value < limit);
+  }
+
+  template <typename T> static inline bool IsUint(int N, T value) {
+    assert((0 < N) &&
+           (static_cast<unsigned int>(N) < (CHAR_BIT * sizeof(value))));
+    T limit = static_cast<T>(1) << N;
+    return (0 <= value) && (value < limit);
+  }
+
+  template <typename T> static inline bool WouldOverflowAdd(T X, T Y) {
+    return ((X > 0 && Y > 0 && (X > std::numeric_limits<T>::max() - Y)) ||
+            (X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
+  }
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEUTILS_H
--- a/src/assembler.cpp
+++ b/src/assembler.cpp
+// Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===- subzero/src/assembler.cpp - Assembler base class -------------------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Assembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembler.h"
+#include "IceMemoryRegion.h"
+
+namespace Ice {
+
+static uintptr_t NewContents(Assembler &assembler, intptr_t capacity) {
+  uintptr_t result = assembler.AllocateBytes(capacity);
+  return result;
+}
+
+#if defined(DEBUG)
+AssemblerBuffer::EnsureCapacity::EnsureCapacity(AssemblerBuffer *buffer) {
+  if (buffer->cursor() >= buffer->limit())
+    buffer->ExtendCapacity();
+  // In debug mode, we save the assembler buffer along with the gap
+  // size before we start emitting to the buffer. This allows us to
+  // check that any single generated instruction doesn't overflow the
+  // limit implied by the minimum gap size.
+  buffer_ = buffer;
+  gap_ = ComputeGap();
+  // Make sure that extending the capacity leaves a big enough gap
+  // for any kind of instruction.
+  assert(gap_ >= kMinimumGap);
+  // Mark the buffer as having ensured the capacity.
+  assert(!buffer->HasEnsuredCapacity()); // Cannot nest.
+  buffer->has_ensured_capacity_ = true;
+}
+
+AssemblerBuffer::EnsureCapacity::~EnsureCapacity() {
+  // Unmark the buffer, so we cannot emit after this.
+  buffer_->has_ensured_capacity_ = false;
+  // Make sure the generated instruction doesn't take up more
+  // space than the minimum gap.
+  intptr_t delta = gap_ - ComputeGap();
+  assert(delta <= kMinimumGap);
+}
+#endif
+
+AssemblerBuffer::AssemblerBuffer(Assembler &assembler) : assembler_(assembler) {
+  const intptr_t OneKB = 1024;
+  static const intptr_t kInitialBufferCapacity = 4 * OneKB;
+  contents_ = NewContents(assembler_, kInitialBufferCapacity);
+  cursor_ = contents_;
+  limit_ = ComputeLimit(contents_, kInitialBufferCapacity);
+#if defined(DEBUG)
+  has_ensured_capacity_ = false;
+  fixups_processed_ = false;
+#endif
+
+  // Verify internal state.
+  assert(Capacity() == kInitialBufferCapacity);
+  assert(Size() == 0);
+}
+
+AssemblerBuffer::~AssemblerBuffer() {}
+
+AssemblerFixup *AssemblerBuffer::GetLatestFixup() const {
+  if (fixups_.empty())
+    return NULL;
+  return fixups_.back();
+}
+
+void AssemblerBuffer::ProcessFixups(const MemoryRegion &region) {
+  for (SizeT I = 0; I < fixups_.size(); ++I) {
+    AssemblerFixup *fixup = fixups_[I];
+    fixup->Process(region, fixup->position());
+  }
+}
+
+void AssemblerBuffer::FinalizeInstructions(const MemoryRegion &instructions) {
+  // Copy the instructions from the buffer.
+  MemoryRegion from(reinterpret_cast<void *>(contents()), Size());
+  instructions.CopyFrom(0, from);
+
+  // Process fixups in the instructions.
+  ProcessFixups(instructions);
+#if defined(DEBUG)
+  fixups_processed_ = true;
+#endif
+}
+
+void AssemblerBuffer::ExtendCapacity() {
+  intptr_t old_size = Size();
+  intptr_t old_capacity = Capacity();
+  const intptr_t OneMB = 1 << 20;
+  intptr_t new_capacity = std::min(old_capacity * 2, old_capacity + OneMB);
+  if (new_capacity < old_capacity) {
+    // FATAL
+    llvm_unreachable("Unexpected overflow in AssemblerBuffer::ExtendCapacity");
+  }
+
+  // Allocate the new data area and copy contents of the old one to it.
+  uintptr_t new_contents = NewContents(assembler_, new_capacity);
+  memmove(reinterpret_cast<void *>(new_contents),
+          reinterpret_cast<void *>(contents_), old_size);
+
+  // Compute the relocation delta and switch to the new contents area.
+  intptr_t delta = new_contents - contents_;
+  contents_ = new_contents;
+
+  // Update the cursor and recompute the limit.
+  cursor_ += delta;
+  limit_ = ComputeLimit(new_contents, new_capacity);
+
+  // Verify internal state.
+  assert(Capacity() == new_capacity);
+  assert(Size() == old_size);
+}
+
+} // end of namespace Ice
--- a/src/assembler.h
+++ b/src/assembler.h
+// Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===- subzero/src/assembler.h - Integrated assembler -----------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Assembler base class.  Instructions are assembled
+// by architecture-specific assemblers that derive from this base class.
+// This base class manages buffers and fixups for emitting code, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ASSEMBLER_H
+#define SUBZERO_SRC_ASSEMBLER_H
+
+#include "IceDefs.h"
+
+#include "IceFixups.h"
+#include "llvm/Support/Allocator.h"
+
+namespace Ice {
+
+// Forward declarations.
+class Assembler;
+class AssemblerFixup;
+class AssemblerBuffer;
+class ConstantRelocatable;
+class MemoryRegion;
+
+// Assembler fixups are positions in generated code that hold relocation
+// information that needs to be processed before finalizing the code
+// into executable memory.
+class AssemblerFixup {
+public:
+  virtual void Process(const MemoryRegion &region, intptr_t position) = 0;
+
+  // It would be ideal if the destructor method could be made private,
+  // but the g++ compiler complains when this is subclassed.
+  virtual ~AssemblerFixup() { llvm_unreachable("~AssemblerFixup used"); }
+
+  intptr_t position() const { return position_; }
+
+  FixupKind kind() const { return kind_; }
+
+  const ConstantRelocatable *value() const { return value_; }
+
+protected:
+  AssemblerFixup(FixupKind Kind, const ConstantRelocatable *Value)
+      : position_(0), kind_(Kind), value_(Value) {}
+
+private:
+  intptr_t position_;
+  FixupKind kind_;
+  const ConstantRelocatable *value_;
+
+  void set_position(intptr_t position) { position_ = position; }
+
+  AssemblerFixup(const AssemblerFixup &) LLVM_DELETED_FUNCTION;
+  AssemblerFixup &operator=(const AssemblerFixup &) LLVM_DELETED_FUNCTION;
+  friend class AssemblerBuffer;
+};
+
+// Assembler buffers are used to emit binary code. They grow on demand.
+class AssemblerBuffer {
+public:
+  AssemblerBuffer(Assembler &);
+  ~AssemblerBuffer();
+
+  // Basic support for emitting, loading, and storing.
+  template <typename T> void Emit(T value) {
+    assert(HasEnsuredCapacity());
+    *reinterpret_cast<T *>(cursor_) = value;
+    cursor_ += sizeof(T);
+  }
+
+  template <typename T> T Load(intptr_t position) const {
+    assert(position >= 0 &&
+           position <= (Size() - static_cast<intptr_t>(sizeof(T))));
+    return *reinterpret_cast<T *>(contents_ + position);
+  }
+
+  template <typename T> void Store(intptr_t position, T value) {
+    assert(position >= 0 &&
+           position <= (Size() - static_cast<intptr_t>(sizeof(T))));
+    *reinterpret_cast<T *>(contents_ + position) = value;
+  }
+
+  // Emit a fixup at the current location.
+  void EmitFixup(AssemblerFixup *fixup) {
+    fixup->set_position(Size());
+    fixups_.push_back(fixup);
+  }
+
+  // Get the size of the emitted code.
+  intptr_t Size() const { return cursor_ - contents_; }
+  uintptr_t contents() const { return contents_; }
+
+  // Copy the assembled instructions into the specified memory block
+  // and apply all fixups.
+  // TODO(jvoung): This will be different. We'll be writing the text
+  // and reloc section to a file?
+  void FinalizeInstructions(const MemoryRegion &region);
+
+// To emit an instruction to the assembler buffer, the EnsureCapacity helper
+// must be used to guarantee that the underlying data area is big enough to
+// hold the emitted instruction. Usage:
+//
+//     AssemblerBuffer buffer;
+//     AssemblerBuffer::EnsureCapacity ensured(&buffer);
+//     ... emit bytes for single instruction ...
+
+#if defined(DEBUG)
+  class EnsureCapacity {
+  public:
+    explicit EnsureCapacity(AssemblerBuffer *buffer);
+    ~EnsureCapacity();
+
+  private:
+    AssemblerBuffer *buffer_;
+    intptr_t gap_;
+
+    intptr_t ComputeGap() { return buffer_->Capacity() - buffer_->Size(); }
+  };
+
+  bool has_ensured_capacity_;
+  bool HasEnsuredCapacity() const { return has_ensured_capacity_; }
+#else
+  class EnsureCapacity {
+  public:
+    explicit EnsureCapacity(AssemblerBuffer *buffer) {
+      if (buffer->cursor() >= buffer->limit())
+        buffer->ExtendCapacity();
+    }
+  };
+
+  // When building the C++ tests, assertion code is enabled. To allow
+  // asserting that the user of the assembler buffer has ensured the
+  // capacity needed for emitting, we add a dummy method in non-debug mode.
+  bool HasEnsuredCapacity() const { return true; }
+#endif
+
+  // Returns the position in the instruction stream.
+  intptr_t GetPosition() const { return cursor_ - contents_; }
+
+  // For bringup only.
+  AssemblerFixup *GetLatestFixup() const;
+
+private:
+  // The limit is set to kMinimumGap bytes before the end of the data area.
+  // This leaves enough space for the longest possible instruction and allows
+  // for a single, fast space check per instruction.
+  static const intptr_t kMinimumGap = 32;
+
+  uintptr_t contents_;
+  uintptr_t cursor_;
+  uintptr_t limit_;
+  Assembler &assembler_;
+  std::vector<AssemblerFixup *> fixups_;
+#if defined(DEBUG)
+  bool fixups_processed_;
+#endif
+
+  uintptr_t cursor() const { return cursor_; }
+  uintptr_t limit() const { return limit_; }
+  intptr_t Capacity() const {
+    assert(limit_ >= contents_);
+    return (limit_ - contents_) + kMinimumGap;
+  }
+
+  // Process the fixup chain.
+  void ProcessFixups(const MemoryRegion &region);
+
+  // Compute the limit based on the data area and the capacity. See
+  // description of kMinimumGap for the reasoning behind the value.
+  static uintptr_t ComputeLimit(uintptr_t data, intptr_t capacity) {
+    return data + capacity - kMinimumGap;
+  }
+
+  void ExtendCapacity();
+
+  friend class AssemblerFixup;
+};
+
+class Assembler {
+public:
+  Assembler() {}
+  ~Assembler() {}
+
+  // Allocate a chunk of bytes using the per-Assembler allocator.
+  uintptr_t AllocateBytes(size_t bytes) {
+    // For now, alignment is not related to NaCl bundle alignment, since
+    // the buffer's GetPosition is relative to the base. So NaCl bundle
+    // alignment checks can be relative to that base. Later, the buffer
+    // will be copied out to a ".text" section (or an in memory-buffer
+    // that can be mprotect'ed with executable permission), and that
+    // second buffer should be aligned for NaCl.
+    const size_t Alignment = 16;
+    return reinterpret_cast<uintptr_t>(Allocator.Allocate(bytes, Alignment));
+  }
+
+  // Allocate data of type T using the per-Assembler allocator.
+  template <typename T> T *Allocate() { return Allocator.Allocate<T>(); }
+
+private:
+  llvm::BumpPtrAllocator Allocator;
+
+  Assembler(const Assembler &) LLVM_DELETED_FUNCTION;
+  Assembler &operator=(const Assembler &) LLVM_DELETED_FUNCTION;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ASSEMBLER_H_
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -130,6 +130,11 @@ BuildOnRead("build-on-read",
            cl::desc("Build ICE instructions when reading bitcode"),
            cl::init(false));

+static cl::opt<bool>
+    UseIntegratedAssembler("integrated-as",
+                           cl::desc("Use integrated assembler (default yes)"),
+                           cl::init(true));
+
 int main(int argc, char **argv) {

  cl::ParseCommandLineOptions(argc, argv);
@@ -158,6 +163,7 @@ int main(int argc, char **argv) {
  Flags.DisableTranslation = DisableTranslation;
  Flags.DisableGlobals = DisableGlobals;
  Flags.FunctionSections = FunctionSections;
+  Flags.UseIntegratedAssembler = UseIntegratedAssembler;
  Flags.UseSandboxing = UseSandboxing;
  Flags.DumpStats = DumpStats;
  Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;

--- a/tests_lit/llvm2ice_tests/align-spill-locations.ll
+++ b/tests_lit/llvm2ice_tests/align-spill-locations.ll
 ; This checks to ensure that Subzero aligns spill slots.

-; RUN: %llvm2ice --verbose none %s | FileCheck  %s
-; RUN: %llvm2ice -O2 --verbose none %s | FileCheck  %s
+; RUN: %llvm2ice --verbose none %s \
+; RUN:   | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN:   | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s \
+; RUN:   | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN:   | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s

 ; The location of the stack slot for a variable is inferred from the
@@ -48,7 +52,7 @@ block:
  call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
  ret <4 x i32> %vec.global
 ; CHECK-LABEL: align_global_vector_ebp_based:
-; CHECK: movups xmm0, xmmword ptr [ebp-24]
+; CHECK: movups xmm0, xmmword ptr [ebp - 24]
 ; CHECK-NEXT: mov esp, ebp
 ; CHECK-NEXT: pop ebp
 ; CHECK: ret
@@ -61,7 +65,7 @@ entry:
  call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
  ret <4 x i32> %vec.local
 ; CHECK-LABEL: align_local_vector_ebp_based:
-; CHECK: movups xmm0, xmmword ptr [ebp-24]
+; CHECK: movups xmm0, xmmword ptr [ebp - 24]
 ; CHECK-NEXT: mov esp, ebp
 ; CHECK-NEXT: pop ebp
 ; CHECK: ret
@@ -78,8 +82,8 @@ block:
  ret <4 x i32> %vec.local
 ; CHECK-LABEL: align_local_vector_and_global_float:
 ; CHECK: cvtsi2ss xmm0, eax
-; CHECK-NEXT: movss dword ptr [esp+{{12|28}}], xmm0
-; CHECK: movups xmm0, xmmword ptr [{{esp|esp\+16}}]
+; CHECK-NEXT: movss dword ptr [esp + {{12|28}}], xmm0
+; CHECK: movups xmm0, xmmword ptr [{{esp|esp \+ 16}}]
 ; CHECK-NEXT: add esp, 44
 ; CHECK-NEXT: ret
 }

--- a/tests_lit/llvm2ice_tests/ebp_args.ll
+++ b/tests_lit/llvm2ice_tests/ebp_args.ll
@@ -3,7 +3,9 @@
 ; adjustment was incorrectly added to the stack/frame offset for
 ; ebp-based frames.

-; RUN: %llvm2ice -Om1 --target=x8632 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -Om1 --target=x8632 --verbose none %s \
+; RUN:   | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN:   | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s

 declare i32 @memcpy_helper2(i32 %buf, i32 %buf2, i32 %n)

@@ -25,19 +27,19 @@ entry:
 ; CHECK:  push  ebp
 ; CHECK:  mov   ebp, esp
 ; CHECK:  sub   esp, 24
-; CHECK:  mov   eax, dword ptr [ebp+12]
-; CHECK:  mov   dword ptr [ebp-4], eax
+; CHECK:  mov   eax, dword ptr [ebp + 12]
+; CHECK:  mov   dword ptr [ebp - 4], eax
 ; CHECK:  sub   esp, 128
-; CHECK:  mov   dword ptr [ebp-8], esp
-; CHECK:  mov   eax, dword ptr [ebp-8]
-; CHECK:  mov   dword ptr [ebp-12], eax
-; CHECK:  movzx eax, byte ptr [ebp-4]
-; CHECK:  mov   dword ptr [ebp-16], eax
+; CHECK:  mov   dword ptr [ebp - 8], esp
+; CHECK:  mov   eax, dword ptr [ebp - 8]
+; CHECK:  mov   dword ptr [ebp - 12], eax
+; CHECK:  movzx eax, byte ptr [ebp - 4]
+; CHECK:  mov   dword ptr [ebp - 16], eax
 ; CHECK:  sub   esp, 16
-; CHECK:  mov   ecx, dword ptr [ebp+8]
+; CHECK:  mov   ecx, dword ptr [ebp + 8]
 ; CHECK:  mov   dword ptr [esp], ecx
-; CHECK:  mov   ecx, dword ptr [ebp-12]
-; CHECK:  mov   dword ptr [esp+4], ecx
-; CHECK:  mov   ecx, dword ptr [ebp-16]
-; CHECK:  mov   dword ptr [esp+8], ecx
-; CHECK:  call  memcpy_helper2
+; CHECK:  mov   ecx, dword ptr [ebp - 12]
+; CHECK:  mov   dword ptr [esp + 4], ecx
+; CHECK:  mov   ecx, dword ptr [ebp - 16]
+; CHECK:  mov   dword ptr [esp + 8], ecx
+; CHECK:  call  -4
--- a/tests_lit/llvm2ice_tests/nop-insertion.ll
+++ b/tests_lit/llvm2ice_tests/nop-insertion.ll
 ; This is a smoke test of nop insertion.

+; Don't use integrated-as because this currently depends on the # variant
+; assembler comment.
 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
-; RUN:    -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB50
+; RUN:    -max-nops-per-instruction=1 -integrated-as=false %s \
+; RUN:    | FileCheck %s --check-prefix=PROB50
 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=90 \
-; RUN:    -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB90
+; RUN:    -max-nops-per-instruction=1 -integrated-as=false %s \
+; RUN:    | FileCheck %s --check-prefix=PROB90
 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
-; RUN:    -max-nops-per-instruction=2 %s | FileCheck %s --check-prefix=MAXNOPS2
+; RUN:    -max-nops-per-instruction=2 -integrated-as=false %s \
+; RUN:    | FileCheck %s --check-prefix=MAXNOPS2

 define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
 entry: