Subzero. ARM32. Initial sandboxing code.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 Review URL: https://codereview.chromium.org/1491473002 .

Subzero. ARM32. Initial sandboxing code.
38ac6bee · John Porto · 866b6b19 · 38ac6bee · 38ac6bee · 38ac6bee
Commit 38ac6bee authored Dec 04, 2015 by John Porto
10 changed files
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -404,7 +404,10 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
          -e x8664,native,sse2,test_global \
          -i arm32,native,neon \
          -e arm32,native,neon,test_vector_ops \
-          -e arm32,native,neon,test_select
+          -e arm32,native,neon,test_select \
+          -i arm32,sandbox,neon \
+          -e arm32,sandbox,neon,test_vector_ops \
+          -e arm32,sandbox,neon,test_select
 	PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
 	$(LLVM_SRC_PATH)/utils/lit/lit.py -sv crosstest/Output
 endif

--- a/pydir/crosstest.py
+++ b/pydir/crosstest.py
@@ -182,7 +182,7 @@ def main():
            ).format(root=nacl_root, sb='sb' if args.sandbox else 'native'))
    pure_c = os.path.splitext(args.driver)[1] == '.c'

-    # TargetX8664 is ilp32, but clang does not currently support such
+    # TargetX8664 is ilp32, but pnacl-clang does not currently support such
    # configuration. In order to run the crosstests we play nasty, dangerous
    # tricks with the stack pointer.
    needs_stack_hack = (args.target == 'x8664')
@@ -202,7 +202,7 @@ def main():
        bin=bindir, prefix='pnacl-' if args.sandbox else '',
        cc='clang' if pure_c else 'clang++')
    sb_native_args = (['-O0', '--pnacl-allow-native',
-                       '-arch', target_info.target,
+                       '-arch', target_info.compiler_arch,
                       '-Wn,-defsym=__Sz_AbsoluteZero=0']
                      if args.sandbox else
                      ['-g', '-target=' + triple,

--- a/pydir/crosstest_generator.py
+++ b/pydir/crosstest_generator.py
@@ -67,7 +67,7 @@ def main():
  arch_flags = { 'x8632': [],
                 'x8664': [],
                 # ARM doesn't have an ELF writer yet.
-                 'arm32': ['--filetype=iasm'] }
+                 'arm32': ['--filetype=asm'] }
  # all_keys is only used in the help text.
  all_keys = '; '.join([' '.join(targets), ' '.join(sandboxing),
                        ' '.join(opt_levels), ' '.join(flat_attrs)])

--- a/pydir/run-pnacl-sz.py
+++ b/pydir/run-pnacl-sz.py
@@ -11,14 +11,16 @@ import tempfile
 from utils import shellcmd


-def TargetAssemblerFlags(target):
+def TargetAssemblerFlags(target, sandboxed):
  # TODO(stichnot): -triple=i686-nacl should be used for a
  # sandboxing test.  This means there should be an args.sandbox
  # argument that also gets passed through to pnacl-sz.
  # TODO(reed kotler). Need to find out exactly we need to
  # add here for Mips32.
-  flags = { 'x8632': ['-triple=i686'],
-            'arm32': ['-triple=armv7a', '-mcpu=cortex-a9', '-mattr=+neon'],
+  flags = { 'x8632': ['-triple=%s' % ('i686' if not sandboxed else 'i686-nacl')],
+            'arm32': ['-triple=%s' % (
+                          'armv7a' if not sandboxed else 'armv7a-nacl'),
+                      '-mcpu=cortex-a9', '-mattr=+neon'],
            'mips32': ['-triple=mipsel' ] }
  return flags[target]

@@ -89,6 +91,8 @@ def main():
    argparser.add_argument('--args', '-a', nargs=argparse.REMAINDER,
                           default=[],
                           help='Remaining arguments are passed to pnacl-sz')
+    argparser.add_argument('--sandbox', required=False, action='store_true',
+                           help='Sanboxes the generated code.')

    args = argparser.parse_args()
    pnacl_bin_path = args.pnacl_bin_path
@@ -121,6 +125,8 @@ def main():
      cmd += [os.path.join(pnacl_bin_path, 'not')]
    cmd += [args.pnacl_sz]
    cmd += ['--target', args.target]
+    if args.sandbox:
+      cmd += ['-sandbox']
    if args.insts:
      # If the tests are based on '-verbose inst' output, force
      # single-threaded translation because dump output does not get
@@ -147,7 +153,7 @@ def main():
      asm_temp.close()
    if args.assemble and args.filetype != 'obj':
      cmd += (['|', os.path.join(pnacl_bin_path, 'llvm-mc')] +
-              TargetAssemblerFlags(args.target) +
+              TargetAssemblerFlags(args.target, args.sandbox) +
              ['-filetype=obj', '-o', asm_temp.name])
    elif asm_temp:
      cmd += ['-o', asm_temp.name]

--- a/pydir/szbuild.py
+++ b/pydir/szbuild.py
@@ -318,10 +318,14 @@ def ProcessPexe(args, pexe, exe):

    # Run the linker regardless of hybrid mode.
    if args.sandbox:
-        assert args.target in ['x8632'], \
+        assert args.target in ('x8632', 'arm32'), \
            '-sandbox is not available for %s' % args.target
+        target_lib_dir = {
+          'arm32': 'arm',
+          'x8632': 'x86-32',
+        }[args.target]
        linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
-                   'x86-32/lib').format(root=nacl_root)
+                   '{target_dir}/lib').format(root=nacl_root, target_dir=target_lib_dir)
        shellcmd((
            '{gold} -nostdlib --no-fix-cortex-a8 --eh-frame-hdr -z text ' +
            '--build-id --entry=__pnacl_start -static ' +

--- a/pydir/targets.py
+++ b/pydir/targets.py
@@ -17,22 +17,25 @@ def FindARMCrossInclude():


 TargetInfo = namedtuple('TargetInfo',
-                        ['target', 'triple', 'llc_flags', 'ld_emu',
-                         'cross_headers'])
+                        ['target', 'compiler_arch', 'triple', 'llc_flags',
+                         'ld_emu', 'cross_headers'])

 X8632Target = TargetInfo(target='x8632',
+                         compiler_arch='x8632',
                         triple='i686-none-linux',
                         llc_flags=['-mcpu=pentium4m'],
                         ld_emu='elf_i386_nacl',
                         cross_headers=[])

 X8664Target = TargetInfo(target='x8664',
+                         compiler_arch='x8664',
                         triple='x86_64-none-linux',
                         llc_flags=['-mcpu=x86-64'],
                         ld_emu='elf_x86_64_nacl',
                         cross_headers=[])

 ARM32Target = TargetInfo(target='arm32',
+                         compiler_arch='armv7',
                         triple='armv7a-none-linux-gnueabihf',
                         llc_flags=['-mcpu=cortex-a9',
                                    '-float-abi=hard',

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -161,7 +161,8 @@ TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
 }

 TargetARM32::TargetARM32(Cfg *Func)
-    : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {}
+    : TargetLowering(Func), NeedSandboxing(Ctx->getFlags().getUseSandboxing()),
+      CPUFeatures(Func->getContext()->getFlags()) {}

 void TargetARM32::staticInit() {
  // Limit this size (or do all bitsets need to be the same width)???
@@ -544,8 +545,7 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
      return;
    }
    case Intrinsics::NaClReadTP: {
-      if (Ctx->getFlags().getUseSandboxing()) {
-        UnimplementedError(Func->getContext()->getFlags());
+      if (NeedSandboxing) {
        return;
      }
      static constexpr SizeT MaxArgs = 0;
@@ -1120,6 +1120,10 @@ void TargetARM32::addProlog(CfgNode *Node) {
      continue;
    }
    if (CalleeSaves[i] && RegsUsed[i]) {
+      if (NeedSandboxing && i == RegARM32::Reg_r9) {
+        // r9 is never updated in sandboxed code.
+        continue;
+      }
      ++NumCallee;
      Variable *PhysicalRegister = getPhysicalRegister(i);
      PreservedRegsSizeBytes +=
@@ -1173,10 +1177,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
    // Use the scratch register if needed to legalize the immediate.
    Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
                                  Legal_Reg | Legal_Flex, getReservedTmpReg());
-    Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
-    _sub(SP, SP, SubAmount);
+    AutoSandboxer(this).sub_sp(SubAmount);
    if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
-      alignRegisterPow2(SP, FixedAllocaAlignBytes);
+      AutoSandboxer(this).align_sp(FixedAllocaAlignBytes);
    }
  }

@@ -1270,7 +1273,7 @@ void TargetARM32::addEpilog(CfgNode *Node) {
    // use of SP before the assignment of SP=FP keeps previous SP adjustments
    // from being dead-code eliminated.
    Context.insert(InstFakeUse::create(Func, SP));
-    _mov(SP, FP);
+    AutoSandboxer(this).reset_sp(FP);
  } else {
    // add SP, SpillAreaSizeBytes
    if (SpillAreaSizeBytes) {
@@ -1278,7 +1281,7 @@ void TargetARM32::addEpilog(CfgNode *Node) {
      Operand *AddAmount =
          legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
                   Legal_Reg | Legal_Flex, getReservedTmpReg());
-      _add(SP, SP, AddAmount);
+      AutoSandboxer(this).add_sp(AddAmount);
    }
  }

@@ -1302,6 +1305,9 @@ void TargetARM32::addEpilog(CfgNode *Node) {
    }

    if (CalleeSaves[i] && RegsUsed[i]) {
+      if (NeedSandboxing && i == RegARM32::Reg_r9) {
+        continue;
+      }
      GPRsToRestore.push_back(getPhysicalRegister(i));
    }
  }
@@ -1318,16 +1324,13 @@ void TargetARM32::addEpilog(CfgNode *Node) {
  // bundle_unlock
  // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
  // restrict to the lower 1GB as well.
-  Operand *RetMask =
-      legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
-  Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
+  Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
  Variable *RetValue = nullptr;
  if (RI->getSrcSize())
    RetValue = llvm::cast<Variable>(RI->getSrc(0));
-  _bundle_lock();
-  _bic(LR, LR, RetMask);
-  _ret(LR, RetValue);
-  _bundle_unlock();
+
+  AutoSandboxer(this).ret(LR, RetValue);
+
  RI->setDeleted();
 }

@@ -1378,7 +1381,7 @@ Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
    Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
  assert(!Base->isRematerializable());
-  if (AllowOffsets && Target->isLegalMemOffset(Ty, Offset)) {
+  if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
    return OperandARM32Mem::create(
        Target->Func, Ty, Base,
        llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
@@ -1451,8 +1454,9 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
    assert(!SrcR->isRematerializable());
    const int32_t Offset = Dest->getStackOffset();
    // This is a _mov(Mem(), Variable), i.e., a store.
-    Target->_str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
-                 MovInstr->getPredicate());
+    TargetARM32::AutoSandboxer(Target)
+        .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
+             MovInstr->getPredicate());
    // _str() does not have a Dest, so we add a fake-def(Dest).
    Target->Context.insert(InstFakeDef::create(Target->Func, Dest));
    Legalized = true;
@@ -1476,8 +1480,9 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
      if (!Var->hasReg()) {
        // This is a _mov(Variable, Mem()), i.e., a load.
        const int32_t Offset = Var->getStackOffset();
-        Target->_ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
-                     MovInstr->getPredicate());
+        TargetARM32::AutoSandboxer(Target)
+            .ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
+                 MovInstr->getPredicate());
        Legalized = true;
      }
    }
@@ -1542,7 +1547,15 @@ TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
    Legalized = true;
  }

-  if (!Legalized) {
+  if (!Legalized && !Target->NeedSandboxing) {
+    return nullptr;
+  }
+
+  if (Target->NeedSandboxing && Base->getRegNum() == RegARM32::Reg_r9) {
+    if (Legalized) {
+      llvm::report_fatal_error("r9-based mem operand should not need to be "
+                               "legalized.");
+    }
    return nullptr;
  }

@@ -1550,6 +1563,7 @@ TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
    return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
  }

+  assert(!Target->NeedSandboxing);
  assert(MemTraits[Mem->getType()].CanHaveIndex);

  if (Offset != 0) {
@@ -1621,7 +1635,8 @@ void TargetARM32::postLowerLegalization() {
      } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
                llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
-          _ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
+          AutoSandboxer(this)
+              .ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
          CurInstr->setDeleted();
        }
      } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
@@ -1629,14 +1644,16 @@ void TargetARM32::postLowerLegalization() {
        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
                llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
                DisallowOffsetsBecauseLdrex)) {
-          _ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
+          AutoSandboxer(this)
+              .ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
          CurInstr->setDeleted();
        }
      } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
+        AutoSandboxer Bundle(this);
        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
                llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
-          _str(llvm::cast<Variable>(CurInstr->getSrc(0)), LegalMem,
-               StrInstr->getPredicate());
+          AutoSandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
+                                  LegalMem, StrInstr->getPredicate());
          CurInstr->setDeleted();
        }
      } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
@@ -1644,8 +1661,9 @@ void TargetARM32::postLowerLegalization() {
        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
                llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
                DisallowOffsetsBecauseStrex)) {
-          _strex(CurInstr->getDest(), llvm::cast<Variable>(CurInstr->getSrc(0)),
-                 LegalMem, StrexInstr->getPredicate());
+          AutoSandboxer(this).strex(CurInstr->getDest(),
+                                    llvm::cast<Variable>(CurInstr->getSrc(0)),
+                                    LegalMem, StrexInstr->getPredicate());
          CurInstr->setDeleted();
        }
      }
@@ -1803,7 +1821,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {

  Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
  if (OverAligned) {
-    alignRegisterPow2(SP, Alignment);
+    AutoSandboxer(this).align_sp(Alignment);
  }

  Variable *Dest = Inst->getDest();
@@ -1828,7 +1846,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
    // in Dest.
    Operand *SubAmountRF =
        legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
-    _sub(SP, SP, SubAmountRF);
+    AutoSandboxer(this).sub_sp(SubAmountRF);
  } else {
    // Non-constant sizes need to be adjusted to the next highest multiple of
    // the required alignment at runtime.
@@ -1838,7 +1856,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
    Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
    _add(T, T, AddAmount);
    alignRegisterPow2(T, Alignment);
-    _sub(SP, SP, T);
+    AutoSandboxer(this).sub_sp(T);
  }

  // Adds back a few bytes to SP to account for the out args area.
@@ -3249,8 +3267,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
      break;
    }
  }
-  // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
-  // Ctx->getFlags().getUseSandboxing();

  // Allow ConstantRelocatable to be left alone as a direct call, but force
  // other constants like ConstantInteger32 to be in a register and make it an
@@ -3271,8 +3287,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
    // the call.
    Context.insert(InstFakeUse::create(Func, Reg));
  }
-  Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
-  Context.insert(NewCall);
+
+  InstARM32Call *NewCall = AutoSandboxer(this, InstBundleLock::Opt_AlignToEnd)
+                               .bl(ReturnReg, CallTarget);
+
  if (ReturnRegHi)
    Context.insert(InstFakeDef::create(Func, ReturnRegHi));

@@ -4612,7 +4630,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
    llvm::report_fatal_error("memmove should have been prelowered.");
  }
  case Intrinsics::NaClReadTP: {
-    llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
+    if (!NeedSandboxing) {
+      llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
+    }
+    Variable *TP = legalizeToReg(OperandARM32Mem::create(
+        Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
+        llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
+    _mov(Dest, TP);
+    return;
  }
  case Intrinsics::Setjmp: {
    llvm::report_fatal_error("setjmp should have been prelowered.");
@@ -4630,9 +4655,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
    return;
  }
  case Intrinsics::Stackrestore: {
-    Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
-    Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
-    _mov_redefined(SP, Val);
+    Variable *Val = legalizeToReg(Instr->getArg(0));
+    AutoSandboxer(this).reset_sp(Val);
    return;
  }
  case Intrinsics::Trap:
@@ -4987,8 +5011,9 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
  (void)MemTraitsSize;
  assert(Ty < MemTraitsSize);
  auto *TypeTraits = &MemTraits[Ty];
-  const bool CanHaveIndex = TypeTraits->CanHaveIndex;
-  const bool CanHaveShiftedIndex = TypeTraits->CanHaveShiftedIndex;
+  const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
+  const bool CanHaveShiftedIndex =
+      !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
  const bool CanHaveImm = TypeTraits->CanHaveImm;
  const int32_t ValidImmMask = TypeTraits->ValidImmMask;
  (void)ValidImmMask;
@@ -5160,6 +5185,7 @@ void TargetARM32::lowerRet(const InstRet *Inst) {
  // frame removal instructions. addEpilog is responsible for restoring the
  // "lr" register as needed prior to this ret instruction.
  _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
+
  // Add a fake use of sp to make sure sp stays alive for the entire function.
  // Otherwise post-call sp adjustments get dead-code eliminated.
  // TODO: Are there more places where the fake use should be inserted? E.g.

--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -162,6 +162,18 @@ public:
        llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
  }

+  OperandARM32FlexImm *indirectBranchBicMask() const {
+    constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
+    constexpr uint32_t RotateAmt = 2;
+    return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
+  }
+
+  OperandARM32FlexImm *memOpBicMask() const {
+    constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
+    constexpr uint32_t RotateAmt = 2;
+    return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
+  }
+
  GlobalContext *getCtx() const { return Ctx; }

 protected:
@@ -822,6 +834,115 @@ protected:

  void postLowerLegalization();

+  class AutoSandboxer {
+  public:
+    explicit AutoSandboxer(
+        TargetARM32 *Target,
+        InstBundleLock::Option BundleOption = InstBundleLock::Opt_None)
+        : Target(Target) {
+      if (Target->NeedSandboxing) {
+        Target->_bundle_lock(BundleOption);
+      }
+    }
+
+    void add_sp(Operand *AddAmount) {
+      Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
+      Target->_add(SP, SP, AddAmount);
+      if (Target->NeedSandboxing) {
+        Target->_bic(SP, SP, Target->memOpBicMask());
+      }
+    }
+
+    void align_sp(size_t Alignment) {
+      Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
+      Target->alignRegisterPow2(SP, Alignment);
+      if (Target->NeedSandboxing) {
+        Target->_bic(SP, SP, Target->memOpBicMask());
+      }
+    }
+
+    InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget) {
+      if (Target->NeedSandboxing) {
+        if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
+          Target->_bic(CallTargetR, CallTargetR,
+                       Target->indirectBranchBicMask());
+        }
+      }
+      auto *Call = InstARM32Call::create(Target->Func, ReturnReg, CallTarget);
+      Target->Context.insert(Call);
+      return Call;
+    }
+
+    void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred) {
+      if (Target->NeedSandboxing) {
+        assert(!Mem->isRegReg());
+        Variable *MemBase = Mem->getBase();
+        Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred);
+      }
+      Target->_ldr(Dest, Mem, Pred);
+    }
+
+    void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred) {
+      if (Target->NeedSandboxing) {
+        assert(!Mem->isRegReg());
+        Variable *MemBase = Mem->getBase();
+        Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred);
+      }
+      Target->_ldrex(Dest, Mem, Pred);
+    }
+
+    void reset_sp(Variable *Src) {
+      Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
+      Target->_mov_redefined(SP, Src);
+      if (Target->NeedSandboxing) {
+        Target->_bic(SP, SP, Target->memOpBicMask());
+      }
+    }
+
+    void ret(Variable *RetAddr, Variable *RetValue) {
+      if (Target->NeedSandboxing) {
+        Target->_bic(RetAddr, RetAddr, Target->indirectBranchBicMask());
+      }
+      Target->_ret(RetAddr, RetValue);
+    }
+
+    void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred) {
+      if (Target->NeedSandboxing) {
+        assert(!Mem->isRegReg());
+        Variable *MemBase = Mem->getBase();
+        Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred);
+      }
+      Target->_str(Src, Mem, Pred);
+    }
+
+    void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
+               CondARM32::Cond Pred) {
+      if (Target->NeedSandboxing) {
+        assert(!Mem->isRegReg());
+        Variable *MemBase = Mem->getBase();
+        Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred);
+      }
+      Target->_strex(Dest, Src, Mem, Pred);
+    }
+
+    void sub_sp(Operand *SubAmount) {
+      Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
+      Target->_sub(SP, SP, SubAmount);
+      if (Target->NeedSandboxing) {
+        Target->_bic(SP, SP, Target->memOpBicMask());
+      }
+    }
+
+    ~AutoSandboxer() {
+      if (Target->NeedSandboxing) {
+        Target->_bundle_unlock();
+      }
+    }
+
+  private:
+    TargetARM32 *Target;
+  };
+
  class PostLoweringLegalizer {
    PostLoweringLegalizer() = delete;
    PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
@@ -878,6 +999,7 @@ protected:
    int32_t TempBaseOffset = 0;
  };

+  const bool NeedSandboxing;
  TargetARM32Features CPUFeatures;
  bool UsesFramePointer = false;
  bool NeedsStackAlignment = false;

--- a/tests_lit/assembler/arm32/sandboxing.ll
+++ b/tests_lit/assembler/arm32/sandboxing.ll
+; Tests basics and corner cases of x86-32 sandboxing, using -Om1 in
+; the hope that the output will remain stable.  When packing bundles,
+; we try to limit to a few instructions with well known sizes and
+; minimal use of registers and stack slots in the lowering sequence.
+
+; RUN: %p2i -i %s --sandbox --filetype=asm --target=arm32 --assemble \
+; RUN:   --disassemble --args -Om1 -allow-externally-defined-symbols \
+; RUN:   -ffunction-sections  | FileCheck %s
+
+declare void @call_target()
+@global_short = internal global [2 x i8] zeroinitializer
+
+; A direct call sequence uses the right mask and register-call sequence.
+define internal void @test_direct_call() {
+entry:
+  call void @call_target()
+  ret void
+}
+; CHECK-LABEL: test_direct_call
+; CHECK: nop
+; CHECK: c: {{.*}} bl {{.*}} call_target
+; CHECK-NEXT: 10:
+
+; An indirect call sequence uses the right mask and register-call sequence.
+define internal void @test_indirect_call(i32 %target) {
+entry:
+  %__1 = inttoptr i32 %target to void ()*
+  call void %__1()
+  ret void
+}
+; CHECK-LABEL: test_indirect_call
+; CHECK: ldr [[REG:.*]], [sp, 
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK: 18: {{.*}} bic [[REG]], [[REG]], {{.*}} 0xc000000f
+; CHECK-NEXT: blx [[REG]]
+; CHECk-NEXT: 20:
+
+; A return sequences uses the right pop / mask / jmp sequence.
+define internal void @test_ret() {
+entry:
+  ret void
+}
+; CHECK-LABEL: test_ret
+; CHECK: 0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
+; CHECK-NEXT: bx lr
+
+; Bundle lock without padding.
+define internal void @bundle_lock_without_padding() {
+entry:
+  %addr_short = bitcast [2 x i8]* @global_short to i16*
+  store i16 0, i16* %addr_short, align 1
+  ret void
+}
+; CHECK-LABEL: bundle_lock_without_padding
+; CHECK: 0: {{.*}} movw
+; CHECK-NEXT: movt
+; CHECK-NEXT: movw
+; CHECK-NEXT: strh
+; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f
+; CHECK-NEXT: {{.*}} bx lr
+
+; Bundle lock with padding.
+define internal void @bundle_lock_with_padding() {
+entry:
+  call void @call_target()
+  ; bundle boundary
+  store i16 0, i16* undef, align 1   ; 3 insts
+  store i16 0, i16* undef, align 1   ; 3 insts
+  store i16 0, i16* undef, align 1   ; 3 insts
+                                     ; SP adjustment + pop
+  ; nop
+  ; bundle boundary
+  ret void
+}
+; CHECK-LABEL: bundle_lock_with_padding
+; CHECK: 38: {{.*}} pop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f
+; CHECK-NEXT: {{.*}} bx lr
+
+; Bundle lock align_to_end without any padding.
+define internal void @bundle_lock_align_to_end_padding_0() {
+entry:
+  call void @call_target()
+  ; bundle boundary
+  store i16 0, i16* undef, align 1
+  call void @call_target()
+  ; bundle boundary
+  ret void
+}
+; CHECK-LABEL: bundle_lock_align_to_end_padding_0
+; CHECK: c: {{.*}} bl {{.*}} call_target
+; CHECK-NEXT: movw
+; CHECK-NEXT: movw
+; CHECK-NEXT: strh
+; CHECK-NEXT: bl {{.*}} call_target
+; CHECK-NEXT: add
+; CHECK-NEXT: pop
+; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f
+; CHECK-NEXT: {{.*}} bx lr
+
+; Bundle lock align_to_end with one bunch of padding.
+define internal void @bundle_lock_align_to_end_padding_1() {
+entry:
+  call void @call_target()
+  ; bundle boundary
+  store i16 0, i16* undef, align 1
+  store i16 0, i16* undef, align 1
+  ; bundle boundary
+  call void @call_target()
+  ; bundle boundary
+  ret void
+}
+; CHECK-LABEL: bundle_lock_align_to_end_padding_1
+; CHECK: c: {{.*}} bl {{.*}} call_target
+; CHECK-NEXT: movw
+; CHECK-NEXT: movw
+; CHECK-NEXT: strh
+; CHECK-NEXT: movw
+; CHECK-NEXT: movw
+; CHECK-NEXT: strh
+; CHECK-NEXT: nop
+; CHECK-NEXT: bl {{.*}} call_target
+; CHECK-NEXT: add
+; CHECK-NEXT: pop
+; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f
+; CHECK-NEXT: {{.*}} bx lr
+
+; Bundle lock align_to_end with two bunches of padding.
+define internal void @bundle_lock_align_to_end_padding_2(i32 %target) {
+entry:
+  call void @call_target()
+  ; bundle boundary
+  %__1 = inttoptr i32 %target to void ()*
+  store i8 0, i8* undef, align 1
+  call void %__1()
+  ret void
+}
+; CHECK-LABEL: bundle_lock_align_to_end_padding_2
+; CHECK: c: {{.*}} bl {{.*}} call_target
+; CHECK-NEXT: movw
+; CHECK-NEXT: movw
+; CHECK-NEXT: strb
+; CHECK: 20: {{.*}} nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f
+; CHECK-NEXT: {{.*}} blx [[REG]]
+
--- a/tests_lit/assembler/x86/sandboxing.ll
+++ b/tests_lit/assembler/x86/sandboxing.ll
@@ -3,9 +3,9 @@
 ; we try to limit to a few instructions with well known sizes and
 ; minimal use of registers and stack slots in the lowering sequence.

-; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \
+; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --args -Om1 \
 ; RUN:   -allow-externally-defined-symbols \
-; RUN:   -ffunction-sections -sandbox | FileCheck %s
+; RUN:   -ffunction-sections | FileCheck %s

 declare void @call_target()
 @global_byte = internal global [1 x i8] zeroinitializer