Fix bug when atomic load is fused with an arith op (and not in the entry BB)

Normally, the FakeUse for preserving the atomic load ends up on the load's Dest. However, for fused load+add, the load is deleted, and its Dest is no longer defined. This trips up the liveness analysis when it happens on a non-entry block. So the FakeUse should be for the add's dest instead, in that case. We have no access to the add, so introduce a getLastInserted() helper. A couple of ways to do that: - modify insert() to track explicitly - rewind from Next one step Either that, or we disable the fusing for atomic loads. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/417353003

Fix bug when atomic load is fused with an arith op (and not in the entry BB)
e6e497db · Jan Voung · d7ee9728 · e6e497db · e6e497db · e6e497db
Commit e6e497db authored Jul 30, 2014 by Jan Voung
4 changed files
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -26,11 +26,12 @@ namespace Ice {
 void LoweringContext::init(CfgNode *N) {
  Node = N;
-  Cur = getNode()->getInsts().begin();
+  Begin = getNode()->getInsts().begin();
+  Cur = Begin;
  End = getNode()->getInsts().end();
  skipDeleted(Cur);
  Next = Cur;
-  advance(Next);
+  advanceForward(Next);
 }
 void LoweringContext::insert(Inst *Inst) {
@@ -43,13 +44,26 @@ void LoweringContext::skipDeleted(InstList::iterator &I) const {
    ++I;
 }
-void LoweringContext::advance(InstList::iterator &I) const {
+void LoweringContext::advanceForward(InstList::iterator &I) const {
  if (I != End) {
    ++I;
    skipDeleted(I);
  }
 }
+void LoweringContext::advanceBackward(InstList::iterator &I) const {
+  assert(I != Begin);
+  do {
+    --I;
+  } while (I != Begin && (*I)->isDeleted());
+}
+Inst *LoweringContext::getLastInserted() const {
+  InstList::iterator Cursor = Next;
+  advanceBackward(Cursor);
+  return *Cursor;
+}
 TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
  // These statements can be #ifdef'd to specialize the code generator
  // to a subset of the available targets.  TODO: use CRTP.

--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -42,7 +42,7 @@ public:
    return *Next;
  }
  Inst *getNextInst(InstList::iterator &Iter) const {
-    advance(Iter);
+    advanceForward(Iter);
    if (Iter == End)
      return NULL;
    return *Iter;
@@ -52,8 +52,9 @@ public:
  InstList::iterator getCur() const { return Cur; }
  InstList::iterator getEnd() const { return End; }
  void insert(Inst *Inst);
+  Inst *getLastInserted() const;
  void advanceCur() { Cur = Next; }
-  void advanceNext() { advance(Next); }
+  void advanceNext() { advanceForward(Next); }
  void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
 private:
@@ -71,11 +72,14 @@ private:
  // insertion point", to avoid confusion when previously-deleted
  // instructions come between the two points.
  InstList::iterator Next;
+  // Begin is a copy of Insts.begin(), used if iterators are moved backward.
+  InstList::iterator Begin;
  // End is a copy of Insts.end(), used if Next needs to be advanced.
  InstList::iterator End;
  void skipDeleted(InstList::iterator &I) const;
-  void advance(InstList::iterator &I) const;
+  void advanceForward(InstList::iterator &I) const;
+  void advanceBackward(InstList::iterator &I) const;
  LoweringContext(const LoweringContext &) LLVM_DELETED_FUNCTION;
  LoweringContext &operator=(const LoweringContext &) LLVM_DELETED_FUNCTION;
 };

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2724,15 +2724,18 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
      // Then cast the bits back out of the XMM register to the i64 Dest.
      InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
      lowerCast(Cast);
-      // Make sure that the atomic load isn't elided.
+      // Make sure that the atomic load isn't elided when unused.
      Context.insert(InstFakeUse::create(Func, Dest->getLo()));
      Context.insert(InstFakeUse::create(Func, Dest->getHi()));
      return;
    }
    InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
    lowerLoad(Load);
-    // Make sure the atomic load isn't elided.
+    // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
-    Context.insert(InstFakeUse::create(Func, Dest));
+    // Since lowerLoad may fuse the load w/ an arithmetic instruction,
+    // insert the FakeUse on the last-inserted instruction's dest.
+    Context.insert(InstFakeUse::create(Func,
+                                       Context.getLastInserted()->getDest()));
    return;
  }
  case Intrinsics::AtomicRMW:

--- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
@@ -88,6 +88,9 @@ entry:
 define i32 @test_atomic_load_32_with_arith(i32 %iptr) {
 entry:
+  br label %next
+next:
  %ptr = inttoptr i32 %iptr to i32*
  %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
  %r2 = add i32 %r, 32
@@ -96,6 +99,11 @@ entry:
 ; CHECK-LABEL: test_atomic_load_32_with_arith
 ; CHECK: mov {{.*}}, dword
 ; The next instruction may be a separate load or folded into an add.
+;
+; In O2 mode, we know that the load and add are going to be fused.
+; CHECKO2-LABEL: test_atomic_load_32_with_arith
+; CHECKO2: mov {{.*}}, dword
+; CHECKO2: add {{.*}}, dword
 define i32 @test_atomic_load_32_ignored(i32 %iptr) {
 entry:
@@ -106,6 +114,9 @@ entry:
 ; CHECK-LABEL: test_atomic_load_32_ignored
 ; CHECK: mov {{.*}}, dword
 ; CHECK: mov {{.*}}, dword
+; CHECKO2-LABEL: test_atomic_load_32_ignored
+; CHECKO2: mov {{.*}}, dword
+; CHECKO2: mov {{.*}}, dword
 define i64 @test_atomic_load_64_ignored(i32 %iptr) {
 entry: