Commit 8525c329 by Jim Stichnoth

Subzero: Apply commutativity to the RMW optimization.

The read-modify-write (RMW) optimization looks for patterns like this: a = Load addr b = <op> a, other Store b, addr and essentially transforms them into this: RMW <op>, addr, other This CL also applies the transformation when the middle instruction is b = <op> other, a and <op> is commutative. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1193103005
parent eafb56cb
...@@ -711,10 +711,13 @@ void TargetX8632::findRMW() { ...@@ -711,10 +711,13 @@ void TargetX8632::findRMW() {
if (!isSameMemAddressOperand(Load->getSourceAddress(), if (!isSameMemAddressOperand(Load->getSourceAddress(),
Store->getAddr())) Store->getAddr()))
continue; continue;
if (false && Load->getSourceAddress() != Store->getAddr()) Operand *ArithSrcFromLoad = Arith->getSrc(0);
continue; Operand *ArithSrcOther = Arith->getSrc(1);
if (Arith->getSrc(0) != Load->getDest()) if (ArithSrcFromLoad != Load->getDest()) {
if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
continue; continue;
std::swap(ArithSrcFromLoad, ArithSrcOther);
}
if (Arith->getDest() != Store->getData()) if (Arith->getDest() != Store->getData())
continue; continue;
if (!canRMW(Arith)) if (!canRMW(Arith))
...@@ -734,8 +737,7 @@ void TargetX8632::findRMW() { ...@@ -734,8 +737,7 @@ void TargetX8632::findRMW() {
InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
Node->getInsts().insert(I3, BeaconDef); Node->getInsts().insert(I3, BeaconDef);
InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
Func, Arith->getSrc(1), Store->getAddr(), Beacon, Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
Arith->getOp());
Node->getInsts().insert(I3, RMW); Node->getInsts().insert(I3, RMW);
} }
} }
......
...@@ -102,3 +102,31 @@ entry: ...@@ -102,3 +102,31 @@ entry:
; Look for something like: add DWORD PTR [eax+ecx*4+12],ecx ; Look for something like: add DWORD PTR [eax+ecx*4+12],ecx
; CHECK-LABEL: rmw_add_i32_var_addropt ; CHECK-LABEL: rmw_add_i32_var_addropt
; CHECK: add DWORD PTR [e{{..}}+e{{..}}*4+0xc],e{{ax|bx|cx|dx|bp|di|si}} ; CHECK: add DWORD PTR [e{{..}}+e{{..}}*4+0xc],e{{ax|bx|cx|dx|bp|di|si}}
; Test for commutativity opportunities. This is the same as rmw_add_i32_var
; except with the "add" operands reversed.
define internal void @rmw_add_i32_var_comm(i32 %addr_arg, i32 %var) {
entry:
%addr = inttoptr i32 %addr_arg to i32*
%val = load i32, i32* %addr, align 1
%rmw = add i32 %var, %val
store i32 %rmw, i32* %addr, align 1
ret void
}
; Look for something like: add DWORD PTR [eax],ecx
; CHECK-LABEL: rmw_add_i32_var_comm
; CHECK: add DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}],e{{ax|bx|cx|dx|bp|di|si}}
; Test that commutativity isn't triggered for a non-commutative arithmetic
; operator (sub). This is the same as rmw_add_i32_var_comm except with a
; "sub" operation.
define internal i32 @no_rmw_sub_i32_var(i32 %addr_arg, i32 %var) {
entry:
%addr = inttoptr i32 %addr_arg to i32*
%val = load i32, i32* %addr, align 1
%rmw = sub i32 %var, %val
store i32 %rmw, i32* %addr, align 1
ret i32 %rmw
}
; CHECK-LABEL: no_rmw_sub_i32_var
; CHECK: sub e{{ax|bx|cx|dx|bp|di|si}},DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment