Commit 55f2e6d3 by David Sehr

Optimize 64-bit shifts by constants for x86-32

Hopefully improves perf in fpclassifyd in ammp spec test. BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1351133003 .
parent 43632b95
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#define INT_VALUE_ARRAY \ #define INT_VALUE_ARRAY \
{ 0x0, 0x1, 0x7ffffffe, 0x7fffffff, \ { 0x0, 0x1, 0x7ffffffe, 0x7fffffff, \
0x80000000, 0x80000001, 0xfffffffe, 0xffffffff, \ 0x80000000, 0x80000001, 0xfffffffe, 0xffffffff, \
0x1e, 0x1f, 0x20, 0x21, 0x3e, 0x3f, 0x40, 0x41, \
0x7e, 0x7f, 0x80, 0x81, \ 0x7e, 0x7f, 0x80, 0x81, \
0xfe, 0xff, 0x100, 0x101, \ 0xfe, 0xff, 0x100, 0x101, \
0x7ffe, 0x7fff, 0x8000, 0x8001, \ 0x7ffe, 0x7fff, 0x8000, 0x8001, \
......
...@@ -2103,7 +2103,7 @@ template <class Machine> class InstX86Shld final : public InstX86Base<Machine> { ...@@ -2103,7 +2103,7 @@ template <class Machine> class InstX86Shld final : public InstX86Base<Machine> {
public: public:
static InstX86Shld *create(Cfg *Func, Variable *Dest, Variable *Source1, static InstX86Shld *create(Cfg *Func, Variable *Dest, Variable *Source1,
Variable *Source2) { Operand *Source2) {
return new (Func->allocate<InstX86Shld>()) return new (Func->allocate<InstX86Shld>())
InstX86Shld(Func, Dest, Source1, Source2); InstX86Shld(Func, Dest, Source1, Source2);
} }
...@@ -2115,7 +2115,7 @@ public: ...@@ -2115,7 +2115,7 @@ public:
} }
private: private:
InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, Variable *Source2); InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
}; };
/// Shrd instruction - shift across a pair of operands. /// Shrd instruction - shift across a pair of operands.
...@@ -2126,7 +2126,7 @@ template <class Machine> class InstX86Shrd final : public InstX86Base<Machine> { ...@@ -2126,7 +2126,7 @@ template <class Machine> class InstX86Shrd final : public InstX86Base<Machine> {
public: public:
static InstX86Shrd *create(Cfg *Func, Variable *Dest, Variable *Source1, static InstX86Shrd *create(Cfg *Func, Variable *Dest, Variable *Source1,
Variable *Source2) { Operand *Source2) {
return new (Func->allocate<InstX86Shrd>()) return new (Func->allocate<InstX86Shrd>())
InstX86Shrd(Func, Dest, Source1, Source2); InstX86Shrd(Func, Dest, Source1, Source2);
} }
...@@ -2138,7 +2138,7 @@ public: ...@@ -2138,7 +2138,7 @@ public:
} }
private: private:
InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1, Variable *Source2); InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
}; };
/// Conditional move instruction. /// Conditional move instruction.
......
...@@ -74,7 +74,7 @@ InstX86Mul<Machine>::InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1, ...@@ -74,7 +74,7 @@ InstX86Mul<Machine>::InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1,
template <class Machine> template <class Machine>
InstX86Shld<Machine>::InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, InstX86Shld<Machine>::InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1,
Variable *Source2) Operand *Source2)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::Shld, 3, Dest) { : InstX86Base<Machine>(Func, InstX86Base<Machine>::Shld, 3, Dest) {
this->addSource(Dest); this->addSource(Dest);
this->addSource(Source1); this->addSource(Source1);
...@@ -83,7 +83,7 @@ InstX86Shld<Machine>::InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, ...@@ -83,7 +83,7 @@ InstX86Shld<Machine>::InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1,
template <class Machine> template <class Machine>
InstX86Shrd<Machine>::InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1, InstX86Shrd<Machine>::InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1,
Variable *Source2) Operand *Source2)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::Shrd, 3, Dest) { : InstX86Base<Machine>(Func, InstX86Base<Machine>::Shrd, 3, Dest) {
this->addSource(Dest); this->addSource(Dest);
this->addSource(Source1); this->addSource(Source1);
......
...@@ -568,13 +568,13 @@ protected: ...@@ -568,13 +568,13 @@ protected:
void _shl(Variable *Dest, Operand *Src0) { void _shl(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Shl::create(Func, Dest, Src0)); Context.insert(Traits::Insts::Shl::create(Func, Dest, Src0));
} }
void _shld(Variable *Dest, Variable *Src0, Variable *Src1) { void _shld(Variable *Dest, Variable *Src0, Operand *Src1) {
Context.insert(Traits::Insts::Shld::create(Func, Dest, Src0, Src1)); Context.insert(Traits::Insts::Shld::create(Func, Dest, Src0, Src1));
} }
void _shr(Variable *Dest, Operand *Src0) { void _shr(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Shr::create(Func, Dest, Src0)); Context.insert(Traits::Insts::Shr::create(Func, Dest, Src0));
} }
void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) { void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) {
Context.insert(Traits::Insts::Shrd::create(Func, Dest, Src0, Src1)); Context.insert(Traits::Insts::Shrd::create(Func, Dest, Src0, Src1));
} }
void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) { void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
......
...@@ -1185,7 +1185,46 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1185,7 +1185,46 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
} break; } break;
case InstArithmetic::Shl: { case InstArithmetic::Shl: {
// TODO: Refactor the similarities between Shl, Lshr, and Ashr. // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
// gcc does the following: Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Constant *Zero = Ctx->getConstantZero(IceType_i32);
if (const auto *ConstantShiftAmount =
llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
uint32_t ShiftAmount = ConstantShiftAmount->getValue();
if (ShiftAmount > 32) {
// a=b<<c ==>
// t2 = b.lo
// t2 = shl t2, ShiftAmount-32
// t3 = t2
// t2 = 0
_mov(T_2, Src0Lo);
_shl(T_2, Ctx->getConstantInt32(ShiftAmount - 32));
_mov(DestHi, T_2);
_mov(DestLo, Zero);
} else if (ShiftAmount == 32) {
// a=b<<c ==>
// t2 = b.lo
// a.hi = t2
// a.lo = 0
_mov(T_2, Src0Lo);
_mov(DestHi, T_2);
_mov(DestLo, Zero);
} else {
// a=b<<c ==>
// t2 = b.lo
// t3 = b.hi
// t3 = shld t3, t2, ShiftAmount
// t2 = shl t2, ShiftAmount
// a.lo = t2
// a.hi = t3
_mov(T_2, Src0Lo);
_mov(T_3, Src0Hi);
_shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount));
_shl(T_2, Ctx->getConstantInt32(ShiftAmount));
// Move T_2 first to reduce register pressure.
_mov(DestLo, T_2);
_mov(DestHi, T_3);
}
} else {
// a=b<<c ==> // a=b<<c ==>
// t1:ecx = c.lo & 0xff // t1:ecx = c.lo & 0xff
// t2 = b.lo // t2 = b.lo
...@@ -1200,9 +1239,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1200,9 +1239,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
// L1: // L1:
// a.lo = t2 // a.lo = t2
// a.hi = t3 // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Constant *BitTest = Ctx->getConstantInt32(0x20); Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
typename Traits::Insts::Label *Label = typename Traits::Insts::Label *Label =
Traits::Insts::Label::create(Func, this); Traits::Insts::Label::create(Func, this);
_mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
...@@ -1220,8 +1257,49 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1220,8 +1257,49 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert(Label); Context.insert(Label);
_mov(DestLo, T_2); _mov(DestLo, T_2);
_mov(DestHi, T_3); _mov(DestHi, T_3);
}
} break; } break;
case InstArithmetic::Lshr: { case InstArithmetic::Lshr: {
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Constant *Zero = Ctx->getConstantZero(IceType_i32);
if (const auto *ConstantShiftAmount =
llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
uint32_t ShiftAmount = ConstantShiftAmount->getValue();
if (ShiftAmount > 32) {
// a=b>>c (unsigned) ==>
// t3 = b.hi
// t3 = shr t3, ShiftAmount-32
// a.lo = t3
// a.hi = 0
_mov(T_3, Src0Hi);
_shr(T_3, Ctx->getConstantInt32(ShiftAmount - 32));
_mov(DestLo, T_3);
_mov(DestHi, Zero);
} else if (ShiftAmount == 32) {
// a=b>>c (unsigned) ==>
// t3 = b.hi
// a.lo = t3
// a.hi = 0
_mov(T_3, Src0Hi);
_mov(DestLo, T_3);
_mov(DestHi, Zero);
} else {
// a=b>>c (unsigned) ==>
// t2 = b.lo
// t3 = b.hi
// t2 = shrd t2, t3, ShiftAmount
// t3 = shr t3, ShiftAmount
// a.lo = t2
// a.hi = t3
_mov(T_2, Src0Lo);
_mov(T_3, Src0Hi);
_shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
_shr(T_3, Ctx->getConstantInt32(ShiftAmount));
// Move T_3 first to reduce register pressure.
_mov(DestHi, T_3);
_mov(DestLo, T_2);
}
} else {
// a=b>>c (unsigned) ==> // a=b>>c (unsigned) ==>
// t1:ecx = c.lo & 0xff // t1:ecx = c.lo & 0xff
// t2 = b.lo // t2 = b.lo
...@@ -1236,9 +1314,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1236,9 +1314,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
// L1: // L1:
// a.lo = t2 // a.lo = t2
// a.hi = t3 // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Constant *BitTest = Ctx->getConstantInt32(0x20); Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
typename Traits::Insts::Label *Label = typename Traits::Insts::Label *Label =
Traits::Insts::Label::create(Func, this); Traits::Insts::Label::create(Func, this);
_mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
...@@ -1256,8 +1332,55 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1256,8 +1332,55 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert(Label); Context.insert(Label);
_mov(DestLo, T_2); _mov(DestLo, T_2);
_mov(DestHi, T_3); _mov(DestHi, T_3);
}
} break; } break;
case InstArithmetic::Ashr: { case InstArithmetic::Ashr: {
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
if (const auto *ConstantShiftAmount =
llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
uint32_t ShiftAmount = ConstantShiftAmount->getValue();
if (ShiftAmount > 32) {
// a=b>>c (signed) ==>
// t2 = b.hi
// t3 = b.hi
// t3 = sar t3, 0x1f
// t2 = shrd t2, t3, ShiftAmount-32
// a.lo = t2
// a.hi = t3
_mov(T_2, Src0Hi);
_mov(T_3, Src0Hi);
_sar(T_3, Ctx->getConstantInt32(0x1f));
_shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount - 32));
_mov(DestLo, T_2);
_mov(DestHi, T_3);
} else if (ShiftAmount == 32) {
// a=b>>c (signed) ==>
// t2 = b.hi
// a.lo = t2
// t3 = b.hi
// t3 = sar t3, 0x1f
// a.hi = t3
_mov(T_2, Src0Hi);
_mov(DestLo, T_2);
_mov(T_3, Src0Hi);
_sar(T_3, Ctx->getConstantInt32(0x1f));
_mov(DestHi, T_3);
} else {
// a=b>>c (signed) ==>
// t2 = b.lo
// t3 = b.hi
// t2 = shrd t2, t3, ShiftAmount
// t3 = sar t3, ShiftAmount
// a.lo = t2
// a.hi = t3
_mov(T_2, Src0Lo);
_mov(T_3, Src0Hi);
_shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
_sar(T_3, Ctx->getConstantInt32(ShiftAmount));
_mov(DestLo, T_2);
_mov(DestHi, T_3);
}
} else {
// a=b>>c (signed) ==> // a=b>>c (signed) ==>
// t1:ecx = c.lo & 0xff // t1:ecx = c.lo & 0xff
// t2 = b.lo // t2 = b.lo
...@@ -1272,7 +1395,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1272,7 +1395,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
// L1: // L1:
// a.lo = t2 // a.lo = t2
// a.hi = t3 // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Constant *BitTest = Ctx->getConstantInt32(0x20); Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *SignExtend = Ctx->getConstantInt32(0x1f); Constant *SignExtend = Ctx->getConstantInt32(0x1f);
typename Traits::Insts::Label *Label = typename Traits::Insts::Label *Label =
...@@ -1293,6 +1415,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1293,6 +1415,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert(Label); Context.insert(Label);
_mov(DestLo, T_2); _mov(DestLo, T_2);
_mov(DestHi, T_3); _mov(DestHi, T_3);
}
} break; } break;
case InstArithmetic::Fadd: case InstArithmetic::Fadd:
case InstArithmetic::Fsub: case InstArithmetic::Fsub:
......
...@@ -112,3 +112,105 @@ entry: ...@@ -112,3 +112,105 @@ entry:
} }
; CHECK-LABEL: ashrImmNeg ; CHECK-LABEL: ashrImmNeg
; CHECK: sar {{.*}},0xff ; CHECK: sar {{.*}},0xff
define i64 @shlImm64One(i64 %val) {
entry:
%result = shl i64 %val, 1
ret i64 %result
}
; CHECK-LABEL: shlImm64One
; CHECK: shl {{.*}},1
define i64 @shlImm64LessThan32(i64 %val) {
entry:
%result = shl i64 %val, 4
ret i64 %result
}
; CHECK-LABEL: shlImm64LessThan32
; CHECK: shl {{.*}},0x4
define i64 @shlImm64Equal32(i64 %val) {
entry:
%result = shl i64 %val, 32
ret i64 %result
}
; CHECK-LABEL: shlImm64Equal32
; CHECK-NOT: shl
define i64 @shlImm64GreaterThan32(i64 %val) {
entry:
%result = shl i64 %val, 40
ret i64 %result
}
; CHECK-LABEL: shlImm64GreaterThan32
; CHECK: shl {{.*}},0x8
define i64 @lshrImm64One(i64 %val) {
entry:
%result = lshr i64 %val, 1
ret i64 %result
}
; CHECK-LABEL: lshrImm64One
; CHECK: shr {{.*}},1
define i64 @lshrImm64LessThan32(i64 %val) {
entry:
%result = lshr i64 %val, 4
ret i64 %result
}
; CHECK-LABEL: lshrImm64LessThan32
; CHECK: shrd {{.*}},0x4
; CHECK: shr {{.*}},0x4
define i64 @lshrImm64Equal32(i64 %val) {
entry:
%result = lshr i64 %val, 32
ret i64 %result
}
; CHECK-LABEL: lshrImm64Equal32
; CHECK-NOT: shr
define i64 @lshrImm64GreaterThan32(i64 %val) {
entry:
%result = lshr i64 %val, 40
ret i64 %result
}
; CHECK-LABEL: lshrImm64GreaterThan32
; CHECK-NOT: shrd
; CHECK: shr {{.*}},0x8
define i64 @ashrImm64One(i64 %val) {
entry:
%result = ashr i64 %val, 1
ret i64 %result
}
; CHECK-LABEL: ashrImm64One
; CHECK: shrd {{.*}},0x1
; CHECK: sar {{.*}},1
define i64 @ashrImm64LessThan32(i64 %val) {
entry:
%result = ashr i64 %val, 4
ret i64 %result
}
; CHECK-LABEL: ashrImm64LessThan32
; CHECK: shrd {{.*}},0x4
; CHECK: sar {{.*}},0x4
define i64 @ashrImm64Equal32(i64 %val) {
entry:
%result = ashr i64 %val, 32
ret i64 %result
}
; CHECK-LABEL: ashrImm64Equal32
; CHECK: sar {{.*}},0x1f
; CHECK-NOT: shrd
define i64 @ashrImm64GreaterThan32(i64 %val) {
entry:
%result = ashr i64 %val, 40
ret i64 %result
}
; CHECK-LABEL: ashrImm64GreaterThan32
; CHECK: sar {{.*}},0x1f
; CHECK: shrd {{.*}},0x8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment