Commit 448c16f0 by John Porto

Removes references to ah.

AH is a thorn in the flesh for our X86-64 backend. The assembler was designed to always encode the low 8-bit registers, so %ah would become %spl. While it is true we **could** force %spl to always be encoded as %ah, that would not work if the instruction has a rex prefix. This CL removes references to %ah from TargetX86Base. There used to be 2 uses of ah in the target lowering: 1) To zero-extend %al before an unsigned div: mov <<src0>>, %al mov 0, %ah div <<src1>> This pattern has been changed to xor %eax, %eax mov <<src0>>, %al div <<src1>> 2) To access the 8-bit remainder for 8-bit division: mov %ah, <<dest>> This pattern has been changed to shr $8, %eax mov %al, <<Dest>> BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1260163003.
parent 2fea26ca
...@@ -44,16 +44,10 @@ ...@@ -44,16 +44,10 @@
// all of the registers are considered and have distinct numberings. // all of the registers are considered and have distinct numberings.
// This is in contrast to the above, where the "encode" is based on how // This is in contrast to the above, where the "encode" is based on how
// the register numbers will be encoded in binaries and values can overlap. // the register numbers will be encoded in binaries and values can overlap.
// Note that the isI8 attributed of Reg_ah is not set. In general we
// don't want the register allocator choosing Reg_ah, in particular
// for lowering insertelement to pinsrb where internally we use an
// 8-bit operand but externally pinsrb uses a 32-bit register, in
// which Reg_ah doesn't map to eax.
#define REGX8632_TABLE \ #define REGX8632_TABLE \
/* val, encode, name, name16, name8, scratch, preserved, stackptr, \ /* val, encode, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP */ \ frameptr, isI8, isInt, isFP */ \
REGX8632_GPR_TABLE \ REGX8632_GPR_TABLE \
X(Reg_ah, 4, "???", "" , "ah", 0, 0, 0, 0, 0, 0, 0) \
REGX8632_XMM_TABLE REGX8632_XMM_TABLE
//#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, //#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,
// frameptr, isI8, isInt, isFP) // frameptr, isI8, isInt, isFP)
...@@ -73,8 +67,7 @@ ...@@ -73,8 +67,7 @@
X(Reg_al, = 0) \ X(Reg_al, = 0) \
X(Reg_cl, = 1) \ X(Reg_cl, = 1) \
X(Reg_dl, = 2) \ X(Reg_dl, = 2) \
X(Reg_bl, = 3) \ X(Reg_bl, = 3)
X(Reg_ah, = 4)
//#define X(val, encode) //#define X(val, encode)
// X86 segment registers. // X86 segment registers.
......
...@@ -67,15 +67,10 @@ ...@@ -67,15 +67,10 @@
// all of the registers are considered and have distinct numberings. // all of the registers are considered and have distinct numberings.
// This is in contrast to the above, where the "encode" is based on how // This is in contrast to the above, where the "encode" is based on how
// the register numbers will be encoded in binaries and values can overlap. // the register numbers will be encoded in binaries and values can overlap.
// We don't want the register allocator choosing Reg_ah, in particular
// for lowering insertelement to pinsrb where internally we use an
// 8-bit operand but externally pinsrb uses a 32-bit register, in
// which Reg_ah doesn't map to eax.
#define REGX8664_TABLE \ #define REGX8664_TABLE \
/* val, encode, name64, name, name16, name8, scratch, preserved, \ /* val, encode, name64, name, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP */ \ stackptr, frameptr, isInt, isFP */ \
REGX8664_GPR_TABLE \ REGX8664_GPR_TABLE \
X(Reg_ah, = Reg_rax + 4, "?ah", "?ah", "?ah", "ah", 0, 0, 0, 0, 0, 0) \
REGX8664_XMM_TABLE REGX8664_XMM_TABLE
//#define X(val, encode, name, name32, name16, name8, scratch, preserved, //#define X(val, encode, name, name32, name16, name8, scratch, preserved,
// stackptr, frameptr, isI8, isInt, isFP) // stackptr, frameptr, isI8, isInt, isFP)
......
...@@ -78,19 +78,20 @@ public: ...@@ -78,19 +78,20 @@ public:
}; };
static inline GPRRegister getEncodedGPR(int32_t RegNum) { static inline GPRRegister getEncodedGPR(int32_t RegNum) {
assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last); assert(Reg_GPR_First <= RegNum);
assert(RegNum <= Reg_GPR_Last);
return GPRRegister(RegNum - Reg_GPR_First); return GPRRegister(RegNum - Reg_GPR_First);
} }
static inline XmmRegister getEncodedXmm(int32_t RegNum) { static inline XmmRegister getEncodedXmm(int32_t RegNum) {
assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last); assert(Reg_XMM_First <= RegNum);
assert(RegNum <= Reg_XMM_Last);
return XmmRegister(RegNum - Reg_XMM_First); return XmmRegister(RegNum - Reg_XMM_First);
} }
static inline ByteRegister getEncodedByteReg(int32_t RegNum) { static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
assert(RegNum == Reg_ah || (Reg_GPR_First <= RegNum && RegNum <= Reg_ebx)); assert(Reg_GPR_First <= RegNum);
if (RegNum == Reg_ah) assert(RegNum <= Reg_ebx);
return Encoded_Reg_ah;
return ByteRegister(RegNum - Reg_GPR_First); return ByteRegister(RegNum - Reg_GPR_First);
} }
...@@ -102,7 +103,8 @@ public: ...@@ -102,7 +103,8 @@ public:
} }
static inline X87STRegister getEncodedSTReg(int32_t RegNum) { static inline X87STRegister getEncodedSTReg(int32_t RegNum) {
assert(Encoded_X87ST_First <= RegNum && RegNum <= Encoded_X87ST_Last); assert(Encoded_X87ST_First <= RegNum);
assert(RegNum <= Encoded_X87ST_Last);
return X87STRegister(RegNum); return X87STRegister(RegNum);
} }
}; };
......
...@@ -69,16 +69,20 @@ public: ...@@ -69,16 +69,20 @@ public:
}; };
static inline GPRRegister getEncodedGPR(int32_t RegNum) { static inline GPRRegister getEncodedGPR(int32_t RegNum) {
assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last); assert(Reg_GPR_First <= RegNum);
assert(RegNum <= Reg_GPR_Last);
return GPRRegister(RegNum - Reg_GPR_First); return GPRRegister(RegNum - Reg_GPR_First);
} }
static inline XmmRegister getEncodedXmm(int32_t RegNum) { static inline XmmRegister getEncodedXmm(int32_t RegNum) {
assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last); assert(Reg_XMM_First <= RegNum);
assert(RegNum <= Reg_XMM_Last);
return XmmRegister(RegNum - Reg_XMM_First); return XmmRegister(RegNum - Reg_XMM_First);
} }
static inline ByteRegister getEncodedByteReg(int32_t RegNum) { static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
assert(Reg_GPR_First <= RegNum);
assert(RegNum <= Reg_GPR_Last);
return ByteRegister(RegNum - Reg_GPR_First); return ByteRegister(RegNum - Reg_GPR_First);
} }
......
//===- subzero/src/IceTargetLoweringX8664.def - x86-64 X-macros -*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines certain patterns for lowering to x86-64 target
// instructions, in the form of x-macros.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETARGETLOWERINGX8664_DEF
#define SUBZERO_SRC_ICETARGETLOWERINGX8664_DEF
#define FCMPX8664_TABLE \
/* <---- scalar comparison ----> <- vector comparison -> */ \
/* val, dflt, swap, C1, C2, swap, predicate */ \
X(False, 0, 0, Br_None, Br_None, 0, Cmpps_Invalid) \
X(Oeq, 0, 0, Br_ne, Br_p, 0, Cmpps_eq) \
X(Ogt, 1, 0, Br_a, Br_None, 1, Cmpps_lt) \
X(Oge, 1, 0, Br_ae, Br_None, 1, Cmpps_le) \
X(Olt, 1, 1, Br_a, Br_None, 0, Cmpps_lt) \
X(Ole, 1, 1, Br_ae, Br_None, 0, Cmpps_le) \
X(One, 1, 0, Br_ne, Br_None, 0, Cmpps_Invalid) \
X(Ord, 1, 0, Br_np, Br_None, 0, Cmpps_ord) \
X(Ueq, 1, 0, Br_e, Br_None, 0, Cmpps_Invalid) \
X(Ugt, 1, 1, Br_b, Br_None, 0, Cmpps_nle) \
X(Uge, 1, 1, Br_be, Br_None, 0, Cmpps_nlt) \
X(Ult, 1, 0, Br_b, Br_None, 1, Cmpps_nle) \
X(Ule, 1, 0, Br_be, Br_None, 1, Cmpps_nlt) \
X(Une, 1, 0, Br_ne, Br_p, 0, Cmpps_neq) \
X(Uno, 1, 0, Br_p, Br_None, 0, Cmpps_unord) \
X(True, 1, 0, Br_None, Br_None, 0, Cmpps_Invalid) \
//#define X(val, dflt, swapS, C1, C2, swapV, pred)
#define ICMPX8664_TABLE \
/* val, C_32, C1_64, C2_64, C3_64 */ \
X(Eq, Br_e, Br_None, Br_ne, Br_e) \
X(Ne, Br_ne, Br_ne, Br_None, Br_ne) \
X(Ugt, Br_a, Br_a, Br_b, Br_a) \
X(Uge, Br_ae, Br_a, Br_b, Br_ae) \
X(Ult, Br_b, Br_b, Br_a, Br_b) \
X(Ule, Br_be, Br_b, Br_a, Br_be) \
X(Sgt, Br_g, Br_g, Br_l, Br_a) \
X(Sge, Br_ge, Br_g, Br_l, Br_ae) \
X(Slt, Br_l, Br_l, Br_g, Br_b) \
X(Sle, Br_le, Br_l, Br_g, Br_be) \
//#define X(val, C_32, C1_64, C2_64, C3_64)
#endif // SUBZERO_SRC_ICETARGETLOWERINGX8664_DEF
...@@ -1850,12 +1850,21 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1850,12 +1850,21 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
// immediates as the operand. // immediates as the operand.
Src1 = legalize(Src1, Legal_Reg | Legal_Mem); Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
if (isByteSizedArithType(Dest->getType())) { if (isByteSizedArithType(Dest->getType())) {
Variable *T_ah = nullptr; // For 8-bit unsigned division we need to zero-extend al into ah. A mov
Constant *Zero = Ctx->getConstantZero(IceType_i8); // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
// assembler refuses to encode %ah (encoding %spl with a REX prefix
// instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
// with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
// d[%lh], which means the X86 target lowering (and the register
// allocator) would have to be aware of this restriction. For now, we
// simply zero %eax completely, and move the dividend into %al.
Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
Context.insert(InstFakeDef::create(Func, T_eax));
_xor(T_eax, T_eax);
_mov(T, Src0, Traits::RegisterSet::Reg_eax); _mov(T, Src0, Traits::RegisterSet::Reg_eax);
_mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); _div(T, Src1, T);
_div(T, Src1, T_ah);
_mov(Dest, T); _mov(Dest, T);
Context.insert(InstFakeUse::create(Func, T_eax));
} else { } else {
Constant *Zero = Ctx->getConstantZero(IceType_i32); Constant *Zero = Ctx->getConstantZero(IceType_i32);
_mov(T, Src0, Traits::RegisterSet::Reg_eax); _mov(T, Src0, Traits::RegisterSet::Reg_eax);
...@@ -1917,12 +1926,21 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1917,12 +1926,21 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Urem: case InstArithmetic::Urem:
Src1 = legalize(Src1, Legal_Reg | Legal_Mem); Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
if (isByteSizedArithType(Dest->getType())) { if (isByteSizedArithType(Dest->getType())) {
Variable *T_ah = nullptr; Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
Constant *Zero = Ctx->getConstantZero(IceType_i8); Context.insert(InstFakeDef::create(Func, T_eax));
_xor(T_eax, T_eax);
_mov(T, Src0, Traits::RegisterSet::Reg_eax); _mov(T, Src0, Traits::RegisterSet::Reg_eax);
_mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax);
_div(T_ah, Src1, T); _div(T_al, Src1, T);
_mov(Dest, T_ah); // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
// mov %ah, %al because it would make x86-64 codegen more complicated. If
// this ever becomes a problem we can introduce a pseudo rem instruction
// that returns the remainder in %al directly (and uses a mov for copying
// %ah to %al.)
static constexpr uint8_t AlSizeInBits = 8;
_shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
_mov(Dest, T_al);
Context.insert(InstFakeUse::create(Func, T_eax));
} else { } else {
Constant *Zero = Ctx->getConstantZero(IceType_i32); Constant *Zero = Ctx->getConstantZero(IceType_i32);
_mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
...@@ -1974,12 +1992,21 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1974,12 +1992,21 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
} }
Src1 = legalize(Src1, Legal_Reg | Legal_Mem); Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
if (isByteSizedArithType(Dest->getType())) { if (isByteSizedArithType(Dest->getType())) {
Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);
_mov(T, Src0, Traits::RegisterSet::Reg_eax); _mov(T, Src0, Traits::RegisterSet::Reg_eax);
// T is %al.
_cbwdq(T, T); _cbwdq(T, T);
Context.insert(InstFakeDef::create(Func, T_ah)); _idiv(T, Src1, T);
_idiv(T_ah, Src1, T); Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
_mov(Dest, T_ah); Context.insert(InstFakeDef::create(Func, T_eax));
// shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
// mov %ah, %al because it would make x86-64 codegen more complicated. If
// this ever becomes a problem we can introduce a pseudo rem instruction
// that returns the remainder in %al directly (and uses a mov for copying
// %ah to %al.)
static constexpr uint8_t AlSizeInBits = 8;
_shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
_mov(Dest, T);
Context.insert(InstFakeUse::create(Func, T_eax));
} else { } else {
T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
_mov(T, Src0, Traits::RegisterSet::Reg_eax); _mov(T, Src0, Traits::RegisterSet::Reg_eax);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment