Commit c59288b3 by Jim Stichnoth

Subzero: Refactor x86 register representation to actively use aliases.

Sets up additional register attributes, plus the notion of register classes, to enable robust usage of the high 8-bit GPRs (ah/bh/ch/dh), for both x86-32 and x86-64. (Note that the x86-64 changes are currently untested.) We add a Register Class field to the Variable class. The default register class is a value corresponding to the variable's type, but the target can extend the set of register class values, and the target lowering can assign different register classes as needed. The register allocator uses the register class instead of the type to determine the set of registers to draw from. For x86-64, the high 8-bit registers are not included in the general register allocation pool, but there are explicit references to ah for lowering the div/rem instructions. The target lowering is modified as needed to make sure types are appropriate and register use in instructions is legalized. Some other fixes and cleanups are included in this CL: * Makefile.standalone changes. Source files are reordered so that the more expensive compiles are done earlier, speeding up parallel builds by decreasing fragmentation. A dependency error is fixed for check-spec. * A bug is fixed in advanced phi lowering. When a temporary is introduced to break a cycle, we were neglecting to updated the predecessor count for one of the operands, leading to an assertion failure. (Applying that fix to master resulted in no changes to spec2k code generation.) A consistency check is added to help find future problems like this. Also, refactored iteration over the Phi descriptor array to use range-based for loops and avoid directly indexing the array. * Removed most of the "IceType_" prefixes in x-macro tables for brevity. * Fix a correctness TODO in the register allocator. This had no effect on spec2k code generation in master or in this CL, so we were probably just lucky. * Made some much-needed s/Dest->getType()/Ty/ changes for brevity, in the target lowering sections that needed other changes. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4095 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1427973003 .
parent ea15bbe7
...@@ -196,9 +196,20 @@ LDFLAGS := $(HOST_FLAGS) -L$(LIBCXX_INSTALL_PATH)/lib -Wl,--gc-sections \ ...@@ -196,9 +196,20 @@ LDFLAGS := $(HOST_FLAGS) -L$(LIBCXX_INSTALL_PATH)/lib -Wl,--gc-sections \
# Not specifying -Wl,--gc-sections but instead doing bitcode linking GC w/ LTO. # Not specifying -Wl,--gc-sections but instead doing bitcode linking GC w/ LTO.
SB_LDFLAGS := $(LINKOPTLEVEL) $(LD_EXTRA) SB_LDFLAGS := $(LINKOPTLEVEL) $(LD_EXTRA)
# List the target-specific source files first, which generally take longer to
# compile, in the hope of improving parallel build time.
SRCS = \ SRCS = \
IceAssembler.cpp \
IceAssemblerARM32.cpp \ IceAssemblerARM32.cpp \
IceInstARM32.cpp \
IceInstMIPS32.cpp \
IceInstX8632.cpp \
IceInstX8664.cpp \
IceTargetLowering.cpp \
IceTargetLoweringARM32.cpp \
IceTargetLoweringMIPS32.cpp \
IceTargetLoweringX8632.cpp \
IceTargetLoweringX8664.cpp \
IceAssembler.cpp \
IceBrowserCompileServer.cpp \ IceBrowserCompileServer.cpp \
IceCfg.cpp \ IceCfg.cpp \
IceCfgNode.cpp \ IceCfgNode.cpp \
...@@ -211,10 +222,6 @@ SRCS = \ ...@@ -211,10 +222,6 @@ SRCS = \
IceGlobalContext.cpp \ IceGlobalContext.cpp \
IceGlobalInits.cpp \ IceGlobalInits.cpp \
IceInst.cpp \ IceInst.cpp \
IceInstARM32.cpp \
IceInstMIPS32.cpp \
IceInstX8632.cpp \
IceInstX8664.cpp \
IceIntrinsics.cpp \ IceIntrinsics.cpp \
IceLiveness.cpp \ IceLiveness.cpp \
IceLoopAnalyzer.cpp \ IceLoopAnalyzer.cpp \
...@@ -222,11 +229,6 @@ SRCS = \ ...@@ -222,11 +229,6 @@ SRCS = \
IceRegAlloc.cpp \ IceRegAlloc.cpp \
IceRNG.cpp \ IceRNG.cpp \
IceSwitchLowering.cpp \ IceSwitchLowering.cpp \
IceTargetLowering.cpp \
IceTargetLoweringARM32.cpp \
IceTargetLoweringMIPS32.cpp \
IceTargetLoweringX8632.cpp \
IceTargetLoweringX8664.cpp \
IceThreading.cpp \ IceThreading.cpp \
IceTimerTree.cpp \ IceTimerTree.cpp \
IceTranslator.cpp \ IceTranslator.cpp \
...@@ -397,9 +399,11 @@ endif ...@@ -397,9 +399,11 @@ endif
check-unit: $(OBJDIR)/run_unittests check-unit: $(OBJDIR)/run_unittests
$(OBJDIR)/run_unittests $(OBJDIR)/run_unittests
ALLSPEC := 177.mesa 179.art 183.equake 188.ammp 164.gzip 175.vpr 176.gcc \ # List the spec2k components in roughly reverse order of runtime, to help with
181.mcf 186.crafty 197.parser 253.perlbmk 254.gap 255.vortex \ # parallel execution speed.
256.bzip2 300.twolf 252.eon ALLSPEC := 253.perlbmk 177.mesa 188.ammp 256.bzip2 164.gzip 179.art 183.equake \
175.vpr 176.gcc 181.mcf 186.crafty 197.parser 254.gap 255.vortex \
300.twolf 252.eon
.PHONY: $(ALLSPEC) .PHONY: $(ALLSPEC)
TARGET := x8632 TARGET := x8632
...@@ -414,12 +418,12 @@ ifeq ($(TARGET),arm32) ...@@ -414,12 +418,12 @@ ifeq ($(TARGET),arm32)
SPEC := -O2 --filetype=asm SPEC := -O2 --filetype=asm
endif endif
%.spec2k: % %.spec2k: % $(OBJDIR)/pnacl-sz make_symlink runtime
./pydir/szbuild_spec2k.py -v --force --target=$(TARGETFLAG) $(SPEC) $< ./pydir/szbuild_spec2k.py -v --force --target=$(TARGETFLAG) $(SPEC) $<
( cd ../../../tests/spec2k; \ ( cd ../../../tests/spec2k; \
./run_all.sh RunTimedBenchmarks $(SETUP) train $< ) ./run_all.sh RunTimedBenchmarks $(SETUP) train $< )
check-spec: $(OBJDIR)/pnacl-sz make_symlink $(ALLSPEC:=.spec2k) check-spec: $(ALLSPEC:=.spec2k)
check: check-lit check-unit check-xtest check: check-lit check-unit check-xtest
......
...@@ -457,19 +457,17 @@ void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, ...@@ -457,19 +457,17 @@ void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
return; return;
// Sort by decreasing alignment. This does not really matter at the moment, // Sort by decreasing alignment. This does not really matter at the moment,
// but will allow compacting stack allocation when we fuse to one alloca. // but will allow compacting stack allocation when we fuse to one alloca.
std::sort(Allocas.begin(), Allocas.end(), std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
[](Inst *I1, Inst *I2) {
auto *A1 = llvm::dyn_cast<InstAlloca>(I1); auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
auto *A2 = llvm::dyn_cast<InstAlloca>(I2); auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
return A1->getAlignInBytes() > A2->getAlignInBytes(); return A1->getAlignInBytes() > A2->getAlignInBytes();
}); });
for (Inst *Instr: Allocas) { for (Inst *Instr : Allocas) {
auto *Alloca = llvm::cast<InstAlloca>(Instr); auto *Alloca = llvm::cast<InstAlloca>(Instr);
// Move the alloca to its sorted position. // Move the alloca to its sorted position.
InstAlloca *NewAlloca = InstAlloca::create(this, InstAlloca *NewAlloca =
Alloca->getSizeInBytes(), InstAlloca::create(this, Alloca->getSizeInBytes(),
Alloca->getAlignInBytes(), Alloca->getAlignInBytes(), Alloca->getDest());
Alloca->getDest());
if (IsKnownFrameOffset) if (IsKnownFrameOffset)
NewAlloca->setKnownFrameOffset(); NewAlloca->setKnownFrameOffset();
Insts.push_front(NewAlloca); Insts.push_front(NewAlloca);
...@@ -506,8 +504,7 @@ void Cfg::processAllocas() { ...@@ -506,8 +504,7 @@ void Cfg::processAllocas() {
// Allocations aligned more than the stack require a frame pointer. // Allocations aligned more than the stack require a frame pointer.
RequiresFramePointer = true; RequiresFramePointer = true;
AlignedAllocas.push_back(Alloca); AlignedAllocas.push_back(Alloca);
} } else
else
FixedAllocas.push_back(Alloca); FixedAllocas.push_back(Alloca);
} }
} }
......
...@@ -304,6 +304,7 @@ class PhiDesc { ...@@ -304,6 +304,7 @@ class PhiDesc {
PhiDesc() = delete; PhiDesc() = delete;
PhiDesc(const PhiDesc &) = delete; PhiDesc(const PhiDesc &) = delete;
PhiDesc &operator=(const PhiDesc &) = delete; PhiDesc &operator=(const PhiDesc &) = delete;
public: public:
PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
PhiDesc(PhiDesc &&) = default; PhiDesc(PhiDesc &&) = default;
......
...@@ -55,13 +55,13 @@ ...@@ -55,13 +55,13 @@
X(Reg_bl, 3, "bl", Reg_ebx, 0,1,0,0, 1,0,0,0,1, 0, 0,0,0,1,1, \ X(Reg_bl, 3, "bl", Reg_ebx, 0,1,0,0, 1,0,0,0,1, 0, 0,0,0,1,1, \
REGLIST2(RegX8632, ebx, bx)) \ REGLIST2(RegX8632, ebx, bx)) \
/* High 8-bit registers */ \ /* High 8-bit registers */ \
X(Reg_ah, 4, "ah", Reg_eax, 1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_ah, 4, "ah", Reg_eax, 1,0,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, eax, ax)) \ REGLIST2(RegX8632, eax, ax)) \
X(Reg_ch, 5, "ch", Reg_ecx, 1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_ch, 5, "ch", Reg_ecx, 1,0,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, ecx, cx)) \ REGLIST2(RegX8632, ecx, cx)) \
X(Reg_dh, 6, "dh", Reg_edx, 1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_dh, 6, "dh", Reg_edx, 1,0,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, edx, dx)) \ REGLIST2(RegX8632, edx, dx)) \
X(Reg_bh, 7, "bh", Reg_ebx, 0,1,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_bh, 7, "bh", Reg_ebx, 0,1,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, ebx, bx)) \ REGLIST2(RegX8632, ebx, bx)) \
/* End of 8-bit register set */ /* End of 8-bit register set */
//#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, //#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,
...@@ -213,21 +213,21 @@ ...@@ -213,21 +213,21 @@
#define ICETYPEX8632_TABLE \ #define ICETYPEX8632_TABLE \
/* tag, element type, cvt , sdss, pack, width, fld */ \ /* tag, element type, cvt , sdss, pack, width, fld */ \
X(IceType_void, IceType_void, "?", "", "", "", "") \ X(void, void, "?", "", "", "", "") \
X(IceType_i1, IceType_void, "si", "", "", "b", "") \ X(i1, void, "si", "", "", "b", "") \
X(IceType_i8, IceType_void, "si", "", "", "b", "") \ X(i8, void, "si", "", "", "b", "") \
X(IceType_i16, IceType_void, "si", "", "", "w", "") \ X(i16, void, "si", "", "", "w", "") \
X(IceType_i32, IceType_void, "si", "", "", "l", "") \ X(i32, void, "si", "", "", "l", "") \
X(IceType_i64, IceType_void, "si", "", "", "q", "") \ X(i64, void, "si", "", "", "q", "") \
X(IceType_f32, IceType_void, "ss", "ss", "d", "", "s") \ X(f32, void, "ss", "ss", "d", "", "s") \
X(IceType_f64, IceType_void, "sd", "sd", "q", "", "l") \ X(f64, void, "sd", "sd", "q", "", "l") \
X(IceType_v4i1, IceType_i32, "?", "", "d", "", "") \ X(v4i1, i32, "?", "", "d", "", "") \
X(IceType_v8i1, IceType_i16, "?", "", "w", "", "") \ X(v8i1, i16, "?", "", "w", "", "") \
X(IceType_v16i1, IceType_i8, "?", "", "b", "", "") \ X(v16i1, i8, "?", "", "b", "", "") \
X(IceType_v16i8, IceType_i8, "?", "", "b", "", "") \ X(v16i8, i8, "?", "", "b", "", "") \
X(IceType_v8i16, IceType_i16, "?", "", "w", "", "") \ X(v8i16, i16, "?", "", "w", "", "") \
X(IceType_v4i32, IceType_i32, "dq", "", "d", "", "") \ X(v4i32, i32, "dq", "", "d", "", "") \
X(IceType_v4f32, IceType_f32, "ps", "", "d", "", "") X(v4f32, f32, "ps", "", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pack, width, fld) //#define X(tag, elementty, cvt, sdss, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF #endif // SUBZERO_SRC_ICEINSTX8632_DEF
...@@ -292,22 +292,22 @@ ...@@ -292,22 +292,22 @@
//#define X(val, emit) //#define X(val, emit)
#define ICETYPEX8664_TABLE \ #define ICETYPEX8664_TABLE \
/* tag , element type, cvt , sdss, pack, width, fld */ \ /* tag, element type, cvt , sdss, pack, width, fld */ \
X(IceType_void, IceType_void, "?", "", "", "", "") \ X(void, void, "?", "", "", "", "") \
X(IceType_i1, IceType_void, "si", "", "", "b", "") \ X(i1, void, "si", "", "", "b", "") \
X(IceType_i8, IceType_void, "si", "", "", "b", "") \ X(i8, void, "si", "", "", "b", "") \
X(IceType_i16, IceType_void, "si", "", "", "w", "") \ X(i16, void, "si", "", "", "w", "") \
X(IceType_i32, IceType_void, "si", "", "", "l", "") \ X(i32, void, "si", "", "", "l", "") \
X(IceType_i64, IceType_void, "si", "", "", "q", "") \ X(i64, void, "si", "", "", "q", "") \
X(IceType_f32, IceType_void, "ss", "ss", "d", "", "s") \ X(f32, void, "ss", "ss", "d", "", "s") \
X(IceType_f64, IceType_void, "sd", "sd", "q", "", "l") \ X(f64, void, "sd", "sd", "q", "", "l") \
X(IceType_v4i1, IceType_i32, "?", "", "d", "", "") \ X(v4i1, i32, "?", "", "d", "", "") \
X(IceType_v8i1, IceType_i16, "?", "", "w", "", "") \ X(v8i1, i16, "?", "", "w", "", "") \
X(IceType_v16i1, IceType_i8, "?", "", "b", "", "") \ X(v16i1, i8, "?", "", "b", "", "") \
X(IceType_v16i8, IceType_i8, "?", "", "b", "", "") \ X(v16i8, i8, "?", "", "b", "", "") \
X(IceType_v8i16, IceType_i16, "?", "", "w", "", "") \ X(v8i16, i16, "?", "", "w", "", "") \
X(IceType_v4i32, IceType_i32, "dq", "", "d", "", "") \ X(v4i32, i32, "dq", "", "d", "", "") \
X(IceType_v4f32, IceType_f32, "ps", "", "d", "", "") X(v4f32, f32, "ps", "", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pack, width, fld) //#define X(tag, elementty, cvt, sdss, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF #endif // SUBZERO_SRC_ICEINSTX8664_DEF
...@@ -1384,38 +1384,35 @@ void InstX86Cbwdq<Machine>::emit(const Cfg *Func) const { ...@@ -1384,38 +1384,35 @@ void InstX86Cbwdq<Machine>::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 1); assert(this->getSrcSize() == 1);
Operand *Src0 = this->getSrc(0); Operand *Src0 = this->getSrc(0);
assert(llvm::isa<Variable>(Src0)); int32_t DestReg = this->getDest()->getRegNum();
int32_t SrcReg = llvm::cast<Variable>(Src0)->getRegNum();
(void)DestReg;
(void)SrcReg;
switch (Src0->getType()) { switch (Src0->getType()) {
default: default:
llvm_unreachable("unexpected source type!"); llvm_unreachable("unexpected source type!");
break; break;
case IceType_i8: case IceType_i8:
assert(llvm::cast<Variable>(Src0)->getRegNum() == assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
InstX86Base<Machine>::Traits::RegisterSet::Reg_al); assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax ||
assert(this->getDest()->getRegNum() == DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ah);
InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
Str << "\t" Str << "\t"
<< "cbtw"; << "cbtw";
break; break;
case IceType_i16: case IceType_i16:
assert(llvm::cast<Variable>(Src0)->getRegNum() == assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
InstX86Base<Machine>::Traits::RegisterSet::Reg_ax); assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
Str << "\t" Str << "\t"
<< "cwtd"; << "cwtd";
break; break;
case IceType_i32: case IceType_i32:
assert(llvm::cast<Variable>(Src0)->getRegNum() == assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
InstX86Base<Machine>::Traits::RegisterSet::Reg_eax); assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Str << "\t" Str << "\t"
<< "cltd"; << "cltd";
break; break;
case IceType_i64: case IceType_i64:
assert(this->getDest()->getRegNum() == assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Str << "\t" Str << "\t"
<< "cdto"; << "cdto";
break; break;
...@@ -1428,35 +1425,32 @@ void InstX86Cbwdq<Machine>::emitIAS(const Cfg *Func) const { ...@@ -1428,35 +1425,32 @@ void InstX86Cbwdq<Machine>::emitIAS(const Cfg *Func) const {
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>(); Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(this->getSrcSize() == 1); assert(this->getSrcSize() == 1);
Operand *Src0 = this->getSrc(0); Operand *Src0 = this->getSrc(0);
assert(llvm::isa<Variable>(Src0)); int32_t DestReg = this->getDest()->getRegNum();
int32_t SrcReg = llvm::cast<Variable>(Src0)->getRegNum();
(void)DestReg;
(void)SrcReg;
switch (Src0->getType()) { switch (Src0->getType()) {
default: default:
llvm_unreachable("unexpected source type!"); llvm_unreachable("unexpected source type!");
break; break;
case IceType_i8: case IceType_i8:
assert(llvm::cast<Variable>(Src0)->getRegNum() == assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
InstX86Base<Machine>::Traits::RegisterSet::Reg_al); assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax ||
assert(this->getDest()->getRegNum() == DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ah);
InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
Asm->cbw(); Asm->cbw();
break; break;
case IceType_i16: case IceType_i16:
assert(llvm::cast<Variable>(Src0)->getRegNum() == assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
InstX86Base<Machine>::Traits::RegisterSet::Reg_ax); assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
Asm->cwd(); Asm->cwd();
break; break;
case IceType_i32: case IceType_i32:
assert(llvm::cast<Variable>(Src0)->getRegNum() == assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
InstX86Base<Machine>::Traits::RegisterSet::Reg_eax); assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Asm->cdq(); Asm->cdq();
break; break;
case IceType_i64: case IceType_i64:
assert(this->getDest()->getRegNum() == assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Asm->cqo(); Asm->cqo();
break; break;
} }
...@@ -2278,32 +2272,29 @@ template <class Machine> void InstX86Mov<Machine>::emit(const Cfg *Func) const { ...@@ -2278,32 +2272,29 @@ template <class Machine> void InstX86Mov<Machine>::emit(const Cfg *Func) const {
} else { } else {
Str << "\tmov" Str << "\tmov"
<< (!isScalarFloatingType(DestTy) << (!isScalarFloatingType(DestTy)
? this->getWidthString(SrcTy) ? this->getWidthString(DestTy)
: InstX86Base<Machine>::Traits::TypeAttributes[DestTy] : InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
.SdSsString) << "\t"; .SdSsString) << "\t";
} }
// For an integer truncation operation, src is wider than dest. Ideally, we // For an integer truncation operation, src is wider than dest. In this case,
// use a mov instruction whose data width matches the narrower dest. This is // we use a mov instruction whose data width matches the narrower dest.
// a problem if e.g. src is a register like esi or si where there is no 8-bit
// version of the register. To be safe, we instead widen the dest to match
// src. This works even for stack-allocated dest variables because
// typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
// is used.
// TODO: This assert disallows usages such as copying a floating // TODO: This assert disallows usages such as copying a floating
// point value between a vector and a scalar (which movss is used for). Clean // point value between a vector and a scalar (which movss is used for). Clean
// this up. // this up.
assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) == assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
Func->getTarget()->typeWidthInBytesOnStack(SrcTy)); Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
Src->emit(Func); const Operand *NewSrc = Src;
Str << ", "; if (auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
int32_t NewRegNum = Variable::NoRegister; int32_t NewRegNum = Variable::NoRegister;
if (this->getDest()->hasReg()) if (SrcVar->hasReg())
NewRegNum = InstX86Base<Machine>::Traits::getGprForType( NewRegNum = InstX86Base<Machine>::Traits::getGprForType(
SrcTy, this->getDest()->getRegNum()); DestTy, SrcVar->getRegNum());
const Variable *NewDest = SrcTy == DestTy if (SrcTy != DestTy)
? this->getDest() NewSrc = SrcVar->asType(DestTy, NewRegNum);
: this->getDest()->asType(SrcTy, NewRegNum); }
NewDest->emit(Func); NewSrc->emit(Func);
Str << ", ";
this->getDest()->emit(Func);
} }
template <class Machine> template <class Machine>
...@@ -2330,13 +2321,8 @@ void InstX86Mov<Machine>::emitIAS(const Cfg *Func) const { ...@@ -2330,13 +2321,8 @@ void InstX86Mov<Machine>::emitIAS(const Cfg *Func) const {
Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = { Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
&InstX86Base<Machine>::Traits::Assembler::mov, &InstX86Base<Machine>::Traits::Assembler::mov,
&InstX86Base<Machine>::Traits::Assembler::mov}; &InstX86Base<Machine>::Traits::Assembler::mov};
// For an integer truncation operation, src is wider than dest. Ideally, we // For an integer truncation operation, src is wider than dest. In this case,
// use a mov instruction whose data width matches the narrower dest. This is // we use a mov instruction whose data width matches the narrower dest.
// a problem if e.g. src is a register like esi or si where there is no 8-bit
// version of the register. To be safe, we instead widen the dest to match
// src. This works even for stack-allocated dest variables because
// typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
// is used.
// TODO: This assert disallows usages such as copying a floating // TODO: This assert disallows usages such as copying a floating
// point value between a vector and a scalar (which movss is used for). Clean // point value between a vector and a scalar (which movss is used for). Clean
// this up. // this up.
...@@ -2366,7 +2352,7 @@ void InstX86Mov<Machine>::emitIAS(const Cfg *Func) const { ...@@ -2366,7 +2352,7 @@ void InstX86Mov<Machine>::emitIAS(const Cfg *Func) const {
return; return;
} }
if (isScalarIntegerType(SrcTy)) { if (isScalarIntegerType(SrcTy)) {
DestTy = SrcTy; SrcTy = DestTy;
} }
emitIASRegOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRRegEmitter); emitIASRegOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRRegEmitter);
return; return;
......
...@@ -428,6 +428,23 @@ private: ...@@ -428,6 +428,23 @@ private:
Ostream &operator<<(Ostream &Str, const LiveRange &L); Ostream &operator<<(Ostream &Str, const LiveRange &L);
/// RegClass indicates the physical register class that a Variable may be
/// register-allocated from. By default, a variable's register class is
/// directly associated with its type. However, the target lowering may define
/// additional target-specific register classes by extending the set of enum
/// values.
enum RegClass : uint8_t {
// Define RC_void, RC_i1, RC_i8, etc.
#define X(tag, sizeLog2, align, elts, elty, str) RC_##tag = IceType_##tag,
ICETYPE_TABLE
#undef X
RC_Target,
// Leave plenty of space for target-specific values.
RC_Max = std::numeric_limits<uint8_t>::max()
};
static_assert(RC_Target == static_cast<RegClass>(IceType_NUM),
"Expected RC_Target and IceType_NUM to be the same");
/// Variable represents an operand that is register-allocated or /// Variable represents an operand that is register-allocated or
/// stack-allocated. If it is register-allocated, it will ultimately have a /// stack-allocated. If it is register-allocated, it will ultimately have a
/// non-negative RegNum field. /// non-negative RegNum field.
...@@ -493,6 +510,9 @@ public: ...@@ -493,6 +510,9 @@ public:
return RegRequirement == RR_MustNotHaveRegister; return RegRequirement == RR_MustNotHaveRegister;
} }
void setRegClass(uint8_t RC) { RegisterClass = static_cast<RegClass>(RC); }
RegClass getRegClass() const { return RegisterClass; }
LiveRange &getLiveRange() { return Live; } LiveRange &getLiveRange() { return Live; }
const LiveRange &getLiveRange() const { return Live; } const LiveRange &getLiveRange() const { return Live; }
void setLiveRange(const LiveRange &Range) { Live = Range; } void setLiveRange(const LiveRange &Range) { Live = Range; }
...@@ -537,7 +557,8 @@ public: ...@@ -537,7 +557,8 @@ public:
protected: protected:
Variable(OperandKind K, Type Ty, SizeT Index) Variable(OperandKind K, Type Ty, SizeT Index)
: Operand(K, Ty), Number(Index) { : Operand(K, Ty), Number(Index),
RegisterClass(static_cast<RegClass>(Ty)) {
Vars = VarsReal; Vars = VarsReal;
Vars[0] = this; Vars[0] = this;
NumVars = 1; NumVars = 1;
...@@ -553,6 +574,7 @@ protected: ...@@ -553,6 +574,7 @@ protected:
/// pointer and other physical registers specifically referenced by name. /// pointer and other physical registers specifically referenced by name.
bool IgnoreLiveness = false; bool IgnoreLiveness = false;
RegRequirement RegRequirement = RR_MayHaveRegister; RegRequirement RegRequirement = RR_MayHaveRegister;
RegClass RegisterClass;
/// RegNum is the allocated register, or NoRegister if it isn't /// RegNum is the allocated register, or NoRegister if it isn't
/// register-allocated. /// register-allocated.
int32_t RegNum = NoRegister; int32_t RegNum = NoRegister;
......
...@@ -833,8 +833,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -833,8 +833,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
Iter.Cur = Unhandled.back(); Iter.Cur = Unhandled.back();
Unhandled.pop_back(); Unhandled.pop_back();
dumpLiveRangeTrace("\nConsidering ", Iter.Cur); dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
Iter.RegMask = Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
RegMaskFull & Target->getRegisterSetForType(Iter.Cur->getType());
KillsRange.trim(Iter.Cur->getLiveRange().getStart()); KillsRange.trim(Iter.Cur->getLiveRange().getStart());
// Check for pre-colored ranges. If Cur is pre-colored, it definitely gets // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
...@@ -862,11 +861,10 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -862,11 +861,10 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
// Disable AllowOverlap if an Active variable, which is not Prefer, shares // Disable AllowOverlap if an Active variable, which is not Prefer, shares
// Prefer's register, and has a definition within Cur's live range. // Prefer's register, and has a definition within Cur's live range.
if (Iter.AllowOverlap) { if (Iter.AllowOverlap) {
const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg];
for (const Variable *Item : Active) { for (const Variable *Item : Active) {
int32_t RegNum = Item->getRegNumTmp(); int32_t RegNum = Item->getRegNumTmp();
// TODO(stichnot): Consider aliases of RegNum. This is probably a if (Item != Iter.Prefer && Aliases[RegNum] &&
// correctness issue.
if (Item != Iter.Prefer && RegNum == Iter.PreferReg &&
overlapsDefs(Func, Iter.Cur, Item)) { overlapsDefs(Func, Iter.Cur, Item)) {
Iter.AllowOverlap = false; Iter.AllowOverlap = false;
dumpDisableOverlap(Func, Item, "Active"); dumpDisableOverlap(Func, Item, "Active");
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "IceDefs.h" #include "IceDefs.h"
#include "IceInstARM32.def" #include "IceInstARM32.def"
#include "IceOperand.h" // RC_Target
#include "IceTypes.h" #include "IceTypes.h"
namespace Ice { namespace Ice {
...@@ -118,6 +119,9 @@ public: ...@@ -118,6 +119,9 @@ public:
static const char *RegNames[]; static const char *RegNames[];
}; };
// Extend enum RegClass with ARM32-specific register classes (if any).
enum RegClassARM32 : uint8_t { RCARM32_NUM = RC_Target };
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICEREGISTERSARM32_H #endif // SUBZERO_SRC_ICEREGISTERSARM32_H
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "IceDefs.h" #include "IceDefs.h"
#include "IceInstMIPS32.def" #include "IceInstMIPS32.def"
#include "IceOperand.h" // RC_Target
#include "IceTypes.h" #include "IceTypes.h"
namespace Ice { namespace Ice {
...@@ -59,6 +60,9 @@ static inline GPRRegister getEncodedGPR(int32_t RegNum) { ...@@ -59,6 +60,9 @@ static inline GPRRegister getEncodedGPR(int32_t RegNum) {
} // end of namespace RegMIPS32 } // end of namespace RegMIPS32
// Extend enum RegClass with MIPS32-specific register classes (if any).
enum RegClassMIPS32 : uint8_t { RCMIPS32_NUM = RC_Target };
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICEREGISTERSMIPS32_H #endif // SUBZERO_SRC_ICEREGISTERSMIPS32_H
...@@ -234,7 +234,8 @@ public: ...@@ -234,7 +234,8 @@ public:
virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include, virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const = 0; RegSetMask Exclude) const = 0;
virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0; virtual const llvm::SmallBitVector &
getRegistersForVariable(const Variable *Var) const = 0;
virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0; virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0;
void regAlloc(RegAllocKind Kind); void regAlloc(RegAllocKind Kind);
......
...@@ -74,8 +74,11 @@ public: ...@@ -74,8 +74,11 @@ public:
IceString getRegName(SizeT RegNum, Type Ty) const override; IceString getRegName(SizeT RegNum, Type Ty) const override;
llvm::SmallBitVector getRegisterSet(RegSetMask Include, llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override; RegSetMask Exclude) const override;
const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const override { const llvm::SmallBitVector &
return TypeToRegisterSet[Ty]; getRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg]; return RegisterAliases[Reg];
...@@ -554,7 +557,7 @@ protected: ...@@ -554,7 +557,7 @@ protected:
bool MaybeLeafFunc = true; bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
// TODO(jpp): std::array instead of array. // TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
static llvm::SmallBitVector ScratchRegs; static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
......
...@@ -42,8 +42,11 @@ public: ...@@ -42,8 +42,11 @@ public:
IceString getRegName(SizeT RegNum, Type Ty) const override; IceString getRegName(SizeT RegNum, Type Ty) const override;
llvm::SmallBitVector getRegisterSet(RegSetMask Include, llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override; RegSetMask Exclude) const override;
const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const override { const llvm::SmallBitVector &
return TypeToRegisterSet[Ty]; getRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg]; return RegisterAliases[Reg];
...@@ -231,7 +234,7 @@ protected: ...@@ -231,7 +234,7 @@ protected:
bool UsesFramePointer = false; bool UsesFramePointer = false;
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
static llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
static llvm::SmallBitVector ScratchRegs; static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
......
...@@ -74,7 +74,7 @@ const size_t MachineTraits<TargetX8632>::TableIcmp64Size = ...@@ -74,7 +74,7 @@ const size_t MachineTraits<TargetX8632>::TableIcmp64Size =
const MachineTraits<TargetX8632>::TableTypeX8632AttributesType const MachineTraits<TargetX8632>::TableTypeX8632AttributesType
MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = { MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pack, width, fld) \
{ elementty } \ { IceType_##elementty } \
, ,
ICETYPEX8632_TABLE ICETYPEX8632_TABLE
#undef X #undef X
...@@ -87,7 +87,7 @@ const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16; ...@@ -87,7 +87,7 @@ const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
const char *MachineTraits<TargetX8632>::TargetName = "X8632"; const char *MachineTraits<TargetX8632>::TargetName = "X8632";
template <> template <>
std::array<llvm::SmallBitVector, IceType_NUM> std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<TargetX8632>::TypeToRegisterSet = {}; TargetX86Base<TargetX8632>::TypeToRegisterSet = {};
template <> template <>
...@@ -957,7 +957,7 @@ enum _tmp_enum { ...@@ -957,7 +957,7 @@ enum _tmp_enum {
}; };
// Define a set of constants based on high-level table entries. // Define a set of constants based on high-level table entries.
#define X(tag, sizeLog2, align, elts, elty, str) \ #define X(tag, sizeLog2, align, elts, elty, str) \
static const int _table1_##tag = tag; static const int _table1_##tag = IceType_##tag;
ICETYPE_TABLE ICETYPE_TABLE
#undef X #undef X
// Define a set of constants based on low-level table entries, and ensure the // Define a set of constants based on low-level table entries, and ensure the
......
...@@ -22,8 +22,9 @@ ...@@ -22,8 +22,9 @@
#include "IceInstX8632.def" #include "IceInstX8632.def"
#include "IceOperand.h" #include "IceOperand.h"
#include "IceRegistersX8632.h" #include "IceRegistersX8632.h"
#include "IceTargetLoweringX8632.def"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceTargetLoweringX8632.def"
#include "IceTargetLoweringX86RegClass.h"
#include <array> #include <array>
...@@ -398,7 +399,7 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -398,7 +399,7 @@ template <> struct MachineTraits<TargetX8632> {
} }
static void initRegisterSet( static void initRegisterSet(
std::array<llvm::SmallBitVector, IceType_NUM> *TypeToRegisterSet, std::array<llvm::SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases, std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases,
llvm::SmallBitVector *ScratchRegs) { llvm::SmallBitVector *ScratchRegs) {
llvm::SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM); llvm::SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM);
...@@ -406,6 +407,11 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -406,6 +407,11 @@ template <> struct MachineTraits<TargetX8632> {
llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM); llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM); llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM); llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM); llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
ScratchRegs->resize(RegisterSet::Reg_NUM); ScratchRegs->resize(RegisterSet::Reg_NUM);
#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, \ #define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, \
...@@ -416,6 +422,11 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -416,6 +422,11 @@ template <> struct MachineTraits<TargetX8632> {
(IntegerRegistersI8)[RegisterSet::val] = is8; \ (IntegerRegistersI8)[RegisterSet::val] = is8; \
(FloatRegisters)[RegisterSet::val] = isXmm; \ (FloatRegisters)[RegisterSet::val] = isXmm; \
(VectorRegisters)[RegisterSet::val] = isXmm; \ (VectorRegisters)[RegisterSet::val] = isXmm; \
(Trunc64To8Registers)[RegisterSet::val] = is64To8; \
(Trunc32To8Registers)[RegisterSet::val] = is32To8; \
(Trunc16To8Registers)[RegisterSet::val] = is16To8; \
(Trunc8RcvrRegisters)[RegisterSet::val] = isTrunc8Rcvr; \
(AhRcvrRegisters)[RegisterSet::val] = isAhRcvr; \
(*RegisterAliases)[RegisterSet::val].resize(RegisterSet::Reg_NUM); \ (*RegisterAliases)[RegisterSet::val].resize(RegisterSet::Reg_NUM); \
for (SizeT RegAlias : aliases) { \ for (SizeT RegAlias : aliases) { \
assert(!(*RegisterAliases)[RegisterSet::val][RegAlias] && \ assert(!(*RegisterAliases)[RegisterSet::val][RegAlias] && \
...@@ -427,21 +438,26 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -427,21 +438,26 @@ template <> struct MachineTraits<TargetX8632> {
REGX8632_TABLE; REGX8632_TABLE;
#undef X #undef X
(*TypeToRegisterSet)[IceType_void] = InvalidRegisters; (*TypeToRegisterSet)[RC_void] = InvalidRegisters;
(*TypeToRegisterSet)[IceType_i1] = IntegerRegistersI8; (*TypeToRegisterSet)[RC_i1] = IntegerRegistersI8;
(*TypeToRegisterSet)[IceType_i8] = IntegerRegistersI8; (*TypeToRegisterSet)[RC_i8] = IntegerRegistersI8;
(*TypeToRegisterSet)[IceType_i16] = IntegerRegistersI16; (*TypeToRegisterSet)[RC_i16] = IntegerRegistersI16;
(*TypeToRegisterSet)[IceType_i32] = IntegerRegistersI32; (*TypeToRegisterSet)[RC_i32] = IntegerRegistersI32;
(*TypeToRegisterSet)[IceType_i64] = IntegerRegistersI32; (*TypeToRegisterSet)[RC_i64] = IntegerRegistersI32;
(*TypeToRegisterSet)[IceType_f32] = FloatRegisters; (*TypeToRegisterSet)[RC_f32] = FloatRegisters;
(*TypeToRegisterSet)[IceType_f64] = FloatRegisters; (*TypeToRegisterSet)[RC_f64] = FloatRegisters;
(*TypeToRegisterSet)[IceType_v4i1] = VectorRegisters; (*TypeToRegisterSet)[RC_v4i1] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v8i1] = VectorRegisters; (*TypeToRegisterSet)[RC_v8i1] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v16i1] = VectorRegisters; (*TypeToRegisterSet)[RC_v16i1] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v16i8] = VectorRegisters; (*TypeToRegisterSet)[RC_v16i8] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v8i16] = VectorRegisters; (*TypeToRegisterSet)[RC_v8i16] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v4i32] = VectorRegisters; (*TypeToRegisterSet)[RC_v4i32] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v4f32] = VectorRegisters; (*TypeToRegisterSet)[RC_v4f32] = VectorRegisters;
(*TypeToRegisterSet)[RCX86_Is64To8] = Trunc64To8Registers;
(*TypeToRegisterSet)[RCX86_Is32To8] = Trunc32To8Registers;
(*TypeToRegisterSet)[RCX86_Is16To8] = Trunc16To8Registers;
(*TypeToRegisterSet)[RCX86_IsTrunc8Rcvr] = Trunc8RcvrRegisters;
(*TypeToRegisterSet)[RCX86_IsAhRcvr] = AhRcvrRegisters;
} }
static llvm::SmallBitVector static llvm::SmallBitVector
...@@ -512,7 +528,12 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -512,7 +528,12 @@ template <> struct MachineTraits<TargetX8632> {
Index |= (is8 << (AttrKey++)); \ Index |= (is8 << (AttrKey++)); \
Index |= (is16 << (AttrKey++)); \ Index |= (is16 << (AttrKey++)); \
Index |= (is32 << (AttrKey++)); \ Index |= (is32 << (AttrKey++)); \
Index |= (is64 << (AttrKey++)); \
Index |= (isXmm << (AttrKey++)); \ Index |= (isXmm << (AttrKey++)); \
Index |= (is16To8 << (AttrKey++)); \
Index |= (is32To8 << (AttrKey++)); \
Index |= (is64To8 << (AttrKey++)); \
Index |= (isTrunc8Rcvr << (AttrKey++)); \
/* val is assigned to an equivalence class based on its properties. */ \ /* val is assigned to an equivalence class based on its properties. */ \
EquivalenceClasses[Index].push_back(RegisterSet::val); \ EquivalenceClasses[Index].push_back(RegisterSet::val); \
} }
......
...@@ -74,7 +74,7 @@ const size_t MachineTraits<TargetX8664>::TableIcmp64Size = ...@@ -74,7 +74,7 @@ const size_t MachineTraits<TargetX8664>::TableIcmp64Size =
const MachineTraits<TargetX8664>::TableTypeX8664AttributesType const MachineTraits<TargetX8664>::TableTypeX8664AttributesType
MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = { MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pack, width, fld) \
{ elementty } \ { IceType_##elementty } \
, ,
ICETYPEX8664_TABLE ICETYPEX8664_TABLE
#undef X #undef X
...@@ -87,7 +87,7 @@ const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16; ...@@ -87,7 +87,7 @@ const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16;
const char *MachineTraits<TargetX8664>::TargetName = "X8664"; const char *MachineTraits<TargetX8664>::TargetName = "X8664";
template <> template <>
std::array<llvm::SmallBitVector, IceType_NUM> std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<TargetX8664>::TypeToRegisterSet = {}; TargetX86Base<TargetX8664>::TypeToRegisterSet = {};
template <> template <>
...@@ -955,7 +955,7 @@ enum _tmp_enum { ...@@ -955,7 +955,7 @@ enum _tmp_enum {
}; };
// Define a set of constants based on high-level table entries. // Define a set of constants based on high-level table entries.
#define X(tag, sizeLog2, align, elts, elty, str) \ #define X(tag, sizeLog2, align, elts, elty, str) \
static const int _table1_##tag = tag; static const int _table1_##tag = IceType_##tag;
ICETYPE_TABLE ICETYPE_TABLE
#undef X #undef X
// Define a set of constants based on low-level table entries, and ensure the // Define a set of constants based on low-level table entries, and ensure the
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "IceRegistersX8664.h" #include "IceRegistersX8664.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceTargetLoweringX8664.def" #include "IceTargetLoweringX8664.def"
#include "IceTargetLoweringX86RegClass.h"
#include <array> #include <array>
...@@ -379,7 +380,7 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -379,7 +380,7 @@ template <> struct MachineTraits<TargetX8664> {
static int32_t getGprForType(Type, int32_t RegNum) { return RegNum; } static int32_t getGprForType(Type, int32_t RegNum) { return RegNum; }
static void initRegisterSet( static void initRegisterSet(
std::array<llvm::SmallBitVector, IceType_NUM> *TypeToRegisterSet, std::array<llvm::SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases, std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases,
llvm::SmallBitVector *ScratchRegs) { llvm::SmallBitVector *ScratchRegs) {
llvm::SmallBitVector IntegerRegistersI64(RegisterSet::Reg_NUM); llvm::SmallBitVector IntegerRegistersI64(RegisterSet::Reg_NUM);
...@@ -388,6 +389,11 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -388,6 +389,11 @@ template <> struct MachineTraits<TargetX8664> {
llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM); llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM); llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM); llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
llvm::SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM); llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
ScratchRegs->resize(RegisterSet::Reg_NUM); ScratchRegs->resize(RegisterSet::Reg_NUM);
...@@ -400,6 +406,11 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -400,6 +406,11 @@ template <> struct MachineTraits<TargetX8664> {
(IntegerRegistersI8)[RegisterSet::val] = is8; \ (IntegerRegistersI8)[RegisterSet::val] = is8; \
(FloatRegisters)[RegisterSet::val] = isXmm; \ (FloatRegisters)[RegisterSet::val] = isXmm; \
(VectorRegisters)[RegisterSet::val] = isXmm; \ (VectorRegisters)[RegisterSet::val] = isXmm; \
(Trunc64To8Registers)[RegisterSet::val] = is64To8; \
(Trunc32To8Registers)[RegisterSet::val] = is32To8; \
(Trunc16To8Registers)[RegisterSet::val] = is16To8; \
(Trunc8RcvrRegisters)[RegisterSet::val] = isTrunc8Rcvr; \
(AhRcvrRegisters)[RegisterSet::val] = isAhRcvr; \
(*RegisterAliases)[RegisterSet::val].resize(RegisterSet::Reg_NUM); \ (*RegisterAliases)[RegisterSet::val].resize(RegisterSet::Reg_NUM); \
for (SizeT RegAlias : aliases) { \ for (SizeT RegAlias : aliases) { \
assert(!(*RegisterAliases)[RegisterSet::val][RegAlias] && \ assert(!(*RegisterAliases)[RegisterSet::val][RegAlias] && \
...@@ -411,21 +422,26 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -411,21 +422,26 @@ template <> struct MachineTraits<TargetX8664> {
REGX8664_TABLE; REGX8664_TABLE;
#undef X #undef X
(*TypeToRegisterSet)[IceType_void] = InvalidRegisters; (*TypeToRegisterSet)[RC_void] = InvalidRegisters;
(*TypeToRegisterSet)[IceType_i1] = IntegerRegistersI8; (*TypeToRegisterSet)[RC_i1] = IntegerRegistersI8;
(*TypeToRegisterSet)[IceType_i8] = IntegerRegistersI8; (*TypeToRegisterSet)[RC_i8] = IntegerRegistersI8;
(*TypeToRegisterSet)[IceType_i16] = IntegerRegistersI16; (*TypeToRegisterSet)[RC_i16] = IntegerRegistersI16;
(*TypeToRegisterSet)[IceType_i32] = IntegerRegistersI32; (*TypeToRegisterSet)[RC_i32] = IntegerRegistersI32;
(*TypeToRegisterSet)[IceType_i64] = IntegerRegistersI64; (*TypeToRegisterSet)[RC_i64] = IntegerRegistersI64;
(*TypeToRegisterSet)[IceType_f32] = FloatRegisters; (*TypeToRegisterSet)[RC_f32] = FloatRegisters;
(*TypeToRegisterSet)[IceType_f64] = FloatRegisters; (*TypeToRegisterSet)[RC_f64] = FloatRegisters;
(*TypeToRegisterSet)[IceType_v4i1] = VectorRegisters; (*TypeToRegisterSet)[RC_v4i1] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v8i1] = VectorRegisters; (*TypeToRegisterSet)[RC_v8i1] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v16i1] = VectorRegisters; (*TypeToRegisterSet)[RC_v16i1] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v16i8] = VectorRegisters; (*TypeToRegisterSet)[RC_v16i8] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v8i16] = VectorRegisters; (*TypeToRegisterSet)[RC_v8i16] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v4i32] = VectorRegisters; (*TypeToRegisterSet)[RC_v4i32] = VectorRegisters;
(*TypeToRegisterSet)[IceType_v4f32] = VectorRegisters; (*TypeToRegisterSet)[RC_v4f32] = VectorRegisters;
(*TypeToRegisterSet)[RCX86_Is64To8] = Trunc64To8Registers;
(*TypeToRegisterSet)[RCX86_Is32To8] = Trunc32To8Registers;
(*TypeToRegisterSet)[RCX86_Is16To8] = Trunc16To8Registers;
(*TypeToRegisterSet)[RCX86_IsTrunc8Rcvr] = Trunc8RcvrRegisters;
(*TypeToRegisterSet)[RCX86_IsAhRcvr] = AhRcvrRegisters;
} }
static llvm::SmallBitVector static llvm::SmallBitVector
...@@ -498,6 +514,10 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -498,6 +514,10 @@ template <> struct MachineTraits<TargetX8664> {
Index |= (is32 << (AttrKey++)); \ Index |= (is32 << (AttrKey++)); \
Index |= (is64 << (AttrKey++)); \ Index |= (is64 << (AttrKey++)); \
Index |= (isXmm << (AttrKey++)); \ Index |= (isXmm << (AttrKey++)); \
Index |= (is16To8 << (AttrKey++)); \
Index |= (is32To8 << (AttrKey++)); \
Index |= (is64To8 << (AttrKey++)); \
Index |= (isTrunc8Rcvr << (AttrKey++)); \
/* val is assigned to an equivalence class based on its properties. */ \ /* val is assigned to an equivalence class based on its properties. */ \
EquivalenceClasses[Index].push_back(RegisterSet::val); \ EquivalenceClasses[Index].push_back(RegisterSet::val); \
} }
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "IceInst.h" #include "IceInst.h"
#include "IceSwitchLowering.h" #include "IceSwitchLowering.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceTargetLoweringX86RegClass.h"
#include "IceUtils.h" #include "IceUtils.h"
#include <array> #include <array>
...@@ -73,8 +74,11 @@ public: ...@@ -73,8 +74,11 @@ public:
IceString getRegName(SizeT RegNum, Type Ty) const override; IceString getRegName(SizeT RegNum, Type Ty) const override;
llvm::SmallBitVector getRegisterSet(RegSetMask Include, llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override; RegSetMask Exclude) const override;
const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const override { const llvm::SmallBitVector &
return TypeToRegisterSet[Ty]; getRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
...@@ -263,6 +267,7 @@ protected: ...@@ -263,6 +267,7 @@ protected:
static Type firstTypeThatFitsSize(uint32_t Size, static Type firstTypeThatFitsSize(uint32_t Size,
uint32_t MaxSize = NoSizeLimit); uint32_t MaxSize = NoSizeLimit);
Variable *copyToReg8(Operand *Src, int32_t RegNum = Variable::NoRegister);
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister); Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
/// \name Returns a vector in a register with the given constant entries. /// \name Returns a vector in a register with the given constant entries.
...@@ -674,7 +679,7 @@ protected: ...@@ -674,7 +679,7 @@ protected:
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0; size_t FixedAllocaSizeBytes = 0;
static std::array<llvm::SmallBitVector, IceType_NUM> TypeToRegisterSet; static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM> static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases; RegisterAliases;
static llvm::SmallBitVector ScratchRegs; static llvm::SmallBitVector ScratchRegs;
......
//===- subzero/src/IceTargetLoweringX86RegClass.h - x86 reg class -*- C++ -*-=//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file declares the X86 register class extensions.
///
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
#define SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
#include "IceOperand.h" // RC_Target
namespace Ice {
namespace X86Internal {
// Extend enum RegClass with x86-specific register classes.
enum RegClassX86 : uint8_t {
RCX86_Is64To8 = RC_Target, // 64-bit GPR trivially truncable to 8-bit
RCX86_Is32To8, // 32-bit GPR trivially truncable to 8-bit
RCX86_Is16To8, // 16-bit GPR trivially truncable to 8-bit
RCX86_IsTrunc8Rcvr, // 8-bit GPR that can receive a trunc operation
RCX86_IsAhRcvr, // 8-bit GPR that can be a mov dest from %ah
RCX86_NUM
};
} // end of namespace X86Internal
} // end of namespace Ice
#endif // SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
...@@ -95,7 +95,7 @@ struct TypeAttributeFields { ...@@ -95,7 +95,7 @@ struct TypeAttributeFields {
const struct TypeAttributeFields TypeAttributes[] = { const struct TypeAttributeFields TypeAttributes[] = {
#define X(tag, sizeLog2, align, elts, elty, str) \ #define X(tag, sizeLog2, align, elts, elty, str) \
{ sizeLog2, align, elts, elty, str } \ { sizeLog2, align, elts, IceType_##elty, str } \
, ,
ICETYPE_TABLE ICETYPE_TABLE
#undef X #undef X
...@@ -120,7 +120,8 @@ const TypePropertyFields TypePropertiesTable[] = { ...@@ -120,7 +120,8 @@ const TypePropertyFields TypePropertiesTable[] = {
CompareResult) \ CompareResult) \
{ \ { \
IsVec, IsInt, IsInt & !IsVec, IsInt & IsVec, IsIntArith, IsFloat, \ IsVec, IsInt, IsInt & !IsVec, IsInt & IsVec, IsIntArith, IsFloat, \
IsFloat & !IsVec, IsFloat & IsVec, IsLoadStore, IsParam, CompareResult \ IsFloat & !IsVec, IsFloat & IsVec, IsLoadStore, IsParam, \
IceType_##CompareResult \
} \ } \
, ,
ICETYPE_PROPS_TABLE ICETYPE_PROPS_TABLE
......
...@@ -29,23 +29,23 @@ ...@@ -29,23 +29,23 @@
//#define X(tag, str, is_elf64, e_machine, e_flags) //#define X(tag, str, is_elf64, e_machine, e_flags)
#define ICETYPE_TABLE \ #define ICETYPE_TABLE \
/* enum value, log_2(size), align, # elts, element type, printable */ \ /* enum value, log_2(size), align, # elts, element type, */ \
/* string (size and alignment in bytes) */ \ /* printable string (size and alignment in bytes) */ \
X(IceType_void, -1, 0, 1, IceType_void, "void") \ X(void, -1, 0, 1, void, "void") \
X(IceType_i1, 0, 1, 1, IceType_i1, "i1") \ X(i1, 0, 1, 1, i1, "i1") \
X(IceType_i8, 0, 1, 1, IceType_i8, "i8") \ X(i8, 0, 1, 1, i8, "i8") \
X(IceType_i16, 1, 1, 1, IceType_i16, "i16") \ X(i16, 1, 1, 1, i16, "i16") \
X(IceType_i32, 2, 1, 1, IceType_i32, "i32") \ X(i32, 2, 1, 1, i32, "i32") \
X(IceType_i64, 3, 1, 1, IceType_i64, "i64") \ X(i64, 3, 1, 1, i64, "i64") \
X(IceType_f32, 2, 4, 1, IceType_f32, "float") \ X(f32, 2, 4, 1, f32, "float") \
X(IceType_f64, 3, 8, 1, IceType_f64, "double") \ X(f64, 3, 8, 1, f64, "double") \
X(IceType_v4i1, 4, 1, 4, IceType_i1, "<4 x i1>") \ X(v4i1, 4, 1, 4, i1, "<4 x i1>") \
X(IceType_v8i1, 4, 1, 8, IceType_i1, "<8 x i1>") \ X(v8i1, 4, 1, 8, i1, "<8 x i1>") \
X(IceType_v16i1, 4, 1, 16, IceType_i1, "<16 x i1>") \ X(v16i1, 4, 1, 16, i1, "<16 x i1>") \
X(IceType_v16i8, 4, 1, 16, IceType_i8, "<16 x i8>") \ X(v16i8, 4, 1, 16, i8, "<16 x i8>") \
X(IceType_v8i16, 4, 2, 8, IceType_i16, "<8 x i16>") \ X(v8i16, 4, 2, 8, i16, "<8 x i16>") \
X(IceType_v4i32, 4, 4, 4, IceType_i32, "<4 x i32>") \ X(v4i32, 4, 4, 4, i32, "<4 x i32>") \
X(IceType_v4f32, 4, 4, 4, IceType_f32, "<4 x float>") \ X(v4f32, 4, 4, 4, f32, "<4 x float>") \
//#define X(tag, sizeLog2, align, elts, elty, str) //#define X(tag, sizeLog2, align, elts, elty, str)
// Dictionary: // Dictionary:
...@@ -59,21 +59,21 @@ ...@@ -59,21 +59,21 @@
// (IceType_void if disallowed) // (IceType_void if disallowed)
#define ICETYPE_PROPS_TABLE \ #define ICETYPE_PROPS_TABLE \
/* Enum Value V I F IA LS P CR */ \ /* Enum Value V I F IA LS P CR */ \
X(IceType_void, 0, 0, 0, 0, 0, 0, IceType_void) \ X(void, 0, 0, 0, 0, 0, 0, void) \
X(IceType_i1, 0, 1, 0, 0, 0, 0, IceType_i1) \ X(i1, 0, 1, 0, 0, 0, 0, i1) \
X(IceType_i8, 0, 1, 0, 1, 1, 0, IceType_i1) \ X(i8, 0, 1, 0, 1, 1, 0, i1) \
X(IceType_i16, 0, 1, 0, 1, 1, 0, IceType_i1) \ X(i16, 0, 1, 0, 1, 1, 0, i1) \
X(IceType_i32, 0, 1, 0, 1, 1, 1, IceType_i1) \ X(i32, 0, 1, 0, 1, 1, 1, i1) \
X(IceType_i64, 0, 1, 0, 1, 1, 1, IceType_i1) \ X(i64, 0, 1, 0, 1, 1, 1, i1) \
X(IceType_f32, 0, 0, 1, 0, 1, 1, IceType_i1) \ X(f32, 0, 0, 1, 0, 1, 1, i1) \
X(IceType_f64, 0, 0, 1, 0, 1, 1, IceType_i1) \ X(f64, 0, 0, 1, 0, 1, 1, i1) \
X(IceType_v4i1, 1, 1, 0, 0, 0, 1, IceType_v4i1) \ X(v4i1, 1, 1, 0, 0, 0, 1, v4i1) \
X(IceType_v8i1, 1, 1, 0, 0, 0, 1, IceType_v8i1) \ X(v8i1, 1, 1, 0, 0, 0, 1, v8i1) \
X(IceType_v16i1, 1, 1, 0, 0, 0, 1, IceType_v16i1) \ X(v16i1, 1, 1, 0, 0, 0, 1, v16i1) \
X(IceType_v16i8, 1, 1, 0, 1, 1, 1, IceType_v16i1) \ X(v16i8, 1, 1, 0, 1, 1, 1, v16i1) \
X(IceType_v8i16, 1, 1, 0, 1, 1, 1, IceType_v8i1) \ X(v8i16, 1, 1, 0, 1, 1, 1, v8i1) \
X(IceType_v4i32, 1, 1, 0, 1, 1, 1, IceType_v4i1) \ X(v4i32, 1, 1, 0, 1, 1, 1, v4i1) \
X(IceType_v4f32, 1, 0, 1, 0, 1, 1, IceType_v4i1) \ X(v4f32, 1, 0, 1, 0, 1, 1, v4i1) \
//#define X(tag, IsVec, IsInt, IsFloat, IsIntArith, IsLoadStore, IsParam, \ //#define X(tag, IsVec, IsInt, IsFloat, IsIntArith, IsLoadStore, IsParam, \
// CompareResult) // CompareResult)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
namespace Ice { namespace Ice {
enum Type { enum Type {
#define X(tag, sizeLog2, align, elts, elty, str) tag, #define X(tag, sizeLog2, align, elts, elty, str) IceType_##tag,
ICETYPE_TABLE ICETYPE_TABLE
#undef X #undef X
IceType_NUM IceType_NUM
......
...@@ -726,7 +726,7 @@ entry: ...@@ -726,7 +726,7 @@ entry:
; CHECK-NEXT: movsx eax,ax ; CHECK-NEXT: movsx eax,ax
; ;
; OPTM1-LABEL: trunc64To16Signed ; OPTM1-LABEL: trunc64To16Signed
; OPTM1: mov eax,DWORD PTR [esp+ ; OPTM1: mov ax,WORD PTR [esp+
; OPTM1: movsx eax, ; OPTM1: movsx eax,
; ARM32-LABEL: trunc64To16Signed ; ARM32-LABEL: trunc64To16Signed
...@@ -808,7 +808,7 @@ entry: ...@@ -808,7 +808,7 @@ entry:
; CHECK-NEXT: movzx eax,ax ; CHECK-NEXT: movzx eax,ax
; ;
; OPTM1-LABEL: trunc64To16Unsigned ; OPTM1-LABEL: trunc64To16Unsigned
; OPTM1: mov eax,DWORD PTR [esp+ ; OPTM1: mov ax,WORD PTR [esp+
; OPTM1: movzx eax, ; OPTM1: movzx eax,
; ARM32-LABEL: trunc64To16Unsigned ; ARM32-LABEL: trunc64To16Unsigned
...@@ -840,12 +840,12 @@ entry: ...@@ -840,12 +840,12 @@ entry:
} }
; CHECK-LABEL: trunc64To1 ; CHECK-LABEL: trunc64To1
; CHECK: mov eax,DWORD PTR [esp+0x4] ; CHECK: mov eax,DWORD PTR [esp+0x4]
; CHECK: and eax,0x1 ; CHECK: and al,0x1
; CHECK-NOT: and eax,0x1 ; CHECK-NOT: and eax,0x1
; ;
; OPTM1-LABEL: trunc64To1 ; OPTM1-LABEL: trunc64To1
; OPTM1: mov eax,DWORD PTR [esp+ ; OPTM1: mov eax,DWORD PTR [esp+
; OPTM1: and eax,0x1 ; OPTM1: and al,0x1
; OPTM1-NOT: and eax,0x1 ; OPTM1-NOT: and eax,0x1
; ARM32-LABEL: trunc64To1 ; ARM32-LABEL: trunc64To1
......
...@@ -31,7 +31,7 @@ eblock: ...@@ -31,7 +31,7 @@ eblock:
; CHECK: sub esp,0x80 ; CHECK: sub esp,0x80
; CHECK: mov DWORD PTR [ebp-0x4],esp ; CHECK: mov DWORD PTR [ebp-0x4],esp
; CHECK: mov eax,DWORD PTR [ebp+0xc] ; CHECK: mov eax,DWORD PTR [ebp+0xc]
; CHECK: mov DWORD PTR [ebp-0x8],eax ; CHECK: mov BYTE PTR [ebp-0x8],al
; CHECK: movzx eax,BYTE PTR [ebp-0x8] ; CHECK: movzx eax,BYTE PTR [ebp-0x8]
; CHECK: mov DWORD PTR [ebp-0xc],eax ; CHECK: mov DWORD PTR [ebp-0xc],eax
; CHECK: sub esp,0x10 ; CHECK: sub esp,0x10
......
...@@ -1085,7 +1085,7 @@ entry: ...@@ -1085,7 +1085,7 @@ entry:
ret i32 %old_ext ret i32 %old_ext
} }
; CHECK-LABEL: test_atomic_cmpxchg_16 ; CHECK-LABEL: test_atomic_cmpxchg_16
; CHECK: mov eax,{{.*}} ; CHECK: mov {{ax|eax}},{{.*}}
; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],{{[^a]}}x ; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],{{[^a]}}x
; ARM32-LABEL: test_atomic_cmpxchg_16 ; ARM32-LABEL: test_atomic_cmpxchg_16
; ARM32: dmb ; ARM32: dmb
......
...@@ -26,11 +26,11 @@ entry: ...@@ -26,11 +26,11 @@ entry:
; OPTM1_1-NEXT: movups XMMWORD PTR [esp+0x20],xmm0 ; OPTM1_1-NEXT: movups XMMWORD PTR [esp+0x20],xmm0
; OPTM1_1-NEXT: movups XMMWORD PTR [esp+0x10],xmm1 ; OPTM1_1-NEXT: movups XMMWORD PTR [esp+0x10],xmm1
; OPTM1_1-NEXT: movups xmm0,XMMWORD PTR [esp+0x20] ; OPTM1_1-NEXT: movups xmm0,XMMWORD PTR [esp+0x20]
; OPTM1_1-NEXT: pshufd xmm1,XMMWORD PTR [esp+0x20],0x31 ; OPTM1_1-NEXT: pshufd xmm6,XMMWORD PTR [esp+0x20],0x31
; OPTM1_1-NEXT: pshufd xmm2,XMMWORD PTR [esp+0x10],0x31 ; OPTM1_1-NEXT: pshufd xmm2,XMMWORD PTR [esp+0x10],0x31
; OPTM1_1-NEXT: pmuludq xmm0,XMMWORD PTR [esp+0x10] ; OPTM1_1-NEXT: pmuludq xmm0,XMMWORD PTR [esp+0x10]
; OPTM1_1-NEXT: pmuludq xmm1,xmm2 ; OPTM1_1-NEXT: pmuludq xmm6,xmm2
; OPTM1_1-NEXT: shufps xmm0,xmm1,0x88 ; OPTM1_1-NEXT: shufps xmm0,xmm6,0x88
; OPTM1_1-NEXT: pshufd xmm0,xmm0,0xd8 ; OPTM1_1-NEXT: pshufd xmm0,xmm0,0xd8
; OPTM1_1-NEXT: movups XMMWORD PTR [esp],xmm0 ; OPTM1_1-NEXT: movups XMMWORD PTR [esp],xmm0
; OPTM1_1-NEXT: movups xmm0,XMMWORD PTR [esp] ; OPTM1_1-NEXT: movups xmm0,XMMWORD PTR [esp]
...@@ -40,9 +40,9 @@ entry: ...@@ -40,9 +40,9 @@ entry:
; CHECK_1-LABEL: mul_v4i32 ; CHECK_1-LABEL: mul_v4i32
; CHECK_1: movups xmm7,xmm0 ; CHECK_1: movups xmm7,xmm0
; CHECK_1-NEXT: pshufd xmm0,xmm0,0x31 ; CHECK_1-NEXT: pshufd xmm0,xmm0,0x31
; CHECK_1-NEXT: pshufd xmm4,xmm1,0x31 ; CHECK_1-NEXT: pshufd xmm5,xmm1,0x31
; CHECK_1-NEXT: pmuludq xmm7,xmm1 ; CHECK_1-NEXT: pmuludq xmm7,xmm1
; CHECK_1-NEXT: pmuludq xmm0,xmm4 ; CHECK_1-NEXT: pmuludq xmm0,xmm5
; CHECK_1-NEXT: shufps xmm7,xmm0,0x88 ; CHECK_1-NEXT: shufps xmm7,xmm0,0x88
; CHECK_1-NEXT: pshufd xmm7,xmm7,0xd8 ; CHECK_1-NEXT: pshufd xmm7,xmm7,0xd8
; CHECK_1-NEXT: movups xmm0,xmm7 ; CHECK_1-NEXT: movups xmm0,xmm7
...@@ -53,11 +53,11 @@ entry: ...@@ -53,11 +53,11 @@ entry:
; OPTM1_123-NEXT: movups XMMWORD PTR [esp+0x20],xmm0 ; OPTM1_123-NEXT: movups XMMWORD PTR [esp+0x20],xmm0
; OPTM1_123-NEXT: movups XMMWORD PTR [esp+0x10],xmm1 ; OPTM1_123-NEXT: movups XMMWORD PTR [esp+0x10],xmm1
; OPTM1_123-NEXT: movups xmm0,XMMWORD PTR [esp+0x20] ; OPTM1_123-NEXT: movups xmm0,XMMWORD PTR [esp+0x20]
; OPTM1_123-NEXT: pshufd xmm3,XMMWORD PTR [esp+0x20],0x31 ; OPTM1_123-NEXT: pshufd xmm6,XMMWORD PTR [esp+0x20],0x31
; OPTM1_123-NEXT: pshufd xmm7,XMMWORD PTR [esp+0x10],0x31 ; OPTM1_123-NEXT: pshufd xmm2,XMMWORD PTR [esp+0x10],0x31
; OPTM1_123-NEXT: pmuludq xmm0,XMMWORD PTR [esp+0x10] ; OPTM1_123-NEXT: pmuludq xmm0,XMMWORD PTR [esp+0x10]
; OPTM1_123-NEXT: pmuludq xmm3,xmm7 ; OPTM1_123-NEXT: pmuludq xmm6,xmm2
; OPTM1_123-NEXT: shufps xmm0,xmm3,0x88 ; OPTM1_123-NEXT: shufps xmm0,xmm6,0x88
; OPTM1_123-NEXT: pshufd xmm0,xmm0,0xd8 ; OPTM1_123-NEXT: pshufd xmm0,xmm0,0xd8
; OPTM1_123-NEXT: movups XMMWORD PTR [esp],xmm0 ; OPTM1_123-NEXT: movups XMMWORD PTR [esp],xmm0
; OPTM1_123-NEXT: movups xmm0,XMMWORD PTR [esp] ; OPTM1_123-NEXT: movups xmm0,XMMWORD PTR [esp]
...@@ -65,14 +65,14 @@ entry: ...@@ -65,14 +65,14 @@ entry:
; OPTM1_123-NEXT: ret ; OPTM1_123-NEXT: ret
; CHECK_123-LABEL: mul_v4i32 ; CHECK_123-LABEL: mul_v4i32
; CHECK_123: movups xmm4,xmm0 ; CHECK_123: movups xmm5,xmm0
; CHECK_123-NEXT: pshufd xmm0,xmm0,0x31 ; CHECK_123-NEXT: pshufd xmm0,xmm0,0x31
; CHECK_123-NEXT: pshufd xmm7,xmm1,0x31 ; CHECK_123-NEXT: pshufd xmm7,xmm1,0x31
; CHECK_123-NEXT: pmuludq xmm4,xmm1 ; CHECK_123-NEXT: pmuludq xmm5,xmm1
; CHECK_123-NEXT: pmuludq xmm0,xmm7 ; CHECK_123-NEXT: pmuludq xmm0,xmm7
; CHECK_123-NEXT: shufps xmm4,xmm0,0x88 ; CHECK_123-NEXT: shufps xmm5,xmm0,0x88
; CHECK_123-NEXT: pshufd xmm4,xmm4,0xd8 ; CHECK_123-NEXT: pshufd xmm5,xmm5,0xd8
; CHECK_123-NEXT: movups xmm0,xmm4 ; CHECK_123-NEXT: movups xmm0,xmm5
; CHECK_123-NEXT: ret ; CHECK_123-NEXT: ret
} }
......
...@@ -189,14 +189,14 @@ entry: ...@@ -189,14 +189,14 @@ entry:
ret <4 x i32> %res ret <4 x i32> %res
; REGALLOC-LABEL: func4 ; REGALLOC-LABEL: func4
; REGALLOC: movups xmm5,xmm0 ; REGALLOC: movups xmm3,xmm0
; REGALLOC-NEXT: pshufd xmm0,xmm0,0x31 ; REGALLOC-NEXT: pshufd xmm0,xmm0,0x31
; REGALLOC-NEXT: pshufd xmm4,xmm1,0x31 ; REGALLOC-NEXT: pshufd xmm4,xmm1,0x31
; REGALLOC-NEXT: pmuludq xmm5,xmm1 ; REGALLOC-NEXT: pmuludq xmm3,xmm1
; REGALLOC-NEXT: pmuludq xmm0,xmm4 ; REGALLOC-NEXT: pmuludq xmm0,xmm4
; REGALLOC-NEXT: shufps xmm5,xmm0,0x88 ; REGALLOC-NEXT: shufps xmm3,xmm0,0x88
; REGALLOC-NEXT: pshufd xmm5,xmm5,0xd8 ; REGALLOC-NEXT: pshufd xmm3,xmm3,0xd8
; REGALLOC-NEXT: movups xmm0,xmm5 ; REGALLOC-NEXT: movups xmm0,xmm3
; REGALLOC-NEXT: ret ; REGALLOC-NEXT: ret
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment