Commit e377767c by Matt Wala

Subzero: Fix some issues related to legalization and undef handling.

1. Much of the lowering code for vector operations was not properly checking that the input operand was in a register or memory. This problem could be exhibited by passing undef values as inputs. => Change the vector legalization code to legalize input operands to register or memory before producing instructions that use the operands. Also, append a suffix to the variable names in the vector legalization code to clarify the legalization status of the values. 2. Undef values should never be emitted directly. Rather, they should have been appropriately legalized to a zero value. => To enforce this, make ConstantUndef::emit() issue an error message. Do this in the x86 backend, as other backends may decide to treat undef values differently. 3. The regalloc_evict_non_overlap test was loading from an undef pointer. Subzero was not handling this correctly (the undef pointer was being emitted without being legalized), but it does not have to handle this case since PNaCl IR disallows undef pointers. => Fix the regalloc_evict_non_overlap test to use an inttoptr instead of directly loading from the undef pointer. Also, add an assert in IceTargetLoweringX8632::FormMemoryOperand() to make sure that undef pointers are never encountered. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/432613002
parent 5acafbc0
...@@ -217,10 +217,8 @@ public: ...@@ -217,10 +217,8 @@ public:
} }
using Constant::emit; using Constant::emit;
virtual void emit(GlobalContext *Ctx) const { // The target needs to implement this.
Ostream &Str = Ctx->getStrEmit(); virtual void emit(GlobalContext *Ctx) const;
Str << "undef";
}
using Constant::dump; using Constant::dump;
virtual void dump(GlobalContext *Ctx) const { virtual void dump(GlobalContext *Ctx) const {
......
...@@ -1249,7 +1249,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1249,7 +1249,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_pmull(T, legalizeToVar(Src1)); _pmull(T, LEGAL_HACK(Src1));
_movp(Dest, T); _movp(Dest, T);
} else if (Dest->getType() == IceType_v4i32) { } else if (Dest->getType() == IceType_v4i32) {
// Lowering sequence: // Lowering sequence:
...@@ -2159,17 +2159,17 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -2159,17 +2159,17 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
} }
void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Operand *SourceVectOperand = Inst->getSrc(0); Operand *SourceVectNotLegalized = Inst->getSrc(0);
ConstantInteger *ElementIndex = ConstantInteger *ElementIndex =
llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1)); llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
// Only constant indices are allowed in PNaCl IR. // Only constant indices are allowed in PNaCl IR.
assert(ElementIndex); assert(ElementIndex);
unsigned Index = ElementIndex->getValue(); unsigned Index = ElementIndex->getValue();
Type Ty = SourceVectOperand->getType(); Type Ty = SourceVectNotLegalized->getType();
Type ElementTy = typeElementType(Ty); Type ElementTy = typeElementType(Ty);
Type InVectorElementTy = getInVectorElementType(Ty); Type InVectorElementTy = getInVectorElementType(Ty);
Variable *ExtractedElement = makeReg(InVectorElementTy); Variable *ExtractedElementR = makeReg(InVectorElementTy);
// TODO(wala): Determine the best lowering sequences for each type. // TODO(wala): Determine the best lowering sequences for each type.
bool CanUsePextr = bool CanUsePextr =
...@@ -2177,8 +2177,8 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { ...@@ -2177,8 +2177,8 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
if (CanUsePextr && Ty != IceType_v4f32) { if (CanUsePextr && Ty != IceType_v4f32) {
// Use pextrb, pextrw, or pextrd. // Use pextrb, pextrw, or pextrd.
Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
Variable *SourceVectR = legalizeToVar(SourceVectOperand); Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
_pextr(ExtractedElement, SourceVectR, Mask); _pextr(ExtractedElementR, SourceVectR, Mask);
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use pshufd and movd/movss. // Use pshufd and movd/movss.
// //
...@@ -2186,58 +2186,60 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { ...@@ -2186,58 +2186,60 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
// require aligned memory operands until support for stack alignment // require aligned memory operands until support for stack alignment
// is implemented. // is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) #define ALIGN_HACK(Vect) legalizeToVar((Vect))
Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Variable *T = NULL; Variable *T = NULL;
if (Index) { if (Index) {
// The shuffle only needs to occur if the element to be extracted // The shuffle only needs to occur if the element to be extracted
// is not at the lowest index. // is not at the lowest index.
Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
T = makeReg(Ty); T = makeReg(Ty);
_pshufd(T, ALIGN_HACK(SourceVectOperand), Mask); _pshufd(T, ALIGN_HACK(SourceVectRM), Mask);
} else { } else {
T = legalizeToVar(SourceVectOperand); T = ALIGN_HACK(SourceVectRM);
} }
if (InVectorElementTy == IceType_i32) { if (InVectorElementTy == IceType_i32) {
_movd(ExtractedElement, T); _movd(ExtractedElementR, T);
} else { // Ty == Icetype_f32 } else { // Ty == Icetype_f32
// TODO(wala): _movss is only used here because _mov does not // TODO(wala): _movss is only used here because _mov does not
// allow a vector source and a scalar destination. _mov should be // allow a vector source and a scalar destination. _mov should be
// able to be used here. // able to be used here.
// _movss is a binary instruction, so the FakeDef is needed to // _movss is a binary instruction, so the FakeDef is needed to
// keep the live range analysis consistent. // keep the live range analysis consistent.
Context.insert(InstFakeDef::create(Func, ExtractedElement)); Context.insert(InstFakeDef::create(Func, ExtractedElementR));
_movss(ExtractedElement, T); _movss(ExtractedElementR, T);
} }
#undef ALIGN_HACK #undef ALIGN_HACK
} else { } else {
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
// Spill the value to a stack slot and do the extraction in memory. // Spill the value to a stack slot and do the extraction in memory.
// //
// TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
// support for legalizing to mem is implemented. // support for legalizing to mem is implemented.
Variable *Slot = Func->makeVariable(Ty, Context.getNode()); Variable *Slot = Func->makeVariable(Ty, Context.getNode());
Slot->setWeight(RegWeight::Zero); Slot->setWeight(RegWeight::Zero);
_movp(Slot, legalizeToVar(SourceVectOperand)); _movp(Slot, legalizeToVar(SourceVectNotLegalized));
// Compute the location of the element in memory. // Compute the location of the element in memory.
unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
OperandX8632Mem *Loc = OperandX8632Mem *Loc =
getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
_mov(ExtractedElement, Loc); _mov(ExtractedElementR, Loc);
} }
if (ElementTy == IceType_i1) { if (ElementTy == IceType_i1) {
// Truncate extracted integers to i1s if necessary. // Truncate extracted integers to i1s if necessary.
Variable *T = makeReg(IceType_i1); Variable *T = makeReg(IceType_i1);
InstCast *Cast = InstCast *Cast =
InstCast::create(Func, InstCast::Trunc, T, ExtractedElement); InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
lowerCast(Cast); lowerCast(Cast);
ExtractedElement = T; ExtractedElementR = T;
} }
// Copy the element to the destination. // Copy the element to the destination.
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
_mov(Dest, ExtractedElement); _mov(Dest, ExtractedElementR);
} }
void TargetX8632::lowerFcmp(const InstFcmp *Inst) { void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
...@@ -2258,48 +2260,52 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { ...@@ -2258,48 +2260,52 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Variable *T = NULL; Variable *T = NULL;
// ALIGNHACK: Without support for stack alignment, both operands to if (Condition == InstFcmp::True) {
// cmpps need to be forced into registers. Once support for stack // makeVectorOfOnes() requires an integer vector type.
// alignment is implemented, remove LEGAL_HACK.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
switch (Condition) {
default: {
InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
T = makeReg(Src0->getType());
_movp(T, Src0);
_cmpps(T, LEGAL_HACK(Src1), Predicate);
} break;
case InstFcmp::False:
T = makeVectorOfZeros(Src0->getType());
break;
case InstFcmp::One: {
// Check both unequal and ordered.
T = makeReg(Src0->getType());
Variable *T2 = makeReg(Src0->getType());
Src1 = LEGAL_HACK(Src1);
_movp(T, Src0);
_cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);
_movp(T2, Src0);
_cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);
_pand(T, T2);
} break;
case InstFcmp::Ueq: {
// Check both equal or unordered.
T = makeReg(Src0->getType());
Variable *T2 = makeReg(Src0->getType());
Src1 = LEGAL_HACK(Src1);
_movp(T, Src0);
_cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);
_movp(T2, Src0);
_cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);
_por(T, T2);
} break;
case InstFcmp::True:
T = makeVectorOfMinusOnes(IceType_v4i32); T = makeVectorOfMinusOnes(IceType_v4i32);
break; } else if (Condition == InstFcmp::False) {
} T = makeVectorOfZeros(Dest->getType());
} else {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
// ALIGNHACK: Without support for stack alignment, both operands to
// cmpps need to be forced into registers. Once support for stack
// alignment is implemented, remove LEGAL_HACK.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
switch (Condition) {
default: {
InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
T = makeReg(Src0RM->getType());
_movp(T, Src0RM);
_cmpps(T, LEGAL_HACK(Src1RM), Predicate);
} break;
case InstFcmp::One: {
// Check both unequal and ordered.
T = makeReg(Src0RM->getType());
Variable *T2 = makeReg(Src0RM->getType());
Src1RM = LEGAL_HACK(Src1RM);
_movp(T, Src0RM);
_cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
_movp(T2, Src0RM);
_cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);
_pand(T, T2);
} break;
case InstFcmp::Ueq: {
// Check both equal or unordered.
T = makeReg(Src0RM->getType());
Variable *T2 = makeReg(Src0RM->getType());
Src1RM = LEGAL_HACK(Src1RM);
_movp(T, Src0RM);
_cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
_movp(T2, Src0RM);
_cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);
_por(T, T2);
} break;
}
#undef LEGAL_HACK #undef LEGAL_HACK
}
_movp(Dest, T); _movp(Dest, T);
eliminateNextVectorSextInstruction(Dest); eliminateNextVectorSextInstruction(Dest);
...@@ -2384,6 +2390,9 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2384,6 +2390,9 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
InstIcmp::ICond Condition = Inst->getCondition(); InstIcmp::ICond Condition = Inst->getCondition();
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
// SSE2 only has signed comparison operations. Transform unsigned // SSE2 only has signed comparison operations. Transform unsigned
// inputs in a manner that allows for the use of signed comparison // inputs in a manner that allows for the use of signed comparison
// operations by flipping the high order bits. // operations by flipping the high order bits.
...@@ -2392,12 +2401,12 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2392,12 +2401,12 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
Variable *T0 = makeReg(Ty); Variable *T0 = makeReg(Ty);
Variable *T1 = makeReg(Ty); Variable *T1 = makeReg(Ty);
Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
_movp(T0, Src0); _movp(T0, Src0RM);
_pxor(T0, HighOrderBits); _pxor(T0, HighOrderBits);
_movp(T1, Src1); _movp(T1, Src1RM);
_pxor(T1, HighOrderBits); _pxor(T1, HighOrderBits);
Src0 = T0; Src0RM = T0;
Src1 = T1; Src1RM = T1;
} }
// TODO: ALIGNHACK: Both operands to compare instructions need to be // TODO: ALIGNHACK: Both operands to compare instructions need to be
...@@ -2410,38 +2419,38 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2410,38 +2419,38 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
llvm_unreachable("unexpected condition"); llvm_unreachable("unexpected condition");
break; break;
case InstIcmp::Eq: { case InstIcmp::Eq: {
_movp(T, Src0); _movp(T, Src0RM);
_pcmpeq(T, LEGAL_HACK(Src1)); _pcmpeq(T, LEGAL_HACK(Src1RM));
} break; } break;
case InstIcmp::Ne: { case InstIcmp::Ne: {
_movp(T, Src0); _movp(T, Src0RM);
_pcmpeq(T, LEGAL_HACK(Src1)); _pcmpeq(T, LEGAL_HACK(Src1RM));
Variable *MinusOne = makeVectorOfMinusOnes(Ty); Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_pxor(T, MinusOne); _pxor(T, MinusOne);
} break; } break;
case InstIcmp::Ugt: case InstIcmp::Ugt:
case InstIcmp::Sgt: { case InstIcmp::Sgt: {
_movp(T, Src0); _movp(T, Src0RM);
_pcmpgt(T, LEGAL_HACK(Src1)); _pcmpgt(T, LEGAL_HACK(Src1RM));
} break; } break;
case InstIcmp::Uge: case InstIcmp::Uge:
case InstIcmp::Sge: { case InstIcmp::Sge: {
// !(Src1 > Src0) // !(Src1RM > Src0RM)
_movp(T, Src1); _movp(T, Src1RM);
_pcmpgt(T, LEGAL_HACK(Src0)); _pcmpgt(T, LEGAL_HACK(Src0RM));
Variable *MinusOne = makeVectorOfMinusOnes(Ty); Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_pxor(T, MinusOne); _pxor(T, MinusOne);
} break; } break;
case InstIcmp::Ult: case InstIcmp::Ult:
case InstIcmp::Slt: { case InstIcmp::Slt: {
_movp(T, Src1); _movp(T, Src1RM);
_pcmpgt(T, LEGAL_HACK(Src0)); _pcmpgt(T, LEGAL_HACK(Src0RM));
} break; } break;
case InstIcmp::Ule: case InstIcmp::Ule:
case InstIcmp::Sle: { case InstIcmp::Sle: {
// !(Src0 > Src1) // !(Src0RM > Src1RM)
_movp(T, Src0); _movp(T, Src0RM);
_pcmpgt(T, LEGAL_HACK(Src1)); _pcmpgt(T, LEGAL_HACK(Src1RM));
Variable *MinusOne = makeVectorOfMinusOnes(Ty); Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_pxor(T, MinusOne); _pxor(T, MinusOne);
} break; } break;
...@@ -2533,16 +2542,16 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2533,16 +2542,16 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
} }
void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Operand *SourceVectOperand = Inst->getSrc(0); Operand *SourceVectNotLegalized = Inst->getSrc(0);
Operand *ElementToInsert = Inst->getSrc(1); Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
ConstantInteger *ElementIndex = ConstantInteger *ElementIndex =
llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
// Only constant indices are allowed in PNaCl IR. // Only constant indices are allowed in PNaCl IR.
assert(ElementIndex); assert(ElementIndex);
unsigned Index = ElementIndex->getValue(); unsigned Index = ElementIndex->getValue();
assert(Index < typeNumElements(SourceVectOperand->getType())); assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Type Ty = SourceVectOperand->getType(); Type Ty = SourceVectNotLegalized->getType();
Type ElementTy = typeElementType(Ty); Type ElementTy = typeElementType(Ty);
Type InVectorElementTy = getInVectorElementType(Ty); Type InVectorElementTy = getInVectorElementType(Ty);
...@@ -2551,39 +2560,45 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2551,39 +2560,45 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// in the vector. // in the vector.
Variable *Expanded = Variable *Expanded =
Func->makeVariable(InVectorElementTy, Context.getNode()); Func->makeVariable(InVectorElementTy, Context.getNode());
InstCast *Cast = InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert); ElementToInsertNotLegalized);
lowerCast(Cast); lowerCast(Cast);
ElementToInsert = Expanded; ElementToInsertNotLegalized = Expanded;
} }
if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
// Use insertps, pinsrb, pinsrw, or pinsrd. // Use insertps, pinsrb, pinsrw, or pinsrd.
Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg); Operand *ElementRM =
legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Variable *T = makeReg(Ty); Variable *T = makeReg(Ty);
_movp(T, SourceVectOperand); _movp(T, SourceVectRM);
if (Ty == IceType_v4f32) if (Ty == IceType_v4f32)
_insertps(T, Element, Ctx->getConstantInt(IceType_i8, Index << 4)); _insertps(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index << 4));
else else
_pinsr(T, Element, Ctx->getConstantInt(IceType_i8, Index)); _pinsr(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index));
_movp(Inst->getDest(), T); _movp(Inst->getDest(), T);
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use shufps or movss. // Use shufps or movss.
Variable *Element = NULL; Variable *ElementR = NULL;
Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
if (InVectorElementTy == IceType_f32) { if (InVectorElementTy == IceType_f32) {
// Element will be in an XMM register since it is floating point. // ElementR will be in an XMM register since it is floating point.
Element = legalizeToVar(ElementToInsert); ElementR = legalizeToVar(ElementToInsertNotLegalized);
} else { } else {
// Copy an integer to an XMM register. // Copy an integer to an XMM register.
Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem); Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
Element = makeReg(Ty); ElementR = makeReg(Ty);
_movd(Element, T); _movd(ElementR, T);
} }
if (Index == 0) { if (Index == 0) {
Variable *T = makeReg(Ty); Variable *T = makeReg(Ty);
_movp(T, SourceVectOperand); _movp(T, SourceVectRM);
_movss(T, Element); _movss(T, ElementR);
_movp(Inst->getDest(), T); _movp(Inst->getDest(), T);
return; return;
} }
...@@ -2597,19 +2612,19 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2597,19 +2612,19 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// Element[0] is being inserted into SourceVectOperand. Indices are // Element[0] is being inserted into SourceVectOperand. Indices are
// ordered from left to right. // ordered from left to right.
// //
// insertelement into index 1 (result is stored in Element): // insertelement into index 1 (result is stored in ElementR):
// Element := Element[0, 0] SourceVectOperand[0, 0] // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
// Element := Element[3, 0] SourceVectOperand[2, 3] // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
// //
// insertelement into index 2 (result is stored in T): // insertelement into index 2 (result is stored in T):
// T := SourceVectOperand // T := SourceVectRM
// Element := Element[0, 0] T[0, 3] // ElementR := ElementR[0, 0] T[0, 3]
// T := T[0, 1] Element[0, 3] // T := T[0, 1] ElementR[0, 3]
// //
// insertelement into index 3 (result is stored in T): // insertelement into index 3 (result is stored in T):
// T := SourceVectOperand // T := SourceVectRM
// Element := Element[0, 0] T[0, 2] // ElementR := ElementR[0, 0] T[0, 2]
// T := T[0, 1] Element[3, 0] // T := T[0, 1] ElementR[3, 0]
const unsigned char Mask1[3] = {0, 192, 128}; const unsigned char Mask1[3] = {0, 192, 128};
const unsigned char Mask2[3] = {227, 196, 52}; const unsigned char Mask2[3] = {227, 196, 52};
...@@ -2621,15 +2636,15 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2621,15 +2636,15 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// is implemented. // is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) #define ALIGN_HACK(Vect) legalizeToVar((Vect))
if (Index == 1) { if (Index == 1) {
SourceVectOperand = ALIGN_HACK(SourceVectOperand); SourceVectRM = ALIGN_HACK(SourceVectRM);
_shufps(Element, SourceVectOperand, Mask1Constant); _shufps(ElementR, SourceVectRM, Mask1Constant);
_shufps(Element, SourceVectOperand, Mask2Constant); _shufps(ElementR, SourceVectRM, Mask2Constant);
_movp(Inst->getDest(), Element); _movp(Inst->getDest(), ElementR);
} else { } else {
Variable *T = makeReg(Ty); Variable *T = makeReg(Ty);
_movp(T, SourceVectOperand); _movp(T, SourceVectRM);
_shufps(Element, T, Mask1Constant); _shufps(ElementR, T, Mask1Constant);
_shufps(T, Element, Mask2Constant); _shufps(T, ElementR, Mask2Constant);
_movp(Inst->getDest(), T); _movp(Inst->getDest(), T);
} }
#undef ALIGN_HACK #undef ALIGN_HACK
...@@ -2638,17 +2653,17 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2638,17 +2653,17 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// Spill the value to a stack slot and perform the insertion in // Spill the value to a stack slot and perform the insertion in
// memory. // memory.
// //
// TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
// support for legalizing to mem is implemented. // support for legalizing to mem is implemented.
Variable *Slot = Func->makeVariable(Ty, Context.getNode()); Variable *Slot = Func->makeVariable(Ty, Context.getNode());
Slot->setWeight(RegWeight::Zero); Slot->setWeight(RegWeight::Zero);
_movp(Slot, legalizeToVar(SourceVectOperand)); _movp(Slot, legalizeToVar(SourceVectNotLegalized));
// Compute the location of the position to insert in memory. // Compute the location of the position to insert in memory.
unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
OperandX8632Mem *Loc = OperandX8632Mem *Loc =
getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
_store(legalizeToVar(ElementToInsert), Loc); _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Variable *T = makeReg(Ty); Variable *T = makeReg(Ty);
_movp(T, Slot); _movp(T, Slot);
...@@ -3573,6 +3588,8 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3573,6 +3588,8 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Type SrcTy = SrcT->getType(); Type SrcTy = SrcT->getType();
Variable *T = makeReg(SrcTy); Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
// ALIGNHACK: Until stack alignment support is implemented, vector // ALIGNHACK: Until stack alignment support is implemented, vector
// instructions need to have vector operands in registers. Once // instructions need to have vector operands in registers. Once
// there is support for stack alignment, LEGAL_HACK can be removed. // there is support for stack alignment, LEGAL_HACK can be removed.
...@@ -3584,11 +3601,12 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3584,11 +3601,12 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
// Use blendvps or pblendvb to implement select. // Use blendvps or pblendvb to implement select.
if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
SrcTy == IceType_v4f32) { SrcTy == IceType_v4f32) {
Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
_movp(xmm0, Condition); _movp(xmm0, ConditionRM);
_psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
_movp(T, SrcF); _movp(T, SrcFRM);
_blendvps(T, LEGAL_HACK(SrcT), xmm0); _blendvps(T, LEGAL_HACK(SrcTRM), xmm0);
_movp(Dest, T); _movp(Dest, T);
} else { } else {
assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
...@@ -3596,8 +3614,8 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3596,8 +3614,8 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
: IceType_v16i8; : IceType_v16i8;
Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
_movp(T, SrcF); _movp(T, SrcFRM);
_pblendvb(T, LEGAL_HACK(SrcT), xmm0); _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0);
_movp(Dest, T); _movp(Dest, T);
} }
return; return;
...@@ -3617,11 +3635,12 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3617,11 +3635,12 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
} else if (typeElementType(SrcTy) != IceType_i1) { } else if (typeElementType(SrcTy) != IceType_i1) {
lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
} else { } else {
_movp(T, Condition); Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
_movp(T, ConditionRM);
} }
_movp(T2, T); _movp(T2, T);
_pand(T, LEGAL_HACK(SrcT)); _pand(T, LEGAL_HACK(SrcTRM));
_pandn(T2, LEGAL_HACK(SrcF)); _pandn(T2, LEGAL_HACK(SrcFRM));
_por(T, T2); _por(T, T2);
_movp(Dest, T); _movp(Dest, T);
#undef LEGAL_HACK #undef LEGAL_HACK
...@@ -3947,6 +3966,10 @@ OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) { ...@@ -3947,6 +3966,10 @@ OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
Variable *Base = llvm::dyn_cast<Variable>(Operand); Variable *Base = llvm::dyn_cast<Variable>(Operand);
Constant *Offset = llvm::dyn_cast<Constant>(Operand); Constant *Offset = llvm::dyn_cast<Constant>(Operand);
assert(Base || Offset); assert(Base || Offset);
if (Offset) {
assert(llvm::isa<ConstantInteger>(Offset) ||
llvm::isa<ConstantRelocatable>(Offset));
}
Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
} }
return llvm::cast<OperandX8632Mem>(legalize(Mem)); return llvm::cast<OperandX8632Mem>(legalize(Mem));
...@@ -4046,6 +4069,10 @@ template <> void ConstantDouble::emit(GlobalContext *Ctx) const { ...@@ -4046,6 +4069,10 @@ template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]"; Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
} }
void ConstantUndef::emit(GlobalContext *) const {
llvm_unreachable("undef value encountered by emitter.");
}
TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx) TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
: TargetGlobalInitLowering(Ctx) {} : TargetGlobalInitLowering(Ctx) {}
......
...@@ -34,7 +34,8 @@ bb21: ; preds = %bb27, %bb17 ...@@ -34,7 +34,8 @@ bb21: ; preds = %bb27, %bb17
%tmp22 = phi i32 [ undef, %bb17 ], [ %tmp30, %bb27 ] %tmp22 = phi i32 [ undef, %bb17 ], [ %tmp30, %bb27 ]
%tmp23 = add i32 undef, -1 %tmp23 = add i32 undef, -1
%tmp24 = add i32 undef, undef %tmp24 = add i32 undef, undef
%tmp25 = load i32* undef, align 1 %undef.ptr = inttoptr i32 undef to i32*
%tmp25 = load i32* %undef.ptr, align 1
%tmp26 = icmp eq i32 undef, %tmp22 %tmp26 = icmp eq i32 undef, %tmp22
br i1 %tmp26, label %bb34, label %bb32 br i1 %tmp26, label %bb34, label %bb32
......
...@@ -2,10 +2,16 @@ ...@@ -2,10 +2,16 @@
; RUN: %llvm2ice --verbose none %s | FileCheck %s ; RUN: %llvm2ice --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -mattr=sse4.1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -mattr=sse4.1 -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s \ ; RUN: %llvm2ice -O2 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice -Om1 --verbose none %s \ ; RUN: %llvm2ice -Om1 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice -mattr=sse4.1 -O2 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice -mattr=sse4.1 -Om1 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
...@@ -16,7 +22,6 @@ entry: ...@@ -16,7 +22,6 @@ entry:
ret i32 undef ret i32 undef
; CHECK-LABEL: undef_i32: ; CHECK-LABEL: undef_i32:
; CHECK: mov eax, 0 ; CHECK: mov eax, 0
; CHECK: ret
} }
define i64 @undef_i64() { define i64 @undef_i64() {
...@@ -34,7 +39,6 @@ entry: ...@@ -34,7 +39,6 @@ entry:
; CHECK-LABEL: undef_float: ; CHECK-LABEL: undef_float:
; CHECK-NOT: sub esp ; CHECK-NOT: sub esp
; CHECK: fld ; CHECK: fld
; CHECK: ret
} }
define <4 x i1> @undef_v4i1() { define <4 x i1> @undef_v4i1() {
...@@ -42,7 +46,6 @@ entry: ...@@ -42,7 +46,6 @@ entry:
ret <4 x i1> undef ret <4 x i1> undef
; CHECK-LABEL: undef_v4i1: ; CHECK-LABEL: undef_v4i1:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret
} }
define <8 x i1> @undef_v8i1() { define <8 x i1> @undef_v8i1() {
...@@ -50,7 +53,6 @@ entry: ...@@ -50,7 +53,6 @@ entry:
ret <8 x i1> undef ret <8 x i1> undef
; CHECK-LABEL: undef_v8i1: ; CHECK-LABEL: undef_v8i1:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret
} }
define <16 x i1> @undef_v16i1() { define <16 x i1> @undef_v16i1() {
...@@ -58,7 +60,6 @@ entry: ...@@ -58,7 +60,6 @@ entry:
ret <16 x i1> undef ret <16 x i1> undef
; CHECK-LABEL: undef_v16i1: ; CHECK-LABEL: undef_v16i1:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret
} }
define <16 x i8> @undef_v16i8() { define <16 x i8> @undef_v16i8() {
...@@ -66,7 +67,6 @@ entry: ...@@ -66,7 +67,6 @@ entry:
ret <16 x i8> undef ret <16 x i8> undef
; CHECK-LABEL: undef_v16i8: ; CHECK-LABEL: undef_v16i8:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret
} }
define <8 x i16> @undef_v8i16() { define <8 x i16> @undef_v8i16() {
...@@ -74,7 +74,6 @@ entry: ...@@ -74,7 +74,6 @@ entry:
ret <8 x i16> undef ret <8 x i16> undef
; CHECK-LABEL: undef_v8i16: ; CHECK-LABEL: undef_v8i16:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret
} }
define <4 x i32> @undef_v4i32() { define <4 x i32> @undef_v4i32() {
...@@ -82,7 +81,6 @@ entry: ...@@ -82,7 +81,6 @@ entry:
ret <4 x i32> undef ret <4 x i32> undef
; CHECK-LABEL: undef_v4i32: ; CHECK-LABEL: undef_v4i32:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret
} }
define <4 x float> @undef_v4f32() { define <4 x float> @undef_v4f32() {
...@@ -90,7 +88,207 @@ entry: ...@@ -90,7 +88,207 @@ entry:
ret <4 x float> undef ret <4 x float> undef
; CHECK-LABEL: undef_v4f32: ; CHECK-LABEL: undef_v4f32:
; CHECK: pxor ; CHECK: pxor
; CHECK: ret }
define <4 x i32> @vector_arith(<4 x i32> %arg) {
entry:
%val = add <4 x i32> undef, %arg
ret <4 x i32> %val
; CHECK-LABEL: vector_arith:
; CHECK: pxor
}
define <4 x float> @vector_bitcast() {
entry:
%val = bitcast <4 x i32> undef to <4 x float>
ret <4 x float> %val
; CHECK-LABEL: vector_bitcast:
; CHECK: pxor
}
define <4 x i32> @vector_sext() {
entry:
%val = sext <4 x i1> undef to <4 x i32>
ret <4 x i32> %val
; CHECK-LABEL: vector_sext:
; CHECK: pxor
}
define <4 x i32> @vector_zext() {
entry:
%val = zext <4 x i1> undef to <4 x i32>
ret <4 x i32> %val
; CHECK-LABEL: vector_zext:
; CHECK: pxor
}
define <4 x i1> @vector_trunc() {
entry:
%val = trunc <4 x i32> undef to <4 x i1>
ret <4 x i1> %val
; CHECK-LABEL: vector_trunc:
; CHECK: pxor
}
define <4 x i1> @vector_icmp(<4 x i32> %arg) {
entry:
%val = icmp eq <4 x i32> undef, %arg
ret <4 x i1> %val
; CHECK-LABEL: vector_icmp:
; CHECK: pxor
}
define <4 x i1> @vector_fcmp(<4 x float> %arg) {
entry:
%val = fcmp ueq <4 x float> undef, %arg
ret <4 x i1> %val
; CHECK-LABEL: vector_fcmp:
; CHECK: pxor
}
define <4 x i32> @vector_fptosi() {
entry:
%val = fptosi <4 x float> undef to <4 x i32>
ret <4 x i32> %val
; CHECK-LABEL: vector_fptosi:
; CHECK: pxor
}
define <4 x i32> @vector_fptoui() {
entry:
%val = fptoui <4 x float> undef to <4 x i32>
ret <4 x i32> %val
; CHECK-LABEL: vector_fptoui:
; CHECK: pxor
}
define <4 x float> @vector_sitofp() {
entry:
%val = sitofp <4 x i32> undef to <4 x float>
ret <4 x float> %val
; CHECK-LABEL: vector_sitofp:
; CHECK: pxor
}
define <4 x float> @vector_uitofp() {
entry:
%val = uitofp <4 x i32> undef to <4 x float>
ret <4 x float> %val
; CHECK-LABEL: vector_uitofp:
; CHECK: pxor
}
define <4 x float> @vector_insertelement_arg1() {
entry:
%val = insertelement <4 x float> undef, float 1.0, i32 0
ret <4 x float> %val
; CHECK-LABEL: vector_insertelement_arg1:
; CHECK: pxor
}
define <4 x float> @vector_insertelement_arg2(<4 x float> %arg) {
entry:
%val = insertelement <4 x float> %arg, float undef, i32 0
ret <4 x float> %val
; CHECK-LABEL: vector_insertelement_arg2:
; CHECK: [L$float$
}
define float @vector_extractelement_v4f32_index_0() {
entry:
%val = extractelement <4 x float> undef, i32 0
ret float %val
; CHECK-LABEL: vector_extractelement_v4f32_index_0:
; CHECK: pxor
}
define float @vector_extractelement_v4f32_index_1() {
entry:
%val = extractelement <4 x float> undef, i32 1
ret float %val
; CHECK-LABEL: vector_extractelement_v4f32_index_1:
; CHECK: pxor
}
define i32 @vector_extractelement_v16i1_index_7() {
entry:
%val.trunc = extractelement <16 x i1> undef, i32 7
%val = sext i1 %val.trunc to i32
ret i32 %val
; CHECK-LABEL: vector_extractelement_v16i1_index_7:
; CHECK: pxor
}
define <4 x i32> @vector_select_v4i32_cond(<4 x i32> %a, <4 x i32> %b) {
entry:
%val = select <4 x i1> undef, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %val
; CHECK-LABEL: vector_select_v4i32_cond:
; CHECK: pxor
}
define <4 x i32> @vector_select_v4i32_arg1(<4 x i1> %cond, <4 x i32> %b) {
entry:
%val = select <4 x i1> %cond, <4 x i32> undef, <4 x i32> %b
ret <4 x i32> %val
; CHECK-LABEL: vector_select_v4i32_arg1:
; CHECK: pxor
}
define <4 x i32> @vector_select_v4i32_arg2(<4 x i1> %cond, <4 x i32> %a) {
entry:
%val = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> undef
ret <4 x i32> %val
; CHECK-LABEL: vector_select_v4i32_arg2:
; CHECK: pxor
}
define <4 x i1> @vector_select_v4i1_cond(<4 x i1> %a, <4 x i1> %b) {
entry:
%val = select <4 x i1> undef, <4 x i1> %a, <4 x i1> %b
ret <4 x i1> %val
; CHECK-LABEL: vector_select_v4i1_cond:
; CHECK: pxor
}
define <4 x i1> @vector_select_v4i1_arg1(<4 x i1> %cond, <4 x i1> %b) {
entry:
%val = select <4 x i1> %cond, <4 x i1> undef, <4 x i1> %b
ret <4 x i1> %val
; CHECK-LABEL: vector_select_v4i1_arg1:
; CHECK: pxor
}
define <4 x i1> @vector_select_v4i1_arg2(<4 x i1> %cond, <4 x i1> %a) {
entry:
%val = select <4 x i1> %cond, <4 x i1> %a, <4 x i1> undef
ret <4 x i1> %val
; CHECK-LABEL: vector_select_v4i1_arg2:
; CHECK: pxor
}
define <4 x float> @vector_select_v4f32_cond(<4 x float> %a, <4 x float> %b) {
entry:
%val = select <4 x i1> undef, <4 x float> %a, <4 x float> %b
ret <4 x float> %val
; CHECK-LABEL: vector_select_v4f32_cond:
; CHECK: pxor
}
define <4 x float> @vector_select_v4f32_arg1(<4 x i1> %cond, <4 x float> %b) {
entry:
%val = select <4 x i1> %cond, <4 x float> undef, <4 x float> %b
ret <4 x float> %val
; CHECK-LABEL: vector_select_v4f32_arg1:
; CHECK: pxor
}
define <4 x float> @vector_select_v4f32_arg2(<4 x i1> %cond, <4 x float> %a) {
entry:
%val = select <4 x i1> %cond, <4 x float> %a, <4 x float> undef
ret <4 x float> %val
; CHECK-LABEL: vector_select_v4f32_arg2:
; CHECK: pxor
} }
; ERRORS-NOT: ICE translation error ; ERRORS-NOT: ICE translation error
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment