Commit 79568d27 by John Porto

Subzero. ARM32. Fixes Insert/Extract v(8|16)i1 bug.

Subzero emits the following sequence when extracting elements from a vector of i1: vmov.8 Rt, Dm[I] I should be scaled when accessing v4i1, and v8i1, i.e., to extract the n-th boolean in a v8i1, the emitted code should be vmov.8 Rt, Dm[I*n] Insertions are handled by changing the operands' types, so that a v4i1 is handled as a v4i32, and a v8i1, as a v8i16. I.e., to insert the n-th boolean into a v8i1, the emitted code should be mov.16 Dt[I], Rm instead of mov.8 Dt[I*n], Rm This clears the upper bits for that element. BUG= R=eholk@chromium.org Review URL: https://codereview.chromium.org/1876083004 .
parent e922c236
...@@ -1108,8 +1108,27 @@ Register getDRegister(const Variable *Src, uint32_t Index) { ...@@ -1108,8 +1108,27 @@ Register getDRegister(const Variable *Src, uint32_t Index) {
} }
} }
constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) { uint32_t adjustDIndex(Type Ty, uint32_t DIndex) {
return (Index < NumElements / 2) ? Index : Index - (NumElements / 2); // If Ty is a vector of i1, we may need to adjust DIndex. This is needed
// because, e.g., the second i1 in a v4i1 is accessed with a
//
// vmov.s8 Qd[4], Rn
switch (Ty) {
case IceType_v4i1:
return DIndex * 4;
case IceType_v8i1:
return DIndex * 2;
case IceType_v16i1:
return DIndex;
default:
return DIndex;
}
}
uint32_t getDIndex(Type Ty, uint32_t NumElements, uint32_t Index) {
const uint32_t DIndex =
(Index < NumElements / 2) ? Index : Index - (NumElements / 2);
return adjustDIndex(Ty, DIndex);
} }
// For floating point values, we can insertelement or extractelement by moving // For floating point values, we can insertelement or extractelement by moving
...@@ -1152,12 +1171,13 @@ void InstARM32Extract::emit(const Cfg *Func) const { ...@@ -1152,12 +1171,13 @@ void InstARM32Extract::emit(const Cfg *Func) const {
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
const size_t VectorSize = typeNumElements(Src->getType()); const Type SrcTy = Src->getType();
const size_t VectorSize = typeNumElements(SrcTy);
const Register SrcReg = getDRegister(Src, Index); const Register SrcReg = getDRegister(Src, Index);
Str << RegARM32::RegTable[SrcReg].Name; Str << RegARM32::RegTable[SrcReg].Name;
Str << "[" << getDIndex(VectorSize, Index) << "]"; Str << "[" << getDIndex(SrcTy, VectorSize, Index) << "]";
} else if (isFloatingType(DestTy)) { } else if (isFloatingType(DestTy)) {
const Register SrcReg = getSRegister(Src, Index); const Register SrcReg = getSRegister(Src, Index);
...@@ -1175,11 +1195,12 @@ void InstARM32Extract::emitIAS(const Cfg *Func) const { ...@@ -1175,11 +1195,12 @@ void InstARM32Extract::emitIAS(const Cfg *Func) const {
const Operand *Dest = getDest(); const Operand *Dest = getDest();
const Type DestTy = Dest->getType(); const Type DestTy = Dest->getType();
const Operand *Src = getSrc(0); const Operand *Src = getSrc(0);
const Type SrcTy = Src->getType();
assert(isVectorType(Src->getType())); assert(isVectorType(Src->getType()));
assert(DestTy == typeElementType(Src->getType())); assert(DestTy == typeElementType(Src->getType()));
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isIntegerType(DestTy)) { if (isIntegerType(DestTy)) {
Asm->vmovrqi(Dest, Src, Index, getPredicate()); Asm->vmovrqi(Dest, Src, adjustDIndex(SrcTy, Index), getPredicate());
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
return; return;
} }
...@@ -1188,12 +1209,28 @@ void InstARM32Extract::emitIAS(const Cfg *Func) const { ...@@ -1188,12 +1209,28 @@ void InstARM32Extract::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
} }
namespace {
Type insertionType(Type Ty) {
assert(isVectorType(Ty));
switch (Ty) {
case IceType_v4i1:
return IceType_v4i32;
case IceType_v8i1:
return IceType_v8i16;
case IceType_v16i1:
return IceType_v16i8;
default:
return Ty;
}
}
} // end of anonymous namespace
void InstARM32Insert::emit(const Cfg *Func) const { void InstARM32Insert::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
const Type DestTy = getDest()->getType();
const auto *Src = llvm::cast<Variable>(getSrc(0)); const auto *Src = llvm::cast<Variable>(getSrc(0));
const Type DestTy = insertionType(getDest()->getType());
assert(isVectorType(DestTy));
if (isIntegerType(DestTy)) { if (isIntegerType(DestTy)) {
Str << "\t" Str << "\t"
...@@ -1203,7 +1240,8 @@ void InstARM32Insert::emit(const Cfg *Func) const { ...@@ -1203,7 +1240,8 @@ void InstARM32Insert::emit(const Cfg *Func) const {
const size_t VectorSize = typeNumElements(DestTy); const size_t VectorSize = typeNumElements(DestTy);
const Register DestReg = getDRegister(Dest, Index); const Register DestReg = getDRegister(Dest, Index);
const uint32_t Index = getDIndex(VectorSize, this->Index); const uint32_t Index =
getDIndex(insertionType(DestTy), VectorSize, this->Index);
Str << RegARM32::RegTable[DestReg].Name; Str << RegARM32::RegTable[DestReg].Name;
Str << "[" << Index << "], "; Str << "[" << Index << "], ";
Src->emit(Func); Src->emit(Func);
...@@ -1221,14 +1259,16 @@ void InstARM32Insert::emit(const Cfg *Func) const { ...@@ -1221,14 +1259,16 @@ void InstARM32Insert::emit(const Cfg *Func) const {
void InstARM32Insert::emitIAS(const Cfg *Func) const { void InstARM32Insert::emitIAS(const Cfg *Func) const {
const Variable *Dest = getDest(); const Variable *Dest = getDest();
const Operand *Src = getSrc(0); const auto *Src = llvm::cast<Variable>(getSrc(0));
const Type SrcTy = Src->getType(); const Type DestTy = insertionType(Dest->getType());
assert(isVectorType(Dest->getType())); const Type SrcTy = typeElementType(DestTy);
assert(typeElementType(Dest->getType()) == SrcTy); assert(SrcTy == Src->getType() || Src->getType() == IceType_i1);
assert(isVectorType(DestTy));
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isIntegerType(SrcTy)) { if (isIntegerType(SrcTy)) {
const Operand *Src = getSrc(0); Asm->vmovqir(Dest->asType(Func, DestTy, Dest->getRegNum()),
Asm->vmovqir(Dest, Index, Src, getPredicate()); adjustDIndex(DestTy, Index),
Src->asType(Func, SrcTy, Src->getRegNum()), getPredicate());
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
return; return;
} }
......
...@@ -24,7 +24,7 @@ entry: ...@@ -24,7 +24,7 @@ entry:
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16 ; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s12, s17 ; ASM-NEXT: vmov.f32 s12, s17
; ASM-NEXT: vmov.s8 r0, d0[1] ; ASM-NEXT: vmov.s8 r0, d0[4]
; ASM-NEXT: vmov.f32 s16, s5 ; ASM-NEXT: vmov.f32 s16, s5
; ASM-NEXT: vmov.f32 s17, s9 ; ASM-NEXT: vmov.f32 s17, s9
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
...@@ -36,7 +36,7 @@ entry: ...@@ -36,7 +36,7 @@ entry:
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16 ; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s14, s17 ; ASM-NEXT: vmov.f32 s14, s17
; ASM-NEXT: vmov.s8 r0, d1[1] ; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.f32 s4, s7 ; ASM-NEXT: vmov.f32 s4, s7
; ASM-NEXT: vmov.f32 s8, s11 ; ASM-NEXT: vmov.f32 s8, s11
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
...@@ -65,7 +65,7 @@ entry: ...@@ -65,7 +65,7 @@ entry:
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d6[0], r2 ; ASM-NEXT: vmov.32 d6[0], r2
; ASM-NEXT: vmov.s8 r0, d0[1] ; ASM-NEXT: vmov.s8 r0, d0[4]
; ASM-NEXT: vmov.32 r1, d2[1] ; ASM-NEXT: vmov.32 r1, d2[1]
; ASM-NEXT: vmov.32 r2, d4[1] ; ASM-NEXT: vmov.32 r2, d4[1]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
...@@ -77,7 +77,7 @@ entry: ...@@ -77,7 +77,7 @@ entry:
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d7[0], r2 ; ASM-NEXT: vmov.32 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[1] ; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.32 r1, d3[1] ; ASM-NEXT: vmov.32 r1, d3[1]
; ASM-NEXT: vmov.32 r2, d5[1] ; ASM-NEXT: vmov.32 r2, d5[1]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
...@@ -103,19 +103,19 @@ entry: ...@@ -103,19 +103,19 @@ entry:
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[0], r2 ; ASM-NEXT: vmov.16 d6[0], r2
; ASM-NEXT: vmov.s8 r0, d0[1] ; ASM-NEXT: vmov.s8 r0, d0[2]
; ASM-NEXT: vmov.s16 r1, d2[1] ; ASM-NEXT: vmov.s16 r1, d2[1]
; ASM-NEXT: vmov.s16 r2, d4[1] ; ASM-NEXT: vmov.s16 r2, d4[1]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[1], r2 ; ASM-NEXT: vmov.16 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d0[2] ; ASM-NEXT: vmov.s8 r0, d0[4]
; ASM-NEXT: vmov.s16 r1, d2[2] ; ASM-NEXT: vmov.s16 r1, d2[2]
; ASM-NEXT: vmov.s16 r2, d4[2] ; ASM-NEXT: vmov.s16 r2, d4[2]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[2], r2 ; ASM-NEXT: vmov.16 d6[2], r2
; ASM-NEXT: vmov.s8 r0, d0[3] ; ASM-NEXT: vmov.s8 r0, d0[6]
; ASM-NEXT: vmov.s16 r1, d2[3] ; ASM-NEXT: vmov.s16 r1, d2[3]
; ASM-NEXT: vmov.s16 r2, d4[3] ; ASM-NEXT: vmov.s16 r2, d4[3]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
...@@ -127,19 +127,19 @@ entry: ...@@ -127,19 +127,19 @@ entry:
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[0], r2 ; ASM-NEXT: vmov.16 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[1] ; ASM-NEXT: vmov.s8 r0, d1[2]
; ASM-NEXT: vmov.s16 r1, d3[1] ; ASM-NEXT: vmov.s16 r1, d3[1]
; ASM-NEXT: vmov.s16 r2, d5[1] ; ASM-NEXT: vmov.s16 r2, d5[1]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[1], r2 ; ASM-NEXT: vmov.16 d7[1], r2
; ASM-NEXT: vmov.s8 r0, d1[2] ; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.s16 r1, d3[2] ; ASM-NEXT: vmov.s16 r1, d3[2]
; ASM-NEXT: vmov.s16 r2, d5[2] ; ASM-NEXT: vmov.s16 r2, d5[2]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1 ; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[2], r2 ; ASM-NEXT: vmov.16 d7[2], r2
; ASM-NEXT: vmov.s8 r0, d1[3] ; ASM-NEXT: vmov.s8 r0, d1[6]
; ASM-NEXT: vmov.s16 r1, d3[3] ; ASM-NEXT: vmov.s16 r1, d3[3]
; ASM-NEXT: vmov.s16 r2, d5[3] ; ASM-NEXT: vmov.s16 r2, d5[3]
; ASM-NEXT: tst r0, #1 ; ASM-NEXT: tst r0, #1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment