Commit 658bae20 by Eric Holk

Subzero: ARM32: lowering of vector insert and extract.

parent 2d6c8267
......@@ -20,7 +20,6 @@
#include "IceCfgNode.h"
#include "IceInst.h"
#include "IceOperand.h"
#include "IceRegistersARM32.h"
#include "IceTargetLoweringARM32.h"
namespace Ice {
......@@ -28,6 +27,8 @@ namespace ARM32 {
namespace {
using Register = RegARM32::AllRegisters;
// maximum number of registers allowed in vpush/vpop.
static constexpr SizeT VpushVpopMaxConsecRegs = 16;
......@@ -1043,6 +1044,132 @@ InstARM32Mov::InstARM32Mov(Cfg *Func, Variable *Dest, Operand *Src,
}
}
// These next two functions find the D register that maps to the half of the Q
// register that this instruction is accessing.
Register getDRegister(const Variable *Src, uint32_t Index) {
assert(Src->hasReg());
const auto SrcReg = static_cast<Register>(Src->getRegNum());
const RegARM32::RegTableType &SrcEntry = RegARM32::RegTable[SrcReg];
assert(SrcEntry.IsVec128);
const uint32_t NumElements = typeNumElements(Src->getType());
// This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in
// the next two branches help to check that this is still true.
if (Index < NumElements / 2) {
// We have a Q register that's made up of two D registers. This assert is
// to help ensure that we picked the right D register.
//
// TODO(jpp): find a way to do this that doesn't rely on ordering of the
// alias list.
assert(RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding + 1 ==
RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding);
return static_cast<Register>(SrcEntry.Aliases[1]);
} else {
// We have a Q register that's made up of two D registers. This assert is
// to help ensure that we picked the right D register.
//
// TODO(jpp): find a way to do this that doesn't rely on ordering of the
// alias list.
assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 ==
RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding);
return static_cast<Register>(SrcEntry.Aliases[2]);
}
}
constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) {
return (Index < NumElements / 2) ? Index : Index - (NumElements / 2);
}
// For floating point values, we can insertelement or extractelement by moving
// directly from an S register. This function finds the right one.
Register getSRegister(const Variable *Src, uint32_t Index) {
assert(Src->hasReg());
const auto SrcReg = static_cast<Register>(Src->getRegNum());
// For floating point values, we need to be allocated to Q0 - Q7, so we can
// directly access the value we want as one of the S registers.
assert(Src->getType() == IceType_v4f32);
assert(SrcReg < RegARM32::Reg_q8);
// This part assumes the register alias list goes q0, d0, d1, s0, s1, s2, s3.
assert(Index < 4);
// TODO(jpp): find a way to do this that doesn't rely on ordering of the alias
// list.
return static_cast<Register>(RegARM32::RegTable[SrcReg].Aliases[Index + 3]);
}
void InstARM32Extract::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
const Type DestTy = getDest()->getType();
const auto *Src = llvm::cast<Variable>(getSrc(0));
if (isIntegerType(DestTy)) {
Str << "\t"
<< "vmov" << getPredicate();
const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT;
if (BitSize < 32) {
Str << ".s" << BitSize;
} else {
Str << "." << BitSize;
}
Str << "\t";
getDest()->emit(Func);
Str << ", ";
const size_t VectorSize = typeNumElements(Src->getType());
const Register SrcReg = getDRegister(Src, Index);
Str << RegARM32::RegTable[SrcReg].Name;
Str << "[" << getDIndex(VectorSize, Index) << "]";
} else if (isFloatingType(DestTy)) {
const Register SrcReg = getSRegister(Src, Index);
Str << "\t"
<< "vmov" << getPredicate() << ".f32"
<< "\t";
getDest()->emit(Func);
Str << ", " << RegARM32::RegTable[SrcReg].Name;
} else {
assert(false && "Invalid extract type");
}
}
void InstARM32Insert::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
const Variable *Dest = getDest();
const Type DestTy = getDest()->getType();
const auto *Src = llvm::cast<Variable>(getSrc(0));
if (isIntegerType(DestTy)) {
Str << "\t"
<< "vmov" << getPredicate();
const size_t BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT;
Str << "." << BitSize << "\t";
const size_t VectorSize = typeNumElements(DestTy);
const Register DestReg = getDRegister(Dest, Index);
const uint32_t Index = getDIndex(VectorSize, this->Index);
Str << RegARM32::RegTable[DestReg].Name;
Str << "[" << Index << "], ";
Src->emit(Func);
} else if (isFloatingType(DestTy)) {
Str << "\t"
<< "vmov" << getPredicate() << ".f32"
<< "\t";
const Register DestReg = getSRegister(Dest, Index);
Str << RegARM32::RegTable[DestReg].Name << ", ";
Src->emit(Func);
} else {
assert(false && "Invalid insert type");
}
}
template <InstARM32::InstKindARM32 K>
void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
......
......@@ -23,6 +23,7 @@
#include "IceInst.h"
#include "IceInstARM32.def"
#include "IceOperand.h"
#include "IceRegistersARM32.h"
namespace Ice {
namespace ARM32 {
......@@ -389,6 +390,8 @@ public:
Cmp,
Dmb,
Eor,
Extract,
Insert,
Label,
Ldr,
Ldrex,
......@@ -1349,6 +1352,62 @@ private:
Variable *DestHi = nullptr;
};
/// Generates vmov Rd, Dn[x] instructions, and their related floating point
/// versions.
class InstARM32Extract final : public InstARM32Pred {
InstARM32Extract() = delete;
InstARM32Extract(const InstARM32Extract &) = delete;
InstARM32Extract &operator=(const InstARM32Extract &) = delete;
public:
static InstARM32Extract *create(Cfg *Func, Variable *Dest, Variable *Src0,
uint32_t Index, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Extract>())
InstARM32Extract(Func, Dest, Src0, Index, Predicate);
}
void emit(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Extract); }
private:
InstARM32Extract(Cfg *Func, Variable *Dest, Variable *Src0, uint32_t Index,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Extract, 1, Dest, Predicate),
Index(Index) {
assert(Index < typeNumElements(Src0->getType()));
addSource(Src0);
}
const uint32_t Index;
};
/// Generates vmov Dn[x], Rd instructions, and their related floating point
/// versions.
class InstARM32Insert final : public InstARM32Pred {
InstARM32Insert() = delete;
InstARM32Insert(const InstARM32Insert &) = delete;
InstARM32Insert &operator=(const InstARM32Insert &) = delete;
public:
static InstARM32Insert *create(Cfg *Func, Variable *Dest, Variable *Src0,
uint32_t Index, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Insert>())
InstARM32Insert(Func, Dest, Src0, Index, Predicate);
}
void emit(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Insert); }
private:
InstARM32Insert(Cfg *Func, Variable *Dest, Variable *Src0, uint32_t Index,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Insert, 1, Dest, Predicate),
Index(Index) {
assert(Index < typeNumElements(Dest->getType()));
addSource(Src0);
}
const uint32_t Index;
};
class InstARM32Vcmp final : public InstARM32Pred {
InstARM32Vcmp() = delete;
InstARM32Vcmp(const InstARM32Vcmp &) = delete;
......
......@@ -219,8 +219,12 @@ static inline IceString getRegName(int32_t RegNum) {
return RegTable[RegNum].Name;
}
// Extend enum RegClass with ARM32-specific register classes (if any).
enum RegClassARM32 : uint8_t { RCARM32_NUM = RC_Target };
// Extend enum RegClass with ARM32-specific register classes.
enum RegClassARM32 : uint8_t {
RCARM32_QtoS = RC_Target, // Denotes Q registers that are aliased by S
// registers.
RCARM32_NUM
};
} // end of namespace RegARM32
} // end of namespace ARM32
......
......@@ -296,7 +296,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) {
llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
const auto &Entry = RegARM32::RegTable[i];
IntegerRegisters[i] = Entry.IsInt;
......@@ -305,6 +307,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) {
Float64Registers[i] = Entry.IsFP64;
VectorRegisters[i] = Entry.IsVec128;
RegisterAliases[i].resize(RegARM32::Reg_NUM);
// TODO(eholk): It would be better to store a QtoS flag in the
// IceRegistersARM32 table than to compare their encodings here.
QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
for (int j = 0; j < Entry.NumAliases; ++j) {
assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
RegisterAliases[i].set(Entry.Aliases[j]);
......@@ -340,6 +345,7 @@ void TargetARM32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
......@@ -3834,7 +3840,28 @@ void TargetARM32::lowerCast(const InstCast *Instr) {
}
void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
UnimplementedLoweringError(this, Instr);
Variable *Dest = Instr->getDest();
Type DestTy = Dest->getType();
Variable *Src0 = legalizeToReg(Instr->getSrc(0));
Operand *Src1 = Instr->getSrc(1);
if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
const uint32_t Index = Imm->getValue();
Variable *T = makeReg(DestTy);
Variable *TSrc0 = makeReg(Src0->getType());
if (isFloatingType(DestTy)) {
// We need to make sure the source is in a suitable register.
TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
}
_mov(TSrc0, Src0);
_extractelement(T, TSrc0, Index);
_mov(Dest, T);
return;
}
assert(false && "extractelement requires a constant index");
}
namespace {
......@@ -4229,7 +4256,28 @@ void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
}
void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
UnimplementedLoweringError(this, Instr);
Variable *Dest = Instr->getDest();
Type DestTy = Dest->getType();
Variable *Src0 = legalizeToReg(Instr->getSrc(0));
Variable *Src1 = legalizeToReg(Instr->getSrc(1));
Operand *Src2 = Instr->getSrc(2);
if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
const uint32_t Index = Imm->getValue();
Variable *T = makeReg(DestTy);
if (isFloatingType(DestTy)) {
T->setRegClass(RegARM32::RCARM32_QtoS);
}
_mov(T, Src0);
_insertelement(T, Src1, Index);
_set_dest_redefined();
_mov(Dest, T);
return;
}
assert(false && "insertelement requires a constant index");
}
namespace {
......
......@@ -85,13 +85,18 @@ public:
const llvm::SmallBitVector &
getRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
switch (RC) {
default:
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
case RegARM32::RCARM32_QtoS:
return TypeToRegisterSet[RC];
}
}
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
......@@ -413,6 +418,20 @@ protected:
}
}
// Generates a vmov instruction to extract the given index from a vector
// register.
void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
}
// Generates a vmov instruction to insert a value into the given index of a
// vector register.
void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
}
// --------------------------------------------------------------------------
// Begin bool folding machinery.
//
......
; Show that we know how to translate insertelement and extractelement.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -Om1 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -Om1 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -Om1 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -Om1 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal i32 @extract1_v4i32(<4 x i32> %src) {
; ASM-LABEL: extract1_v4i32:
; DIS-LABEL: 00000000 <extract1_v4i32>:
; IASM-LABEL: extract1_v4i32:
%1 = extractelement <4 x i32> %src, i32 1
; ASM: vmov.32 r0, d0[1]
; DIS: 10: ee300b10
ret i32 %1
}
define internal i32 @extract2_v4i32(<4 x i32> %src) {
; ASM-LABEL: extract2_v4i32:
; DIS-LABEL: 00000030 <extract2_v4i32>:
; IASM-LABEL: extract2_v4i32:
%1 = extractelement <4 x i32> %src, i32 2
; ASM: vmov.32 r0, d1[0]
; DIS: 40: ee110b10
ret i32 %1
}
define internal i32 @extract3_v8i16(<8 x i16> %src) {
; ASM-LABEL: extract3_v8i16:
; DIS-LABEL: 00000060 <extract3_v8i16>:
; IASM-LABEL: extract3_v8i16:
%1 = extractelement <8 x i16> %src, i32 3
; ASM: vmov.s16 r0, d0[3]
; DIS: 70: ee300b70
%2 = sext i16 %1 to i32
ret i32 %2
}
define internal i32 @extract4_v8i16(<8 x i16> %src) {
; ASM-LABEL: extract4_v8i16:
; DIS-LABEL: 00000090 <extract4_v8i16>:
; IASM-LABEL: extract4_v8i16:
%1 = extractelement <8 x i16> %src, i32 4
; ASM: vmov.s16 r0, d1[0]
; DIS: a0: ee110b30
%2 = sext i16 %1 to i32
ret i32 %2
}
define internal i32 @extract7_v4i8(<16 x i8> %src) {
; ASM-LABEL: extract7_v4i8:
; DIS-LABEL: 000000c0 <extract7_v4i8>:
; IASM-LABEL: extract7_v4i8:
%1 = extractelement <16 x i8> %src, i32 7
; ASM: vmov.s8 r0, d0[7]
; DIS: d0: ee700b70
%2 = sext i8 %1 to i32
ret i32 %2
}
define internal i32 @extract8_v16i8(<16 x i8> %src) {
; ASM-LABEL: extract8_v16i8:
; DIS-LABEL: 000000f0 <extract8_v16i8>:
; IASM-LABEL: extract8_v16i8:
%1 = extractelement <16 x i8> %src, i32 8
; ASM: vmov.s8 r0, d1[0]
; DIS: 100: ee510b10
%2 = sext i8 %1 to i32
ret i32 %2
}
define internal float @extract1_v4float(<4 x float> %src) {
; ASM-LABEL: extract1_v4float:
; DIS-LABEL: 00000120 <extract1_v4float>:
; IASM-LABEL: extract1_v4float:
%1 = extractelement <4 x float> %src, i32 1
; ASM: vmov.f32 s0, s1
; DIS: 130: eeb00a60
ret float %1
}
define internal float @extract2_v4float(<4 x float> %src) {
; ASM-LABEL: extract2_v4float:
; DIS-LABEL: 00000150 <extract2_v4float>:
; IASM-LABEL: extract2_v4float:
%1 = extractelement <4 x float> %src, i32 2
; ASM: vmov.f32 s0, s2
; DIS: 160: eeb00a41
ret float %1
}
define internal <4 x i32> @insert1_v4i32(<4 x i32> %src, i32 %s) {
; ASM-LABEL: insert1_v4i32:
; DIS-LABEL: 00000180 <insert1_v4i32>:
; IASM-LABEL: insert1_v4i32:
%1 = insertelement <4 x i32> %src, i32 %s, i32 1
; ASM: vmov.32 d0[1], r0
; DIS: 198: ee200b10
ret <4 x i32> %1
}
define internal <4 x i32> @insert2_v4i32(<4 x i32> %src, i32 %s) {
; ASM-LABEL: insert2_v4i32:
; DIS-LABEL: 000001b0 <insert2_v4i32>:
; IASM-LABEL: insert2_v4i32:
%1 = insertelement <4 x i32> %src, i32 %s, i32 2
; ASM: vmov.32 d1[0], r0
; DIS: 1c8: ee010b10
ret <4 x i32> %1
}
define internal <8 x i16> @insert3_v8i16(<8 x i16> %src, i32 %s) {
; ASM-LABEL: insert3_v8i16:
; DIS-LABEL: 000001e0 <insert3_v8i16>:
; IASM-LABEL: insert3_v8i16:
%s2 = trunc i32 %s to i16
%1 = insertelement <8 x i16> %src, i16 %s2, i32 3
; ASM: vmov.16 d0[3], r0
; DIS: 200: ee200b70
ret <8 x i16> %1
}
define internal <8 x i16> @insert4_v8i16(<8 x i16> %src, i32 %s) {
; ASM-LABEL: insert4_v8i16:
; DIS-LABEL: 00000220 <insert4_v8i16>:
; IASM-LABEL: insert4_v8i16:
%s2 = trunc i32 %s to i16
%1 = insertelement <8 x i16> %src, i16 %s2, i32 4
; ASM: vmov.16 d1[0], r0
; DIS: 240: ee010b30
ret <8 x i16> %1
}
define internal <16 x i8> @insert7_v4i8(<16 x i8> %src, i32 %s) {
; ASM-LABEL: insert7_v4i8:
; DIS-LABEL: 00000260 <insert7_v4i8>:
; IASM-LABEL: insert7_v4i8:
%s2 = trunc i32 %s to i8
%1 = insertelement <16 x i8> %src, i8 %s2, i32 7
; ASM: vmov.8 d0[7], r0
; DIS: 280: ee600b70
ret <16 x i8> %1
}
define internal <16 x i8> @insert8_v16i8(<16 x i8> %src, i32 %s) {
; ASM-LABEL: insert8_v16i8:
; DIS-LABEL: 000002a0 <insert8_v16i8>:
; IASM-LABEL: insert8_v16i8:
%s2 = trunc i32 %s to i8
%1 = insertelement <16 x i8> %src, i8 %s2, i32 8
; ASM: vmov.8 d1[0], r0
; DIS: 2c0: ee410b10
ret <16 x i8> %1
}
define internal <4 x float> @insert1_v4float(<4 x float> %src, float %s) {
; ASM-LABEL: insert1_v4float:
; DIS-LABEL: 000002e0 <insert1_v4float>:
; IASM-LABEL: insert1_v4float:
%1 = insertelement <4 x float> %src, float %s, i32 1
; ASM: vmov.f32 s1, s4
; DIS: 2f8: eef00a42
ret <4 x float> %1
}
define internal <4 x float> @insert2_v4float(<4 x float> %src, float %s) {
; ASM-LABEL: insert2_v4float:
; DIS-LABEL: 00000310 <insert2_v4float>:
; IASM-LABEL: insert2_v4float:
%1 = insertelement <4 x float> %src, float %s, i32 2
; ASM: vmov.f32 s2, s4
; DIS: 328: eeb01a42
ret <4 x float> %1
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment