Commit 40c69b43 by Eric Holk

Initial support for vector addition on ARM32.

parent 18273c0f
......@@ -603,16 +603,17 @@ template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const {
switch (Dest->getType()) {
default:
// TODO(kschimpf) Figure if more cases are needed.
Asm->setNeedsTextFixup();
emitUsingTextFixup(Func);
break;
case IceType_f32:
Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
assert(!Asm->needsTextFixup());
break;
case IceType_f64:
Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
assert(!Asm->needsTextFixup());
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const {
......
......@@ -2798,11 +2798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
}
if (isVectorType(DestTy)) {
UnimplementedLoweringError(this, Instr);
return;
switch (Instr->getOp()) {
default:
UnimplementedLoweringError(this, Instr);
return;
// Explicitly whitelist vector instructions we have implemented/enabled.
case InstArithmetic::Fadd:
case InstArithmetic::Add:
break;
}
}
// DestTy is a non-i64 scalar.
Variable *T = makeReg(DestTy);
// * Handle div/rem separately. They require a non-legalized Src1 to inspect
......@@ -2900,6 +2906,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
return;
case InstArithmetic::Add: {
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
assert(!isVectorType(DestTy));
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
......@@ -2911,6 +2918,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
if (Srcs.hasConstOperand()) {
if (!Srcs.immediateIsFlexEncodable() &&
Srcs.negatedImmediateIsFlexEncodable()) {
assert(!isVectorType(DestTy));
Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.negatedSrc1F(this);
if (!Srcs.swappedOperands()) {
......@@ -2923,8 +2931,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
}
}
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
_add(T, Src0R, Src1RF);
if (isVectorType(DestTy)) {
Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R);
} else {
Operand *Src1RF = Srcs.src1RF(this);
_add(T, Src0R, Src1RF);
}
_mov(Dest, T);
return;
}
......
; Show that we know how to translate vadd vector instructions.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testVaddFloat4(<4 x float> %v1, <4 x float> %v2) {
; ASM-LABEL: testVaddFloat4:
; DIS-LABEL: 00000000 <testVaddFloat4>:
; IASM-LABEL: testVaddFloat4:
entry:
%res = fadd <4 x float> %v1, %v2
; ASM: vadd.f32 q10, q10, q11
; DIS: 8: f2444de6
; IASM: vadd.f32
ret <4 x float> %res
}
define internal <4 x i32> @testVadd4i32(<4 x i32> %v1, <4 x i32> %v2) {
; ASM-LABEL: testVadd4i32:
; DIS-LABEL: 00000020 <testVadd4i32>:
; IASM-LABEL: testVadd4i32:
entry:
%res = add <4 x i32> %v1, %v2
; ASM: vadd.i32 q10, q10, q11
; DIS: 28: f26448e6
; IASM: vadd.i32
ret <4 x i32> %res
}
define internal <8 x i16> @testVadd8i16(<8 x i16> %v1, <8 x i16> %v2) {
; ASM-LABEL: testVadd8i16:
; DIS-LABEL: 00000040 <testVadd8i16>:
; IASM-LABEL: testVadd8i16:
entry:
%res = add <8 x i16> %v1, %v2
; ASM: vadd.i16 q10, q10, q11
; DIS: 48: f25448e6
; IASM: vadd.i16
ret <8 x i16> %res
}
define internal <16 x i8> @testVadd16i8(<16 x i8> %v1, <16 x i8> %v2) {
; ASM-LABEL: testVadd16i8:
; DIS-LABEL: 00000060 <testVadd16i8>:
; IASM-LABEL: testVadd16i8:
entry:
%res = add <16 x i8> %v1, %v2
; ASM: vadd.i8 q10, q10, q11
; DIS: 68: f24448e6
; IASM: vadd.i8
ret <16 x i8> %res
}
; Show that we know how to translate vector load instructions.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testDerefFloat4(<4 x float> *%p) {
; ASM-LABEL: testDerefFloat4:
; DIS-LABEL: 00000000 <testDerefFloat4>:
; IASM-LABEL: testDerefFloat4:
entry:
%ret = load <4 x float>, <4 x float>* %p, align 4
; ASM: vld1.64 q0, [r0]
; DIS: 0: f4200acf
ret <4 x float> %ret
}
define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) {
; ASM-LABEL: testDeref4i32:
; DIS-LABEL: 00000010 <testDeref4i32>:
; IASM-LABEL: testDeref4i32:
entry:
%ret = load <4 x i32>, <4 x i32>* %p, align 4
; ASM: vld1.64 q0, [r0]
; DIS: 10: f4200acf
ret <4 x i32> %ret
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment