Commit 40c69b43 by Eric Holk

Initial support for vector addition on ARM32.

parent 18273c0f
...@@ -603,16 +603,17 @@ template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const { ...@@ -603,16 +603,17 @@ template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const {
switch (Dest->getType()) { switch (Dest->getType()) {
default: default:
// TODO(kschimpf) Figure if more cases are needed. // TODO(kschimpf) Figure if more cases are needed.
Asm->setNeedsTextFixup(); emitUsingTextFixup(Func);
break; break;
case IceType_f32: case IceType_f32:
Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL); Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
assert(!Asm->needsTextFixup());
break; break;
case IceType_f64: case IceType_f64:
Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL); Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
assert(!Asm->needsTextFixup());
break; break;
} }
assert(!Asm->needsTextFixup());
} }
template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const { template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const {
......
...@@ -2798,11 +2798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -2798,11 +2798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
} }
if (isVectorType(DestTy)) { if (isVectorType(DestTy)) {
UnimplementedLoweringError(this, Instr); switch (Instr->getOp()) {
return; default:
UnimplementedLoweringError(this, Instr);
return;
// Explicitly whitelist vector instructions we have implemented/enabled.
case InstArithmetic::Fadd:
case InstArithmetic::Add:
break;
}
} }
// DestTy is a non-i64 scalar.
Variable *T = makeReg(DestTy); Variable *T = makeReg(DestTy);
// * Handle div/rem separately. They require a non-legalized Src1 to inspect // * Handle div/rem separately. They require a non-legalized Src1 to inspect
...@@ -2900,6 +2906,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -2900,6 +2906,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
return; return;
case InstArithmetic::Add: { case InstArithmetic::Add: {
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
assert(!isVectorType(DestTy));
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
...@@ -2911,6 +2918,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -2911,6 +2918,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
if (Srcs.hasConstOperand()) { if (Srcs.hasConstOperand()) {
if (!Srcs.immediateIsFlexEncodable() && if (!Srcs.immediateIsFlexEncodable() &&
Srcs.negatedImmediateIsFlexEncodable()) { Srcs.negatedImmediateIsFlexEncodable()) {
assert(!isVectorType(DestTy));
Variable *Src0R = Srcs.src0R(this); Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.negatedSrc1F(this); Operand *Src1F = Srcs.negatedSrc1F(this);
if (!Srcs.swappedOperands()) { if (!Srcs.swappedOperands()) {
...@@ -2923,8 +2931,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -2923,8 +2931,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
} }
} }
Variable *Src0R = Srcs.src0R(this); Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this); if (isVectorType(DestTy)) {
_add(T, Src0R, Src1RF); Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R);
} else {
Operand *Src1RF = Srcs.src1RF(this);
_add(T, Src0R, Src1RF);
}
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
......
; Show that we know how to translate vadd vector instructions.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use q10,q11 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testVaddFloat4(<4 x float> %v1, <4 x float> %v2) {
; ASM-LABEL: testVaddFloat4:
; DIS-LABEL: 00000000 <testVaddFloat4>:
; IASM-LABEL: testVaddFloat4:
entry:
%res = fadd <4 x float> %v1, %v2
; ASM: vadd.f32 q10, q10, q11
; DIS: 8: f2444de6
; IASM: vadd.f32
ret <4 x float> %res
}
define internal <4 x i32> @testVadd4i32(<4 x i32> %v1, <4 x i32> %v2) {
; ASM-LABEL: testVadd4i32:
; DIS-LABEL: 00000020 <testVadd4i32>:
; IASM-LABEL: testVadd4i32:
entry:
%res = add <4 x i32> %v1, %v2
; ASM: vadd.i32 q10, q10, q11
; DIS: 28: f26448e6
; IASM: vadd.i32
ret <4 x i32> %res
}
define internal <8 x i16> @testVadd8i16(<8 x i16> %v1, <8 x i16> %v2) {
; ASM-LABEL: testVadd8i16:
; DIS-LABEL: 00000040 <testVadd8i16>:
; IASM-LABEL: testVadd8i16:
entry:
%res = add <8 x i16> %v1, %v2
; ASM: vadd.i16 q10, q10, q11
; DIS: 48: f25448e6
; IASM: vadd.i16
ret <8 x i16> %res
}
define internal <16 x i8> @testVadd16i8(<16 x i8> %v1, <16 x i8> %v2) {
; ASM-LABEL: testVadd16i8:
; DIS-LABEL: 00000060 <testVadd16i8>:
; IASM-LABEL: testVadd16i8:
entry:
%res = add <16 x i8> %v1, %v2
; ASM: vadd.i8 q10, q10, q11
; DIS: 68: f24448e6
; IASM: vadd.i8
ret <16 x i8> %res
}
; Show that we know how to translate vector load instructions.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testDerefFloat4(<4 x float> *%p) {
; ASM-LABEL: testDerefFloat4:
; DIS-LABEL: 00000000 <testDerefFloat4>:
; IASM-LABEL: testDerefFloat4:
entry:
%ret = load <4 x float>, <4 x float>* %p, align 4
; ASM: vld1.64 q0, [r0]
; DIS: 0: f4200acf
ret <4 x float> %ret
}
define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) {
; ASM-LABEL: testDeref4i32:
; DIS-LABEL: 00000010 <testDeref4i32>:
; IASM-LABEL: testDeref4i32:
entry:
%ret = load <4 x i32>, <4 x i32>* %p, align 4
; ASM: vld1.64 q0, [r0]
; DIS: 10: f4200acf
ret <4 x i32> %ret
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment