Commit 694cdbd8 by Karl Schimpf

Add the VMLS instruction to the integrated ARM assembler.

parent b40595a1
...@@ -1012,21 +1012,19 @@ void Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm, ...@@ -1012,21 +1012,19 @@ void Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
Condition cond) { Condition cond) {
EmitVFPddd(cond, 0, dd, dn, dm); EmitVFPddd(cond, 0, dd, dn, dm);
} }
#endif
// Moved to Arm32::AssemblerARM32::vmlss()
void Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm, void Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
Condition cond) { Condition cond) {
EmitVFPsss(cond, B6, sd, sn, sm); EmitVFPsss(cond, B6, sd, sn, sm);
} }
// Moved to Arm32::AssemblerARM32::vmlsd()
void Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm, void Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
Condition cond) { Condition cond) {
EmitVFPddd(cond, B6, dd, dn, dm); EmitVFPddd(cond, B6, dd, dn, dm);
} }
#if 0
// Moved to Arm32::AssemblerARM32::vdivs() // Moved to Arm32::AssemblerARM32::vdivs()
void Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm, void Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
Condition cond) { Condition cond) {
......
...@@ -696,10 +696,10 @@ class Assembler : public ValueObject { ...@@ -696,10 +696,10 @@ class Assembler : public ValueObject {
void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL); void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlad() // Moved to Arm32::AssemblerARM32::vmlad()
void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL); void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
#endif // Moved to Arm32::AssemblerARM32::vmlss()
void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL); void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlsd()
void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL); void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
#if 0
// Moved to Arm32::AssemblerARM32::vdivs() // Moved to Arm32::AssemblerARM32::vdivs()
void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL); void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vdivd() // Moved to Arm32::AssemblerARM32::vdivd()
......
...@@ -2520,6 +2520,30 @@ void AssemblerARM32::vmlas(const Operand *OpSd, const Operand *OpSn, ...@@ -2520,6 +2520,30 @@ void AssemblerARM32::vmlas(const Operand *OpSd, const Operand *OpSn,
emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas); emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
} }
void AssemblerARM32::vmlsd(const Operand *OpDd, const Operand *OpDn,
const Operand *OpDm, CondARM32::Cond Cond) {
// VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
// vmls<c>.f64 <Dd>, <Dn>, <Dm>
//
// cccc11100d00nnnndddd1011n1M0mmmm where cccc=Cond, Ddddd=Dd, Nnnnn=Dn, and
// Mmmmm=Dm
constexpr const char *Vmlad = "vmlad";
constexpr IValueT VmladOpcode = B6;
emitVFPddd(Cond, VmladOpcode, OpDd, OpDn, OpDm, Vmlad);
}
void AssemblerARM32::vmlss(const Operand *OpSd, const Operand *OpSn,
const Operand *OpSm, CondARM32::Cond Cond) {
// VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
// vmls<c>.f32 <Sd>, <Sn>, <Sm>
//
// cccc11100d00nnnndddd1010n1M0mmmm where cccc=Cond, ddddD=Sd, nnnnN=Sn, and
// mmmmM=Sm
constexpr const char *Vmlas = "vmlas";
constexpr IValueT VmlasOpcode = B6;
emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
}
void AssemblerARM32::vmrsAPSR_nzcv(CondARM32::Cond Cond) { void AssemblerARM32::vmrsAPSR_nzcv(CondARM32::Cond Cond) {
// MVRS - ARM section A*.8.348, encoding A1: // MVRS - ARM section A*.8.348, encoding A1:
// vmrs<c> APSR_nzcv, FPSCR // vmrs<c> APSR_nzcv, FPSCR
......
...@@ -402,6 +402,12 @@ public: ...@@ -402,6 +402,12 @@ public:
void vmlas(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm, void vmlas(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
void vmlsd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond);
void vmlss(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
// Uses APSR_nzcv as register // Uses APSR_nzcv as register
void vmrsAPSR_nzcv(CondARM32::Cond Cond); void vmrsAPSR_nzcv(CondARM32::Cond Cond);
......
...@@ -666,15 +666,36 @@ template <> void InstARM32Vmla::emitIAS(const Cfg *Func) const { ...@@ -666,15 +666,36 @@ template <> void InstARM32Vmla::emitIAS(const Cfg *Func) const {
default: default:
// TODO(kschimpf) Figure out how vector operations apply. // TODO(kschimpf) Figure out how vector operations apply.
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
break; return;
case IceType_f32: case IceType_f32:
Asm->vmlas(getDest(), getSrc(1), getSrc(2), CondARM32::AL); Asm->vmlas(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
break; return;
case IceType_f64: case IceType_f64:
Asm->vmlad(getDest(), getSrc(1), getSrc(2), CondARM32::AL); Asm->vmlad(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
break; return;
}
}
template <> void InstARM32Vmls::emitIAS(const Cfg *Func) const {
// Note: Dest == getSrc(0) for four address FP instructions.
assert(getSrcSize() == 3);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
switch (Dest->getType()) {
default:
// TODO(kschimpf) Figure out how vector operations apply.
emitUsingTextFixup(Func);
return;
case IceType_f32:
Asm->vmlss(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup());
return;
case IceType_f64:
Asm->vmlsd(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup());
return;
} }
} }
......
; Show that we can take advantage of the vmls instruction for floating point
; operations during optimization.
; Note that we use -O2 to force the result of the fmul to be
; (immediately) available for the fsub. When using -Om1, the merge of
; fmul and fsub does not happen.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal float @mulSubFloat(float %f1, float %f2) {
; ASM-LABEL: mulSubFloat:
; DIS-LABEL: 00000000 <mulSubFloat>:
%v1 = fmul float %f1, 1.5
%v2 = fsub float %f2, %v1
; ASM: vmls.f32 s21, s20, s22
; DIS: 10: ee4aaa4b
; IASM-NOT: vmls.f32
ret float %v2
}
define internal double @mulSubDouble(double %f1, double %f2) {
; ASM-LABEL: mulSubDouble:
; DIS-LABEL: 00000020 <mulSubDouble>:
%v1 = fmul double %f1, 1.5
%v2 = fsub double %f2, %v1
; ASM: vmls.f64 d21, d20, d22
; DIS: 2c: ee445be6
; IASM-NOT: vmls.f64
ret double %v2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment