Commit 18cce427 by Karl Schimpf

Fix vector load/stores in the ARM assembler.

Fixes emit() methods for load/store to specify the element size (affects alignment issues). Also adds assembler methods to generate the corresponding binary forms, and updates emitIAS() to call these assembler methods. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4334 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1663053008 .
parent 6e8d3fae
...@@ -66,9 +66,7 @@ def main(): ...@@ -66,9 +66,7 @@ def main():
flat_attrs += v flat_attrs += v
arch_flags = { 'x8632': [], arch_flags = { 'x8632': [],
'x8664': [], 'x8664': [],
# ARM doesn't have an ELF writer yet, and iasm does not 'arm32': [] }
# support sandboxing yet.
'arm32': ['--filetype=asm'] }
# all_keys is only used in the help text. # all_keys is only used in the help text.
all_keys = '; '.join([' '.join(targets), ' '.join(sandboxing), all_keys = '; '.join([' '.join(targets), ' '.join(sandboxing),
' '.join(opt_levels), ' '.join(flat_attrs)]) ' '.join(opt_levels), ' '.join(flat_attrs)])
......
...@@ -1402,11 +1402,13 @@ class Assembler : public ValueObject { ...@@ -1402,11 +1402,13 @@ class Assembler : public ValueObject {
#if 0 #if 0
// Added the following missing operations: // Added the following missing operations:
// //
// ARM32::AssemblerARM32::uxt() (uxtb and uxth). // ARM32::AssemblerARM32::uxt() (uxtb and uxth)
// ARM32::AssemblerARM32::vpop() // ARM32::AssemblerARM32::vpop()
// ARM32::AssemblerARM32::vpush() // ARM32::AssemblerARM32::vpush()
// ARM32::AssemblerARM:rbit(). // ARM32::AssemblerARM32:rbit()
// ARM32::AssemblerARM::veord() // ARM32::AssemblerARM32::veord()
// ARM32::AssemblerARM32::vld1qr()
// ARM32::AssemblerARM32::vst1qr()
#endif #endif
DISALLOW_ALLOCATION(); DISALLOW_ALLOCATION();
......
...@@ -2544,6 +2544,61 @@ void AssemblerARM32::vldrs(const Operand *OpSd, const Operand *OpAddress, ...@@ -2544,6 +2544,61 @@ void AssemblerARM32::vldrs(const Operand *OpSd, const Operand *OpAddress,
emitInst(Encoding); emitInst(Encoding);
} }
void AssemblerARM32::emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn,
IValueT Rm, DRegListSize NumDRegs,
size_t ElmtSize, IValueT Align,
const char *InstName) {
assert(Utils::IsAbsoluteUint(2, Align));
IValueT EncodedElmtSize;
switch (ElmtSize) {
default: {
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
StrBuf << InstName << ": found invalid vector element size " << ElmtSize;
llvm::report_fatal_error(StrBuf.str());
}
case 8:
EncodedElmtSize = 0;
break;
case 16:
EncodedElmtSize = 1;
break;
case 32:
EncodedElmtSize = 2;
break;
case 64:
EncodedElmtSize = 3;
}
const IValueT Encoding =
Opcode | (encodeCondition(CondARM32::kNone) << kConditionShift) |
(getYInRegYXXXX(Dd) << 22) | (Rn << kRnShift) |
(getXXXXInRegYXXXX(Dd) << kRdShift) | (NumDRegs << 8) |
(EncodedElmtSize << 6) | (Align << 4) | Rm;
emitInst(Encoding);
}
void AssemblerARM32::vld1qr(size_t ElmtSize, const Operand *OpQd,
const Operand *OpAddress, const TargetInfo &TInfo) {
// VLD1 (multiple single elements) - ARM section A8.8.320, encoding A1:
// vld1.<size> <Qd>, [<Rn>]
//
// 111101000D10nnnnddd0ttttssaammmm where tttt=DRegListSize2, Dddd=Qd,
// nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
// encoding of ElmtSize.
constexpr const char *Vld1qr = "vld1qr";
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vld1qr);
const IValueT Dd = mapQRegToDReg(Qd);
IValueT Address;
if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
EncodedAsImmRegOffset)
llvm::report_fatal_error(std::string(Vld1qr) + ": malform memory address");
const IValueT Rn = mask(Address, kRnShift, 4);
constexpr IValueT Rm = RegARM32::Reg_pc;
constexpr IValueT Opcode = B26 | B21;
constexpr IValueT Align = 0; // use default alignment.
emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vld1qr);
}
void AssemblerARM32::vmovd(const Operand *OpDd, void AssemblerARM32::vmovd(const Operand *OpDd,
const OperandARM32FlexFpImm *OpFpImm, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond) { CondARM32::Cond Cond) {
...@@ -2858,6 +2913,28 @@ void AssemblerARM32::vstrs(const Operand *OpSd, const Operand *OpAddress, ...@@ -2858,6 +2913,28 @@ void AssemblerARM32::vstrs(const Operand *OpSd, const Operand *OpAddress,
emitInst(Encoding); emitInst(Encoding);
} }
void AssemblerARM32::vst1qr(size_t ElmtSize, const Operand *OpQd,
const Operand *OpAddress, const TargetInfo &TInfo) {
// VST1 (multiple single elements) - ARM section A8.8.404, encoding A1:
// vst1.<size> <Qd>, [<Rn>]
//
// 111101000D00nnnnddd0ttttssaammmm where tttt=DRegListSize2, Dddd=Qd,
// nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
// encoding of ElmtSize.
constexpr const char *Vst1qr = "vst1qr";
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vst1qr);
const IValueT Dd = mapQRegToDReg(Qd);
IValueT Address;
if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
EncodedAsImmRegOffset)
llvm::report_fatal_error(std::string(Vst1qr) + ": malform memory address");
const IValueT Rn = mask(Address, kRnShift, 4);
constexpr IValueT Rm = RegARM32::Reg_pc;
constexpr IValueT Opcode = B26;
constexpr IValueT Align = 0; // use default alignment.
emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vst1qr);
}
void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn, void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn,
const Operand *OpSm, CondARM32::Cond Cond) { const Operand *OpSm, CondARM32::Cond Cond) {
// VSUB (floating-point) - ARM section A8.8.415, encoding A2: // VSUB (floating-point) - ARM section A8.8.415, encoding A2:
......
...@@ -73,6 +73,14 @@ public: ...@@ -73,6 +73,14 @@ public:
kRotate24 // ror #24 kRotate24 // ror #24
}; };
// Encoding of the number of D registers in a list of D registers.
enum DRegListSize {
DRegListSize1 = 7, // 0b0111
DRegListSize2 = 10, // 0b1010
DRegListSIze3 = 6, // 0b0110
DRegListSize4 = 2 // 0b0010
};
class TargetInfo { class TargetInfo {
TargetInfo(const TargetInfo &) = delete; TargetInfo(const TargetInfo &) = delete;
TargetInfo &operator=(const TargetInfo &) = delete; TargetInfo &operator=(const TargetInfo &) = delete;
...@@ -399,6 +407,16 @@ public: ...@@ -399,6 +407,16 @@ public:
vldrs(OpSd, OpAddress, Cond, TInfo); vldrs(OpSd, OpAddress, Cond, TInfo);
} }
// ElmtSize = #bits in vector element.
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetInfo &TInfo);
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vld1qr(ElmtSize, OpQd, OpRn, TInfo);
}
void vmovd(const Operand *OpDn, const OperandARM32FlexFpImm *OpFpImm, void vmovd(const Operand *OpDn, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
...@@ -477,6 +495,16 @@ public: ...@@ -477,6 +495,16 @@ public:
vstrs(OpSd, OpAddress, Cond, TInfo); vstrs(OpSd, OpAddress, Cond, TInfo);
} }
// ElmtSize = #bits in vector element.
void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress,
const TargetInfo &TInfo);
void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vst1qr(ElmtSize, OpQd, OpRn, TInfo);
}
void vsubd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm, void vsubd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
...@@ -601,6 +629,13 @@ private: ...@@ -601,6 +629,13 @@ private:
// mmmmM=Sm, and xx0xxxxxxdddd000xxx0x0000=Opcode. // mmmmM=Sm, and xx0xxxxxxdddd000xxx0x0000=Opcode.
void emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, IValueT Sm); void emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, IValueT Sm);
// Pattern 111100000D00nnnnddddttttssaammmm | Opcode where Ddddd=Dd, nnnn=Rn,
// mmmmm=Rm, tttt=NumDRegs, ElmtSize in {8, 16, 32, 64) and defines ss, and
// aa=Align.
void emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn, IValueT Rm,
DRegListSize NumDRegs, size_t ElmtSize, IValueT Align,
const char *InstName);
// Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond, // Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond,
// x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm. // x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm.
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn, void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
......
...@@ -65,6 +65,10 @@ const struct InstARM32CondAttributes_ { ...@@ -65,6 +65,10 @@ const struct InstARM32CondAttributes_ {
#undef X #undef X
}; };
size_t getVecElmtBitsize(Type Ty) {
return typeWidthInBytes(typeElementType(Ty)) * CHAR_BIT;
}
} // end of anonymous namespace } // end of anonymous namespace
const char *InstARM32::getWidthString(Type Ty) { const char *InstARM32::getWidthString(Type Ty) {
...@@ -1563,7 +1567,6 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const { ...@@ -1563,7 +1567,6 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
const bool IsScalarFloat = isScalarFloatingType(Ty); const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *ActualOpcode = const char *ActualOpcode =
IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr"); IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr");
const char *VectorMarker = IsVector ? ".64" : "";
const char *WidthString = IsVector ? "" : getWidthString(Ty); const char *WidthString = IsVector ? "" : getWidthString(Ty);
Str << "\t" << ActualOpcode; Str << "\t" << ActualOpcode;
const bool IsVInst = IsVector || IsScalarFloat; const bool IsVInst = IsVector || IsScalarFloat;
...@@ -1572,7 +1575,9 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const { ...@@ -1572,7 +1575,9 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
} else { } else {
Str << WidthString << getPredicate(); Str << WidthString << getPredicate();
} }
Str << VectorMarker << "\t"; if (IsVector)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
...@@ -1580,14 +1585,18 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const { ...@@ -1580,14 +1585,18 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const { template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
Variable *Dest = getDest(); Variable *Dest = getDest();
const Type DestTy = Dest->getType(); const Type DestTy = Dest->getType();
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isScalarFloatingType(DestTy)) {
switch (DestTy) { switch (DestTy) {
default: default:
// TODO(kschimpf) Does this happen? llvm::report_fatal_error("Ldr on unknown type: " + typeIceString(DestTy));
Asm->setNeedsTextFixup(); case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
case IceType_i64:
Asm->ldr(Dest, getSrc(0), getPredicate(), Func->getTarget());
break; break;
case IceType_f32: case IceType_f32:
Asm->vldrs(Dest, getSrc(0), getPredicate(), Func->getTarget()); Asm->vldrs(Dest, getSrc(0), getPredicate(), Func->getTarget());
...@@ -1595,14 +1604,13 @@ template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const { ...@@ -1595,14 +1604,13 @@ template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
case IceType_f64: case IceType_f64:
Asm->vldrd(Dest, getSrc(0), getPredicate(), Func->getTarget()); Asm->vldrd(Dest, getSrc(0), getPredicate(), Func->getTarget());
break; break;
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
Asm->vld1qr(getVecElmtBitsize(DestTy), Dest, getSrc(0), Func->getTarget());
break;
} }
} else if (isVectorType(DestTy))
// TODO(kschimpf) Handle case.
Asm->setNeedsTextFixup();
else
Asm->ldr(Dest, getSrc(0), getPredicate(), Func->getTarget());
if (Asm->needsTextFixup())
emitUsingTextFixup(Func);
} }
template <> void InstARM32Ldrex::emit(const Cfg *Func) const { template <> void InstARM32Ldrex::emit(const Cfg *Func) const {
...@@ -1898,7 +1906,6 @@ void InstARM32Str::emit(const Cfg *Func) const { ...@@ -1898,7 +1906,6 @@ void InstARM32Str::emit(const Cfg *Func) const {
const bool IsScalarFloat = isScalarFloatingType(Ty); const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *Opcode = const char *Opcode =
IsVectorStore ? "vst1" : (IsScalarFloat ? "vstr" : "str"); IsVectorStore ? "vst1" : (IsScalarFloat ? "vstr" : "str");
const char *VecEltWidthString = IsVectorStore ? ".64" : "";
Str << "\t" << Opcode; Str << "\t" << Opcode;
const bool IsVInst = IsVectorStore || IsScalarFloat; const bool IsVInst = IsVectorStore || IsScalarFloat;
if (IsVInst) { if (IsVInst) {
...@@ -1906,7 +1913,9 @@ void InstARM32Str::emit(const Cfg *Func) const { ...@@ -1906,7 +1913,9 @@ void InstARM32Str::emit(const Cfg *Func) const {
} else { } else {
Str << getWidthString(Ty) << getPredicate(); Str << getWidthString(Ty) << getPredicate();
} }
Str << VecEltWidthString << "\t"; if (IsVectorStore)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
Str << ", "; Str << ", ";
getSrc(1)->emit(Func); getSrc(1)->emit(Func);
...@@ -1914,28 +1923,33 @@ void InstARM32Str::emit(const Cfg *Func) const { ...@@ -1914,28 +1923,33 @@ void InstARM32Str::emit(const Cfg *Func) const {
void InstARM32Str::emitIAS(const Cfg *Func) const { void InstARM32Str::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2); assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType();
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isScalarFloatingType(Ty)) { const Operand *Src0 = getSrc(0);
const Operand *Src1 = getSrc(1);
Type Ty = Src0->getType();
switch (Ty) { switch (Ty) {
default: default:
// TODO(kschimpf) Does this happen? llvm::report_fatal_error("Str on unknown type: " + typeIceString(Ty));
Asm->setNeedsTextFixup(); case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
case IceType_i64:
Asm->str(Src0, Src1, getPredicate(), Func->getTarget());
break; break;
case IceType_f32: case IceType_f32:
Asm->vstrs(getSrc(0), getSrc(1), getPredicate(), Func->getTarget()); Asm->vstrs(Src0, Src1, getPredicate(), Func->getTarget());
break; break;
case IceType_f64: case IceType_f64:
Asm->vstrd(getSrc(0), getSrc(1), getPredicate(), Func->getTarget()); Asm->vstrd(Src0, Src1, getPredicate(), Func->getTarget());
break;
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
Asm->vst1qr(getVecElmtBitsize(Ty), Src0, Src1, Func->getTarget());
break; break;
} }
} else if (isVectorType(Ty))
// TODO(kschimpf) Handle case.
Asm->setNeedsTextFixup();
else
Asm->str(getSrc(0), getSrc(1), getPredicate(), Func->getTarget());
if (Asm->needsTextFixup())
emitUsingTextFixup(Func);
} }
void InstARM32Str::dump(const Cfg *Func) const { void InstARM32Str::dump(const Cfg *Func) const {
......
; Show that we know how to translate vector load instructions. ; Show that we know how to translate vector load instructions.
; Note: Uses -O2 to remove unnecessary loads/stores, resulting in only one VLD1
; instruction per function.
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; Compile using standalone assembler. ; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \ ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=ASM ; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code. ; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \ ; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \ ; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS ; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler. ; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \ ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=IASM ; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code. ; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \ ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \ ; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS ; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testDerefFloat4(<4 x float> *%p) { define internal <4 x float> @testDerefFloat4(<4 x float> *%p) {
; ASM-LABEL: testDerefFloat4: ; ASM-LABEL: testDerefFloat4:
; DIS-LABEL: 00000000 <testDerefFloat4>: ; DIS-LABEL: {{.+}} <testDerefFloat4>:
; IASM-LABEL: testDerefFloat4: ; IASM-LABEL: testDerefFloat4:
entry: entry:
%ret = load <4 x float>, <4 x float>* %p, align 4 %ret = load <4 x float>, <4 x float>* %p, align 4
; ASM: vld1.64 q0, [r0] ; ASM: vld1.32 q11, [r5]
; DIS: 0: f4200acf ; DIS: {{.*}}: f4656a8f
; IASM-NOT: vld1.32
ret <4 x float> %ret ret <4 x float> %ret
} }
define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) { define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) {
; ASM-LABEL: testDeref4i32: ; ASM-LABEL: testDeref4i32:
; DIS-LABEL: 00000010 <testDeref4i32>: ; DIS-LABEL: {{.+}} <testDeref4i32>:
; IASM-LABEL: testDeref4i32: ; IASM-LABEL: testDeref4i32:
entry: entry:
%ret = load <4 x i32>, <4 x i32>* %p, align 4 %ret = load <4 x i32>, <4 x i32>* %p, align 4
; ASM: vld1.64 q0, [r0] ; ASM: vld1.32 q11, [r5]
; DIS: 10: f4200acf ; DIS: {{.+}}: f4656a8f
; IASM-NOT: vld1.32
ret <4 x i32> %ret ret <4 x i32> %ret
} }
define internal <8 x i16> @testDeref8i16(<8 x i16> *%p) { define internal <8 x i16> @testDeref8i16(<8 x i16> *%p) {
; ASM-LABEL: testDeref8i16: ; ASM-LABEL: testDeref8i16:
; DIS-LABEL: 00000020 <testDeref8i16>: ; DIS-LABEL: {{.+}} <testDeref8i16>:
; IASM-LABEL: testDeref8i16: ; IASM-LABEL: testDeref8i16:
entry: entry:
%ret = load <8 x i16>, <8 x i16>* %p, align 2 %ret = load <8 x i16>, <8 x i16>* %p, align 2
; ASM: vld1.64 q0, [r0] ; ASM: vld1.16 q11, [r5]
; DIS: 20: f4200acf ; DIS: {{.+}}: f4656a4f
; IASM-NOT: vld1.16
ret <8 x i16> %ret ret <8 x i16> %ret
} }
define internal <16 x i8> @testDeref16i8(<16 x i8> *%p) { define internal <16 x i8> @testDeref16i8(<16 x i8> *%p) {
; ASM-LABEL: testDeref16i8: ; ASM-LABEL: testDeref16i8:
; DIS-LABEL: 00000030 <testDeref16i8>: ; DIS-LABEL: {{.+}} <testDeref16i8>:
; IASM-LABEL: testDeref16i8: ; IASM-LABEL: testDeref16i8:
entry: entry:
%ret = load <16 x i8>, <16 x i8>* %p, align 1 %ret = load <16 x i8>, <16 x i8>* %p, align 1
; ASM: vld1.64 q0, [r0] ; ASM: vld1.8 q11, [r5]
; DIS: 30: f4200acf ; DIS: {{.+}}: f4656a0f
; IASM-NOT: vld1.8
ret <16 x i8> %ret ret <16 x i8> %ret
} }
; Show that we know how to translate vector store instructions.
; Note: Uses -O2 to remove unnecessary loads/stores, resulting in only one VST1
; instruction per function.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal void @testDerefFloat4(<4 x float>* %p, <4 x float> %v) {
; ASM-LABEL: testDerefFloat4:
; DIS-LABEL: {{.+}} <testDerefFloat4>:
entry:
store <4 x float> %v, <4 x float>* %p, align 4
; ASM: vst1.32 q11, [r5]
; DIS: {{.+}}: f4456a8f
; IASM-NOT: vst1.32
ret void
}
define internal void @testDeref4i32(<4 x i32> *%p, <4 x i32> %v) {
; ASM-LABEL: testDeref4i32:
; DIS-LABEL: {{.+}} <testDeref4i32>:
entry:
store <4 x i32> %v, <4 x i32>* %p, align 4
; ASM: vst1.32 q11, [r5]
; DIS: {{.+}}: f4456a8f
; IASM-NOT: vst1.32
ret void
}
define internal void @testDeref8i16(<8 x i16> *%p, <8 x i16> %v) {
; ASM-LABEl: testDeref8i16:
; DIS-LABEL: {{.+}} <testDeref8i16>:
store <8 x i16> %v, <8 x i16>* %p, align 2
; ASM: vst1.16 q11, [r5]
; DIS: {{.+}}: f4456a4f
; IASM-NOT: vst1.16
ret void
}
define internal void @testDeref16i8(<16 x i8> *%p, <16 x i8> %v) {
; ASM-LABEL: testDeref16i8:
; DIS-LABEL: {{.+}} <testDeref16i8>:
store <16 x i8> %v, <16 x i8>* %p, align 1
; ASM: vst1.8 q11, [r5]
; DIS: {{.+}}: f4456a0f
; IASM-NOT: vst1.8
ret void
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment