Commit 18cce427 by Karl Schimpf

Fix vector load/stores in the ARM assembler.

Fixes emit() methods for load/store to specify the element size (affects alignment issues). Also adds assembler methods to generate the corresponding binary forms, and updates emitIAS() to call these assembler methods. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4334 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1663053008 .
parent 6e8d3fae
......@@ -66,9 +66,7 @@ def main():
flat_attrs += v
arch_flags = { 'x8632': [],
'x8664': [],
# ARM doesn't have an ELF writer yet, and iasm does not
# support sandboxing yet.
'arm32': ['--filetype=asm'] }
'arm32': [] }
# all_keys is only used in the help text.
all_keys = '; '.join([' '.join(targets), ' '.join(sandboxing),
' '.join(opt_levels), ' '.join(flat_attrs)])
......
......@@ -1402,11 +1402,13 @@ class Assembler : public ValueObject {
#if 0
// Added the following missing operations:
//
// ARM32::AssemblerARM32::uxt() (uxtb and uxth).
// ARM32::AssemblerARM32::uxt() (uxtb and uxth)
// ARM32::AssemblerARM32::vpop()
// ARM32::AssemblerARM32::vpush()
// ARM32::AssemblerARM:rbit().
// ARM32::AssemblerARM::veord()
// ARM32::AssemblerARM32:rbit()
// ARM32::AssemblerARM32::veord()
// ARM32::AssemblerARM32::vld1qr()
// ARM32::AssemblerARM32::vst1qr()
#endif
DISALLOW_ALLOCATION();
......
......@@ -2544,6 +2544,61 @@ void AssemblerARM32::vldrs(const Operand *OpSd, const Operand *OpAddress,
emitInst(Encoding);
}
void AssemblerARM32::emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn,
IValueT Rm, DRegListSize NumDRegs,
size_t ElmtSize, IValueT Align,
const char *InstName) {
assert(Utils::IsAbsoluteUint(2, Align));
IValueT EncodedElmtSize;
switch (ElmtSize) {
default: {
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
StrBuf << InstName << ": found invalid vector element size " << ElmtSize;
llvm::report_fatal_error(StrBuf.str());
}
case 8:
EncodedElmtSize = 0;
break;
case 16:
EncodedElmtSize = 1;
break;
case 32:
EncodedElmtSize = 2;
break;
case 64:
EncodedElmtSize = 3;
}
const IValueT Encoding =
Opcode | (encodeCondition(CondARM32::kNone) << kConditionShift) |
(getYInRegYXXXX(Dd) << 22) | (Rn << kRnShift) |
(getXXXXInRegYXXXX(Dd) << kRdShift) | (NumDRegs << 8) |
(EncodedElmtSize << 6) | (Align << 4) | Rm;
emitInst(Encoding);
}
void AssemblerARM32::vld1qr(size_t ElmtSize, const Operand *OpQd,
const Operand *OpAddress, const TargetInfo &TInfo) {
// VLD1 (multiple single elements) - ARM section A8.8.320, encoding A1:
// vld1.<size> <Qd>, [<Rn>]
//
// 111101000D10nnnnddd0ttttssaammmm where tttt=DRegListSize2, Dddd=Qd,
// nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
// encoding of ElmtSize.
constexpr const char *Vld1qr = "vld1qr";
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vld1qr);
const IValueT Dd = mapQRegToDReg(Qd);
IValueT Address;
if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
EncodedAsImmRegOffset)
llvm::report_fatal_error(std::string(Vld1qr) + ": malform memory address");
const IValueT Rn = mask(Address, kRnShift, 4);
constexpr IValueT Rm = RegARM32::Reg_pc;
constexpr IValueT Opcode = B26 | B21;
constexpr IValueT Align = 0; // use default alignment.
emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vld1qr);
}
void AssemblerARM32::vmovd(const Operand *OpDd,
const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond) {
......@@ -2858,6 +2913,28 @@ void AssemblerARM32::vstrs(const Operand *OpSd, const Operand *OpAddress,
emitInst(Encoding);
}
void AssemblerARM32::vst1qr(size_t ElmtSize, const Operand *OpQd,
const Operand *OpAddress, const TargetInfo &TInfo) {
// VST1 (multiple single elements) - ARM section A8.8.404, encoding A1:
// vst1.<size> <Qd>, [<Rn>]
//
// 111101000D00nnnnddd0ttttssaammmm where tttt=DRegListSize2, Dddd=Qd,
// nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
// encoding of ElmtSize.
constexpr const char *Vst1qr = "vst1qr";
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vst1qr);
const IValueT Dd = mapQRegToDReg(Qd);
IValueT Address;
if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
EncodedAsImmRegOffset)
llvm::report_fatal_error(std::string(Vst1qr) + ": malform memory address");
const IValueT Rn = mask(Address, kRnShift, 4);
constexpr IValueT Rm = RegARM32::Reg_pc;
constexpr IValueT Opcode = B26;
constexpr IValueT Align = 0; // use default alignment.
emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vst1qr);
}
void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn,
const Operand *OpSm, CondARM32::Cond Cond) {
// VSUB (floating-point) - ARM section A8.8.415, encoding A2:
......
......@@ -73,6 +73,14 @@ public:
kRotate24 // ror #24
};
// Encoding of the number of D registers in a list of D registers.
enum DRegListSize {
DRegListSize1 = 7, // 0b0111
DRegListSize2 = 10, // 0b1010
DRegListSIze3 = 6, // 0b0110
DRegListSize4 = 2 // 0b0010
};
class TargetInfo {
TargetInfo(const TargetInfo &) = delete;
TargetInfo &operator=(const TargetInfo &) = delete;
......@@ -399,6 +407,16 @@ public:
vldrs(OpSd, OpAddress, Cond, TInfo);
}
// ElmtSize = #bits in vector element.
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetInfo &TInfo);
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vld1qr(ElmtSize, OpQd, OpRn, TInfo);
}
void vmovd(const Operand *OpDn, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond);
......@@ -477,6 +495,16 @@ public:
vstrs(OpSd, OpAddress, Cond, TInfo);
}
// ElmtSize = #bits in vector element.
void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress,
const TargetInfo &TInfo);
void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vst1qr(ElmtSize, OpQd, OpRn, TInfo);
}
void vsubd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond);
......@@ -601,6 +629,13 @@ private:
// mmmmM=Sm, and xx0xxxxxxdddd000xxx0x0000=Opcode.
void emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, IValueT Sm);
// Pattern 111100000D00nnnnddddttttssaammmm | Opcode where Ddddd=Dd, nnnn=Rn,
// mmmmm=Rm, tttt=NumDRegs, ElmtSize in {8, 16, 32, 64) and defines ss, and
// aa=Align.
void emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn, IValueT Rm,
DRegListSize NumDRegs, size_t ElmtSize, IValueT Align,
const char *InstName);
// Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond,
// x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm.
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
......
......@@ -65,6 +65,10 @@ const struct InstARM32CondAttributes_ {
#undef X
};
size_t getVecElmtBitsize(Type Ty) {
return typeWidthInBytes(typeElementType(Ty)) * CHAR_BIT;
}
} // end of anonymous namespace
const char *InstARM32::getWidthString(Type Ty) {
......@@ -1563,7 +1567,6 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *ActualOpcode =
IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr");
const char *VectorMarker = IsVector ? ".64" : "";
const char *WidthString = IsVector ? "" : getWidthString(Ty);
Str << "\t" << ActualOpcode;
const bool IsVInst = IsVector || IsScalarFloat;
......@@ -1572,7 +1575,9 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
} else {
Str << WidthString << getPredicate();
}
Str << VectorMarker << "\t";
if (IsVector)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
......@@ -1580,29 +1585,32 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
Variable *Dest = getDest();
const Type DestTy = Dest->getType();
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isScalarFloatingType(DestTy)) {
switch (DestTy) {
default:
// TODO(kschimpf) Does this happen?
Asm->setNeedsTextFixup();
break;
case IceType_f32:
Asm->vldrs(Dest, getSrc(0), getPredicate(), Func->getTarget());
break;
case IceType_f64:
Asm->vldrd(Dest, getSrc(0), getPredicate(), Func->getTarget());
break;
}
} else if (isVectorType(DestTy))
// TODO(kschimpf) Handle case.
Asm->setNeedsTextFixup();
else
switch (DestTy) {
default:
llvm::report_fatal_error("Ldr on unknown type: " + typeIceString(DestTy));
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
case IceType_i64:
Asm->ldr(Dest, getSrc(0), getPredicate(), Func->getTarget());
if (Asm->needsTextFixup())
emitUsingTextFixup(Func);
break;
case IceType_f32:
Asm->vldrs(Dest, getSrc(0), getPredicate(), Func->getTarget());
break;
case IceType_f64:
Asm->vldrd(Dest, getSrc(0), getPredicate(), Func->getTarget());
break;
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
Asm->vld1qr(getVecElmtBitsize(DestTy), Dest, getSrc(0), Func->getTarget());
break;
}
}
template <> void InstARM32Ldrex::emit(const Cfg *Func) const {
......@@ -1898,7 +1906,6 @@ void InstARM32Str::emit(const Cfg *Func) const {
const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *Opcode =
IsVectorStore ? "vst1" : (IsScalarFloat ? "vstr" : "str");
const char *VecEltWidthString = IsVectorStore ? ".64" : "";
Str << "\t" << Opcode;
const bool IsVInst = IsVectorStore || IsScalarFloat;
if (IsVInst) {
......@@ -1906,7 +1913,9 @@ void InstARM32Str::emit(const Cfg *Func) const {
} else {
Str << getWidthString(Ty) << getPredicate();
}
Str << VecEltWidthString << "\t";
if (IsVectorStore)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
......@@ -1914,28 +1923,33 @@ void InstARM32Str::emit(const Cfg *Func) const {
void InstARM32Str::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType();
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isScalarFloatingType(Ty)) {
switch (Ty) {
default:
// TODO(kschimpf) Does this happen?
Asm->setNeedsTextFixup();
break;
case IceType_f32:
Asm->vstrs(getSrc(0), getSrc(1), getPredicate(), Func->getTarget());
break;
case IceType_f64:
Asm->vstrd(getSrc(0), getSrc(1), getPredicate(), Func->getTarget());
break;
}
} else if (isVectorType(Ty))
// TODO(kschimpf) Handle case.
Asm->setNeedsTextFixup();
else
Asm->str(getSrc(0), getSrc(1), getPredicate(), Func->getTarget());
if (Asm->needsTextFixup())
emitUsingTextFixup(Func);
const Operand *Src0 = getSrc(0);
const Operand *Src1 = getSrc(1);
Type Ty = Src0->getType();
switch (Ty) {
default:
llvm::report_fatal_error("Str on unknown type: " + typeIceString(Ty));
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
case IceType_i64:
Asm->str(Src0, Src1, getPredicate(), Func->getTarget());
break;
case IceType_f32:
Asm->vstrs(Src0, Src1, getPredicate(), Func->getTarget());
break;
case IceType_f64:
Asm->vstrd(Src0, Src1, getPredicate(), Func->getTarget());
break;
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
Asm->vst1qr(getVecElmtBitsize(Ty), Src0, Src1, Func->getTarget());
break;
}
}
void InstARM32Str::dump(const Cfg *Func) const {
......
; Show that we know how to translate vector load instructions.
; Note: Uses -O2 to remove unnecessary loads/stores, resulting in only one VLD1
; instruction per function.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testDerefFloat4(<4 x float> *%p) {
; ASM-LABEL: testDerefFloat4:
; DIS-LABEL: 00000000 <testDerefFloat4>:
; DIS-LABEL: {{.+}} <testDerefFloat4>:
; IASM-LABEL: testDerefFloat4:
entry:
%ret = load <4 x float>, <4 x float>* %p, align 4
; ASM: vld1.64 q0, [r0]
; DIS: 0: f4200acf
; ASM: vld1.32 q11, [r5]
; DIS: {{.*}}: f4656a8f
; IASM-NOT: vld1.32
ret <4 x float> %ret
}
define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) {
; ASM-LABEL: testDeref4i32:
; DIS-LABEL: 00000010 <testDeref4i32>:
; DIS-LABEL: {{.+}} <testDeref4i32>:
; IASM-LABEL: testDeref4i32:
entry:
%ret = load <4 x i32>, <4 x i32>* %p, align 4
; ASM: vld1.64 q0, [r0]
; DIS: 10: f4200acf
; ASM: vld1.32 q11, [r5]
; DIS: {{.+}}: f4656a8f
; IASM-NOT: vld1.32
ret <4 x i32> %ret
}
define internal <8 x i16> @testDeref8i16(<8 x i16> *%p) {
; ASM-LABEL: testDeref8i16:
; DIS-LABEL: 00000020 <testDeref8i16>:
; DIS-LABEL: {{.+}} <testDeref8i16>:
; IASM-LABEL: testDeref8i16:
entry:
%ret = load <8 x i16>, <8 x i16>* %p, align 2
; ASM: vld1.64 q0, [r0]
; DIS: 20: f4200acf
; ASM: vld1.16 q11, [r5]
; DIS: {{.+}}: f4656a4f
; IASM-NOT: vld1.16
ret <8 x i16> %ret
}
define internal <16 x i8> @testDeref16i8(<16 x i8> *%p) {
; ASM-LABEL: testDeref16i8:
; DIS-LABEL: 00000030 <testDeref16i8>:
; DIS-LABEL: {{.+}} <testDeref16i8>:
; IASM-LABEL: testDeref16i8:
entry:
%ret = load <16 x i8>, <16 x i8>* %p, align 1
; ASM: vld1.64 q0, [r0]
; DIS: 30: f4200acf
; ASM: vld1.8 q11, [r5]
; DIS: {{.+}}: f4656a0f
; IASM-NOT: vld1.8
ret <16 x i8> %ret
}
; Show that we know how to translate vector store instructions.
; Note: Uses -O2 to remove unnecessary loads/stores, resulting in only one VST1
; instruction per function.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 \
; RUN: -reg-use=q11,r5 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal void @testDerefFloat4(<4 x float>* %p, <4 x float> %v) {
; ASM-LABEL: testDerefFloat4:
; DIS-LABEL: {{.+}} <testDerefFloat4>:
entry:
store <4 x float> %v, <4 x float>* %p, align 4
; ASM: vst1.32 q11, [r5]
; DIS: {{.+}}: f4456a8f
; IASM-NOT: vst1.32
ret void
}
define internal void @testDeref4i32(<4 x i32> *%p, <4 x i32> %v) {
; ASM-LABEL: testDeref4i32:
; DIS-LABEL: {{.+}} <testDeref4i32>:
entry:
store <4 x i32> %v, <4 x i32>* %p, align 4
; ASM: vst1.32 q11, [r5]
; DIS: {{.+}}: f4456a8f
; IASM-NOT: vst1.32
ret void
}
define internal void @testDeref8i16(<8 x i16> *%p, <8 x i16> %v) {
; ASM-LABEl: testDeref8i16:
; DIS-LABEL: {{.+}} <testDeref8i16>:
store <8 x i16> %v, <8 x i16>* %p, align 2
; ASM: vst1.16 q11, [r5]
; DIS: {{.+}}: f4456a4f
; IASM-NOT: vst1.16
ret void
}
define internal void @testDeref16i8(<16 x i8> *%p, <16 x i8> %v) {
; ASM-LABEL: testDeref16i8:
; DIS-LABEL: {{.+}} <testDeref16i8>:
store <16 x i8> %v, <16 x i8>* %p, align 1
; ASM: vst1.8 q11, [r5]
; DIS: {{.+}}: f4456a0f
; IASM-NOT: vst1.8
ret void
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment