Commit cc69fa29 by Eric Holk

ARM32 vector ops - scalarize icmp, fcmp and cast.

This is part of a sequence of patches to quickly fill out vector support by scalarizing the remaining operations. Later we can work to generate better code. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1683153003 .
parent 8aa39661
...@@ -1755,6 +1755,9 @@ template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const { ...@@ -1755,6 +1755,9 @@ template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
case IceType_v8i16: case IceType_v8i16:
case IceType_v4i32: case IceType_v4i32:
case IceType_v4f32: case IceType_v4f32:
case IceType_v16i1:
case IceType_v8i1:
case IceType_v4i1:
Asm->vld1qr(getVecElmtBitsize(DestTy), Dest, getSrc(0), Func->getTarget()); Asm->vld1qr(getVecElmtBitsize(DestTy), Dest, getSrc(0), Func->getTarget());
break; break;
} }
...@@ -2094,6 +2097,9 @@ void InstARM32Str::emitIAS(const Cfg *Func) const { ...@@ -2094,6 +2097,9 @@ void InstARM32Str::emitIAS(const Cfg *Func) const {
case IceType_v8i16: case IceType_v8i16:
case IceType_v4i32: case IceType_v4i32:
case IceType_v4f32: case IceType_v4f32:
case IceType_v16i1:
case IceType_v8i1:
case IceType_v4i1:
Asm->vst1qr(getVecElmtBitsize(Ty), Src0, Src1, Func->getTarget()); Asm->vst1qr(getVecElmtBitsize(Ty), Src0, Src1, Func->getTarget());
break; break;
} }
......
...@@ -705,34 +705,11 @@ bool TargetLowering::shouldOptimizeMemIntrins() { ...@@ -705,34 +705,11 @@ bool TargetLowering::shouldOptimizeMemIntrins() {
void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind, void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
Variable *Dest, Operand *Src0, Variable *Dest, Operand *Src0,
Operand *Src1) { Operand *Src1) {
assert(isVectorType(Dest->getType())); scalarizeInstruction(
Type Ty = Dest->getType(); Dest, Src0, Src1,
Type ElementTy = typeElementType(Ty); [this, Kind](Variable *Dest, Variable *Src0, Variable *Src1) {
SizeT NumElements = typeNumElements(Ty); return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
});
Operand *T = Ctx->getConstantUndef(Ty);
for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs.
Variable *Op0 = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op0, Src0, Index);
Variable *Op1 = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op1, Src1, Index);
// Perform the arithmetic as a scalar operation.
Variable *Res = Func->makeVariable(ElementTy);
auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
// We might have created an operation that needed a helper call.
genTargetHelperCallFor(Arith);
// Insert the result into position.
Variable *DestT = Func->makeVariable(Ty);
Context.insert<InstInsertElement>(DestT, T, Res, Index);
T = DestT;
}
Context.insert<InstAssign>(Dest, T);
} }
void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C, void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
......
...@@ -467,6 +467,82 @@ protected: ...@@ -467,6 +467,82 @@ protected:
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1); Operand *Src0, Operand *Src1);
/// Generalizes scalarizeArithmetic to support other instruction types.
///
/// MakeInstruction is a function-like object with signature
/// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *.
template <typename F>
void scalarizeInstruction(Variable *Dest, Operand *Src0, Operand *Src1,
F &&MakeInstruction) {
const Type DestTy = Dest->getType();
assert(isVectorType(DestTy));
const Type DestElementTy = typeElementType(DestTy);
const SizeT NumElements = typeNumElements(DestTy);
const Type Src0ElementTy = typeElementType(Src0->getType());
const Type Src1ElementTy = typeElementType(Src1->getType());
assert(NumElements == typeNumElements(Src0->getType()));
assert(NumElements == typeNumElements(Src1->getType()));
Variable *T = Func->makeVariable(DestTy);
Context.insert<InstFakeDef>(T);
for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs.
Variable *Op0 = Func->makeVariable(Src0ElementTy);
Context.insert<InstExtractElement>(Op0, Src0, Index);
Variable *Op1 = Func->makeVariable(Src1ElementTy);
Context.insert<InstExtractElement>(Op1, Src1, Index);
// Perform the operation as a scalar operation.
Variable *Res = Func->makeVariable(DestElementTy);
auto Arith = MakeInstruction(Res, Op0, Op1);
// We might have created an operation that needed a helper call.
genTargetHelperCallFor(Arith);
// Insert the result into position.
Variable *DestT = Func->makeVariable(DestTy);
Context.insert<InstInsertElement>(DestT, T, Res, Index);
T = DestT;
}
Context.insert<InstAssign>(Dest, T);
}
template <typename F>
void scalarizeUnaryInstruction(Variable *Dest, Operand *Src0,
F &&MakeInstruction) {
const Type DestTy = Dest->getType();
assert(isVectorType(DestTy));
const Type DestElementTy = typeElementType(DestTy);
const SizeT NumElements = typeNumElements(DestTy);
const Type Src0ElementTy = typeElementType(Src0->getType());
assert(NumElements == typeNumElements(Src0->getType()));
Variable *T = Func->makeVariable(DestTy);
Context.insert<InstFakeDef>(T);
for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs.
Variable *Op0 = Func->makeVariable(Src0ElementTy);
Context.insert<InstExtractElement>(Op0, Src0, Index);
// Perform the operation as a scalar operation.
Variable *Res = Func->makeVariable(DestElementTy);
auto Arith = MakeInstruction(Res, Op0);
// We might have created an operation that needed a helper call.
genTargetHelperCallFor(Arith);
// Insert the result into position.
Variable *DestT = Func->makeVariable(DestTy);
Context.insert<InstInsertElement>(DestT, T, Res, Index);
T = DestT;
}
Context.insert<InstAssign>(Dest, T);
}
/// SandboxType enumerates all possible sandboxing strategies that /// SandboxType enumerates all possible sandboxing strategies that
enum SandboxType { enum SandboxType {
ST_None, ST_None,
......
...@@ -577,8 +577,18 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -577,8 +577,18 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
Variable *Dest = Instr->getDest(); Variable *Dest = Instr->getDest();
Operand *Src0 = Instr->getSrc(0); Operand *Src0 = Instr->getSrc(0);
const Type DestTy = Dest->getType(); const Type DestTy = Dest->getType();
const InstCast::OpKind CastKind = auto *CastInstr = llvm::cast<InstCast>(Instr);
llvm::cast<InstCast>(Instr)->getCastKind(); const InstCast::OpKind CastKind = CastInstr->getCastKind();
if (isVectorType(DestTy)) {
scalarizeUnaryInstruction(
Dest, Src0, [this, CastKind](Variable *Dest, Variable *Src) {
return Context.insert<InstCast>(CastKind, Dest, Src);
});
CastInstr->setDeleted();
return;
}
switch (CastKind) { switch (CastKind) {
default: default:
return; return;
...@@ -723,6 +733,36 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -723,6 +733,36 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
} }
llvm::report_fatal_error("Control flow should never have reached here."); llvm::report_fatal_error("Control flow should never have reached here.");
} }
case Inst::Icmp: {
Variable *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
auto *CmpInstr = llvm::cast<InstIcmp>(Instr);
const auto Condition = CmpInstr->getCondition();
scalarizeInstruction(
Dest, CmpInstr->getSrc(0), CmpInstr->getSrc(1),
[this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstIcmp>(Condition, Dest, Src0, Src1);
});
CmpInstr->setDeleted();
}
return;
}
case Inst::Fcmp: {
Variable *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
auto *CmpInstr = llvm::cast<InstFcmp>(Instr);
const auto Condition = CmpInstr->getCondition();
scalarizeInstruction(
Dest, CmpInstr->getSrc(0), CmpInstr->getSrc(1),
[this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstFcmp>(Condition, Dest, Src0, Src1);
});
CmpInstr->setDeleted();
}
return;
}
} }
} }
...@@ -4194,9 +4234,6 @@ TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, ...@@ -4194,9 +4234,6 @@ TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
} }
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
assert(Instr->getSrc(0)->getType() != IceType_i1);
assert(Instr->getSrc(1)->getType() != IceType_i1);
Operand *Src0 = legalizeUndef(Instr->getSrc(0)); Operand *Src0 = legalizeUndef(Instr->getSrc(0));
Operand *Src1 = legalizeUndef(Instr->getSrc(1)); Operand *Src1 = legalizeUndef(Instr->getSrc(1));
...@@ -4233,6 +4270,7 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { ...@@ -4233,6 +4270,7 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
switch (Src0->getType()) { switch (Src0->getType()) {
default: default:
llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
case IceType_i1:
case IceType_i8: case IceType_i8:
case IceType_i16: case IceType_i16:
return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1); return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
......
; Test that we handle icmp and fcmp on vectors.
; TODO(eholk): This test will need to be updated once comparison is no
; longer scalarized.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -Om1 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -Om1 | FileCheck %s --check-prefix=DIS
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -Om1 | FileCheck %s --check-prefix=DIS
define internal <4 x i32> @cmpEq4I32(<4 x i32> %a, <4 x i32> %b) {
; ASM-LABEL:cmpEq4I32:
; DIS-LABEL:00000000 <cmpEq4I32>:
entry:
%cmp = icmp eq <4 x i32> %a, %b
; ASM: cmp r1, r2
; ASM: cmp r1, r2
; ASM: cmp r1, r2
; ASM: cmp r1, r2
; DIS: 40: e1510002
%cmp.ret_ext = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %cmp.ret_ext
}
define internal <4 x i32> @cmpEq4f32(<4 x float> %a, <4 x float> %b) {
; ASM-LABEL:cmpEq4f32:
; DIS-LABEL:00000240 <cmpEq4f32>:
entry:
%cmp = fcmp oeq <4 x float> %a, %b
; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1
; DIS: 27c: eeb40a60
%cmp.ret_ext = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %cmp.ret_ext
}
...@@ -31,15 +31,19 @@ entry: ...@@ -31,15 +31,19 @@ entry:
; TODO(eholk): this code could be a lot better. Fix the code generator ; TODO(eholk): this code could be a lot better. Fix the code generator
; and update the test. Same for the rest of the tests. ; and update the test. Same for the rest of the tests.
; ASM: vdiv.f32 s8, s8, s9 ; ASM: vdiv.f32 s12, s12, s13
; ASM: vdiv.f32 s8, s8, s9 ; ASM-NEXT: vmov.f32 s8, s12
; ASM: vdiv.f32 s8, s8, s9 ; ASM: vdiv.f32 s12, s12, s13
; ASM: vdiv.f32 s0, s0, s4 ; ASM-NEXT: vmov.f32 s9, s12
; ASM: vdiv.f32 s12, s12, s13
; DIS: 8: ee844a24 ; ASM-NEXT: vmov.f32 s10, s12
; DIS: 1c: ee844a24 ; ASM: vdiv.f32 s0, s0, s4
; DIS: 2c: ee844a24 ; ASM-NEXT: vmov.f32 s11, s0
; DIS: 3c: ee800a02
; DIS: 8: ee866a26
; DIS: 18: ee866a26
; DIS: 28: ee866a26
; DIS: 38: ee800a02
; IASM-NOT: vdiv ; IASM-NOT: vdiv
...@@ -60,9 +64,9 @@ entry: ...@@ -60,9 +64,9 @@ entry:
; ASM: udiv r0, r0, r1 ; ASM: udiv r0, r0, r1
; DIS: 64: e730f110 ; DIS: 64: e730f110
; DIS: 84: e730f110 ; DIS: 80: e730f110
; DIS: a0: e730f110 ; DIS: 9c: e730f110
; DIS: bc: e730f110 ; DIS: b8: e730f110
; IASM-NOT: udiv ; IASM-NOT: udiv
...@@ -105,27 +109,27 @@ entry: ...@@ -105,27 +109,27 @@ entry:
; DIS: e4: e6ff0070 ; DIS: e4: e6ff0070
; DIS: e8: e6ff1071 ; DIS: e8: e6ff1071
; DIS: ec: e730f110 ; DIS: ec: e730f110
; DIS: 10c: e6ff0070 ; DIS: 108: e6ff0070
; DIS: 110: e6ff1071 ; DIS: 10c: e6ff1071
; DIS: 114: e730f110 ; DIS: 110: e730f110
; DIS: 130: e6ff0070 ; DIS: 12c: e6ff0070
; DIS: 134: e6ff1071 ; DIS: 130: e6ff1071
; DIS: 138: e730f110 ; DIS: 134: e730f110
; DIS: 154: e6ff0070 ; DIS: 150: e6ff0070
; DIS: 158: e6ff1071 ; DIS: 154: e6ff1071
; DIS: 15c: e730f110 ; DIS: 158: e730f110
; DIS: 178: e6ff0070 ; DIS: 174: e6ff0070
; DIS: 17c: e6ff1071 ; DIS: 178: e6ff1071
; DIS: 180: e730f110 ; DIS: 17c: e730f110
; DIS: 19c: e6ff0070 ; DIS: 198: e6ff0070
; DIS: 1a0: e6ff1071 ; DIS: 19c: e6ff1071
; DIS: 1a4: e730f110 ; DIS: 1a0: e730f110
; DIS: 1c0: e6ff0070 ; DIS: 1bc: e6ff0070
; DIS: 1c4: e6ff1071 ; DIS: 1c0: e6ff1071
; DIS: 1c8: e730f110 ; DIS: 1c4: e730f110
; DIS: 1e4: e6ff0070 ; DIS: 1e0: e6ff0070
; DIS: 1e8: e6ff1071 ; DIS: 1e4: e6ff1071
; DIS: 1ec: e730f110 ; DIS: 1e8: e730f110
; IASM-NOT: uxth ; IASM-NOT: uxth
; IASM-NOT: udiv ; IASM-NOT: udiv
...@@ -193,51 +197,51 @@ entry: ...@@ -193,51 +197,51 @@ entry:
; DIS: 214: e6ef0070 ; DIS: 214: e6ef0070
; DIS: 218: e6ef1071 ; DIS: 218: e6ef1071
; DIS: 21c: e730f110 ; DIS: 21c: e730f110
; DIS: 23c: e6ef0070 ; DIS: 238: e6ef0070
; DIS: 240: e6ef1071 ; DIS: 23c: e6ef1071
; DIS: 244: e730f110 ; DIS: 240: e730f110
; DIS: 260: e6ef0070 ; DIS: 25c: e6ef0070
; DIS: 264: e6ef1071 ; DIS: 260: e6ef1071
; DIS: 268: e730f110 ; DIS: 264: e730f110
; DIS: 284: e6ef0070 ; DIS: 280: e6ef0070
; DIS: 288: e6ef1071 ; DIS: 284: e6ef1071
; DIS: 28c: e730f110 ; DIS: 288: e730f110
; DIS: 2a8: e6ef0070 ; DIS: 2a4: e6ef0070
; DIS: 2ac: e6ef1071 ; DIS: 2a8: e6ef1071
; DIS: 2b0: e730f110 ; DIS: 2ac: e730f110
; DIS: 2cc: e6ef0070 ; DIS: 2c8: e6ef0070
; DIS: 2d0: e6ef1071 ; DIS: 2cc: e6ef1071
; DIS: 2d4: e730f110 ; DIS: 2d0: e730f110
; DIS: 2f0: e6ef0070 ; DIS: 2ec: e6ef0070
; DIS: 2f4: e6ef1071 ; DIS: 2f0: e6ef1071
; DIS: 2f8: e730f110 ; DIS: 2f4: e730f110
; DIS: 314: e6ef0070 ; DIS: 310: e6ef0070
; DIS: 318: e6ef1071 ; DIS: 314: e6ef1071
; DIS: 31c: e730f110 ; DIS: 318: e730f110
; DIS: 338: e6ef0070 ; DIS: 334: e6ef0070
; DIS: 33c: e6ef1071 ; DIS: 338: e6ef1071
; DIS: 340: e730f110 ; DIS: 33c: e730f110
; DIS: 35c: e6ef0070 ; DIS: 358: e6ef0070
; DIS: 360: e6ef1071 ; DIS: 35c: e6ef1071
; DIS: 364: e730f110 ; DIS: 360: e730f110
; DIS: 380: e6ef0070 ; DIS: 37c: e6ef0070
; DIS: 384: e6ef1071 ; DIS: 380: e6ef1071
; DIS: 388: e730f110 ; DIS: 384: e730f110
; DIS: 3a4: e6ef0070 ; DIS: 3a0: e6ef0070
; DIS: 3a8: e6ef1071 ; DIS: 3a4: e6ef1071
; DIS: 3ac: e730f110 ; DIS: 3a8: e730f110
; DIS: 3c8: e6ef0070 ; DIS: 3c4: e6ef0070
; DIS: 3cc: e6ef1071 ; DIS: 3c8: e6ef1071
; DIS: 3d0: e730f110 ; DIS: 3cc: e730f110
; DIS: 3ec: e6ef0070 ; DIS: 3e8: e6ef0070
; DIS: 3f0: e6ef1071 ; DIS: 3ec: e6ef1071
; DIS: 3f4: e730f110 ; DIS: 3f0: e730f110
; DIS: 410: e6ef0070 ; DIS: 40c: e6ef0070
; DIS: 414: e6ef1071 ; DIS: 410: e6ef1071
; DIS: 418: e730f110 ; DIS: 414: e730f110
; DIS: 434: e6ef0070 ; DIS: 430: e6ef0070
; DIS: 438: e6ef1071 ; DIS: 434: e6ef1071
; DIS: 43c: e730f110 ; DIS: 438: e730f110
; IASM-NOT: uxtb ; IASM-NOT: uxtb
; IASM-NOT: udiv ; IASM-NOT: udiv
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment