Commit cfc25533 by Eric Holk

ARM32 vector division lowering.

Enables vector division by scalarization. Also, removed an assert as suggested by Karl in a previous CL: https://codereview.chromium.org/1646033002/diff/1/src/IceInstARM32.cpp#newcode717 BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1681003002 .
parent fbdc7e4c
...@@ -804,7 +804,6 @@ template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const { ...@@ -804,7 +804,6 @@ template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const {
Asm->vmuld(Dest, getSrc(0), getSrc(1), CondARM32::AL); Asm->vmuld(Dest, getSrc(0), getSrc(1), CondARM32::AL);
break; break;
} }
assert(!Asm->needsTextFixup());
} }
InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget) InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
......
...@@ -700,6 +700,39 @@ bool TargetLowering::shouldOptimizeMemIntrins() { ...@@ -700,6 +700,39 @@ bool TargetLowering::shouldOptimizeMemIntrins() {
Ctx->getFlags().getForceMemIntrinOpt(); Ctx->getFlags().getForceMemIntrinOpt();
} }
void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
Variable *Dest, Operand *Src0,
Operand *Src1) {
assert(isVectorType(Dest->getType()));
Type Ty = Dest->getType();
Type ElementTy = typeElementType(Ty);
SizeT NumElements = typeNumElements(Ty);
Operand *T = Ctx->getConstantUndef(Ty);
for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs.
Variable *Op0 = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op0, Src0, Index);
Variable *Op1 = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op1, Src1, Index);
// Perform the arithmetic as a scalar operation.
Variable *Res = Func->makeVariable(ElementTy);
auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
// We might have created an operation that needed a helper call.
genTargetHelperCallFor(Arith);
// Insert the result into position.
Variable *DestT = Func->makeVariable(Ty);
Context.insert<InstInsertElement>(DestT, T, Res, Index);
T = DestT;
}
Context.insert<InstAssign>(Dest, T);
}
void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C, void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
const char *Suffix) const { const char *Suffix) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
......
...@@ -464,6 +464,9 @@ protected: ...@@ -464,6 +464,9 @@ protected:
bool shouldOptimizeMemIntrins(); bool shouldOptimizeMemIntrins();
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1);
/// SandboxType enumerates all possible sandboxing strategies that /// SandboxType enumerates all possible sandboxing strategies that
enum SandboxType { enum SandboxType {
ST_None, ST_None,
......
...@@ -430,6 +430,18 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -430,6 +430,18 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
const Type DestTy = Dest->getType(); const Type DestTy = Dest->getType();
const InstArithmetic::OpKind Op = const InstArithmetic::OpKind Op =
llvm::cast<InstArithmetic>(Instr)->getOp(); llvm::cast<InstArithmetic>(Instr)->getOp();
if (isVectorType(DestTy)) {
switch (Op) {
default:
break;
case InstArithmetic::Fdiv:
case InstArithmetic::Udiv:
case InstArithmetic::Sdiv:
scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
Instr->setDeleted();
return;
}
}
switch (DestTy) { switch (DestTy) {
default: default:
return; return;
...@@ -2015,7 +2027,8 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { ...@@ -2015,7 +2027,8 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
Variable *SrcLoReg = legalizeToReg(SrcLo); Variable *SrcLoReg = legalizeToReg(SrcLo);
switch (Ty) { switch (Ty) {
default: default:
llvm::report_fatal_error("Unexpected type"); llvm_unreachable(
("Unexpected type in div0Check: " + typeIceString(Ty)).c_str());
case IceType_i8: case IceType_i8:
case IceType_i16: { case IceType_i16: {
Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty)); Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
...@@ -5508,7 +5521,8 @@ void TargetARM32::prelowerPhis() { ...@@ -5508,7 +5521,8 @@ void TargetARM32::prelowerPhis() {
Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
Context.insert<InstFakeDef>(Reg); Context.insert<InstFakeDef>(Reg);
UnimplementedError(Func->getContext()->getFlags()); assert(isVectorType(Ty));
_veor(Reg, Reg, Reg);
return Reg; return Reg;
} }
......
...@@ -343,9 +343,6 @@ protected: ...@@ -343,9 +343,6 @@ protected:
void eliminateNextVectorSextInstruction(Variable *SignExtendedResult); void eliminateNextVectorSextInstruction(Variable *SignExtendedResult);
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1);
void emitGetIP(CfgNode *Node) { void emitGetIP(CfgNode *Node) {
dispatchToConcrete(&Traits::ConcreteTarget::emitGetIP, std::move(Node)); dispatchToConcrete(&Traits::ConcreteTarget::emitGetIP, std::move(Node));
} }
......
...@@ -6061,41 +6061,6 @@ void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Instr) { ...@@ -6061,41 +6061,6 @@ void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Instr) {
_br(DefaultTarget); _br(DefaultTarget);
} }
template <typename TraitsType>
void TargetX86Base<TraitsType>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
Variable *Dest,
Operand *Src0,
Operand *Src1) {
assert(isVectorType(Dest->getType()));
Type Ty = Dest->getType();
Type ElementTy = typeElementType(Ty);
SizeT NumElements = typeNumElements(Ty);
Operand *T = Ctx->getConstantUndef(Ty);
for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs.
Variable *Op0 = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op0, Src0, Index);
Variable *Op1 = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op1, Src1, Index);
// Perform the arithmetic as a scalar operation.
Variable *Res = Func->makeVariable(ElementTy);
auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
// We might have created an operation that needed a helper call.
genTargetHelperCallFor(Arith);
// Insert the result into position.
Variable *DestT = Func->makeVariable(Ty);
Context.insert<InstInsertElement>(DestT, T, Res, Index);
T = DestT;
}
Context.insert<InstAssign>(Dest, T);
}
/// The following pattern occurs often in lowered C and C++ code: /// The following pattern occurs often in lowered C and C++ code:
/// ///
/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
......
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
X(f32, 2, 4, 1, f32, "float", "f32") \ X(f32, 2, 4, 1, f32, "float", "f32") \
X(f64, 3, 8, 1, f64, "double", "f64") \ X(f64, 3, 8, 1, f64, "double", "f64") \
X(v4i1, 4, 1, 4, i1, "<4 x i1>", "v4i1") \ X(v4i1, 4, 1, 4, i1, "<4 x i1>", "v4i1") \
X(v8i1, 4, 1, 8, i1, "<8 x i1>", "v8ii") \ X(v8i1, 4, 1, 8, i1, "<8 x i1>", "v8i1") \
X(v16i1, 4, 1, 16, i1, "<16 x i1>", "v16i1") \ X(v16i1, 4, 1, 16, i1, "<16 x i1>", "v16i1") \
X(v16i8, 4, 1, 16, i8, "<16 x i8>", "v16i8") \ X(v16i8, 4, 1, 16, i8, "<16 x i8>", "v16i8") \
X(v8i16, 4, 2, 8, i16, "<8 x i16>", "v8i16") \ X(v8i16, 4, 2, 8, i16, "<8 x i16>", "v8i16") \
......
; Show that we know how to translate vector division instructions.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 -mattr=hwdiv-arm \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 -mattr=hwdiv-arm \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 -mattr=hwdiv-arm \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 -mattr=hwdiv-arm \
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testVdivFloat4(<4 x float> %v1, <4 x float> %v2) {
; ASM-LABEL: testVdivFloat4:
; DIS-LABEL: 00000000 <testVdivFloat4>:
; IASM-LABEL: testVdivFloat4:
entry:
%res = fdiv <4 x float> %v1, %v2
; TODO(eholk): this code could be a lot better. Fix the code generator
; and update the test. Same for the rest of the tests.
; ASM: vdiv.f32 s8, s8, s9
; ASM: vdiv.f32 s8, s8, s9
; ASM: vdiv.f32 s8, s8, s9
; ASM: vdiv.f32 s0, s0, s4
; DIS: 8: ee844a24
; DIS: 1c: ee844a24
; DIS: 2c: ee844a24
; DIS: 3c: ee800a02
; IASM-NOT: vdiv
ret <4 x float> %res
}
define internal <4 x i32> @testVdiv4i32(<4 x i32> %v1, <4 x i32> %v2) {
; ASM-LABEL: testVdiv4i32:
; DIS-LABEL: 00000050 <testVdiv4i32>:
; IASM-LABEL: testVdiv4i32:
entry:
%res = udiv <4 x i32> %v1, %v2
; ASM: udiv r0, r0, r1
; ASM: udiv r0, r0, r1
; ASM: udiv r0, r0, r1
; ASM: udiv r0, r0, r1
; DIS: 64: e730f110
; DIS: 84: e730f110
; DIS: a0: e730f110
; DIS: bc: e730f110
; IASM-NOT: udiv
ret <4 x i32> %res
}
define internal <8 x i16> @testVdiv8i16(<8 x i16> %v1, <8 x i16> %v2) {
; ASM-LABEL: testVdiv8i16:
; DIS-LABEL: 000000d0 <testVdiv8i16>:
; IASM-LABEL: testVdiv8i16:
entry:
%res = udiv <8 x i16> %v1, %v2
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxth r0, r0
; ASM: uxth r1, r1
; ASM: udiv r0, r0, r1
; DIS: e4: e6ff0070
; DIS: e8: e6ff1071
; DIS: ec: e730f110
; DIS: 10c: e6ff0070
; DIS: 110: e6ff1071
; DIS: 114: e730f110
; DIS: 130: e6ff0070
; DIS: 134: e6ff1071
; DIS: 138: e730f110
; DIS: 154: e6ff0070
; DIS: 158: e6ff1071
; DIS: 15c: e730f110
; DIS: 178: e6ff0070
; DIS: 17c: e6ff1071
; DIS: 180: e730f110
; DIS: 19c: e6ff0070
; DIS: 1a0: e6ff1071
; DIS: 1a4: e730f110
; DIS: 1c0: e6ff0070
; DIS: 1c4: e6ff1071
; DIS: 1c8: e730f110
; DIS: 1e4: e6ff0070
; DIS: 1e8: e6ff1071
; DIS: 1ec: e730f110
; IASM-NOT: uxth
; IASM-NOT: udiv
ret <8 x i16> %res
}
define internal <16 x i8> @testVdiv16i8(<16 x i8> %v1, <16 x i8> %v2) {
; ASM-LABEL: testVdiv16i8:
; DIS-LABEL: 00000200 <testVdiv16i8>:
; IASM-LABEL: testVdiv16i8:
entry:
%res = udiv <16 x i8> %v1, %v2
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; ASM: uxtb r0, r0
; ASM: uxtb r1, r1
; ASM: udiv r0, r0, r1
; DIS: 214: e6ef0070
; DIS: 218: e6ef1071
; DIS: 21c: e730f110
; DIS: 23c: e6ef0070
; DIS: 240: e6ef1071
; DIS: 244: e730f110
; DIS: 260: e6ef0070
; DIS: 264: e6ef1071
; DIS: 268: e730f110
; DIS: 284: e6ef0070
; DIS: 288: e6ef1071
; DIS: 28c: e730f110
; DIS: 2a8: e6ef0070
; DIS: 2ac: e6ef1071
; DIS: 2b0: e730f110
; DIS: 2cc: e6ef0070
; DIS: 2d0: e6ef1071
; DIS: 2d4: e730f110
; DIS: 2f0: e6ef0070
; DIS: 2f4: e6ef1071
; DIS: 2f8: e730f110
; DIS: 314: e6ef0070
; DIS: 318: e6ef1071
; DIS: 31c: e730f110
; DIS: 338: e6ef0070
; DIS: 33c: e6ef1071
; DIS: 340: e730f110
; DIS: 35c: e6ef0070
; DIS: 360: e6ef1071
; DIS: 364: e730f110
; DIS: 380: e6ef0070
; DIS: 384: e6ef1071
; DIS: 388: e730f110
; DIS: 3a4: e6ef0070
; DIS: 3a8: e6ef1071
; DIS: 3ac: e730f110
; DIS: 3c8: e6ef0070
; DIS: 3cc: e6ef1071
; DIS: 3d0: e730f110
; DIS: 3ec: e6ef0070
; DIS: 3f0: e6ef1071
; DIS: 3f4: e730f110
; DIS: 410: e6ef0070
; DIS: 414: e6ef1071
; DIS: 418: e730f110
; DIS: 434: e6ef0070
; DIS: 438: e6ef1071
; DIS: 43c: e730f110
; IASM-NOT: uxtb
; IASM-NOT: udiv
ret <16 x i8> %res
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment