Commit 958ddb75 by Jaydeep Patil Committed by Jim Stichnoth

[SubZero] Vector types support for MIPS

This patch implements vector operations on MIPS32 using VariableVecOn32 method (on the lines of Variable64On32). Vector operations are scalarized prior to lowering. Each vector variable is split into 4 containers to hold a variable of vector type. For MIPS32, four GP/FP registers are used to hold a vector variable. Arguments are passed in GP registers irrespective of the type of the vector variable. Lit test vector-mips.ll has been added to test this implementation. R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2380023002 . Patch from Jaydeep Patil <jaydeep.patil@imgtec.com>.
parent 9309756d
...@@ -119,9 +119,14 @@ void Cfg::swapNodes(NodeList &NewNodes) { ...@@ -119,9 +119,14 @@ void Cfg::swapNodes(NodeList &NewNodes) {
template <> Variable *Cfg::makeVariable<Variable>(Type Ty) { template <> Variable *Cfg::makeVariable<Variable>(Type Ty) {
SizeT Index = Variables.size(); SizeT Index = Variables.size();
Variable *Var = Target->shouldSplitToVariable64On32(Ty) Variable *Var;
? Variable64On32::create(this, Ty, Index) if (Target->shouldSplitToVariableVecOn32(Ty)) {
: Variable::create(this, Ty, Index); Var = VariableVecOn32::create(this, Ty, Index);
} else if (Target->shouldSplitToVariable64On32(Ty)) {
Var = Variable64On32::create(this, Ty, Index);
} else {
Var = Variable::create(this, Ty, Index);
}
Variables.push_back(Var); Variables.push_back(Var);
return Var; return Var;
} }
...@@ -244,9 +249,13 @@ void Cfg::translate() { ...@@ -244,9 +249,13 @@ void Cfg::translate() {
} }
// Create the Hi and Lo variables where a split was needed // Create the Hi and Lo variables where a split was needed
for (Variable *Var : Variables) for (Variable *Var : Variables) {
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) {
Var64On32->initHiLo(this); Var64On32->initHiLo(this);
} else if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Var)) {
VarVecOn32->initVecElement(this);
}
}
// Instrument the Cfg, e.g. with AddressSanitizer // Instrument the Cfg, e.g. with AddressSanitizer
if (!BuildDefs::minimal() && getFlags().getSanitizeAddresses()) { if (!BuildDefs::minimal() && getFlags().getSanitizeAddresses()) {
......
...@@ -955,11 +955,10 @@ public: ...@@ -955,11 +955,10 @@ public:
void dump(const Cfg *Func) const override { void dump(const Cfg *Func) const override {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrDump();
Str << "\t" << Opcode << "\t"; dumpOpcode(Str, Opcode, getSrc(0)->getType());
getSrc(0)->emit(Func); Str << " ";
Str << ", "; dumpSources(Func);
getSrc(1)->emit(Func);
Str << ", " << TrapCode; Str << ", " << TrapCode;
} }
......
...@@ -52,6 +52,7 @@ public: ...@@ -52,6 +52,7 @@ public:
kConst_Max = kConst_Target + MaxTargetKinds, kConst_Max = kConst_Target + MaxTargetKinds,
kVariable, kVariable,
kVariable64On32, kVariable64On32,
kVariableVecOn32,
kVariableBoolean, kVariableBoolean,
kVariable_Target, // leave space for target-specific variable kinds kVariable_Target, // leave space for target-specific variable kinds
kVariable_Max = kVariable_Target + MaxTargetKinds, kVariable_Max = kVariable_Target + MaxTargetKinds,
...@@ -962,6 +963,66 @@ protected: ...@@ -962,6 +963,66 @@ protected:
Variable *HiVar = nullptr; Variable *HiVar = nullptr;
}; };
// VariableVecOn32 represents a 128-bit vector variable on a 32-bit
// architecture. In this case the variable must be split into 4 containers.
class VariableVecOn32 : public Variable {
VariableVecOn32() = delete;
VariableVecOn32(const VariableVecOn32 &) = delete;
VariableVecOn32 &operator=(const VariableVecOn32 &) = delete;
public:
static VariableVecOn32 *create(Cfg *Func, Type Ty, SizeT Index) {
return new (Func->allocate<VariableVecOn32>())
VariableVecOn32(Func, kVariableVecOn32, Ty, Index);
}
void setName(const Cfg *Func, const std::string &NewName) override {
Variable::setName(Func, NewName);
if (!Containers.empty()) {
for (SizeT i = 0; i < ElementsPerContainer; ++i) {
Containers[i]->setName(Func, getName() + "__cont" + std::to_string(i));
}
}
}
void setIsArg(bool Val = true) override {
Variable::setIsArg(Val);
for (Variable *Var : Containers) {
Var->setIsArg(getIsArg());
}
}
const VarList &getContainers() const { return Containers; }
void initVecElement(Cfg *Func) {
for (SizeT i = 0; i < ElementsPerContainer; ++i) {
Variable *Var = Func->makeVariable(IceType_i32);
Var->setIsArg(getIsArg());
if (BuildDefs::dump()) {
Var->setName(Func, getName() + "__cont" + std::to_string(i));
}
Containers.push_back(Var);
}
}
static bool classof(const Operand *Operand) {
OperandKind Kind = Operand->getKind();
return Kind == kVariableVecOn32;
}
// A 128-bit vector value is mapped onto 4 32-bit register values.
static constexpr SizeT ElementsPerContainer = 4;
protected:
VariableVecOn32(const Cfg *Func, OperandKind K, Type Ty, SizeT Index)
: Variable(Func, K, Ty, Index) {
assert(typeWidthInBytes(Ty) ==
ElementsPerContainer * typeWidthInBytes(IceType_i32));
}
VarList Containers;
};
enum MetadataKind { enum MetadataKind {
VMK_Uses, /// Track only uses, not defs VMK_Uses, /// Track only uses, not defs
VMK_SingleDefs, /// Track uses+defs, but only record single def VMK_SingleDefs, /// Track uses+defs, but only record single def
......
...@@ -715,6 +715,10 @@ void TargetLowering::addFakeDefUses(const Inst *Instr) { ...@@ -715,6 +715,10 @@ void TargetLowering::addFakeDefUses(const Inst *Instr) {
if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) { if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) {
Context.insert<InstFakeUse>(Var64->getLo()); Context.insert<InstFakeUse>(Var64->getLo());
Context.insert<InstFakeUse>(Var64->getHi()); Context.insert<InstFakeUse>(Var64->getHi());
} else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) {
for (Variable *Var : VarVec->getContainers()) {
Context.insert<InstFakeUse>(Var);
}
} else { } else {
Context.insert<InstFakeUse>(Var); Context.insert<InstFakeUse>(Var);
} }
...@@ -725,6 +729,10 @@ void TargetLowering::addFakeDefUses(const Inst *Instr) { ...@@ -725,6 +729,10 @@ void TargetLowering::addFakeDefUses(const Inst *Instr) {
if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) { if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) {
Context.insert<InstFakeDef>(Var64->getLo()); Context.insert<InstFakeDef>(Var64->getLo());
Context.insert<InstFakeDef>(Var64->getHi()); Context.insert<InstFakeDef>(Var64->getHi());
} else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) {
for (Variable *Var : VarVec->getContainers()) {
Context.insert<InstFakeDef>(Var);
}
} else { } else {
Context.insert<InstFakeDef>(Dest); Context.insert<InstFakeDef>(Dest);
} }
......
...@@ -256,6 +256,12 @@ public: ...@@ -256,6 +256,12 @@ public:
/// Return whether a 64-bit Variable should be split into a Variable64On32. /// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0; virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
/// Return whether a Vector Variable should be split into a VariableVecOn32.
virtual bool shouldSplitToVariableVecOn32(Type Ty) const {
(void)Ty;
return false;
}
bool hasComputedFrame() const { return HasComputedFrame; } bool hasComputedFrame() const { return HasComputedFrame; }
/// Returns true if this function calls a function that has the "returns /// Returns true if this function calls a function that has the "returns
/// twice" attribute. /// twice" attribute.
...@@ -503,6 +509,9 @@ protected: ...@@ -503,6 +509,9 @@ protected:
const SizeT NumElements = typeNumElements(DestTy); const SizeT NumElements = typeNumElements(DestTy);
Variable *T = Func->makeVariable(DestTy); Variable *T = Func->makeVariable(DestTy);
if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) {
VarVecOn32->initVecElement(Func);
}
Context.insert<InstFakeDef>(T); Context.insert<InstFakeDef>(T);
for (SizeT I = 0; I < NumElements; ++I) { for (SizeT I = 0; I < NumElements; ++I) {
......
...@@ -60,7 +60,8 @@ public: ...@@ -60,7 +60,8 @@ public:
void translateOm1() override; void translateOm1() override;
void translateO2() override; void translateO2() override;
bool doBranchOpt(Inst *Instr, const CfgNode *NextNode) override; bool doBranchOpt(Inst *Instr, const CfgNode *NextNode) override;
void setImplicitRet(Variable *Ret) { ImplicitRet = Ret; }
Variable *getImplicitRet() const { return ImplicitRet; }
SizeT getNumRegisters() const override { return RegMIPS32::Reg_NUM; } SizeT getNumRegisters() const override { return RegMIPS32::Reg_NUM; }
Variable *getPhysicalRegister(RegNumT RegNum, Variable *getPhysicalRegister(RegNumT RegNum,
Type Ty = IceType_void) override; Type Ty = IceType_void) override;
...@@ -111,6 +112,10 @@ public: ...@@ -111,6 +112,10 @@ public:
return Ty == IceType_i64; return Ty == IceType_i64;
} }
bool shouldSplitToVariableVecOn32(Type Ty) const override {
return isVectorType(Ty);
}
// TODO(ascull): what is the best size of MIPS? // TODO(ascull): what is the best size of MIPS?
SizeT getMinJumpTableSize() const override { return 3; } SizeT getMinJumpTableSize() const override { return 3; }
void emitJumpTable(const Cfg *Func, void emitJumpTable(const Cfg *Func,
...@@ -621,9 +626,11 @@ public: ...@@ -621,9 +626,11 @@ public:
void split64(Variable *Var); void split64(Variable *Var);
Operand *loOperand(Operand *Operand); Operand *loOperand(Operand *Operand);
Operand *hiOperand(Operand *Operand); Operand *hiOperand(Operand *Operand);
Operand *getOperandAtIndex(Operand *Operand, Type BaseType, uint32_t Index);
void finishArgumentLowering(Variable *Arg, Variable *FramePtr, void finishArgumentLowering(Variable *Arg, bool PartialOnStack,
size_t BasicFrameOffset, size_t *InArgsSizeBytes); Variable *FramePtr, size_t BasicFrameOffset,
size_t *InArgsSizeBytes);
Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
...@@ -642,6 +649,7 @@ public: ...@@ -642,6 +649,7 @@ public:
/// appropriate register number. Note that, when Ty == IceType_i64, Reg will /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
/// be an I64 register pair. /// be an I64 register pair.
bool argInReg(Type Ty, uint32_t ArgNo, RegNumT *Reg); bool argInReg(Type Ty, uint32_t ArgNo, RegNumT *Reg);
void discardReg(RegNumT Reg) { GPRegsUsed |= RegisterAliases[Reg]; }
private: private:
// argInGPR is used to find if any GPR register is available for argument of // argInGPR is used to find if any GPR register is available for argument of
...@@ -755,6 +763,7 @@ protected: ...@@ -755,6 +763,7 @@ protected:
size_t FixedAllocaSizeBytes = 0; size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0; size_t FixedAllocaAlignBytes = 0;
size_t PreservedRegsSizeBytes = 0; size_t PreservedRegsSizeBytes = 0;
Variable *ImplicitRet = nullptr; /// Implicit return
private: private:
ENABLE_MAKE_UNIQUE; ENABLE_MAKE_UNIQUE;
......
; This test checks support for vector type in MIPS.
; RUN: %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target mips32\
; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix MIPS32 %s
define internal i32 @test_0(<4 x i32> %a) #0 {
entry:
%vecext = extractelement <4 x i32> %a, i32 0
ret i32 %vecext
}
; MIPS32-LABEL: test_0
; MIPS32: move v0,a0
define internal i32 @test_1(<4 x i32> %a) #0 {
entry:
%vecext = extractelement <4 x i32> %a, i32 1
ret i32 %vecext
}
; MIPS32-LABEL: test_1
; MIPS32: move v0,a1
define internal i32 @test_2(<4 x i32> %a) #0 {
entry:
%vecext = extractelement <4 x i32> %a, i32 2
ret i32 %vecext
}
; MIPS32-LABEL: test_2
; MIPS32: move v0,a2
define internal i32 @test_3(<4 x i32> %a) #0 {
entry:
%vecext = extractelement <4 x i32> %a, i32 3
ret i32 %vecext
}
; MIPS32-LABEL: test_3
; MIPS32: move v0,a3
define internal float @test_4(<4 x float> %a) #0 {
entry:
%vecext = extractelement <4 x float> %a, i32 1
ret float %vecext
}
; MIPS32-LABEL: test_4
; MIPS32: mtc1 a1,$f0
define internal float @test_5(<4 x float> %a) #0 {
entry:
%vecext = extractelement <4 x float> %a, i32 2
ret float %vecext
}
; MIPS32-LABEL: test_5
; MIPS32: mtc1 a2,$f0
define internal i32 @test_6(<16 x i8> %a) #0 {
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_6
; MIPS32: andi a0,a0,0xff
; MIPS32: sll a0,a0,0x18
; MIPS32: sra a0,a0,0x18
; MIPS32: move v0,a0
define internal i32 @test_7(<16 x i8> %a) #0 {
entry:
%vecext = extractelement <16 x i8> %a, i32 15
%conv = sext i8 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_7
; MIPS32: srl a3,a3,0x18
; MIPS32: sll a3,a3,0x18
; MIPS32: sra a3,a3,0x18
; MIPS32: move v0,a3
define internal i32 @test_8(<8 x i16> %a) #0 {
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_8
; MIPS32: andi a0,a0,0xffff
; MIPS32: sll a0,a0,0x10
; MIPS32: sra a0,a0,0x10
; MIPS32: move v0,a0
define internal i32 @test_9(<8 x i16> %a) #0 {
entry:
%vecext = extractelement <8 x i16> %a, i32 7
%conv = sext i16 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_9
; MIPS32: srl a3,a3,0x10
; MIPS32: sll a3,a3,0x10
; MIPS32: sra a3,a3,0x10
; MIPS32: move v0,a3
define internal i32 @test_10(<4 x i1> %a) #0 {
entry:
%vecext = extractelement <4 x i1> %a, i32 0
%conv = sext i1 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_10
; MIPS32: andi a0,a0,0x1
; MIPS32: sll a0,a0,0x1f
; MIPS32: sra a0,a0,0x1f
; MIPS32: move v0,a0
define internal i32 @test_11(<4 x i1> %a) #0 {
entry:
%vecext = extractelement <4 x i1> %a, i32 2
%conv = sext i1 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_11
; MIPS32: andi a2,a2,0x1
; MIPS32: sll a2,a2,0x1f
; MIPS32: sra a2,a2,0x1f
; MIPS32: move v0,a2
define internal i32 @test_12(<8 x i1> %a) #0 {
entry:
%vecext = extractelement <8 x i1> %a, i32 0
%conv = sext i1 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_12
; MIPS32: andi a0,a0,0xffff
; MIPS32: andi a0,a0,0x1
; MIPS32: sll a0,a0,0x1f
; MIPS32: sra a0,a0,0x1f
; MIPS32: move v0,a0
define internal i32 @test_13(<8 x i1> %a) #0 {
entry:
%vecext = extractelement <8 x i1> %a, i32 7
%conv = sext i1 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_13
; MIPS32: srl a3,a3,0x10
; MIPS32: andi a3,a3,0x1
; MIPS32: sll a3,a3,0x1f
; MIPS32: sra a3,a3,0x1f
; MIPS32: move v0,a3
define internal i32 @test_14(<16 x i1> %a) #0 {
entry:
%vecext = extractelement <16 x i1> %a, i32 0
%conv = sext i1 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_14
; MIPS32: andi a0,a0,0xff
; MIPS32: andi a0,a0,0x1
; MIPS32: sll a0,a0,0x1f
; MIPS32: sra a0,a0,0x1f
; MIPS32: move v0,a0
define internal i32 @test_15(<16 x i1> %a) #0 {
entry:
%vecext = extractelement <16 x i1> %a, i32 15
%conv = sext i1 %vecext to i32
ret i32 %conv
}
; MIPS32-LABEL: test_15
; MIPS32: srl a3,a3,0x18
; MIPS32: andi a3,a3,0x1
; MIPS32: sll a3,a3,0x1f
; MIPS32: sra a3,a3,0x1f
; MIPS32: move v0,a3
define internal i32 @test_16(i32 %i, <4 x i32> %a) #0 {
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%add = add nsw i32 %vecext, %i
ret i32 %add
}
; MIPS32-LABEL: test_16
; MIPS32: addu a2,a2,a0
; MIPS32: move v0,a2
define internal i32 @test_17(i32 %i, <4 x i32> %a) #0 {
entry:
%vecext = extractelement <4 x i32> %a, i32 3
%add = add nsw i32 %vecext, %i
ret i32 %add
}
; MIPS32-LABEL: test_17
; MIPS32: lw v0,{{.*}}(sp)
; MIPS32: addu v0,v0,a0
define internal float @test_18(float %f, <4 x float> %a) #0 {
entry:
%vecext = extractelement <4 x float> %a, i32 0
%add = fadd float %vecext, %f
ret float %add
}
; MIPS32-LABEL: test_18
; MIPS32: mtc1 a2,$f0
; MIPS32: add.s $f0,$f0,$f12
define internal float @test_19(float %f, <4 x float> %a) #0 {
entry:
%vecext = extractelement <4 x float> %a, i32 3
%add = fadd float %vecext, %f
ret float %add
}
; MIPS32-LABEL: test_19
; MIPS32: lw v0,{{.*}}(sp)
; MIPS32: mtc1 v0,$f0
; MIPS32: add.s $f0,$f0,$f12
define internal <4 x float> @test_20(i32 %addr_i, <4 x float> %addend) {
entry:
%addr = inttoptr i32 %addr_i to <4 x float>*
%loaded = load <4 x float>, <4 x float>* %addr, align 4
%result = fadd <4 x float> %addend, %loaded
ret <4 x float> %result
}
; MIPS32-LABEL: test_20
; MIPS32: add.s
; MIPS32: add.s
; MIPS32: add.s
; MIPS32: add.s
define internal <4 x i32> @test_21(i32 %addr_i, <4 x i32> %addend) {
entry:
%addr = inttoptr i32 %addr_i to <4 x i32>*
%loaded = load <4 x i32>, <4 x i32>* %addr, align 4
%result = add <4 x i32> %addend, %loaded
ret <4 x i32> %result
}
; MIPS32-LABEL: test_21
; MIPS32: add
; MIPS32: add
; MIPS32: add
; MIPS32: add
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment