Commit 2f5534f1 by John Porto

Subzero. ARM32 Fcmp lowering.

parent be87b2ec
......@@ -382,6 +382,16 @@ InstARM32Vcvt::InstARM32Vcvt(Cfg *Func, Variable *Dest, Variable *Src,
addSource(Src);
}
InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) {
addSource(Src0);
addSource(Src1);
}
InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {}
// ======================== Dump routines ======================== //
// Two-addr ops
......@@ -507,8 +517,7 @@ void InstARM32Vmov::emitMultiDestSingleSource(const Cfg *Func) const {
assert(!llvm::isa<OperandARM32Mem>(Src0));
Str << "\t"
<< "vmov"
<< "\t";
<< "vmov" << getPredicate() << "\t";
Dest0->emit(Func);
Str << ", ";
Dest1->emit(Func);
......@@ -529,8 +538,7 @@ void InstARM32Vmov::emitSingleDestMultiSource(const Cfg *Func) const {
assert(!llvm::isa<OperandARM32Mem>(Src1));
Str << "\t"
<< "vmov"
<< "\t";
<< "vmov" << getPredicate() << "\t";
Dest0->emit(Func);
Str << ", ";
Src0->emit(Func);
......@@ -549,6 +557,14 @@ bool isVariableWithoutRegister(const Operand *Op) {
bool isMemoryAccess(Operand *Op) {
return isVariableWithoutRegister(Op) || llvm::isa<OperandARM32Mem>(Op);
}
bool isMoveBetweenCoreAndVFPRegisters(Variable *Dest, Operand *Src) {
const Type DestTy = Dest->getType();
const Type SrcTy = Src->getType();
assert(!(isScalarIntegerType(DestTy) && isScalarIntegerType(SrcTy)) &&
"At most one of vmov's operands can be a core register.");
return isScalarIntegerType(DestTy) || isScalarIntegerType(SrcTy);
}
} // end of anonymous namespace
void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
......@@ -559,7 +575,14 @@ void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
if (Dest->hasReg()) {
Operand *Src0 = getSrc(0);
const char *ActualOpcode = isMemoryAccess(Src0) ? "vldr" : "vmov";
Str << "\t" << ActualOpcode << "\t";
// when vmov{c}'ing, we need to emit a width string. Otherwise, the
// assembler might be tempted to assume we want a vector vmov{c}, and that
// is disallowed because ARM.
const char *WidthString =
(isMemoryAccess(Src0) || isMoveBetweenCoreAndVFPRegisters(Dest, Src0))
? ""
: getVecWidthString(Src0->getType());
Str << "\t" << ActualOpcode << getPredicate() << WidthString << "\t";
Dest->emit(Func);
Str << ", ";
Src0->emit(Func);
......@@ -567,8 +590,7 @@ void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg());
Str << "\t"
"vstr"
"\t";
"vstr" << getPredicate() << "\t";
Src0->emit(Func);
Str << ", ";
Dest->emit(Func);
......@@ -578,7 +600,6 @@ void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
void InstARM32Vmov::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
assert(CondARM32::AL == getPredicate());
assert(isMultiDest() + isMultiSource() <= 1 && "Invalid vmov type.");
if (isMultiDest()) {
emitMultiDestSingleSource(Func);
......@@ -1045,6 +1066,59 @@ void InstARM32Vcvt::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstARM32Vcmp::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\t"
"vcmp" << getPredicate() << getVecWidthString(getSrc(0)->getType())
<< "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
}
void InstARM32Vcmp::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Vcmp::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "vcmp" << getPredicate() << getVecWidthString(getSrc(0)->getType());
dumpSources(Func);
}
void InstARM32Vmrs::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 0);
Str << "\t"
"vmrs" << getPredicate() << "\t"
"APSR_nzcv"
", "
"FPSCR";
}
void InstARM32Vmrs::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 0);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Vmrs::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "APSR{n,z,v,c} = vmrs" << getPredicate() << "\t"
"FPSCR{n,z,c,v}";
}
void OperandARM32Mem::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......
......@@ -321,10 +321,12 @@ public:
Umull,
Uxt,
Vadd,
Vcmp,
Vcvt,
Vdiv,
Vldr,
Vmov,
Vmrs,
Vmul,
Vsqrt,
Vsub
......@@ -1204,6 +1206,46 @@ private:
Variable *Dest1 = nullptr;
};
class InstARM32Vcmp final : public InstARM32Pred {
InstARM32Vcmp() = delete;
InstARM32Vcmp(const InstARM32Vcmp &) = delete;
InstARM32Vcmp &operator=(const InstARM32Vcmp &) = delete;
public:
static InstARM32Vcmp *create(Cfg *Func, Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); }
private:
InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate);
};
/// Copies the FP Status and Control Register the core flags.
class InstARM32Vmrs final : public InstARM32Pred {
InstARM32Vmrs() = delete;
InstARM32Vmrs(const InstARM32Vmrs &) = delete;
InstARM32Vmrs &operator=(const InstARM32Vmrs &) = delete;
public:
static InstARM32Vmrs *create(Cfg *Func, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vmrs>()) InstARM32Vmrs(Func, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vmrs); }
private:
InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate);
};
// Declare partial template specializations of emit() methods that already have
// default implementations. Without this, there is the possibility of ODR
// violations and link errors.
......
......@@ -87,40 +87,39 @@ CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
// instructions/operands that use the same enum key value. The tables are kept
// separate to maintain a proper separation between abstraction layers. There
// is a risk that the tables could get out of sync if enum values are reordered
// or if entries are added or deleted. The following dummy namespaces use
// or if entries are added or deleted. The following anonymous namespaces use
// static_asserts to ensure everything is kept in sync.
// Validate the enum values in ICMPARM32_TABLE.
namespace dummy1 {
namespace {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
enum _icmp_ll_enum {
#define X(val, signed, swapped64, C_32, C1_64, C2_64) _icmp_ll_##val,
ICMPARM32_TABLE
#undef X
_num
};
// Define a set of constants based on high-level table entries.
#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
#define X(tag, str) static constexpr int _icmp_hl_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
_table1_##val == _table2_##val, \
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
_icmp_ll_##val == _icmp_hl_##val, \
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
ICMPARM32_TABLE
#undef X
// Repeat the static asserts with respect to the high-level table entries in
// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
_icmp_hl_##tag == _icmp_ll_##tag, \
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
ICEINSTICMP_TABLE
#undef X
} // end of namespace dummy1
} // end of anonymous namespace
// Stack alignment
const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
......@@ -2229,9 +2228,76 @@ void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
UnimplementedError(Func->getContext()->getFlags());
}
namespace {
// Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
// (and naming).
enum {
#define X(val, CC0, CC1) _fcmp_ll_##val,
FCMPARM32_TABLE
#undef X
_fcmp_ll_NUM
};
enum {
#define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
ICEINSTFCMP_TABLE
#undef X
_fcmp_hl_NUM
};
static_assert(_fcmp_hl_NUM == _fcmp_ll_NUM,
"Inconsistency between high-level and low-level fcmp tags.");
#define X(tag, str) \
static_assert( \
_fcmp_hl_##tag == _fcmp_ll_##tag, \
"Inconsistency between high-level and low-level fcmp tag " #tag);
ICEINSTFCMP_TABLE
#undef X
struct {
CondARM32::Cond CC0;
CondARM32::Cond CC1;
} TableFcmp[] = {
#define X(val, CC0, CC1) \
{ CondARM32::CC0, CondARM32::CC1 } \
,
FCMPARM32_TABLE
#undef X
};
} // end of anonymous namespace
void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
(void)Inst;
UnimplementedError(Func->getContext()->getFlags());
Variable *Dest = Inst->getDest();
if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags());
return;
}
Variable *Src0R = legalizeToReg(Inst->getSrc(0));
Variable *Src1R = legalizeToReg(Inst->getSrc(1));
Variable *T = makeReg(IceType_i32);
_vcmp(Src0R, Src1R);
_mov(T, Ctx->getConstantZero(IceType_i32));
_vmrs();
Operand *One = Ctx->getConstantInt32(1);
InstFcmp::FCond Condition = Inst->getCondition();
assert(Condition < llvm::array_lengthof(TableFcmp));
CondARM32::Cond CC0 = TableFcmp[Condition].CC0;
CondARM32::Cond CC1 = TableFcmp[Condition].CC1;
if (CC0 != CondARM32::kNone) {
_mov(T, One, CC0);
// If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we
// don't want to set_dest_nonkillable so that liveness + dead-code
// elimination will get rid of the previous assignment (i.e., T = 0) above.
if (CC0 != CondARM32::AL)
_set_dest_nonkillable();
}
if (CC1 != CondARM32::kNone) {
assert(CC0 != CondARM32::kNone);
assert(CC1 != CondARM32::AL);
_mov_nonkillable(T, One, CC1);
}
_mov(Dest, T);
}
void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
......@@ -2695,16 +2761,12 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
UnimplementedError(Func->getContext()->getFlags());
return;
}
if (isFloatingType(DestTy)) {
UnimplementedError(Func->getContext()->getFlags());
return;
}
// TODO(jvoung): handle folding opportunities.
// cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
Variable *CmpOpnd0 = legalizeToReg(Condition);
Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
_cmp(CmpOpnd0, CmpOpnd1);
CondARM32::Cond Cond = CondARM32::NE;
static constexpr CondARM32::Cond Cond = CondARM32::NE;
if (DestTy == IceType_i64) {
SrcT = legalizeUndef(SrcT);
SrcF = legalizeUndef(SrcF);
......@@ -2726,6 +2788,20 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
_mov(DestHi, THi);
return;
}
if (isFloatingType(DestTy)) {
Variable *T = makeReg(DestTy);
SrcF = legalizeToReg(SrcF);
assert(DestTy == SrcF->getType());
_vmov(T, SrcF);
SrcT = legalizeToReg(SrcT);
assert(DestTy == SrcT->getType());
_vmov(T, SrcT, Cond);
_set_dest_nonkillable();
_vmov(Dest, T);
return;
}
Variable *T = nullptr;
SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
_mov(T, SrcF);
......
......@@ -15,19 +15,47 @@
#ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_DEF
#define SUBZERO_SRC_ICETARGETLOWERINGARM32_DEF
// Patterns for lowering fcmp. These are expected to be used in the following
// manner:
//
// mov reg, #0
// movCC0 reg, #1 /* only if CC0 != kNone */
// movCC1 reg, #1 /* only if CC1 != kNone */
//
// TODO(jpp): vector lowerings.
#define FCMPARM32_TABLE \
/* val, CC0, CC1 */ \
X(False, kNone, kNone) \
X(Oeq, EQ, kNone) \
X(Ogt, GT, kNone) \
X(Oge, GE, kNone) \
X(Olt, MI, kNone) \
X(Ole, LS, kNone) \
X(One, MI, GT) \
X(Ord, VC, kNone) \
X(Ueq, EQ, VS) \
X(Ugt, HI, kNone) \
X(Uge, PL, kNone) \
X(Ult, LT, kNone) \
X(Ule, LE, kNone) \
X(Une, NE, kNone) \
X(Uno, VS, kNone) \
X(True, AL, kNone) \
//#define X(val, CC0, CC1)
// Patterns for lowering icmp.
#define ICMPARM32_TABLE \
/* val, is_signed, swapped64, C_32, C1_64, C2_64 */ \
X(Eq, false, false, EQ, EQ, NE) \
X(Ne, false, false, NE, NE, EQ) \
X(Ugt, false, false, HI, HI, LS) \
X(Uge, false, false, CS, CS, CC) \
X(Ult, false, false, CC, CC, CS) \
X(Ule, false, false, LS, LS, HI) \
X(Sgt, true, true, GT, LT, GE) \
X(Sge, true, false, GE, GE, LT) \
X(Slt, true, false, LT, LT, GE) \
X(Sle, true, true, LE, GE, LT) \
#define ICMPARM32_TABLE \
/* val, is_signed, swapped64, C_32, C1_64, C2_64 */ \
X(Eq, false, false, EQ, EQ, NE) \
X(Ne, false, false, NE, NE, EQ) \
X(Ugt, false, false, HI, HI, LS) \
X(Uge, false, false, CS, CS, CC) \
X(Ult, false, false, CC, CC, CS) \
X(Ule, false, false, LS, LS, HI) \
X(Sgt, true, true, GT, LT, GE) \
X(Sge, true, false, GE, GE, LT) \
X(Slt, true, false, LT, LT, GE) \
X(Sle, true, true, LE, GE, LT) \
//#define X(val, is_signed, swapped64, C_32, C1_64, C2_64)
#endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_DEF
......@@ -401,6 +401,13 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred));
}
void _vcmp(Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred));
}
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred));
}
// There are a whole bunch of vmov variants, to transfer within S/D/Q
// registers, between core integer registers and S/D, and from small
// immediates into S/D. For integer -> S/D/Q there is a variant which takes
......@@ -411,8 +418,8 @@ protected:
// register is modified. This cannot model that.
//
// This represents the simple single source, single dest variants only.
void _vmov(Variable *Dest, Operand *Src0) {
constexpr CondARM32::Cond Pred = CondARM32::AL;
void _vmov(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmov::create(Func, Dest, Src0, Pred));
}
// This represents the single source, multi dest variant.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment