Commit 385351ba by John Porto

Fixes ARM32 VFP calling convetion.

Packs VFP arguments as tight as the ABI wants, and adds tests for float and double arguments. vector argument tests will come soon. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1348393002 .
parent 52863b13
......@@ -538,26 +538,31 @@ void InstARM32Vmov::emitSingleDestMultiSource(const Cfg *Func) const {
Src1->emit(Func);
}
namespace {
bool isVariableWithoutRegister(const Operand *Op) {
if (const auto *OpV = llvm::dyn_cast<const Variable>(Op)) {
return !OpV->hasReg();
}
return false;
}
bool isMemoryAccess(Operand *Op) {
return isVariableWithoutRegister(Op) || llvm::isa<OperandARM32Mem>(Op);
}
} // end of anonymous namespace
void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
Variable *Dest = getDest();
if (Dest->hasReg()) {
IceString ActualOpcode = "vmov";
Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) {
ActualOpcode = IceString("vldr");
}
} else {
if (llvm::isa<OperandARM32Mem>(Src0))
ActualOpcode = IceString("vldr");
}
const char *ActualOpcode = isMemoryAccess(Src0) ? "vldr" : "vmov";
Str << "\t" << ActualOpcode << "\t";
getDest()->emit(Func);
Dest->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Src0->emit(Func);
} else {
Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg());
......@@ -897,8 +902,8 @@ void InstARM32Str::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType();
Str << "\t"
<< "str" << getWidthString(Ty) << getPredicate() << "\t";
const char *Opcode = isScalarFloatingType(Ty) ? "vstr" : "str";
Str << "\t" << Opcode << getWidthString(Ty) << getPredicate() << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
......
......@@ -12,7 +12,6 @@
/// entirely of the lowering sequence for each high-level instruction.
///
//===----------------------------------------------------------------------===//
#include "IceTargetLoweringARM32.h"
#include "IceCfg.h"
......@@ -465,39 +464,50 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
}
bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)
if (!VFPRegsFree.any()) {
return false;
}
if (isVectorType(Ty)) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
// Q registers are declared in reverse order, so RegARM32::Reg_q0 >
// RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
// Reg_q0. Same thing goes for D registers.
// RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
// Same thing goes for D registers.
static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
"ARM32 Q registers are possibly declared incorrectly.");
*Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
NumFPRegUnits += 4;
// If this bumps us past the boundary, don't allocate to a register and
// leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
if (QRegStart >= 0) {
VFPRegsFree.reset(QRegStart, QRegStart + 4);
*Reg = RegARM32::Reg_q0 - (QRegStart / 4);
return true;
}
} else if (Ty == IceType_f64) {
static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
"ARM32 D registers are possibly declared incorrectly.");
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
*Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
NumFPRegUnits += 2;
// If this bumps us past the boundary, don't allocate to a register and
// leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
if (DRegStart >= 0) {
VFPRegsFree.reset(DRegStart, DRegStart + 2);
*Reg = RegARM32::Reg_d0 - (DRegStart / 2);
return true;
}
} else {
static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
"ARM32 S registers are possibly declared incorrectly.");
assert(Ty == IceType_f32);
*Reg = RegARM32::Reg_s0 + NumFPRegUnits;
++NumFPRegUnits;
int32_t SReg = VFPRegsFree.find_first();
assert(SReg >= 0);
VFPRegsFree.reset(SReg);
*Reg = RegARM32::Reg_s0 + SReg;
return true;
}
return true;
// Parameter allocation failed. From now on, every fp register must be placed
// on the stack. We clear VFRegsFree in case there are any "holes" from S and
// D registers.
VFPRegsFree.clear();
return false;
}
void TargetARM32::lowerArguments() {
......@@ -2235,6 +2245,8 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v4i32:
// avoid cryptic liveness errors
Context.insert(InstFakeDef::create(Func, Dest));
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v4f32:
......@@ -2768,9 +2780,10 @@ void TargetARM32::lowerStore(const InstStore *Inst) {
Variable *ValueLo = legalizeToReg(loOperand(Value));
_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
} else {
if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
}
Variable *ValueR = legalizeToReg(Value);
_str(ValueR, NewAddr);
}
......@@ -2832,7 +2845,10 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
Type Ty = Src->getType();
Variable *Reg = makeReg(Ty, RegNum);
if (isVectorType(Ty) || isFloatingType(Ty)) {
if (isVectorType(Ty)) {
// TODO(jpp): Src must be a register, or an address with base register.
_vmov(Reg, Src);
} else if (isFloatingType(Ty)) {
_vmov(Reg, Src);
} else {
// Mov's Src operand can really only be the flexible second operand type or
......
......@@ -21,6 +21,8 @@
#include "IceRegistersARM32.h"
#include "IceTargetLowering.h"
#include "llvm/ADT/SmallBitVector.h"
namespace Ice {
// Class encapsulating ARM cpu features / instruction set.
......@@ -461,19 +463,34 @@ protected:
/// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3,
/// regardless of their position relative to the floating-point/vector
/// arguments in the argument list. Floating-point and vector arguments can
/// use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can start with
/// registers but extend beyond the available registers can be split between
/// the registers and the stack. However, this is typically for passing GPR
/// structs by value, and PNaCl transforms expand this out.
/// arguments in the argument list. Floating-point and vector arguments
/// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
/// see the ARM Architecture Procedure Calling Standards (AAPCS).
///
/// Technically, arguments that can start with registers but extend beyond the
/// available registers can be split between the registers and the stack.
/// However, this is typically for passing GPR structs by value, and PNaCl
/// transforms expand this out.
///
/// Also, at the point before the call, the stack must be aligned.
/// At (public) function entry, the stack must be 8-byte aligned.
class CallingConv {
CallingConv(const CallingConv &) = delete;
CallingConv &operator=(const CallingConv &) = delete;
public:
CallingConv() {}
CallingConv()
: VFPRegsFree(ARM32_MAX_FP_REG_UNITS, true),
ValidF64Regs(ARM32_MAX_FP_REG_UNITS),
ValidV128Regs(ARM32_MAX_FP_REG_UNITS) {
for (uint32_t i = 0; i < ARM32_MAX_FP_REG_UNITS; ++i) {
if ((i % 2) == 0) {
ValidF64Regs[i] = true;
}
if ((i % 4) == 0) {
ValidV128Regs[i] = true;
}
}
}
~CallingConv() = default;
bool I64InRegs(std::pair<int32_t, int32_t> *Regs);
......@@ -481,12 +498,14 @@ protected:
bool FPInReg(Type Ty, int32_t *Reg);
static constexpr uint32_t ARM32_MAX_GPR_ARG = 4;
// Units of S registers still available to S/D/Q arguments.
// TODO(jpp): comment.
static constexpr uint32_t ARM32_MAX_FP_REG_UNITS = 16;
private:
uint32_t NumGPRRegsUsed = 0;
uint32_t NumFPRegUnits = 0;
llvm::SmallBitVector VFPRegsFree;
llvm::SmallBitVector ValidF64Regs;
llvm::SmallBitVector ValidV128Regs;
};
private:
......
......@@ -2,6 +2,7 @@
; https://code.google.com/p/nativeclient/issues/detail?id=4304
RUN: %p2i --expect-fail --tbc -i %p/Input/phi-invalid.tbc --insts 2>&1 \
RUN: --filetype=obj --args -o /dev/null \
RUN: | FileCheck --check-prefix=BADPHI %s
; BADPHI: Phi error:
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment