Commit 385351ba by John Porto

Fixes ARM32 VFP calling convetion.

Packs VFP arguments as tight as the ABI wants, and adds tests for float and double arguments. vector argument tests will come soon. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1348393002 .
parent 52863b13
...@@ -538,26 +538,31 @@ void InstARM32Vmov::emitSingleDestMultiSource(const Cfg *Func) const { ...@@ -538,26 +538,31 @@ void InstARM32Vmov::emitSingleDestMultiSource(const Cfg *Func) const {
Src1->emit(Func); Src1->emit(Func);
} }
namespace {
bool isVariableWithoutRegister(const Operand *Op) {
if (const auto *OpV = llvm::dyn_cast<const Variable>(Op)) {
return !OpV->hasReg();
}
return false;
}
bool isMemoryAccess(Operand *Op) {
return isVariableWithoutRegister(Op) || llvm::isa<OperandARM32Mem>(Op);
}
} // end of anonymous namespace
void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const { void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
Variable *Dest = getDest(); Variable *Dest = getDest();
if (Dest->hasReg()) { if (Dest->hasReg()) {
IceString ActualOpcode = "vmov";
Operand *Src0 = getSrc(0); Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { const char *ActualOpcode = isMemoryAccess(Src0) ? "vldr" : "vmov";
if (!Src0V->hasReg()) {
ActualOpcode = IceString("vldr");
}
} else {
if (llvm::isa<OperandARM32Mem>(Src0))
ActualOpcode = IceString("vldr");
}
Str << "\t" << ActualOpcode << "\t"; Str << "\t" << ActualOpcode << "\t";
getDest()->emit(Func); Dest->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); Src0->emit(Func);
} else { } else {
Variable *Src0 = llvm::cast<Variable>(getSrc(0)); Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg()); assert(Src0->hasReg());
...@@ -897,8 +902,8 @@ void InstARM32Str::emit(const Cfg *Func) const { ...@@ -897,8 +902,8 @@ void InstARM32Str::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2); assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType(); Type Ty = getSrc(0)->getType();
Str << "\t" const char *Opcode = isScalarFloatingType(Ty) ? "vstr" : "str";
<< "str" << getWidthString(Ty) << getPredicate() << "\t"; Str << "\t" << Opcode << getWidthString(Ty) << getPredicate() << "\t";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
Str << ", "; Str << ", ";
getSrc(1)->emit(Func); getSrc(1)->emit(Func);
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
/// entirely of the lowering sequence for each high-level instruction. /// entirely of the lowering sequence for each high-level instruction.
/// ///
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "IceTargetLoweringARM32.h" #include "IceTargetLoweringARM32.h"
#include "IceCfg.h" #include "IceCfg.h"
...@@ -465,39 +464,50 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { ...@@ -465,39 +464,50 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
} }
bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) if (!VFPRegsFree.any()) {
return false; return false;
}
if (isVectorType(Ty)) { if (isVectorType(Ty)) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
// Q registers are declared in reverse order, so RegARM32::Reg_q0 > // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
// RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
// Reg_q0. Same thing goes for D registers. // Same thing goes for D registers.
static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
"ARM32 Q registers are possibly declared incorrectly."); "ARM32 Q registers are possibly declared incorrectly.");
*Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
NumFPRegUnits += 4; int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
// If this bumps us past the boundary, don't allocate to a register and if (QRegStart >= 0) {
// leave any previously speculatively consumed registers as consumed. VFPRegsFree.reset(QRegStart, QRegStart + 4);
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) *Reg = RegARM32::Reg_q0 - (QRegStart / 4);
return false; return true;
}
} else if (Ty == IceType_f64) { } else if (Ty == IceType_f64) {
static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
"ARM32 D registers are possibly declared incorrectly."); "ARM32 D registers are possibly declared incorrectly.");
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
*Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
NumFPRegUnits += 2; if (DRegStart >= 0) {
// If this bumps us past the boundary, don't allocate to a register and VFPRegsFree.reset(DRegStart, DRegStart + 2);
// leave any previously speculatively consumed registers as consumed. *Reg = RegARM32::Reg_d0 - (DRegStart / 2);
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) return true;
return false; }
} else { } else {
static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
"ARM32 S registers are possibly declared incorrectly."); "ARM32 S registers are possibly declared incorrectly.");
assert(Ty == IceType_f32); assert(Ty == IceType_f32);
*Reg = RegARM32::Reg_s0 + NumFPRegUnits; int32_t SReg = VFPRegsFree.find_first();
++NumFPRegUnits; assert(SReg >= 0);
} VFPRegsFree.reset(SReg);
*Reg = RegARM32::Reg_s0 + SReg;
return true; return true;
}
// Parameter allocation failed. From now on, every fp register must be placed
// on the stack. We clear VFRegsFree in case there are any "holes" from S and
// D registers.
VFPRegsFree.clear();
return false;
} }
void TargetARM32::lowerArguments() { void TargetARM32::lowerArguments() {
...@@ -2235,6 +2245,8 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2235,6 +2245,8 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
break; break;
case IceType_v4i32: case IceType_v4i32:
// avoid cryptic liveness errors
Context.insert(InstFakeDef::create(Func, Dest));
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
break; break;
case IceType_v4f32: case IceType_v4f32:
...@@ -2768,9 +2780,10 @@ void TargetARM32::lowerStore(const InstStore *Inst) { ...@@ -2768,9 +2780,10 @@ void TargetARM32::lowerStore(const InstStore *Inst) {
Variable *ValueLo = legalizeToReg(loOperand(Value)); Variable *ValueLo = legalizeToReg(loOperand(Value));
_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr))); _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr))); _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
} else { } else {
if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
}
Variable *ValueR = legalizeToReg(Value); Variable *ValueR = legalizeToReg(Value);
_str(ValueR, NewAddr); _str(ValueR, NewAddr);
} }
...@@ -2832,7 +2845,10 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { ...@@ -2832,7 +2845,10 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
Type Ty = Src->getType(); Type Ty = Src->getType();
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
if (isVectorType(Ty) || isFloatingType(Ty)) { if (isVectorType(Ty)) {
// TODO(jpp): Src must be a register, or an address with base register.
_vmov(Reg, Src);
} else if (isFloatingType(Ty)) {
_vmov(Reg, Src); _vmov(Reg, Src);
} else { } else {
// Mov's Src operand can really only be the flexible second operand type or // Mov's Src operand can really only be the flexible second operand type or
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include "IceRegistersARM32.h" #include "IceRegistersARM32.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "llvm/ADT/SmallBitVector.h"
namespace Ice { namespace Ice {
// Class encapsulating ARM cpu features / instruction set. // Class encapsulating ARM cpu features / instruction set.
...@@ -461,19 +463,34 @@ protected: ...@@ -461,19 +463,34 @@ protected:
/// Helper class that understands the Calling Convention and register /// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3, /// assignments. The first few integer type parameters can use r0-r3,
/// regardless of their position relative to the floating-point/vector /// regardless of their position relative to the floating-point/vector
/// arguments in the argument list. Floating-point and vector arguments can /// arguments in the argument list. Floating-point and vector arguments
/// use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can start with /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
/// registers but extend beyond the available registers can be split between /// see the ARM Architecture Procedure Calling Standards (AAPCS).
/// the registers and the stack. However, this is typically for passing GPR ///
/// structs by value, and PNaCl transforms expand this out. /// Technically, arguments that can start with registers but extend beyond the
/// available registers can be split between the registers and the stack.
/// However, this is typically for passing GPR structs by value, and PNaCl
/// transforms expand this out.
/// ///
/// Also, at the point before the call, the stack must be aligned. /// At (public) function entry, the stack must be 8-byte aligned.
class CallingConv { class CallingConv {
CallingConv(const CallingConv &) = delete; CallingConv(const CallingConv &) = delete;
CallingConv &operator=(const CallingConv &) = delete; CallingConv &operator=(const CallingConv &) = delete;
public: public:
CallingConv() {} CallingConv()
: VFPRegsFree(ARM32_MAX_FP_REG_UNITS, true),
ValidF64Regs(ARM32_MAX_FP_REG_UNITS),
ValidV128Regs(ARM32_MAX_FP_REG_UNITS) {
for (uint32_t i = 0; i < ARM32_MAX_FP_REG_UNITS; ++i) {
if ((i % 2) == 0) {
ValidF64Regs[i] = true;
}
if ((i % 4) == 0) {
ValidV128Regs[i] = true;
}
}
}
~CallingConv() = default; ~CallingConv() = default;
bool I64InRegs(std::pair<int32_t, int32_t> *Regs); bool I64InRegs(std::pair<int32_t, int32_t> *Regs);
...@@ -481,12 +498,14 @@ protected: ...@@ -481,12 +498,14 @@ protected:
bool FPInReg(Type Ty, int32_t *Reg); bool FPInReg(Type Ty, int32_t *Reg);
static constexpr uint32_t ARM32_MAX_GPR_ARG = 4; static constexpr uint32_t ARM32_MAX_GPR_ARG = 4;
// Units of S registers still available to S/D/Q arguments. // TODO(jpp): comment.
static constexpr uint32_t ARM32_MAX_FP_REG_UNITS = 16; static constexpr uint32_t ARM32_MAX_FP_REG_UNITS = 16;
private: private:
uint32_t NumGPRRegsUsed = 0; uint32_t NumGPRRegsUsed = 0;
uint32_t NumFPRegUnits = 0; llvm::SmallBitVector VFPRegsFree;
llvm::SmallBitVector ValidF64Regs;
llvm::SmallBitVector ValidV128Regs;
}; };
private: private:
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
; https://code.google.com/p/nativeclient/issues/detail?id=4304 ; https://code.google.com/p/nativeclient/issues/detail?id=4304
RUN: %p2i --expect-fail --tbc -i %p/Input/phi-invalid.tbc --insts 2>&1 \ RUN: %p2i --expect-fail --tbc -i %p/Input/phi-invalid.tbc --insts 2>&1 \
RUN: --filetype=obj --args -o /dev/null \
RUN: | FileCheck --check-prefix=BADPHI %s RUN: | FileCheck --check-prefix=BADPHI %s
; BADPHI: Phi error: ; BADPHI: Phi error:
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment