Reserve space for scalar FP returns in the stack frame

Rather than bumping the stack pointer around the scalar return sequence in _fld, ensure the prolog allocates enough space. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1442753008 .

Reserve space for scalar FP returns in the stack frame
0d9cf487 · David Sehr · 2f3b8ec8 · 0d9cf487 · 0d9cf487 · 0d9cf487
Commit 0d9cf487 authored Nov 16, 2015 by David Sehr
5 changed files
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -66,6 +66,7 @@ public:
  void setFunctionName(const IceString &Name) { FunctionName = Name; }
  IceString getFunctionName() const { return FunctionName; }
  void setReturnType(Type Ty) { ReturnType = Ty; }
+  Type getReturnType() const { return ReturnType; }
  /// @}
  /// \name Manage the "internal" attribute of the function.

--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -2547,20 +2547,17 @@ template <class Machine> void InstX86Fld<Machine>::emit(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrEmit();
  assert(this->getSrcSize() == 1);
  Type Ty = this->getSrc(0)->getType();
-  SizeT Width = typeWidthInBytes(Ty);
  const auto *Var = llvm::dyn_cast<Variable>(this->getSrc(0));
  if (Var && Var->hasReg()) {
    // This is a physical xmm register, so we need to spill it to a temporary
-    // stack slot.
+    // stack slot.  Function prolog emission guarantees that there is sufficient
-    Str << "\tsubl\t$" << Width << ", %esp"
+    // space to do this.
-        << "\n";
    Str << "\tmov"
        << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString << "\t";
    Var->emit(Func);
    Str << ", (%esp)\n";
    Str << "\tfld" << this->getFldString(Ty) << "\t"
-        << "(%esp)\n";
+        << "(%esp)";
-    Str << "\taddl\t$" << Width << ", %esp";
    return;
  }
  Str << "\tfld" << this->getFldString(Ty) << "\t";
@@ -2578,11 +2575,8 @@ void InstX86Fld<Machine>::emitIAS(const Cfg *Func) const {
  if (const auto *Var = llvm::dyn_cast<Variable>(Src)) {
    if (Var->hasReg()) {
      // This is a physical xmm register, so we need to spill it to a temporary
-      // stack slot.
+      // stack slot.  Function prolog emission guarantees that there is
-      Immediate Width(typeWidthInBytes(Ty));
+      // sufficient space to do this.
-      Asm->sub(IceType_i32,
-               InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
-               Width);
      typename InstX86Base<Machine>::Traits::Address StackSlot =
          typename InstX86Base<Machine>::Traits::Address(
              InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0,
@@ -2590,9 +2584,6 @@ void InstX86Fld<Machine>::emitIAS(const Cfg *Func) const {
      Asm->movss(Ty, StackSlot,
                 InstX86Base<Machine>::Traits::getEncodedXmm(Var->getRegNum()));
      Asm->fld(Ty, StackSlot);
-      Asm->add(IceType_i32,
-               InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
-               Width);
    } else {
      typename InstX86Base<Machine>::Traits::Address StackAddr(
          Target->stackVarToAsmOperand(Var));
@@ -2646,7 +2637,6 @@ void InstX86Fstp<Machine>::emit(const Cfg *Func) const {
      << "\t"
      << "(%esp), ";
  this->getDest()->emit(Func);
-  Str << "\n";
 }
 template <class Machine>

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -497,6 +497,18 @@ void TargetX8632::addProlog(CfgNode *Node) {
  uint32_t GlobalsAndSubsequentPaddingSize =
      GlobalsSize + LocalsSlotsPaddingBytes;
+  // Functions returning scalar floating point types may need to convert values
+  // from an in-register xmm value to the top of the x87 floating point stack.
+  // This is done by a movp[sd] and an fld[sd].  Ensure there is enough scratch
+  // space on the stack for this.
+  const Type ReturnType = Func->getReturnType();
+  if (isScalarFloatingType(ReturnType)) {
+    // Avoid misaligned double-precicion load/store.
+    NeedsStackAlignment = true;
+    SpillAreaSizeBytes =
+        std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
+  }
  // Align esp if necessary.
  if (NeedsStackAlignment) {
    uint32_t StackOffset =

--- a/tests_lit/llvm2ice_tests/commutativity.ll
+++ b/tests_lit/llvm2ice_tests/commutativity.ll
@@ -61,6 +61,7 @@ entry:
  ret float %result
 }
 ; CHECK-LABEL: floatAddLeft
+; CHECK-NEXT: sub esp,0xc
 ; CHECK-NEXT: movss xmm0,DWORD PTR
 ; CHECK-NEXT: movss xmm1,DWORD PTR
 ; CHECK-NEXT: addss xmm1,xmm0
@@ -73,6 +74,7 @@ entry:
  ret float %result
 }
 ; CHECK-LABEL: floatAddRight
+; CHECK-NEXT: sub esp,0xc
 ; CHECK-NEXT: movss xmm0,DWORD PTR
 ; CHECK-NEXT: movss xmm1,DWORD PTR
 ; CHECK-NEXT: addss xmm0,xmm1
@@ -85,6 +87,7 @@ entry:
  ret float %result
 }
 ; CHECK-LABEL: floatMultiplyLeft
+; CHECK-NEXT: sub esp,0xc
 ; CHECK-NEXT: movss xmm0,DWORD PTR
 ; CHECK-NEXT: movss xmm1,DWORD PTR
 ; CHECK-NEXT: mulss xmm1,xmm0
@@ -97,6 +100,7 @@ entry:
  ret float %result
 }
 ; CHECK-LABEL: floatMultiplyRight
+; CHECK-NEXT: sub esp,0xc
 ; CHECK-NEXT: movss xmm0,DWORD PTR
 ; CHECK-NEXT: movss xmm1,DWORD PTR
 ; CHECK-NEXT: mulss xmm0,xmm1

--- a/tests_lit/llvm2ice_tests/elf_container.ll
+++ b/tests_lit/llvm2ice_tests/elf_container.ll
@@ -386,11 +386,11 @@ define void @_start(i32) {
 ; CHECK: Relocations [
 ; CHECK:   Section ({{[0-9]+}}) .rel.text {
-; CHECK:     0x4 R_386_32 .L$float$80000000 0x0
+; CHECK:     0x7 R_386_32 .L$float$80000000 0x0
-; CHECK:     0xC R_386_32 .L$float$3f9d70a0 0x0
+; CHECK:     0xF R_386_32 .L$float$3f9d70a0 0x0
-; CHECK:     0x24 R_386_32 .L$double$ffffffffffffffff 0x0
+; CHECK:     0x27 R_386_32 .L$double$ffffffffffffffff 0x0
-; CHECK:     0x2C R_386_32 .L$double$fff7ffffffffffff 0x0
+; CHECK:     0x2F R_386_32 .L$double$fff7ffffffffffff 0x0
-; CHECK:     0x34 R_386_32 .L$double$fff8000000000003 0x0
+; CHECK:     0x37 R_386_32 .L$double$fff8000000000003 0x0
 ; CHECK:     0x{{.*}} R_386_PC32 memcpy
 ; CHECK:     0x{{.*}} R_386_PC32 memset
 ; CHECK:     0x{{.*}} R_386_PC32 external_foo