Subzero: Clean up the runtime implementation.

The runtime helpers are given more consistent names: __Sz_<bitcode>_<type1>_<type2> Missing helpers (various vector bitcasts) are implemented. We'd prefer to avoid calling external library functions, e.g. in compiler-rt, but even llc uses these helpers for some bitcode on x86-32, so the alternative is to copy their implementation into Subzero's runtime. BUG= none R=mtrofin@chromium.org Review URL: https://codereview.chromium.org/961413002

Subzero: Clean up the runtime implementation.
c4508791 · Jim Stichnoth · 739327ab · c4508791 · c4508791 · c4508791
Commit c4508791 authored Mar 01, 2015 by Jim Stichnoth
8 changed files
--- a/pydir/build-runtime.py
+++ b/pydir/build-runtime.py
@@ -16,6 +16,8 @@ def Translate(ll_files, extra_args, obj, verbose):
    shellcmd(['cat'] + ll_files + ['|',
              'pnacl-llc',
              '-externalize',
+              '-function-sections',
+              '-O2',
              '-filetype=obj',
              '-bitcode-format=llvm',
              '-o', obj

--- a/runtime/szrt.c
+++ b/runtime/szrt.c
@@ -15,7 +15,6 @@
 //===----------------------------------------------------------------------===//

 #include <stdint.h>
-#include <stdlib.h>

 // TODO(stichnot): The various NaN cross tests try to map Subzero's
 // undefined behavior to the same as llc's undefined behavior, as
@@ -24,44 +23,57 @@
 // for different targets.  It would be better to find a more
 // appropriate set of llc options when building the Subzero runtime.
 //
-// We test for NaN using "value==value" instead of using isnan(value)
+// We test for NaN using "Value==Value" instead of using isnan(Value)
 // to avoid an external dependency on fpclassify().

-uint32_t cvtftoui32(float value) {
-  if (value == value) // NaNaN
-    return (uint32_t)value;
+uint32_t __Sz_fptoui_f32_i32(float Value) {
+  if (Value == Value) // NaNaN
+    return (uint32_t)Value;
  return 0x80000000;
 }

-uint32_t cvtdtoui32(double value) {
-  if (value == value) // NaNaN
-    return (uint32_t)value;
+uint32_t __Sz_fptoui_f64_i32(double Value) {
+  if (Value == Value) // NaNaN
+    return (uint32_t)Value;
  return 0x80000000;
 }

-int64_t cvtftosi64(float value) { return (int64_t)value; }
+uint64_t __Sz_fptoui_f32_i64(float Value) { return (uint64_t)Value; }

-int64_t cvtdtosi64(double value) { return (int64_t)value; }
+uint64_t __Sz_fptoui_f64_i64(double Value) { return (uint64_t)Value; }

-uint64_t cvtftoui64(float value) { return (uint64_t)value; }
+int64_t __Sz_fptosi_f32_i64(float Value) { return (int64_t)Value; }

-uint64_t cvtdtoui64(double value) { return (uint64_t)value; }
+int64_t __Sz_fptosi_f64_i64(double Value) { return (int64_t)Value; }

-float cvtui32tof(uint32_t value) { return (float)value; }
+float __Sz_uitofp_i32_f32(uint32_t Value) { return (float)Value; }

-float cvtsi64tof(int64_t value) { return (float)value; }
+float __Sz_uitofp_i64_f32(uint64_t Value) { return (float)Value; }

-float cvtui64tof(uint64_t value) { return (float)value; }
+double __Sz_uitofp_i32_f64(uint32_t Value) { return (double)Value; }

-double cvtui32tod(uint32_t value) { return (double)value; }
+double __Sz_uitofp_i64_f64(uint64_t Value) { return (double)Value; }

-double cvtsi64tod(int64_t value) { return (double)value; }
+float __Sz_sitofp_i64_f32(int64_t Value) { return (float)Value; }

-double cvtui64tod(uint64_t value) { return (double)value; }
+double __Sz_sitofp_i64_f64(int64_t Value) { return (double)Value; }

-/* TODO(stichnot):
-   Sz_bitcast_v8i1_to_i8
-   Sz_bitcast_v16i1_to_i16
-   Sz_bitcast_i8_to_v8i1
-   Sz_bitcast_i16_to_v16i1
-*/
+// Other helper calls emitted by Subzero but not implemented here:
+// Compiler-rt:
+//   __udivdi3     - udiv i64
+//   __divdi3      - sdiv i64
+//   __umoddi3     - urem i64
+//   __moddi3      - srem i64
+//   __popcountsi2 - call @llvm.ctpop.i32
+//   __popcountdi2 - call @llvm.ctpop.i64
+// libm:
+//   fmodf - frem f32
+//   fmod  - frem f64
+// libc:
+//   setjmp  - call @llvm.nacl.setjmp
+//   longjmp - call @llvm.nacl.longjmp
+//   memcpy  - call @llvm.memcpy.p0i8.p0i8.i32
+//   memmove - call @llvm.memmove.p0i8.p0i8.i32
+//   memset  - call @llvm.memset.p0i8.i32
+// unsandboxed_irt:
+//   __nacl_read_tp
--- a/runtime/szrt_ll.ll
+++ b/runtime/szrt_ll.ll
@@ -14,14 +14,38 @@
 ;;
 ;;===----------------------------------------------------------------------===;;

-define <4 x float> @Sz_uitofp_v4i32(<4 x i32> %a) {
+define <4 x float> @__Sz_uitofp_4xi32_4xf32(<4 x i32> %a) {
 entry:
  %0 = uitofp <4 x i32> %a to <4 x float>
  ret <4 x float> %0
 }

-define <4 x i32> @Sz_fptoui_v4f32(<4 x float> %a) {
+define <4 x i32> @__Sz_fptoui_4xi32_f32(<4 x float> %a) {
 entry:
  %0 = fptoui <4 x float> %a to <4 x i32>
  ret <4 x i32> %0
 }
+
+define i8 @__Sz_bitcast_8xi1_i8(<8 x i1> %a) {
+entry:
+  %0 = bitcast <8 x i1> %a to i8
+  ret i8 %0
+}
+
+define i16 @__Sz_bitcast_16xi1_i16(<16 x i1> %a) {
+entry:
+  %0 = bitcast <16 x i1> %a to i16
+  ret i16 %0
+}
+
+define <8 x i1> @__Sz_bitcast_i8_8xi1(i8 %a) {
+entry:
+  %0 = bitcast i8 %a to <8 x i1>
+  ret <8 x i1> %0
+}
+
+define <16 x i1> @__Sz_bitcast_i16_16xi1(i16 %a) {
+entry:
+  %0 = bitcast i16 %a to <16 x i1>
+  ret <16 x i1> %0
+}
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -252,6 +252,40 @@ protected:
  int32_t StackAdjustment;
  LoweringContext Context;

+  // Runtime helper function names
+  const static constexpr char *H_bitcast_16xi1_i16 = "__Sz_bitcast_16xi1_i16";
+  const static constexpr char *H_bitcast_8xi1_i8 = "__Sz_bitcast_8xi1_i8";
+  const static constexpr char *H_bitcast_i16_16xi1 = "__Sz_bitcast_i16_16xi1";
+  const static constexpr char *H_bitcast_i8_8xi1 = "__Sz_bitcast_i8_8xi1";
+  const static constexpr char *H_call_ctpop_i32 = "__popcountsi2";
+  const static constexpr char *H_call_ctpop_i64 = "__popcountdi2";
+  const static constexpr char *H_call_longjmp = "longjmp";
+  const static constexpr char *H_call_memcpy = "memcpy";
+  const static constexpr char *H_call_memmove = "memmove";
+  const static constexpr char *H_call_memset = "memset";
+  const static constexpr char *H_call_read_tp = "__nacl_read_tp";
+  const static constexpr char *H_call_setjmp = "setjmp";
+  const static constexpr char *H_fptosi_f32_i64 = "__Sz_fptosi_f32_i64";
+  const static constexpr char *H_fptosi_f64_i64 = "__Sz_fptosi_f64_i64";
+  const static constexpr char *H_fptoui_4xi32_f32 = "__Sz_fptoui_4xi32_f32";
+  const static constexpr char *H_fptoui_f32_i32 = "__Sz_fptoui_f32_i32";
+  const static constexpr char *H_fptoui_f32_i64 = "__Sz_fptoui_f32_i64";
+  const static constexpr char *H_fptoui_f64_i32 = "__Sz_fptoui_f64_i32";
+  const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
+  const static constexpr char *H_frem_f32 = "fmodf";
+  const static constexpr char *H_frem_f64 = "fmod";
+  const static constexpr char *H_sdiv_i64 = "__divdi3";
+  const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
+  const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
+  const static constexpr char *H_srem_i64 = "__moddi3";
+  const static constexpr char *H_udiv_i64 = "__udivdi3";
+  const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
+  const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
+  const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
+  const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
+  const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
+  const static constexpr char *H_urem_i64 = "__umoddi3";
+
 private:
  int32_t SnapshotStackAdjustment;
 };

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
--- a/tests_lit/llvm2ice_tests/fp.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fp.pnacl.ll
@@ -207,7 +207,7 @@ entry:
  ret i64 %conv
 }
 ; CHECK-LABEL: doubleToSigned64
-; CHECK: call {{.*}} R_{{.*}} cvtdtosi64
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f64_i64

 define internal i64 @floatToSigned64(float %a) {
 entry:
@@ -215,7 +215,7 @@ entry:
  ret i64 %conv
 }
 ; CHECK-LABEL: floatToSigned64
-; CHECK: call {{.*}} R_{{.*}} cvtftosi64
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f32_i64

 define internal i64 @doubleToUnsigned64(double %a) {
 entry:
@@ -223,7 +223,7 @@ entry:
  ret i64 %conv
 }
 ; CHECK-LABEL: doubleToUnsigned64
-; CHECK: call {{.*}} R_{{.*}} cvtdtoui64
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i64

 define internal i64 @floatToUnsigned64(float %a) {
 entry:
@@ -231,7 +231,7 @@ entry:
  ret i64 %conv
 }
 ; CHECK-LABEL: floatToUnsigned64
-; CHECK: call {{.*}} R_{{.*}} cvtftoui64
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i64

 define internal i32 @doubleToSigned32(double %a) {
 entry:
@@ -263,7 +263,7 @@ entry:
  ret i32 %conv
 }
 ; CHECK-LABEL: doubleToUnsigned32
-; CHECK: call {{.*}} R_{{.*}} cvtdtoui32
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i32

 define internal i32 @floatToUnsigned32(float %a) {
 entry:
@@ -271,7 +271,7 @@ entry:
  ret i32 %conv
 }
 ; CHECK-LABEL: floatToUnsigned32
-; CHECK: call {{.*}} R_{{.*}} cvtftoui32
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i32

 define internal i32 @doubleToSigned16(double %a) {
 entry:
@@ -379,7 +379,7 @@ entry:
  ret double %conv
 }
 ; CHECK-LABEL: signed64ToDouble
-; CHECK: call {{.*}} R_{{.*}} cvtsi64tod
+; CHECK: call {{.*}} R_{{.*}} __Sz_sitofp_i64_f64
 ; CHECK: fstp QWORD

 define internal float @signed64ToFloat(i64 %a) {
@@ -388,7 +388,7 @@ entry:
  ret float %conv
 }
 ; CHECK-LABEL: signed64ToFloat
-; CHECK: call {{.*}} R_{{.*}} cvtsi64tof
+; CHECK: call {{.*}} R_{{.*}} __Sz_sitofp_i64_f32
 ; CHECK: fstp DWORD

 define internal double @unsigned64ToDouble(i64 %a) {
@@ -397,7 +397,7 @@ entry:
  ret double %conv
 }
 ; CHECK-LABEL: unsigned64ToDouble
-; CHECK: call {{.*}} R_{{.*}} cvtui64tod
+; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f64
 ; CHECK: fstp

 define internal float @unsigned64ToFloat(i64 %a) {
@@ -406,7 +406,7 @@ entry:
  ret float %conv
 }
 ; CHECK-LABEL: unsigned64ToFloat
-; CHECK: call {{.*}} R_{{.*}} cvtui64tof
+; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f32
 ; CHECK: fstp

 define internal double @unsigned64ToDoubleConst() {
@@ -417,7 +417,7 @@ entry:
 ; CHECK-LABEL: unsigned64ToDouble
 ; CHECK: mov DWORD PTR [esp+0x4],0xb3a
 ; CHECK: mov DWORD PTR [esp],0x73ce2ff2
-; CHECK: call {{.*}} R_{{.*}} cvtui64tod
+; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f64
 ; CHECK: fstp

 define internal double @signed32ToDouble(i32 %a) {
@@ -453,7 +453,7 @@ entry:
  ret double %conv
 }
 ; CHECK-LABEL: unsigned32ToDouble
-; CHECK: call {{.*}} R_{{.*}} cvtui32tod
+; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i32_f64
 ; CHECK: fstp QWORD

 define internal float @unsigned32ToFloat(i32 %a) {
@@ -462,7 +462,7 @@ entry:
  ret float %conv
 }
 ; CHECK-LABEL: unsigned32ToFloat
-; CHECK: call {{.*}} R_{{.*}} cvtui32tof
+; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i32_f32
 ; CHECK: fstp DWORD

 define internal double @signed16ToDouble(i32 %a) {

--- a/tests_lit/llvm2ice_tests/vector-bitcast.ll
+++ b/tests_lit/llvm2ice_tests/vector-bitcast.ll
@@ -156,7 +156,7 @@ entry:
  ret i8 %res

 ; CHECK-LABEL: test_bitcast_v8i1_to_i8
-; CHECK: call {{.*}} R_{{.*}} Sz_bitcast_v8i1_to_i8
+; CHECK: call {{.*}} R_{{.*}} __Sz_bitcast_8xi1_i8

 ; OPTM1-LABEL: test_bitcast_v8i1_to_i8
 ; OPMT1: call -4
@@ -168,7 +168,7 @@ entry:
  ret i16 %res

 ; CHECK-LABEL: test_bitcast_v16i1_to_i16
-; CHECK: call {{.*}} R_{{.*}} Sz_bitcast_v16i1_to_i16
+; CHECK: call {{.*}} R_{{.*}} __Sz_bitcast_16xi1_i16

 ; OPTM1-LABEL: test_bitcast_v16i1_to_i16
 ; OPMT1: call -4
@@ -181,10 +181,10 @@ entry:
  ret <8 x i1> %res

 ; CHECK-LABEL: test_bitcast_i8_to_v8i1
-; CHECK: call {{.*}} R_{{.*}} Sz_bitcast_i8_to_v8i1
+; CHECK: call {{.*}} R_{{.*}} __Sz_bitcast_i8_8xi1

 ; OPTM1-LABEL: test_bitcast_i8_to_v8i1
-; OPTM1: call {{.*}} R_{{.*}} Sz_bitcast_i8_to_v8i1
+; OPTM1: call {{.*}} R_{{.*}} __Sz_bitcast_i8_8xi1
 }

 define <16 x i1> @test_bitcast_i16_to_v16i1(i32 %arg) {
@@ -194,8 +194,8 @@ entry:
  ret <16 x i1> %res

 ; CHECK-LABEL: test_bitcast_i16_to_v16i1
-; CHECK: call {{.*}} R_{{.*}} Sz_bitcast_i16_to_v16i1
+; CHECK: call {{.*}} R_{{.*}} __Sz_bitcast_i16_16xi1

 ; OPTM1-LABEL: test_bitcast_i16_to_v16i1
-; OPTM1: call {{.*}} R_{{.*}} Sz_bitcast_i16_to_v16i1
+; OPTM1: call {{.*}} R_{{.*}} __Sz_bitcast_i16_16xi1
 }
--- a/tests_lit/llvm2ice_tests/vector-cast.ll
+++ b/tests_lit/llvm2ice_tests/vector-cast.ll
@@ -133,7 +133,7 @@ entry:
  ret <4 x i32> %res

 ; CHECK-LABEL: test_fptoui_v4f32_to_v4i32
-; CHECK: call {{.*}} R_{{.*}} Sz_fptoui_v4f32
+; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_4xi32_f32
 }

 ; [su]itofp operations
@@ -153,5 +153,5 @@ entry:
  ret <4 x float> %res

 ; CHECK-LABEL: test_uitofp_v4i32_to_v4f32
-; CHECK: call {{.*}} R_{{.*}} Sz_uitofp_v4i32
+; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_4xi32_4xf32
 }