Commit e4da26f6 by Jan Voung

Lower bitmanip intrinsics, assuming absence of BMI/SSE4.2 for now.

We'll need the fallbacks in any case. However, once we've decided on how to specify the CPU features of the user machine we can use the nicer LZCNT/TZCNT/POPCNT as well. Adds cmov, bsf, and bsr instructions. Calls a popcount helper function for machines without SSE4.2. Not handling bswap yet (which can also take i16 params). BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=stichnot@chromium.org, wala@chromium.org Review URL: https://codereview.chromium.org/390443005
parent ad8f7265
......@@ -39,6 +39,13 @@ for optlevel in ${OPTLEVELS} ; do
./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_bitmanip.cpp --test=test_bitmanip_intrin.ll \
--driver=test_bitmanip_main.cpp \
--output=test_bitmanip_O${optlevel}
./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_cast.cpp --test=test_cast_to_u1.ll \
--driver=test_cast_main.cpp \
--output=test_cast_O${optlevel}
......@@ -81,6 +88,7 @@ for optlevel in ${OPTLEVELS} ; do
"${OUTDIR}"/simple_loop_O${optlevel}
"${OUTDIR}"/mem_intrin_O${optlevel}
"${OUTDIR}"/test_arith_O${optlevel}
"${OUTDIR}"/test_bitmanip_O${optlevel}
"${OUTDIR}"/test_cast_O${optlevel}
"${OUTDIR}"/test_fcmp_O${optlevel}
"${OUTDIR}"/test_global_O${optlevel}
......
//===- subzero/crosstest/test_bitmanip.cpp - Implementation for tests. ----===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This aims to test that all the bit manipulation intrinsics work, via
// cross-testing. This calls wrappers (my_{ctlz,cttz,ctpop} around the
// intrinsics (llvm.{ctlz,cttz,ctpop}.*).
//===----------------------------------------------------------------------===//
#include <stdint.h>
#include <cstdlib>
#include "test_bitmanip.h"
#define X(inst, type) \
type test_##inst(type a) { return my_##inst(a); } \
type test_alloca_##inst(type a) { \
const size_t buf_size = 8; \
type buf[buf_size]; \
for (size_t i = 0; i < buf_size; ++i) { \
buf[i] = my_##inst(a); \
} \
type sum = 0; \
for (size_t i = 0; i < buf_size; ++i) { \
sum += buf[i]; \
} \
return sum; \
} \
type test_const_##inst(type ignored) { \
return my_##inst(static_cast<type>(0x12340)); \
}
FOR_ALL_BMI_OP_TYPES(X)
#undef X
//===- subzero/crosstest/test_bitmanip.def - macros for tests -*- C++ -*---===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines macros for testing bit manipulation intrinsics.
//
//===----------------------------------------------------------------------===//
#ifndef TEST_BIT_MANIP_DEF
#define TEST_BIT_MANIP_DEF
#define STR(s) #s
#define BMI_OPS \
/* inst */ \
X(ctlz) \
X(cttz) \
X(ctpop)
// #define X(inst)
#define BMI_TYPES \
/* type */ \
X(uint32_t) \
X(uint64_t)
// #define X(type)
#define FOR_ALL_BMI_TYPES_INST(F, inst) \
F(inst, uint32_t) \
F(inst, uint64_t)
#define FOR_ALL_BMI_OP_TYPES(X) \
FOR_ALL_BMI_TYPES_INST(X, ctlz) \
FOR_ALL_BMI_TYPES_INST(X, cttz) \
FOR_ALL_BMI_TYPES_INST(X, ctpop)
//#define X(inst, type)
#endif // TEST_BIT_MANIP_DEF
//===- subzero/crosstest/test_bitmanip.h - Test prototypes ---*- C++ -*----===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the function prototypes for cross testing bit
// manipulation intrinsics.
//
//===----------------------------------------------------------------------===//
#include "test_bitmanip.def"
#define X(inst, type) \
type test_##inst(type a); \
type test_alloca_##inst(type a); \
type test_const_##inst(type ignored); \
type my_##inst(type a);
FOR_ALL_BMI_OP_TYPES(X)
#undef X
; Wrappers around the bit manipulation intrinsics, which use name mangling
; for encoding the type in the name instead of plain "C" suffixes.
; E.g., my_ctpop(unsigned long long) vs __builtin_popcountll(...)
; Also, normalize the intrinsic to take a single parameter when there
; can be two, as is the case for ctlz and cttz.
target triple = "i686-pc-linux-gnu"
declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
define i32 @_Z7my_ctlzj(i32 %a) {
%x = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
ret i32 %x
}
define i64 @_Z7my_ctlzy(i64 %a) {
%x = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
ret i64 %x
}
define i32 @_Z7my_cttzj(i32 %a) {
%x = call i32 @llvm.cttz.i32(i32 %a, i1 0)
ret i32 %x
}
define i64 @_Z7my_cttzy(i64 %a) {
%x = call i64 @llvm.cttz.i64(i64 %a, i1 0)
ret i64 %x
}
define i32 @_Z8my_ctpopj(i32 %a) {
%x = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %x
}
define i64 @_Z8my_ctpopy(i64 %a) {
%x = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %x
}
//===- subzero/crosstest/test_bitmanip_main.cpp - Driver for tests. -------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Driver for cross testing bit manipulation intrinsics.
//
//===----------------------------------------------------------------------===//
/* crosstest.py --test=test_bitmanip.cpp --test=test_bitmanip_intrin.ll \
--driver=test_bitmanip_main.cpp --prefix=Subzero_ --output=test_bitmanip */
#include <stdint.h>
#include <climits>
#include <iostream>
// Include test_bitmanip.h twice - once normally, and once within the
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_bitmanip.h"
namespace Subzero_ {
#include "test_bitmanip.h"
}
volatile uint64_t Values[] = {
0, 1, 0x7e,
0x7f, 0x80, 0x81,
0xfe, 0xff, 0x7ffe,
0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff,
0x007fffff /*Max subnormal + */,
0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */,
0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/,
0x7fa00000 /*SNaN*/, 0x7fc00000 /*QNaN*/,
0x7ffffffe, 0x7fffffff, 0x80000000,
0x80000001, 0xfffffffe, 0xffffffff,
0x100000000ll, 0x100000001ll,
0x000fffffffffffffll /*Max subnormal + */,
0x0010000000000000ll /*Min+ */,
0x7fefffffffffffffll /*Max+ */,
0x7ff0000000000000ll /*+Inf*/,
0xfff0000000000000ll /*-Inf*/,
0x7ff0000000000001ll /*SNaN*/,
0x7ff8000000000000ll /*QNaN*/,
0x7ffffffffffffffell, 0x7fffffffffffffffll, 0x8000000000000000ll,
0x8000000000000001ll, 0xfffffffffffffffell, 0xffffffffffffffffll };
const static size_t NumValues = sizeof(Values) / sizeof(*Values);
template <typename Type>
void testBitManip(size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef Type (*FuncType)(Type);
static struct {
const char *Name;
FuncType FuncLlc;
FuncType FuncSz;
} Funcs[] = {
#define X(inst) \
{ \
STR(inst), test_##inst, Subzero_::test_##inst \
}, \
{ \
STR(inst) "_alloca", test_alloca_##inst, Subzero_::test_alloca_##inst \
}, \
{ \
STR(inst) "_const", test_const_##inst, Subzero_::test_const_##inst \
},
BMI_OPS
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) {
for (size_t i = 0; i < NumValues; ++i) {
Type Value = static_cast<Type>(Values[i]);
++TotalTests;
Type ResultSz = Funcs[f].FuncSz(Value);
Type ResultLlc = Funcs[f].FuncLlc(Value);
if (ResultSz == ResultLlc) {
++Passes;
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name
<< (CHAR_BIT * sizeof(Type)) << "("
<< static_cast<uint64_t>(Value)
<< "): sz=" << static_cast<uint64_t>(ResultSz)
<< " llc=" << static_cast<uint64_t>(ResultLlc)
<< "\n";
}
}
}
}
int main(int argc, char **argv) {
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
testBitManip<uint32_t>(TotalTests, Passes, Failures);
testBitManip<uint64_t>(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
return Failures;
}
......@@ -94,6 +94,11 @@ InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
addSource(Source2);
}
InstX8632Neg::InstX8632Neg(Cfg *Func, Operand *SrcDest)
: InstX8632(Func, InstX8632::Neg, 1, llvm::dyn_cast<Variable>(SrcDest)) {
addSource(SrcDest);
}
InstX8632Shld::InstX8632Shld(Cfg *Func, Variable *Dest, Variable *Source1,
Variable *Source2)
: InstX8632(Func, InstX8632::Shld, 3, Dest) {
......@@ -121,7 +126,7 @@ IceString InstX8632Label::getName(const Cfg *Func) const {
}
InstX8632Br::InstX8632Br(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
InstX8632Label *Label, InstX8632Br::BrCond Condition)
InstX8632Label *Label, InstX8632::BrCond Condition)
: InstX8632(Func, InstX8632::Br, 0, NULL), Condition(Condition),
TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
......@@ -139,6 +144,15 @@ InstX8632Cdq::InstX8632Cdq(Cfg *Func, Variable *Dest, Operand *Source)
addSource(Source);
}
InstX8632Cmov::InstX8632Cmov(Cfg *Func, Variable *Dest, Operand *Source,
InstX8632::BrCond Condition)
: InstX8632(Func, InstX8632::Cmov, 2, Dest), Condition(Condition) {
// The final result is either the original Dest, or Source, so mark
// both as sources.
addSource(Dest);
addSource(Source);
}
InstX8632Cmpxchg::InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr,
Variable *Eax, Variable *Desired,
bool Locked)
......@@ -297,11 +311,6 @@ bool InstX8632Movq::isRedundantAssign() const {
return false;
}
InstX8632Sqrtss::InstX8632Sqrtss(Cfg *Func, Variable *Dest, Operand *Source)
: InstX8632(Func, InstX8632::Sqrtss, 1, Dest) {
addSource(Source);
}
InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
: InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
if (Source)
......@@ -429,7 +438,9 @@ void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
Str << "\n";
}
template <> const char *InstX8632Neg::Opcode = "neg";
template <> const char *InstX8632Bsf::Opcode = "bsf";
template <> const char *InstX8632Bsr::Opcode = "bsr";
template <> const char *InstX8632Sqrtss::Opcode = "sqrtss";
template <> const char *InstX8632Add::Opcode = "add";
template <> const char *InstX8632Addps::Opcode = "addps";
template <> const char *InstX8632Adc::Opcode = "adc";
......@@ -453,6 +464,18 @@ template <> const char *InstX8632Shl::Opcode = "shl";
template <> const char *InstX8632Shr::Opcode = "shr";
template <> const char *InstX8632Sar::Opcode = "sar";
template <> void InstX8632Sqrtss::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Type Ty = getSrc(0)->getType();
assert(Ty == IceType_f32 || Ty == IceType_f64);
Str << "\tsqrt" << TypeX8632Attributes[Ty].SdSsString << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << "\n";
}
template <> void InstX8632Addss::emit(const Cfg *Func) const {
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "add%s",
......@@ -523,6 +546,21 @@ void InstX8632Mul::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstX8632Neg::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Str << "\tneg\t";
getSrc(0)->emit(Func);
Str << "\n";
}
void InstX8632Neg::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = neg." << getDest()->getType() << " ";
dumpSources(Func);
}
void InstX8632Shld::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
......@@ -586,6 +624,27 @@ void InstX8632Cdq::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstX8632Cmov::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t";
assert(Condition != Br_None);
assert(getDest()->hasReg());
Str << "cmov" << InstX8632BrAttributes[Condition].DisplayString << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << "\n";
}
void InstX8632Cmov::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "cmov" << InstX8632BrAttributes[Condition].DisplayString << ".";
Str << getDest()->getType() << " ";
dumpDest(Func);
Str << ", ";
dumpSources(Func);
}
void InstX8632Cmpxchg::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
......@@ -1007,25 +1066,6 @@ void InstX8632Ret::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstX8632Sqrtss::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Type Ty = getSrc(0)->getType();
assert(Ty == IceType_f32 || Ty == IceType_f64);
Str << "\tsqrt" << TypeX8632Attributes[Ty].SdSsString << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << "\n";
}
void InstX8632Sqrtss::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = sqrt." << getDest()->getType() << " ";
dumpSources(Func);
}
void InstX8632Xadd::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
if (Locked) {
......
......@@ -139,8 +139,11 @@ public:
Addss,
And,
Br,
Bsf,
Bsr,
Call,
Cdq,
Cmov,
Cmpxchg,
Cmpxchg8b,
Cvt,
......@@ -188,6 +191,14 @@ public:
Xchg,
Xor
};
enum BrCond {
#define X(tag, dump, emit) tag,
ICEINSTX8632BR_TABLE
#undef X
Br_None
};
static const char *getWidthString(Type Ty);
virtual void emit(const Cfg *Func) const = 0;
virtual void dump(const Cfg *Func) const;
......@@ -262,13 +273,6 @@ private:
// Conditional and unconditional branch instruction.
class InstX8632Br : public InstX8632 {
public:
enum BrCond {
#define X(tag, dump, emit) tag,
ICEINSTX8632BR_TABLE
#undef X
Br_None
};
// Create a conditional branch to a node.
static InstX8632Br *create(Cfg *Func, CfgNode *TargetTrue,
CfgNode *TargetFalse, BrCond Condition) {
......@@ -334,16 +338,16 @@ private:
template <InstX8632::InstKindX8632 K>
class InstX8632Unaryop : public InstX8632 {
public:
// Create an unary-op instruction like neg.
// The source and dest are the same variable.
static InstX8632Unaryop *create(Cfg *Func, Operand *SrcDest) {
static InstX8632Unaryop *create(Cfg *Func, Variable *Dest, Operand *Src) {
return new (Func->allocate<InstX8632Unaryop>())
InstX8632Unaryop(Func, SrcDest);
InstX8632Unaryop(Func, Dest, Src);
}
virtual void emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Str << "\t" << Opcode << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << "\n";
}
......@@ -356,9 +360,9 @@ public:
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstX8632Unaryop(Cfg *Func, Operand *SrcDest)
: InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
addSource(SrcDest);
InstX8632Unaryop(Cfg *Func, Variable *Dest, Operand *Src)
: InstX8632(Func, K, 1, Dest) {
addSource(Src);
}
InstX8632Unaryop(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
InstX8632Unaryop &operator=(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
......@@ -438,7 +442,9 @@ private:
static const char *Opcode;
};
typedef InstX8632Unaryop<InstX8632::Neg> InstX8632Neg;
typedef InstX8632Unaryop<InstX8632::Bsf> InstX8632Bsf;
typedef InstX8632Unaryop<InstX8632::Bsr> InstX8632Bsr;
typedef InstX8632Unaryop<InstX8632::Sqrtss> InstX8632Sqrtss;
typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
......@@ -503,6 +509,23 @@ private:
virtual ~InstX8632Mul() {}
};
// Neg instruction - Two's complement negation.
class InstX8632Neg : public InstX8632 {
public:
static InstX8632Neg *create(Cfg *Func, Operand *SrcDest) {
return new (Func->allocate<InstX8632Neg>()) InstX8632Neg(Func, SrcDest);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Neg); }
private:
InstX8632Neg(Cfg *Func, Operand *SrcDest);
InstX8632Neg(const InstX8632Neg &) LLVM_DELETED_FUNCTION;
InstX8632Neg &operator=(const InstX8632Neg &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Neg() {}
};
// Shld instruction - shift across a pair of operands. TODO: Verify
// that the validator accepts the shld instruction.
class InstX8632Shld : public InstX8632 {
......@@ -563,6 +586,27 @@ private:
virtual ~InstX8632Cdq() {}
};
// Conditional move instruction.
class InstX8632Cmov : public InstX8632 {
public:
static InstX8632Cmov *create(Cfg *Func, Variable *Dest, Operand *Source,
BrCond Cond) {
return new (Func->allocate<InstX8632Cmov>())
InstX8632Cmov(Func, Dest, Source, Cond);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Cmov); }
private:
InstX8632Cmov(Cfg *Func, Variable *Dest, Operand *Source, BrCond Cond);
InstX8632Cmov(const InstX8632Cmov &) LLVM_DELETED_FUNCTION;
InstX8632Cmov &operator=(const InstX8632Cmov &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Cmov() {}
BrCond Condition;
};
// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
// If not, ZF is cleared and <dest> is copied to eax (or subregister).
......@@ -948,24 +992,6 @@ private:
virtual ~InstX8632Ret() {}
};
// Sqrtss - Scalar sqrt of a float or double.
class InstX8632Sqrtss : public InstX8632 {
public:
static InstX8632Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX8632Sqrtss>())
InstX8632Sqrtss(Func, Dest, Source);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Sqrtss); }
private:
InstX8632Sqrtss(Cfg *Func, Variable *Dest, Operand *Source);
InstX8632Sqrtss(const InstX8632Sqrtss &) LLVM_DELETED_FUNCTION;
InstX8632Sqrtss &operator=(const InstX8632Sqrtss &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Sqrtss() {}
};
// Exchanging Add instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand), then loads the sum
// of the two values into the destination operand. The destination may be
......
......@@ -39,7 +39,7 @@ namespace {
const struct TableFcmp_ {
uint32_t Default;
bool SwapOperands;
InstX8632Br::BrCond C1, C2;
InstX8632::BrCond C1, C2;
} TableFcmp[] = {
#define X(val, dflt, swap, C1, C2) \
{ dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
......@@ -54,7 +54,7 @@ const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
// x86 conditional branch instruction.
const struct TableIcmp32_ {
InstX8632Br::BrCond Mapping;
InstX8632::BrCond Mapping;
} TableIcmp32[] = {
#define X(val, C_32, C1_64, C2_64, C3_64) \
{ InstX8632Br::C_32 } \
......@@ -69,7 +69,7 @@ const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
// conditional branches are needed. For the other conditions, three separate
// conditional branches are needed.
const struct TableIcmp64_ {
InstX8632Br::BrCond C1, C2, C3;
InstX8632::BrCond C1, C2, C3;
} TableIcmp64[] = {
#define X(val, C_32, C1_64, C2_64, C3_64) \
{ InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
......@@ -79,7 +79,7 @@ const struct TableIcmp64_ {
};
const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
size_t Index = static_cast<size_t>(Cond);
assert(Index < TableIcmp32Size);
return TableIcmp32[Index].Mapping;
......@@ -2109,12 +2109,61 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
case Intrinsics::Bswap:
case Intrinsics::Ctlz:
case Intrinsics::Ctpop:
case Intrinsics::Cttz:
// TODO(jvoung): fill it in.
Func->setError("Unhandled intrinsic");
return;
case Intrinsics::Ctpop: {
Variable *Dest = Instr->getDest();
Operand *Val = Instr->getArg(0);
InstCall *Call = makeHelperCall(Val->getType() == IceType_i64 ?
"__popcountdi2" : "__popcountsi2", Dest, 1);
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
// signature matches the native POPCNT instruction and fills a 64-bit reg
// (in 64-bit mode). Thus, clear the upper bits of the dest just in case
// the user doesn't do that in the IR. If the user does that in the IR,
// then this zero'ing instruction is dead and gets optimized out.
if (Val->getType() == IceType_i64) {
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_mov(DestHi, Zero);
}
return;
}
case Intrinsics::Ctlz: {
// The "is zero undef" parameter is ignored and we always return
// a well-defined value.
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = NULL;
if (Val->getType() == IceType_i64) {
FirstVal = loOperand(Val);
SecondVal = hiOperand(Val);
} else {
FirstVal = Val;
}
const bool IsCttz = false;
lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
SecondVal);
return;
}
case Intrinsics::Cttz: {
// The "is zero undef" parameter is ignored and we always return
// a well-defined value.
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = NULL;
if (Val->getType() == IceType_i64) {
FirstVal = hiOperand(Val);
SecondVal = loOperand(Val);
} else {
FirstVal = Val;
}
const bool IsCttz = true;
lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
SecondVal);
return;
}
case Intrinsics::Longjmp: {
InstCall *Call = makeHelperCall("longjmp", NULL, 2);
Call->addArg(Instr->getArg(0));
......@@ -2408,6 +2457,81 @@ void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
_mov(Dest, T_eax);
}
// Lowers count {trailing, leading} zeros intrinsic.
//
// We could do constant folding here, but that should have
// been done by the front-end/middle-end optimizations.
void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
Operand *FirstVal, Operand *SecondVal) {
// TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
// Then the instructions will handle the Val == 0 case much more simply
// and won't require conversion from bit position to number of zeros.
//
// Otherwise:
// bsr IF_NOT_ZERO, Val
// mov T_DEST, 63
// cmovne T_DEST, IF_NOT_ZERO
// xor T_DEST, 31
// mov DEST, T_DEST
//
// NOTE: T_DEST must be a register because cmov requires its dest to be a
// register. Also, bsf and bsr require their dest to be a register.
//
// The xor DEST, 31 converts a bit position to # of leading zeroes.
// E.g., for 000... 00001100, bsr will say that the most significant bit
// set is at position 3, while the number of leading zeros is 28. Xor is
// like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
//
// Similar for 64-bit, but start w/ speculating that the upper 32 bits
// are all zero, and compute the result for that case (checking the lower
// 32 bits). Then actually compute the result for the upper bits and
// cmov in the result from the lower computation if the earlier speculation
// was correct.
//
// Cttz, is similar, but uses bsf instead, and doesn't require the xor
// bit position conversion, and the speculation is reversed.
assert(Ty == IceType_i32 || Ty == IceType_i64);
Variable *T = makeReg(IceType_i32);
if (Cttz) {
_bsf(T, FirstVal);
} else {
_bsr(T, FirstVal);
}
Variable *T_Dest = makeReg(IceType_i32);
Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32);
Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31);
if (Cttz) {
_mov(T_Dest, ThirtyTwo);
} else {
Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63);
_mov(T_Dest, SixtyThree);
}
_cmov(T_Dest, T, InstX8632::Br_ne);
if (!Cttz) {
_xor(T_Dest, ThirtyOne);
}
if (Ty == IceType_i32) {
_mov(Dest, T_Dest);
return;
}
_add(T_Dest, ThirtyTwo);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
// Will be using "test" on this, so we need a registerized variable.
Variable *SecondVar = legalizeToVar(SecondVal);
Variable *T_Dest2 = makeReg(IceType_i32);
if (Cttz) {
_bsf(T_Dest2, SecondVar);
} else {
_bsr(T_Dest2, SecondVar);
_xor(T_Dest2, ThirtyOne);
}
_test(SecondVar, SecondVar);
_cmov(T_Dest2, T_Dest, InstX8632::Br_e);
_mov(DestLo, T_Dest2);
_mov(DestHi, Ctx->getConstantZero(IceType_i32));
}
namespace {
bool isAdd(const Inst *Inst) {
......
......@@ -99,6 +99,8 @@ protected:
Operand *Desired);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal,
Operand *SecondVal);
typedef void (TargetX8632::*LowerBinOp)(Variable *, Operand *);
void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
......@@ -164,7 +166,7 @@ protected:
void _and(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632And::create(Func, Dest, Src0));
}
void _br(InstX8632Br::BrCond Condition, CfgNode *TargetTrue,
void _br(InstX8632::BrCond Condition, CfgNode *TargetTrue,
CfgNode *TargetFalse) {
Context.insert(
InstX8632Br::create(Func, TargetTrue, TargetFalse, Condition));
......@@ -172,15 +174,24 @@ protected:
void _br(CfgNode *Target) {
Context.insert(InstX8632Br::create(Func, Target));
}
void _br(InstX8632Br::BrCond Condition, CfgNode *Target) {
void _br(InstX8632::BrCond Condition, CfgNode *Target) {
Context.insert(InstX8632Br::create(Func, Target, Condition));
}
void _br(InstX8632Br::BrCond Condition, InstX8632Label *Label) {
void _br(InstX8632::BrCond Condition, InstX8632Label *Label) {
Context.insert(InstX8632Br::create(Func, Label, Condition));
}
void _bsf(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Bsf::create(Func, Dest, Src0));
}
void _bsr(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Bsr::create(Func, Dest, Src0));
}
void _cdq(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Cdq::create(Func, Dest, Src0));
}
void _cmov(Variable *Dest, Operand *Src0, InstX8632::BrCond Condition) {
Context.insert(InstX8632Cmov::create(Func, Dest, Src0, Condition));
}
void _cmp(Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Icmp::create(Func, Src0, Src1));
}
......
; This tests the NaCl intrinsics not related to atomic operations.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s --check-prefix=CHECKO2REM
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1
; share the same "CHECK" prefix). This separate run helps check that
; some code is optimized out.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s --check-prefix=CHECKO2REM
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
; RUN: | FileCheck --check-prefix=DUMP %s
......@@ -18,6 +22,12 @@ declare i32 @llvm.nacl.setjmp(i8*)
declare float @llvm.sqrt.f32(float)
declare double @llvm.sqrt.f64(double)
declare void @llvm.trap()
declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
define i32 @test_nacl_read_tp() {
entry:
......@@ -232,5 +242,128 @@ NonZero:
; CHECK-LABEL: test_trap
; CHECK: ud2
define i32 @test_ctlz_32(i32 %x) {
entry:
%r = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
ret i32 %r
}
; CHECK-LABEL: test_ctlz_32
; TODO(jvoung): If we detect that LZCNT is supported, then use that
; and avoid the need to do the cmovne and xor stuff to guarantee that
; the result is well-defined w/ input == 0.
; CHECK: bsr [[REG_TMP:e.*]], {{.*}}
; CHECK: mov [[REG_RES:e.*]], 63
; CHECK: cmovne [[REG_RES]], [[REG_TMP]]
; CHECK: xor [[REG_RES]], 31
define i32 @test_ctlz_32_const() {
entry:
%r = call i32 @llvm.ctlz.i32(i32 123456, i1 0)
ret i32 %r
}
; Could potentially constant fold this, but the front-end should have done that.
; CHECK-LABEL: test_ctlz_32_const
; CHECK: bsr
define i32 @test_ctlz_32_ignored(i32 %x) {
entry:
%ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
ret i32 1
}
; CHECKO2REM-LABEL: test_ctlz_32_ignored
; CHECKO2REM-NOT: bsr
define i64 @test_ctlz_64(i64 %x) {
entry:
%r = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
ret i64 %r
}
; CHECKO2REM-LABEL: test_ctlz_64
; CHECK-LABEL: test_ctlz_64
; CHECK: bsr [[REG_TMP1:e.*]], {{.*}}
; CHECK: mov [[REG_RES1:e.*]], 63
; CHECK: cmovne [[REG_RES1]], [[REG_TMP1]]
; CHECK: xor [[REG_RES1]], 31
; CHECK: add [[REG_RES1]], 32
; CHECK: bsr [[REG_RES2:e.*]], {{.*}}
; CHECK: xor [[REG_RES2]], 31
; CHECK: test [[REG_UPPER:.*]], [[REG_UPPER]]
; CHECK: cmove [[REG_RES2]], [[REG_RES1]]
; CHECK: mov {{.*}}, 0
define i32 @test_ctlz_64_const(i64 %x) {
entry:
%r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 0)
%r2 = trunc i64 %r to i32
ret i32 %r2
}
; CHECK-LABEL: test_ctlz_64_const
; CHECK: bsr
; CHECK: bsr
define i32 @test_ctlz_64_ignored(i64 %x) {
entry:
%ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 0)
ret i32 2
}
; CHECKO2REM-LABEL: test_ctlz_64_ignored
; CHECKO2REM-NOT: bsr
define i32 @test_cttz_32(i32 %x) {
entry:
%r = call i32 @llvm.cttz.i32(i32 %x, i1 0)
ret i32 %r
}
; CHECK-LABEL: test_cttz_32
; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}}
; CHECK: mov [[REG_IF_ZERO:e.*]], 32
; CHECK: cmovne [[REG_IF_ZERO]], [[REG_IF_NOTZERO]]
define i64 @test_cttz_64(i64 %x) {
entry:
%r = call i64 @llvm.cttz.i64(i64 %x, i1 0)
ret i64 %r
}
; CHECK-LABEL: test_cttz_64
; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}}
; CHECK: mov [[REG_RES1:e.*]], 32
; CHECK: cmovne [[REG_RES1]], [[REG_IF_NOTZERO]]
; CHECK: add [[REG_RES1]], 32
; CHECK: bsf [[REG_RES2:e.*]], [[REG_LOWER:.*]]
; CHECK: test [[REG_LOWER]], [[REG_LOWER]]
; CHECK: cmove [[REG_RES2]], [[REG_RES1]]
; CHECK: mov {{.*}}, 0
define i32 @test_popcount_32(i32 %x) {
entry:
%r = call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %r
}
; CHECK-LABEL: test_popcount_32
; CHECK: call __popcountsi2
define i64 @test_popcount_64(i64 %x) {
entry:
%r = call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %r
}
; CHECK-LABEL: test_popcount_64
; CHECK: call __popcountdi2
; __popcountdi2 only returns a 32-bit result, so clear the upper bits of
; the return value just in case.
; CHECK: mov {{.*}}, 0
define i32 @test_popcount_64_ret_i32(i64 %x) {
entry:
%r_i64 = call i64 @llvm.ctpop.i64(i64 %x)
%r = trunc i64 %r_i64 to i32
ret i32 %r
}
; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out.
; CHECKO2REM-LABEL: test_popcount_64_ret_i32
; CHECKO2REM: call __popcountdi2
; CHECKO2REM-NOT: mov {{.*}}, 0
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment