Commit f645d853 by Jan Voung

ARM32: Lower more integer intrinsics and test.

Lower stacksave/restore. Lower ctlz, cttz, bswap, and popcount. Popcount is just done with a helper call. Ctz can use the clz instruction after reversing the bits. We can only crosstest stacksave/restore for now which happens to be written in C for the C99 VLAs. The CXX crosstests I can't seem to compile with the arm-cross-g++ (missing headers), so I will check that later after resolving the cross compilation issue. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1222943003 .
parent 3469b024
......@@ -327,7 +327,8 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
--toolchain-root $(TOOLCHAIN_ROOT) \
-i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \
-i x8632,sandbox,sse4.1,Om1 \
-i arm32,native,neon,Om1,simple_loop
-i arm32,native,neon,Om1,simple_loop \
-i arm32,native,neon,Om1,test_stacksave
PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
$(LLVM_SRC_PATH)/utils/lit/lit.py -sv crosstest/Output
endif
......
......@@ -80,18 +80,15 @@ CondARM32::Cond InstARM32::getOppositeCondition(CondARM32::Cond Cond) {
}
void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
const InstARM32Pred *Inst, const Cfg *Func) {
const InstARM32Pred *Inst, const Cfg *Func,
bool NeedsWidthSuffix) {
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 1);
Type SrcTy = Inst->getSrc(0)->getType();
Type DestTy = Inst->getDest()->getType();
Str << "\t" << Opcode;
// Sxt and Uxt need source type width letter to define the operation.
// The other unary operations have the same source and dest type and
// as a result need only one letter.
if (SrcTy != DestTy)
if (NeedsWidthSuffix)
Str << getWidthString(SrcTy);
Str << "\t";
Str << Inst->getPredicate() << "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
......@@ -358,7 +355,10 @@ InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
template <> const char *InstARM32Movt::Opcode = "movt";
// Unary ops
template <> const char *InstARM32Movw::Opcode = "movw";
template <> const char *InstARM32Clz::Opcode = "clz";
template <> const char *InstARM32Mvn::Opcode = "mvn";
template <> const char *InstARM32Rbit::Opcode = "rbit";
template <> const char *InstARM32Rev::Opcode = "rev";
template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h
template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h
// Mov-like ops
......
......@@ -262,6 +262,7 @@ public:
Br,
Call,
Cmp,
Clz,
Eor,
Label,
Ldr,
......@@ -277,7 +278,9 @@ public:
Orr,
Pop,
Push,
Rbit,
Ret,
Rev,
Rsb,
Sbc,
Sdiv,
......@@ -324,7 +327,7 @@ public:
/// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func);
const Cfg *Func, bool NeedsWidthSuffix);
static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
......@@ -345,7 +348,7 @@ inline StreamType &operator<<(StreamType &Stream, CondARM32::Cond Predicate) {
}
/// Instructions of the form x := op(y).
template <InstARM32::InstKindARM32 K>
template <InstARM32::InstKindARM32 K, bool NeedsWidthSuffix>
class InstARM32UnaryopGPR : public InstARM32Pred {
InstARM32UnaryopGPR() = delete;
InstARM32UnaryopGPR(const InstARM32UnaryopGPR &) = delete;
......@@ -360,7 +363,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitUnaryopGPR(Opcode, this, Func);
emitUnaryopGPR(Opcode, this, Func, NeedsWidthSuffix);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
......@@ -641,13 +644,16 @@ typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov;
/// MovT leaves the bottom bits alone so dest is also a source.
/// This helps indicate that a previous MovW setting dest is not dead code.
typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt;
typedef InstARM32UnaryopGPR<InstARM32::Movw> InstARM32Movw;
typedef InstARM32UnaryopGPR<InstARM32::Mvn> InstARM32Mvn;
typedef InstARM32UnaryopGPR<InstARM32::Movw, false> InstARM32Movw;
typedef InstARM32UnaryopGPR<InstARM32::Clz, false> InstARM32Clz;
typedef InstARM32UnaryopGPR<InstARM32::Mvn, false> InstARM32Mvn;
typedef InstARM32UnaryopGPR<InstARM32::Rbit, false> InstARM32Rbit;
typedef InstARM32UnaryopGPR<InstARM32::Rev, false> InstARM32Rev;
// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation
// operand as well (rotate source by 8, 16, 24 bits prior to extending),
// but we aren't using that for now, so just model as a Unaryop.
typedef InstARM32UnaryopGPR<InstARM32::Sxt> InstARM32Sxt;
typedef InstARM32UnaryopGPR<InstARM32::Uxt> InstARM32Uxt;
typedef InstARM32UnaryopGPR<InstARM32::Sxt, true> InstARM32Sxt;
typedef InstARM32UnaryopGPR<InstARM32::Uxt, true> InstARM32Uxt;
typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla;
typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls;
typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp;
......
......@@ -321,6 +321,9 @@ protected:
Context.insert(InstBundleLock::create(Func, BundleOption));
}
void _bundle_unlock() { Context.insert(InstBundleUnlock::create(Func)); }
void _set_dest_nonkillable() {
Context.getLastInserted()->setDestNonKillable();
}
Cfg *Func;
GlobalContext *Ctx;
......
......@@ -1575,7 +1575,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
for (auto &StackArg : StackArgs) {
ConstantInteger32 *Loc =
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
......@@ -1662,7 +1662,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
if (ParameterAreaSizeBytes) {
Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
_add(SP, SP, AddAmount);
}
......@@ -2032,19 +2032,91 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
case Intrinsics::Bswap: {
UnimplementedError(Func->getContext()->getFlags());
Variable *Dest = Instr->getDest();
Operand *Val = Instr->getArg(0);
Type Ty = Val->getType();
if (Ty == IceType_i64) {
Variable *Val_Lo = legalizeToVar(loOperand(Val));
Variable *Val_Hi = legalizeToVar(hiOperand(Val));
Variable *T_Lo = makeReg(IceType_i32);
Variable *T_Hi = makeReg(IceType_i32);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
_rev(T_Lo, Val_Lo);
_rev(T_Hi, Val_Hi);
_mov(DestLo, T_Hi);
_mov(DestHi, T_Lo);
} else {
assert(Ty == IceType_i32 || Ty == IceType_i16);
Variable *ValR = legalizeToVar(Val);
Variable *T = makeReg(Ty);
_rev(T, ValR);
if (Val->getType() == IceType_i16) {
Operand *Sixteen =
legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
_lsr(T, T, Sixteen);
}
_mov(Dest, T);
}
return;
}
case Intrinsics::Ctpop: {
UnimplementedError(Func->getContext()->getFlags());
Variable *Dest = Instr->getDest();
Operand *Val = Instr->getArg(0);
InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
? H_call_ctpop_i32
: H_call_ctpop_i64,
Dest, 1);
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
// signature matches some 64-bit platform's native instructions and
// expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
// just in case the user doesn't do that in the IR or doesn't toss the bits
// via truncate.
if (Val->getType() == IceType_i64) {
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_mov(DestHi, Zero);
}
return;
}
case Intrinsics::Ctlz: {
UnimplementedError(Func->getContext()->getFlags());
// The "is zero undef" parameter is ignored and we always return
// a well-defined value.
Operand *Val = Instr->getArg(0);
Variable *ValLoR;
Variable *ValHiR = nullptr;
if (Val->getType() == IceType_i64) {
ValLoR = legalizeToVar(loOperand(Val));
ValHiR = legalizeToVar(hiOperand(Val));
} else {
ValLoR = legalizeToVar(Val);
}
lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
return;
}
case Intrinsics::Cttz: {
UnimplementedError(Func->getContext()->getFlags());
// Essentially like Clz, but reverse the bits first.
Operand *Val = Instr->getArg(0);
Variable *ValLoR;
Variable *ValHiR = nullptr;
if (Val->getType() == IceType_i64) {
ValLoR = legalizeToVar(loOperand(Val));
ValHiR = legalizeToVar(hiOperand(Val));
Variable *TLo = makeReg(IceType_i32);
Variable *THi = makeReg(IceType_i32);
_rbit(TLo, ValLoR);
_rbit(THi, ValHiR);
ValLoR = THi;
ValHiR = TLo;
} else {
ValLoR = legalizeToVar(Val);
Variable *T = makeReg(IceType_i32);
_rbit(T, ValLoR);
ValLoR = T;
}
lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
return;
}
case Intrinsics::Fabs: {
......@@ -2077,13 +2149,15 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
case Intrinsics::Memset: {
// The value operand needs to be extended to a stack slot size
// because the PNaCl ABI requires arguments to be at least 32 bits
// wide.
// The value operand needs to be extended to a stack slot size because the
// PNaCl ABI requires arguments to be at least 32 bits wide.
Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->makeVariable(stackSlotType());
lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
// Technically, ARM has their own __aeabi_memset, but we can use plain
// memset too. The value and size argument need to be flipped if we ever
// decide to use __aeabi_memset.
InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
Call->addArg(Instr->getArg(0));
Call->addArg(ValExt);
......@@ -2111,15 +2185,19 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
case Intrinsics::Stacksave: {
UnimplementedError(Func->getContext()->getFlags());
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Variable *Dest = Instr->getDest();
_mov(Dest, SP);
return;
}
case Intrinsics::Stackrestore: {
UnimplementedError(Func->getContext()->getFlags());
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
_mov_nonkillable(SP, Val);
return;
}
case Intrinsics::Trap:
UnimplementedError(Func->getContext()->getFlags());
_trap();
return;
case Intrinsics::UnknownIntrinsic:
Func->setError("Should not be lowering UnknownIntrinsic");
......@@ -2128,6 +2206,34 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
Type Ty = Dest->getType();
assert(Ty == IceType_i32 || Ty == IceType_i64);
Variable *T = makeReg(IceType_i32);
_clz(T, ValLoR);
if (Ty == IceType_i64) {
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Operand *Zero =
legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
Operand *ThirtyTwo =
legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
_cmp(ValHiR, Zero);
Variable *T2 = makeReg(IceType_i32);
_add(T2, T, ThirtyTwo);
_clz(T2, ValHiR, CondARM32::NE);
// T2 is actually a source as well when the predicate is not AL
// (since it may leave T2 alone). We use set_dest_nonkillable to
// prolong the liveness of T2 as if it was used as a source.
_set_dest_nonkillable();
_mov(DestLo, T2);
_mov(DestHi, Ctx->getConstantZero(IceType_i32));
return;
}
_mov(Dest, T);
return;
}
void TargetARM32::lowerLoad(const InstLoad *Load) {
// A Load instruction can be treated the same as an Assign
// instruction, after the source operand is transformed into an
......@@ -2186,7 +2292,7 @@ void TargetARM32::lowerRet(const InstRet *Inst) {
// eliminated. TODO: Are there more places where the fake use
// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
// have a ret instruction.
Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Context.insert(InstFakeUse::create(Func, SP));
}
......
......@@ -172,6 +172,8 @@ protected:
ExtInstr ExtFunc, DivInstr DivFunc,
const char *DivHelperName, bool IsRemainder);
void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
// The following are helpers that insert lowered ARM32 instructions
// with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical.
......@@ -224,6 +226,10 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
}
void _clz(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Clz::create(Func, Dest, Src0, Pred));
}
void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Eor::create(Func, Dest, Src0, Src1, Pred));
......@@ -301,6 +307,14 @@ protected:
for (Variable *Dest : Dests)
Context.insert(InstFakeDef::create(Func, Dest));
}
void _rbit(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rbit::create(Func, Dest, Src0, Pred));
}
void _rev(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rev::create(Func, Dest, Src0, Pred));
}
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
}
......
......@@ -96,6 +96,7 @@ public:
using Machine::_bundle_lock;
using Machine::_bundle_unlock;
using Machine::_set_dest_nonkillable;
using Machine::getContext;
using Machine::getStackAdjustment;
using Machine::regAlloc;
......@@ -587,9 +588,6 @@ protected:
void _xor_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1));
}
void _set_dest_nonkillable() {
Context.getLastInserted()->setDestNonKillable();
}
bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
void findRMW();
......
......@@ -3521,9 +3521,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
return;
}
case Intrinsics::Memset: {
// The value operand needs to be extended to a stack slot size
// because the PNaCl ABI requires arguments to be at least 32 bits
// wide.
// The value operand needs to be extended to a stack slot size because the
// PNaCl ABI requires arguments to be at least 32 bits wide.
Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->template makeVariable(stackSlotType());
......@@ -5257,8 +5256,7 @@ Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
_lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
nullptr, 0));
// make sure liveness analysis won't kill this variable, otherwise a
// liveness
// assertion will be triggered.
// liveness assertion will be triggered.
_set_dest_nonkillable();
if (Immediate->getType() != IceType_i32) {
Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
......
; This tests the NaCl intrinsics not related to atomic operations.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -sandbox \
; RUN: | FileCheck %s
; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -sandbox \
; RUN: | FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 -sandbox \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 -sandbox \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1
; share the same "CHECK" prefix). This separate run helps check that
; some code is optimized out.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -sandbox \
; RUN: | FileCheck --check-prefix=CHECKO2REM %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 -sandbox \
; RUN: | %if --need=target_X8632 \
; RUN: --command FileCheck --check-prefix=CHECKO2REM %s
; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets
; lowered to __nacl_read_tp instead of gs:0x0.
; We also know that because it's O2, it'll have the O2REM optimizations.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \
; RUN: | FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 \
; RUN: --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
declare i8* @llvm.nacl.read.tp()
declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
......@@ -106,6 +119,8 @@ entry:
; CHECK: call {{.*}} R_{{.*}} memcpy
; CHECKO2REM-LABEL: test_memcpy
; CHECKO2UNSANDBOXEDREM-LABEL: test_memcpy
; ARM32-LABEL: test_memcpy
; ARM32: bl {{.*}} memcpy
; TODO(jvoung) -- if we want to be clever, we can do this and the memmove,
; memset without a function call.
......@@ -114,11 +129,13 @@ entry:
%dst = inttoptr i32 %iptr_dst to i8*
%src = inttoptr i32 %iptr_src to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src,
i32 8, i32 1, i1 false)
i32 32, i32 1, i1 false)
ret void
}
; CHECK-LABEL: test_memcpy_const_len_align
; CHECK: call {{.*}} R_{{.*}} memcpy
; ARM32-LABEL: test_memcpy_const_len_align
; ARM32: bl {{.*}} memcpy
define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
entry:
......@@ -130,17 +147,21 @@ entry:
}
; CHECK-LABEL: test_memmove
; CHECK: call {{.*}} R_{{.*}} memmove
; ARM32-LABEL: test_memmove
; ARM32: bl {{.*}} memmove
define void @test_memmove_const_len_align(i32 %iptr_dst, i32 %iptr_src) {
entry:
%dst = inttoptr i32 %iptr_dst to i8*
%src = inttoptr i32 %iptr_src to i8*
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src,
i32 8, i32 1, i1 false)
i32 32, i32 1, i1 false)
ret void
}
; CHECK-LABEL: test_memmove_const_len_align
; CHECK: call {{.*}} R_{{.*}} memmove
; ARM32-LABEL: test_memmove_const_len_align
; ARM32: bl {{.*}} memmove
define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
entry:
......@@ -153,18 +174,24 @@ entry:
; CHECK-LABEL: test_memset
; CHECK: movzx
; CHECK: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset
; ARM32: uxtb
; ARM32: bl {{.*}} memset
define void @test_memset_const_len_align(i32 %iptr_dst, i32 %wide_val) {
entry:
%val = trunc i32 %wide_val to i8
%dst = inttoptr i32 %iptr_dst to i8*
call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val,
i32 8, i32 1, i1 false)
i32 32, i32 1, i1 false)
ret void
}
; CHECK-LABEL: test_memset_const_len_align
; CHECK: movzx
; CHECK: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_len_align
; ARM32: uxtb
; ARM32: bl {{.*}} memset
define void @test_memset_const_val(i32 %iptr_dst, i32 %len) {
entry:
......@@ -176,7 +203,9 @@ entry:
; Make sure the argument is legalized (can't movzx reg, 0).
; CHECK: movzx {{.*}},{{[^0]}}
; CHECK: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_val
; ARM32: uxtb
; ARM32: bl {{.*}} memset
define i32 @test_setjmplongjmp(i32 %iptr_env) {
entry:
......@@ -198,6 +227,9 @@ NonZero:
; CHECKO2REM-LABEL: test_setjmplongjmp
; CHECKO2REM: call {{.*}} R_{{.*}} setjmp
; CHECKO2REM: call {{.*}} R_{{.*}} longjmp
; ARM32-LABEL: test_setjmplongjmp
; ARM32: bl {{.*}} setjmp
; ARM32: bl {{.*}} longjmp
define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) {
entry:
......@@ -344,6 +376,8 @@ NonZero:
}
; CHECK-LABEL: test_trap
; CHECK: ud2
; ARM32-LABEL: test_trap
; ARM32: .word 0xe7fedef0
define i32 @test_bswap_16(i32 %x) {
entry:
......@@ -356,6 +390,9 @@ entry:
; Make sure this is the right operand size so that the most significant bit
; to least significant bit rotation happens at the right boundary.
; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8
; ARM32-LABEL: test_bswap_16
; ARM32: rev
; ARM32: lsr {{.*}} #16
define i32 @test_bswap_32(i32 %x) {
entry:
......@@ -364,6 +401,8 @@ entry:
}
; CHECK-LABEL: test_bswap_32
; CHECK: bswap e{{.*}}
; ARM32-LABEL: test_bswap_32
; ARM32: rev
define i64 @test_bswap_64(i64 %x) {
entry:
......@@ -373,6 +412,9 @@ entry:
; CHECK-LABEL: test_bswap_64
; CHECK: bswap e{{.*}}
; CHECK: bswap e{{.*}}
; ARM32-LABEL: test_bswap_64
; ARM32: rev
; ARM32: rev
define i32 @test_ctlz_32(i32 %x) {
entry:
......@@ -387,6 +429,8 @@ entry:
; CHECK: mov [[REG_RES:e.*]],0x3f
; CHECK: cmovne [[REG_RES]],[[REG_TMP]]
; CHECK: xor [[REG_RES]],0x1f
; ARM32-LABEL: test_ctlz_32
; ARM32: clz
define i32 @test_ctlz_32_const() {
entry:
......@@ -398,6 +442,8 @@ entry:
; or memory.
; CHECK-LABEL: test_ctlz_32_const
; CHECK: bsr e{{.*}},{{.*}}e{{.*}}
; ARM32-LABEL: test_ctlz_32_const
; ARM32: clz
define i32 @test_ctlz_32_ignored(i32 %x) {
entry:
......@@ -424,6 +470,12 @@ entry:
; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]]
; CHECK: cmove [[REG_RES2]],[[REG_RES1]]
; CHECK: mov {{.*}},0x0
; ARM32-LABEL: test_ctlz_64
; ARM32: clz
; ARM32: cmp {{.*}}, #0
; ARM32: add {{.*}}, #32
; ARM32: clzne
; ARM32: mov {{.*}}, #0
define i32 @test_ctlz_64_const(i64 %x) {
entry:
......@@ -434,7 +486,9 @@ entry:
; CHECK-LABEL: test_ctlz_64_const
; CHECK: bsr e{{.*}},{{.*}}e{{.*}}
; CHECK: bsr e{{.*}},{{.*}}e{{.*}}
; ARM32-LABEL: test_ctlz_64
; ARM32: clz
; ARM32: clzne
define i32 @test_ctlz_64_ignored(i64 %x) {
entry:
......@@ -453,6 +507,9 @@ entry:
; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}}
; CHECK: mov [[REG_IF_ZERO:e.*]],0x20
; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]]
; ARM32-LABEL: test_cttz_32
; ARM32: rbit
; ARM32: clz
define i64 @test_cttz_64(i64 %x) {
entry:
......@@ -468,6 +525,14 @@ entry:
; CHECK: test [[REG_LOWER]],[[REG_LOWER]]
; CHECK: cmove [[REG_RES2]],[[REG_RES1]]
; CHECK: mov {{.*}},0x0
; ARM32-LABEL: test_cttz_64
; ARM32: rbit
; ARM32: rbit
; ARM32: clz
; ARM32: cmp {{.*}}, #0
; ARM32: add {{.*}}, #32
; ARM32: clzne
; ARM32: mov {{.*}}, #0
define i32 @test_popcount_32(i32 %x) {
entry:
......@@ -476,6 +541,8 @@ entry:
}
; CHECK-LABEL: test_popcount_32
; CHECK: call {{.*}} R_{{.*}} __popcountsi2
; ARM32-LABEL: test_popcount_32
; ARM32: bl {{.*}} __popcountsi2
define i64 @test_popcount_64(i64 %x) {
entry:
......@@ -487,7 +554,9 @@ entry:
; __popcountdi2 only returns a 32-bit result, so clear the upper bits of
; the return value just in case.
; CHECK: mov {{.*}},0x0
; ARM32-LABEL: test_popcount_64
; ARM32: bl {{.*}} __popcountdi2
; ARM32: mov {{.*}}, #0
define i32 @test_popcount_64_ret_i32(i64 %x) {
entry:
......@@ -509,6 +578,9 @@ entry:
; CHECK-LABEL: test_stacksave_noalloca
; CHECK: mov {{.*}},esp
; CHECK: mov esp,{{.*}}
; ARM32-LABEL: test_stacksave_noalloca
; ARM32: mov {{.*}}, sp
; ARM32: mov sp, {{.*}}
declare i32 @foo(i32 %x)
......@@ -544,3 +616,8 @@ entry:
; CHECK: mov {{.*}},esp
; CHECK: mov {{.*}},esp
; CHECK: mov esp,{{.*}}
; ARM32-LABEL: test_stacksave_multiple
; ARM32: mov {{.*}}, sp
; ARM32: mov {{.*}}, sp
; ARM32: mov {{.*}}, sp
; ARM32: mov sp, {{.*}}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment