Commit cfa628b5 by Andrew Scull

Inline memove for small constant sizes and refactor memcpy and memset.

The memory intrinsics are only optimized at -O1 and higher unless the -fmem-intrin-opt flag is set to force to optimization to take place. This change also introduces the xchg instruction for two register operands. This is no longer used in the memory intrinsic lowering (or by anything else) but the implementation is left for future use. BUG= R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1278173009.
parent 209318af
......@@ -227,6 +227,9 @@ UNITTEST_SRCS = \
# The X86 assembler tests take too long to compile. Given how infrequently the
# assembler will change, we disable them.
ifdef CHECK_X86_ASM
ifndef DEBUG
$(error Run check-unit with DEBUG=1 lest your machine perish)
endif
UNITTEST_SRCS += AssemblerX8632/LowLevel.cpp \
AssemblerX8632/DataMov.cpp \
AssemblerX8632/Locked.cpp \
......@@ -350,11 +353,14 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
# Do all native/sse2 tests, but only test_vector_ops for native/sse4.1.
# For (slow) sandboxed tests, limit to Om1/sse4.1.
# TODO(jpp): implement x8664 sandbox, then enable xtests.
# TODO(jpp): reenable the x86-64 tests.
./pydir/crosstest_generator.py -v --lit \
--toolchain-root $(TOOLCHAIN_ROOT) \
-i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \
-i x8632,native,sse2 \
-i x8632,native,sse4.1,test_vector_ops \
-i x8632,sandbox,sse4.1,Om1 \
-i x8664,native,sse2 -i x8664,native,sse4.1,test_vector_ops \
-e x8664,native,sse2 \
-e x8664,native,sse4.1,test_vector_ops \
-e x8664,native,sse2,test_global \
-i arm32,native,neon,simple_loop \
-i arm32,native,neon,mem_intrin \
......
......@@ -5,6 +5,7 @@ test: simple_loop.c
[mem_intrin]
driver: mem_intrin_main.cpp
test: mem_intrin.cpp
flags: --sz=-fmem-intrin-opt
[test_arith]
driver: test_arith_main.cpp
......
......@@ -40,30 +40,6 @@ fletcher_checksum(uint8_t *buf, SizeT length) {
return (sum_of_sums << 8) | sum;
}
#define NWORDS 32
#define BYTE_LENGTH (NWORDS * sizeof(elem_t))
int memcpy_test_fixed_len(uint8_t init) {
elem_t buf[NWORDS];
elem_t buf2[NWORDS];
reset_buf((uint8_t *)buf, init, BYTE_LENGTH);
memcpy((void *)buf2, (void *)buf, BYTE_LENGTH);
return fletcher_checksum((uint8_t *)buf2, BYTE_LENGTH);
}
int memmove_test_fixed_len(uint8_t init) {
elem_t buf[NWORDS];
reset_buf((uint8_t *)buf, init, BYTE_LENGTH);
memmove((void *)(buf + 4), (void *)buf, BYTE_LENGTH - (4 * sizeof(elem_t)));
return fletcher_checksum((uint8_t *)buf + 4, BYTE_LENGTH - 4);
}
int memset_test_fixed_len(uint8_t init) {
elem_t buf[NWORDS];
memset((void *)buf, init, BYTE_LENGTH);
return fletcher_checksum((uint8_t *)buf, BYTE_LENGTH);
}
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
reset_buf(buf, init, length);
memcpy((void *)buf2, (void *)buf, length);
......@@ -94,3 +70,33 @@ int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
memset((void *)buf2, init + 4, length);
return fletcher_checksum(buf, length) + fletcher_checksum(buf2, length);
}
#define X(NBYTES) \
int memcpy_test_fixed_len_##NBYTES(uint8_t init) { \
uint8_t buf[NBYTES]; \
uint8_t buf2[NBYTES]; \
reset_buf(buf, init, NBYTES); \
memcpy((void *)buf2, (void *)buf, NBYTES); \
return fletcher_checksum(buf2, NBYTES); \
} \
\
int memmove_test_fixed_len_##NBYTES(uint8_t init) { \
uint8_t buf[NBYTES + 16]; \
uint8_t buf2[NBYTES + 16]; \
reset_buf(buf, init, NBYTES + 16); \
reset_buf(buf2, init, NBYTES + 16); \
/* Move up */ \
memmove((void *)(buf + 16), (void *)buf, NBYTES); \
/* Move down */ \
memmove((void *)buf2, (void *)(buf2 + 16), NBYTES); \
return fletcher_checksum(buf, NBYTES + 16) + \
fletcher_checksum(buf2, NBYTES + 16); \
} \
\
int memset_test_fixed_len_##NBYTES(uint8_t init) { \
uint8_t buf[NBYTES]; \
memset((void *)buf, init, NBYTES); \
return fletcher_checksum(buf, NBYTES); \
}
MEMINTRIN_SIZE_TABLE
#undef X
#define MEMINTRIN_SIZE_TABLE \
X(0) \
X(1) \
X(2) \
X(3) \
X(4) \
X(5) \
X(6) \
X(7) \
X(8) \
X(9) \
X(10) \
X(11) \
X(12) \
X(13) \
X(14) \
X(15) \
X(16) \
X(17) \
X(18) \
X(19) \
X(20) \
X(21) \
X(22) \
X(23) \
X(24) \
X(25) \
X(26) \
X(27) \
X(28) \
X(29) \
X(30) \
X(31) \
X(32) \
X(33) \
X(34) \
X(35) \
X(36) \
X(37) \
X(38) \
X(39) \
X(40) \
X(41) \
X(42) \
X(43) \
X(44) \
X(45) \
X(46) \
X(47) \
X(48) \
X(49) \
X(50) \
X(51) \
X(52) \
X(53) \
X(54) \
X(55) \
X(56) \
X(57) \
X(58) \
X(59) \
X(60) \
X(61) \
X(62) \
X(63) \
X(64) \
X(65) \
X(66) \
X(67) \
X(68) \
X(69) \
X(70) \
X(71) \
X(72) \
X(73) \
X(74) \
X(75) \
X(76) \
X(77) \
X(78) \
X(79) \
X(80) \
X(81) \
X(82) \
X(83) \
X(84) \
X(85) \
X(86) \
X(87) \
X(88) \
X(89) \
X(90) \
X(91) \
X(92) \
X(93) \
X(94) \
X(95) \
X(96) \
X(97) \
X(98) \
X(99) \
X(100) \
X(101) \
X(102) \
X(103) \
X(104) \
X(105) \
X(106) \
X(107) \
X(108) \
X(109) \
X(110) \
X(111) \
X(112) \
X(113) \
X(114) \
X(115) \
X(116) \
X(117) \
X(118) \
X(119) \
X(120) \
X(121) \
X(122) \
X(123) \
X(124) \
X(125) \
X(126) \
X(127) \
X(128) \
X(129) \
X(130) \
X(131) \
X(132) \
X(133) \
X(134) \
X(135) \
X(136) \
X(137) \
X(138) \
X(139) \
X(140) \
X(141) \
X(142) \
X(143) \
X(144) \
X(145) \
X(146) \
X(147) \
X(148) \
X(149) \
X(150) \
X(151) \
X(152) \
X(153) \
X(154) \
X(155) \
X(156) \
X(157) \
X(158) \
X(159) \
X(160) \
X(161) \
X(162) \
X(163) \
X(164) \
X(165) \
X(166) \
X(167) \
X(168) \
X(169) \
X(170) \
X(171) \
X(172) \
X(173) \
X(174) \
X(175) \
X(176) \
X(177) \
X(178) \
X(179) \
X(180) \
X(181) \
X(182) \
X(183) \
X(184) \
X(185) \
X(186) \
X(187) \
X(188) \
X(189) \
X(190) \
X(191) \
X(192) \
X(193) \
X(194) \
X(195) \
X(196) \
X(197) \
X(198) \
X(199) \
X(200) \
X(201) \
X(202) \
X(203) \
X(204) \
X(205) \
X(206) \
X(207) \
X(208) \
X(209) \
X(210) \
X(211) \
X(212) \
X(213) \
X(214) \
X(215) \
X(216) \
X(217) \
X(218) \
X(219) \
X(220) \
X(221) \
X(222) \
X(223) \
X(224) \
X(225) \
X(226) \
X(227) \
X(228) \
X(229) \
X(230) \
X(231) \
X(232) \
X(233) \
X(234) \
X(235) \
X(236) \
X(237) \
X(238) \
X(239) \
X(240) \
X(241) \
X(242) \
X(243) \
X(244) \
X(245) \
X(246) \
X(247) \
X(248) \
X(249) \
X(250) \
X(251) \
X(252) \
X(253) \
X(254) \
X(255) \
X(256)
......@@ -6,10 +6,15 @@
*/
#include "xdefs.h"
#include "mem_intrin.def"
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memcpy_test_fixed_len(uint8_t init);
int memmove_test_fixed_len(uint8_t init);
int memset_test_fixed_len(uint8_t init);
#define X(NBYTES) \
int memcpy_test_fixed_len_##NBYTES(uint8_t init); \
int memmove_test_fixed_len_##NBYTES(uint8_t init); \
int memset_test_fixed_len_##NBYTES(uint8_t init);
MEMINTRIN_SIZE_TABLE
#undef X
......@@ -14,27 +14,6 @@ namespace Subzero_ {
#define XSTR(s) STR(s)
#define STR(s) #s
void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
#define do_test_fixed(test_func) \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \
int llc_result = test_func(init_val); \
int sz_result = Subzero_::test_func(init_val); \
if (llc_result == sz_result) { \
++Passes; \
} else { \
++Failures; \
printf("Failure (%s): init_val=%d, llc=%d, sz=%d\n", STR(test_func), \
init_val, llc_result, sz_result); \
} \
}
do_test_fixed(memcpy_test_fixed_len);
do_test_fixed(memmove_test_fixed_len);
do_test_fixed(memset_test_fixed_len)
#undef do_test_fixed
}
void testVariableLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
uint8_t buf[256];
uint8_t buf2[256];
......@@ -60,6 +39,30 @@ void testVariableLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
#undef do_test_variable
}
void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
#define do_test_fixed(test_func, NBYTES) \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \
int llc_result = test_func##_##NBYTES(init_val); \
int sz_result = Subzero_::test_func##_##NBYTES(init_val); \
if (llc_result == sz_result) { \
++Passes; \
} else { \
++Failures; \
printf("Failure (%s): init_val=%d, len=%d, llc=%d, sz=%d\n", \
STR(test_func), init_val, NBYTES, llc_result, sz_result); \
} \
}
#define X(NBYTES) \
do_test_fixed(memcpy_test_fixed_len, NBYTES); \
do_test_fixed(memmove_test_fixed_len, NBYTES); \
do_test_fixed(memset_test_fixed_len, NBYTES);
MEMINTRIN_SIZE_TABLE
#undef X
#undef do_test_fixed
}
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
......
......@@ -93,6 +93,8 @@ def main():
argparser.add_argument('--filetype', default='obj', dest='filetype',
choices=['obj', 'asm', 'iasm'],
help='Output file type. Default %(default)s.')
argparser.add_argument('--sz', dest='sz_args', action='append', default=[],
help='Extra arguments to pass to pnacl-sz.')
args = argparser.parse_args()
nacl_root = FindBaseNaCl()
......@@ -133,6 +135,7 @@ def main():
obj_sz = os.path.join(args.dir, base_sz + '.sz.o')
obj_llc = os.path.join(args.dir, base_sz + '.llc.o')
shellcmd(['{path}/pnacl-sz'.format(path=os.path.dirname(mypath)),
] + args.sz_args + [
'-O' + args.optlevel,
'-mattr=' + args.attr,
'--target=' + args.target,
......
......@@ -845,6 +845,8 @@ public:
void cmpxchg8b(const typename Traits::Address &address, bool Locked);
void xadd(Type Ty, const typename Traits::Address &address,
typename Traits::GPRRegister reg, bool Locked);
void xchg(Type Ty, typename Traits::GPRRegister reg0,
typename Traits::GPRRegister reg1);
void xchg(Type Ty, const typename Traits::Address &address,
typename Traits::GPRRegister reg);
......
......@@ -3150,6 +3150,29 @@ void AssemblerX86Base<Machine>::xadd(Type Ty,
}
template <class Machine>
void AssemblerX86Base<Machine>::xchg(Type Ty, typename Traits::GPRRegister reg0,
typename Traits::GPRRegister reg1) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (Ty == IceType_i16)
emitOperandSizeOverride();
// Use short form if either register is EAX.
if (reg0 == Traits::Encoded_Reg_Accumulator) {
emitRexB(Ty, reg1);
emitUint8(0x90 + gprEncoding(reg1));
} else if (reg1 == Traits::Encoded_Reg_Accumulator) {
emitRexB(Ty, reg0);
emitUint8(0x90 + gprEncoding(reg0));
} else {
emitRexRB(Ty, reg0, reg1);
if (isByteSizedArithType(Ty))
emitUint8(0x86);
else
emitUint8(0x87);
emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
}
}
template <class Machine>
void AssemblerX86Base<Machine>::xchg(Type Ty,
const typename Traits::Address &addr,
typename Traits::GPRRegister reg) {
......
......@@ -83,6 +83,10 @@ cl::opt<bool> EnableBlockProfile(
cl::init(false));
cl::opt<bool>
ForceMemIntrinOpt("fmem-intrin-opt",
cl::desc("Force optimization of memory intrinsics."));
cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"));
......@@ -356,6 +360,7 @@ void ClFlags::resetClFlags(ClFlags &OutFlags) {
OutFlags.DisableTranslation = false;
OutFlags.DumpStats = false;
OutFlags.EnableBlockProfile = false;
OutFlags.ForceMemIntrinOpt = false;
OutFlags.FunctionSections = false;
OutFlags.GenerateUnitTestMessages = false;
OutFlags.PhiEdgeSplit = false;
......@@ -416,6 +421,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
OutFlags.setDisableTranslation(::DisableTranslation);
OutFlags.setDumpStats(::DumpStats);
OutFlags.setEnableBlockProfile(::EnableBlockProfile);
OutFlags.setForceMemIntrinOpt(::ForceMemIntrinOpt);
OutFlags.setFunctionSections(::FunctionSections);
OutFlags.setNumTranslationThreads(::NumThreads);
OutFlags.setOptLevel(::OLevel);
......
......@@ -72,6 +72,9 @@ public:
bool getEnableBlockProfile() const { return EnableBlockProfile; }
void setEnableBlockProfile(bool NewValue) { EnableBlockProfile = NewValue; }
bool getForceMemIntrinOpt() const { return ForceMemIntrinOpt; }
void setForceMemIntrinOpt(bool NewValue) { ForceMemIntrinOpt = NewValue; }
bool getFunctionSections() const { return FunctionSections; }
void setFunctionSections(bool NewValue) { FunctionSections = NewValue; }
......@@ -241,6 +244,7 @@ private:
bool DisableTranslation;
bool DumpStats;
bool EnableBlockProfile;
bool ForceMemIntrinOpt;
bool FunctionSections;
bool GenerateUnitTestMessages;
bool PhiEdgeSplit;
......
......@@ -3202,19 +3202,29 @@ void InstX86Xchg<Machine>::emitIAS(const Cfg *Func) const {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
Type Ty = this->getSrc(0)->getType();
const auto Mem =
const auto *VarReg1 = llvm::cast<Variable>(this->getSrc(1));
assert(VarReg1->hasReg());
const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg1 =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
VarReg1->getRegNum());
if (const auto *VarReg0 = llvm::dyn_cast<Variable>(this->getSrc(0))) {
assert(VarReg0->hasReg());
const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg0 =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
VarReg0->getRegNum());
Asm->xchg(Ty, Reg0, Reg1);
return;
}
const auto *Mem =
llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
this->getSrc(0));
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
const typename InstX86Base<Machine>::Traits::Address Addr =
Mem->toAsmAddress(Asm);
const auto VarReg = llvm::cast<Variable>(this->getSrc(1));
assert(VarReg->hasReg());
const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
VarReg->getRegNum());
Asm->xchg(Ty, Addr, Reg);
Asm->xchg(Ty, Addr, Reg1);
}
template <class Machine>
......
......@@ -440,6 +440,11 @@ InstCall *TargetLowering::makeHelperCall(const IceString &Name, Variable *Dest,
return Call;
}
bool TargetLowering::shouldOptimizeMemIntrins() {
return Ctx->getFlags().getOptLevel() >= Opt_1 ||
Ctx->getFlags().getForceMemIntrinOpt();
}
void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C) const {
if (!BuildDefs::dump())
return;
......
......@@ -351,6 +351,8 @@ protected:
Context.getLastInserted()->setDestNonKillable();
}
bool shouldOptimizeMemIntrins();
Cfg *Func;
GlobalContext *Ctx;
bool HasComputedFrame = false;
......
......@@ -441,6 +441,13 @@ template <> struct MachineTraits<TargetX8632> {
/// The number of different NOP instructions
static const uint32_t X86_NUM_NOP_VARIANTS = 5;
/// \name Limits for unrolling memory intrinsics.
/// @{
static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8;
static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8;
static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16;
/// @}
/// Value is in bytes. Return Value adjusted to the next highest multiple
/// of the stack alignment.
static uint32_t applyStackAlignment(uint32_t Value) {
......
......@@ -456,6 +456,13 @@ template <> struct MachineTraits<TargetX8664> {
/// The number of different NOP instructions
static const uint32_t X86_NUM_NOP_VARIANTS = 5;
/// \name Limits for unrolling memory intrinsics.
/// @{
static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8;
static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8;
static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16;
/// @}
/// Value is in bytes. Return Value adjusted to the next highest multiple
/// of the stack alignment.
static uint32_t applyStackAlignment(uint32_t Value) {
......
......@@ -192,9 +192,17 @@ protected:
Operand *Val);
void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal,
Operand *SecondVal);
/// Replace a call to memcpy with inline instructions.
/// Load from memory for a given type.
void typedLoad(Type Ty, Variable *Dest, Variable *Base, Constant *Offset);
/// Store to memory for a given type.
void typedStore(Type Ty, Variable *Value, Variable *Base, Constant *Offset);
/// Copy memory of given type from Src to Dest using OffsetAmt on both.
void copyMemory(Type Ty, Variable *Dest, Variable *Src, int32_t OffsetAmt);
/// Replace some calls to memcpy with inline instructions.
void lowerMemcpy(Operand *Dest, Operand *Src, Operand *Count);
/// Replace a call to memset with inline instructions.
/// Replace some calls to memmove with inline instructions.
void lowerMemmove(Operand *Dest, Operand *Src, Operand *Count);
/// Replace some calls to memset with inline instructions.
void lowerMemset(Operand *Dest, Operand *Val, Operand *Count);
/// Lower an indirect jump adding sandboxing when needed.
......@@ -251,6 +259,19 @@ protected:
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType();
static constexpr uint32_t NoSizeLimit = 0;
static const Type TypeForSize[];
/// Returns the largest type which is equal to or larger than Size bytes. The
/// type is suitable for copying memory i.e. a load and store will be a
/// single instruction (for example x86 will get f64 not i64).
static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit);
/// Returns the smallest type which is equal to or larger than Size bytes. If
/// one doesn't exist then the largest type smaller than Size bytes is
/// returned. The type is suitable for memory copies as described at
/// largestTypeInSize.
static Type firstTypeThatFitsSize(uint32_t Size,
uint32_t MaxSize = NoSizeLimit);
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
/// \name Returns a vector in a register with the given constant entries.
......
......@@ -82,6 +82,57 @@ TEST_F(AssemblerX8632Test, Xchg) {
#undef TestImpl
#undef TestImplSize
#undef TestImplAddrReg
#define TestImplRegReg(Reg0, Value0, Reg1, Value1, Size) \
do { \
static constexpr char TestString[] = \
"(" #Reg0 "," #Value0 ", " #Reg1 ", " #Value1 ", " #Size ")"; \
const uint32_t V0 = (Value0)&Mask##Size; \
const uint32_t V1 = (Value1)&Mask##Size; \
\
__ mov(IceType_i##Size, GPRRegister::Encoded_Reg_##Reg0, \
Immediate(Value0)); \
__ mov(IceType_i##Size, GPRRegister::Encoded_Reg_##Reg1, \
Immediate(Value1)); \
__ xchg(IceType_i##Size, GPRRegister::Encoded_Reg_##Reg0, \
GPRRegister::Encoded_Reg_##Reg1); \
__ And(IceType_i32, GPRRegister::Encoded_Reg_##Reg0, \
Immediate(Mask##Size)); \
__ And(IceType_i32, GPRRegister::Encoded_Reg_##Reg1, \
Immediate(Mask##Size)); \
\
AssembledTest test = assemble(); \
test.run(); \
\
ASSERT_EQ(V0, test.Reg1()) << TestString; \
ASSERT_EQ(V1, test.Reg0()) << TestString; \
reset(); \
} while (0)
#define TestImplSize(Reg0, Reg1, Size) \
do { \
TestImplRegReg(Reg0, 0xa2b34567, Reg1, 0x0507ddee, Size); \
} while (0)
#define TestImpl(Reg0, Reg1) \
do { \
if (GPRRegister::Encoded_Reg_##Reg0 < 4 && \
GPRRegister::Encoded_Reg_##Reg1 < 4) { \
TestImplSize(Reg0, Reg1, 8); \
} \
TestImplSize(Reg0, Reg1, 16); \
TestImplSize(Reg0, Reg1, 32); \
} while (0)
TestImpl(eax, ebx);
TestImpl(edx, eax);
TestImpl(ecx, edx);
TestImpl(esi, eax);
TestImpl(edx, edi);
#undef TestImpl
#undef TestImplSize
#undef TestImplRegReg
}
TEST_F(AssemblerX8632Test, Xadd) {
......
......@@ -85,6 +85,54 @@ TEST_F(AssemblerX8664Test, Xchg) {
#undef TestImpl
#undef TestImplSize
#undef TestImplAddrReg
#define TestImplRegReg(Reg0, Value0, Reg1, Value1, Size) \
do { \
static constexpr char TestString[] = \
"(" #Reg0 "," #Value0 ", " #Reg1 ", " #Value1 ", " #Size ")"; \
const uint32_t V0 = (Value0)&Mask##Size; \
const uint32_t V1 = (Value1)&Mask##Size; \
\
__ mov(IceType_i##Size, Encoded_GPR_##Reg0(), Immediate(Value0)); \
__ mov(IceType_i##Size, Encoded_GPR_##Reg1(), Immediate(Value1)); \
__ xchg(IceType_i##Size, Encoded_GPR_##Reg0(), Encoded_GPR_##Reg1()); \
__ And(IceType_i32, Encoded_GPR_##Reg0(), Immediate(Mask##Size)); \
__ And(IceType_i32, Encoded_GPR_##Reg1(), Immediate(Mask##Size)); \
\
AssembledTest test = assemble(); \
test.run(); \
\
ASSERT_EQ(V0, test.Reg1()) << TestString; \
ASSERT_EQ(V1, test.Reg0()) << TestString; \
reset(); \
} while (0)
#define TestImplSize(Reg0, Reg1, Size) \
do { \
TestImplRegReg(Reg0, 0xa2b34567, Reg1, 0x0507ddee, Size); \
} while (0)
#define TestImpl(Reg0, Reg1) \
do { \
TestImplSize(Reg0, Reg1, 8); \
TestImplSize(Reg0, Reg1, 16); \
TestImplSize(Reg0, Reg1, 32); \
} while (0)
// r1 == rax so has a short encoding
TestImpl(r6, r1);
TestImpl(r1, r8);
TestImpl(r2, r10);
TestImpl(r3, r11);
TestImpl(r4, r12);
TestImpl(r5, r13);
TestImpl(r6, r14);
TestImpl(r7, r15);
#undef TestImpl
#undef TestImplSize
#undef TestImplRegReg
}
TEST_F(AssemblerX8664Test, Xadd) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment