Commit ae15f0fd by John Porto

Subzero. X86. Lowers shufflevector using xmm instructions.

parent 4aae81af
......@@ -56,4 +56,4 @@ test: test_sync_atomic.cpp
[test_vector_ops]
driver: test_vector_ops_main.cpp
test: test_vector_ops.ll
test: test_vector_ops.cpp test_vector_ops_ll.ll
//===- subzero/crosstest/test_vector_ops.cpp - Vector tests -----*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the vector shuffle routines.
//
//===----------------------------------------------------------------------===//
#include "test_vector_ops.h"
#include <algorithm>
#include <type_traits>
namespace {
// SHUFFLETESTS_TABLE declares several shufflevector test cases. Each test case
// has 16 indexes because 16 is the max number of elements in a vector type in
// PNaCl bitcode. For vector types with fewer than 16 elements, the additional
// indexes are ignored. This strategy allows a single test table definition.
#define SHUFFLETESTS_TABLE \
/* Indexes... */ \
/* Simple tests splatting elements. */ \
X(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) \
X(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \
X(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) \
X(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3) \
X(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4) \
X(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5) \
X(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6) \
X(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7) \
X(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8) \
X(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9) \
X(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10) \
X(11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11) \
X(12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) \
X(13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13) \
X(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14) \
X(15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15) \
X(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16) \
X(17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17) \
X(18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18) \
X(19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19) \
X(20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20) \
X(21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21) \
X(22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22) \
X(23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23) \
X(24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24) \
X(25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25) \
X(26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26) \
X(27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27) \
X(28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28) \
X(29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29) \
X(30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30) \
X(31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31) \
/* Rotating vectors. */ \
X(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) \
X(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) \
X(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) \
X(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) \
X(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) \
X(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21) \
X(7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22) \
X(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23) \
X(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24) \
X(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25) \
X(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26) \
X(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) \
X(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28) \
X(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) \
X(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30) \
X(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) \
X(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0) \
X(18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1) \
X(19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2) \
X(20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3) \
X(21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4) \
X(22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5) \
X(23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6) \
X(24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7) \
X(25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8) \
X(26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) \
X(27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) \
X(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) \
X(29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) \
X(30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13) \
X(31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) \
/* Swapping elements. */ \
X(1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(19, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(21, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(25, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(26, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(27, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(28, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(29, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(31, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 5, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 6, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 7, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 10, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 11, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 13, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 15, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 17, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 18, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 19, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 20, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 21, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 22, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 23, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 24, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 25, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 26, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 27, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 28, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 29, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 30, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 31, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 4, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 5, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 6, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 7, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 8, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 9, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 10, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 12, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 13, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 14, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 15, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 16, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 17, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 18, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 19, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 20, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 21, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 22, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 23, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 24, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 25, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 26, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 27, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 28, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 29, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 30, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 31, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 5, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 6, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 9, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 10, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 11, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 12, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 13, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 14, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 16, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 17, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 18, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 20, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 21, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 22, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 24, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 25, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 26, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 27, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 28, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 29, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 30, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 31, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 6, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 7, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 8, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 9, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 10, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 11, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 12, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 13, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 14, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 15, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 16, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 17, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 18, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 19, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 21, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 22, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 22, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 23, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 24, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 25, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 26, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 27, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 28, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 29, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 30, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 31, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 7, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 8, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 9, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 11, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 12, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 13, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 14, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 15, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 17, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 18, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 19, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 20, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 21, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 22, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 23, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 24, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 25, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 26, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 27, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 28, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 29, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 30, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 31, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 8, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 9, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 11, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 12, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 13, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 14, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 15, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 17, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 18, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 19, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 20, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 21, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 22, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 23, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 24, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 25, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 26, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 27, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 28, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 29, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 30, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 31, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 2, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 3, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 4, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 5, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 6, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 8, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 9, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 10, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 11, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 12, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 13, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 14, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 15, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 16, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 18, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 19, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 20, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 21, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 22, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 23, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 24, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 25, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 26, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 27, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 28, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 29, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 30, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 31, 0, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 1, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 2, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 3, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 4, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 5, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 6, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 7, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 9, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 10, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 11, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 12, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 13, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 14, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 15, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 16, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 17, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 18, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 19, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 20, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 21, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 22, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 23, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 24, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 25, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 26, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 27, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 28, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 29, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 30, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 2, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 3, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 4, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 6, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 7, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 13, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 15, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 19, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 20, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 21, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 22, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 23, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 24, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 25, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 26, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 27, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 28, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 29, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 30, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 31, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 7, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 17, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 19, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 22, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 23, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 24, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 25, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 26, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 27, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 28, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 29, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 30, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 3, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 4, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 5, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 6, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 7, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 8, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 21, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 22, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 23, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 24, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 25, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 26, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 27, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 28, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 29, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 4, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 6, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 8, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 9, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 17, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 19, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 21, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 25, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 26, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 27, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 29, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 31, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 2, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 3, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 4, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 6, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 7, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 11, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 19, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 21, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 22, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 23, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 24, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 25, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 26, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 27, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 28, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 29, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 30, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 14, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 3, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 4, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 5, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 6, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 7, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 8, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 9, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 10, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 11, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 18, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 19, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 21, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 22, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 24, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 25, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 26, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 27, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 28, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 29, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 31, 15) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 2) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 3) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 4) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 5) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 6) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 7) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 8) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 9) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 10) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 11) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 12) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 18) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 20) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 21) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 22) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 23) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 24) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 25) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 26) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 27) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 28) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 29) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 30) \
X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31) \
/* Testing the optimized shufflevectors for x86. */ \
/* (Src0, Src0, Src0, Src0) */ \
X(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0) \
X(2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3) \
/* (Src0, Src0, Src0, Src1) */ \
X(0, 1, 3, 7, 0, 1, 3, 7, 0, 1, 3, 7, 0, 1, 3, 7) \
X(2, 2, 2, 5, 2, 2, 2, 5, 2, 2, 2, 5, 2, 2, 2, 5) \
/* (Src0, Src0, Src1, Src0) */ \
X(1, 2, 6, 0, 1, 2, 6, 0, 1, 2, 6, 0, 1, 2, 6, 0) \
X(3, 2, 3, 5, 3, 2, 3, 5, 3, 2, 3, 5, 3, 2, 3, 5) \
/* (Src0, Src0, Src1, Src1) */ \
X(2, 3, 5, 7, 2, 3, 5, 7, 2, 3, 5, 7, 2, 3, 5, 7) \
X(3, 3, 7, 6, 3, 3, 7, 6, 3, 3, 7, 6, 3, 3, 7, 6) \
/* (Src0, Src1, Src0, Src0) */ \
X(3, 7, 0, 0, 3, 7, 0, 0, 3, 7, 0, 0, 3, 7, 0, 0) \
X(1, 7, 3, 3, 1, 7, 3, 3, 1, 7, 3, 3, 1, 7, 3, 3) \
/* (Src0, Src1, Src0, Src1) */ \
X(0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5) \
X(0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7) \
X(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4) \
X(1, 7, 3, 6, 1, 7, 3, 6, 1, 7, 3, 6, 1, 7, 3, 6) \
X(0, 6, 3, 7, 0, 6, 3, 7, 0, 6, 3, 7, 0, 6, 3, 7) \
/* (Src0, Src1, Src1, Src0) */ \
X(0, 7, 7, 0, 0, 7, 7, 0, 0, 7, 7, 0, 0, 7, 7, 0) \
X(3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3) \
X(1, 5, 3, 6, 1, 5, 3, 6, 1, 5, 3, 6, 1, 5, 3, 6) \
X(0, 6, 2, 6, 0, 6, 2, 6, 0, 6, 2, 6, 0, 6, 2, 6) \
/* (Src0, Src1, Src1, Src1) */ \
X(0, 7, 7, 7, 0, 7, 7, 7, 0, 7, 7, 7, 0, 7, 7, 7) \
X(3, 6, 7, 4, 3, 6, 7, 4, 3, 6, 7, 4, 3, 6, 7, 4) \
/* (Src1, Src0, Src0, Src0) */ \
X(4, 3, 3, 0, 4, 3, 3, 0, 4, 3, 3, 0, 4, 3, 3, 0) \
X(6, 0, 0, 3, 6, 0, 0, 3, 6, 0, 0, 3, 6, 0, 0, 3) \
/* (Src1, Src0, Src0, Src1) */ \
X(4, 3, 2, 6, 4, 3, 2, 6, 4, 3, 2, 6, 4, 3, 2, 6) \
X(5, 2, 1, 4, 5, 2, 1, 4, 5, 2, 1, 4, 5, 2, 1, 4) \
X(6, 0, 0, 4, 6, 0, 0, 4, 6, 0, 0, 4, 6, 0, 0, 4) \
X(5, 2, 2, 6, 5, 2, 2, 6, 5, 2, 2, 6, 5, 2, 2, 6) \
/* (Src1, Src0, Src1, Src0) */ \
X(4, 0, 5, 1, 4, 0, 5, 1, 4, 0, 5, 1, 4, 0, 5, 1) \
X(7, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, 2) \
X(4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3) \
X(7, 1, 5, 0, 7, 1, 5, 0, 7, 1, 5, 0, 7, 1, 5, 0) \
X(4, 3, 6, 2, 4, 3, 6, 2, 4, 3, 6, 2, 4, 3, 6, 2) \
/* (Src1, Src0, Src1, Src1) */ \
X(6, 0, 3, 2, 6, 0, 3, 2, 6, 0, 3, 2, 6, 0, 3, 2) \
X(4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7) \
/* (Src1, Src1, Src0, Src0) */ \
X(6, 5, 2, 3, 6, 5, 2, 3, 6, 5, 2, 3, 6, 5, 2, 3) \
X(7, 7, 0, 1, 7, 7, 0, 1, 7, 7, 0, 1, 7, 7, 0, 1) \
/* (Src1, Src1, Src0, Src1) */ \
X(7, 6, 0, 5, 7, 6, 0, 5, 7, 6, 0, 5, 7, 6, 0, 5) \
X(4, 5, 3, 7, 4, 5, 3, 7, 4, 5, 3, 7, 4, 5, 3, 7) \
/* (Src1, Src1, Src1, Src0) */ \
X(6, 6, 4, 0, 6, 6, 4, 0, 6, 6, 4, 0, 6, 6, 4, 0) \
X(7, 4, 6, 1, 7, 4, 6, 1, 7, 4, 6, 1, 7, 4, 6, 1) \
/* (Src1, Src1, Src1, Src1) */ \
X(7, 6, 4, 4, 7, 6, 4, 4, 7, 6, 4, 4, 7, 6, 4, 4) \
X(5, 7, 7, 6, 5, 7, 7, 6, 5, 7, 7, 6, 5, 7, 7, 6)
/* End of x86-optimized shufflevectors. */
//#define X(...)
// ShuffleVectorTest declares the template functions that are used to shuffle
// the test vectors. It has specific template methods depending on how many
// elements VecTy has.
template <typename VecTy> class ShuffleVectorTest {
//----------------------------------------------------------------------------
//
// V4??? Shuffles.
//
//----------------------------------------------------------------------------
template <typename Ty, uint8_t Idx0, uint8_t Idx1, uint8_t Idx2, uint8_t Idx3,
uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
uint8_t, uint8_t, uint8_t, uint8_t, uint8_t>
static typename std::enable_if<Vectors<Ty>::NumElements == 4, Ty>::type
shufflevector(Ty V1, Ty V2) {
const uint8_t NumElements = 4;
return __builtin_shufflevector(
V1, V2, Idx0 % (NumElements * 2), Idx1 % (NumElements * 2),
Idx2 % (NumElements * 2), Idx3 % (NumElements * 2));
}
//----------------------------------------------------------------------------
//
// V8??? Shuffles.
//
//----------------------------------------------------------------------------
template <typename Ty, uint8_t Idx0, uint8_t Idx1, uint8_t Idx2, uint8_t Idx3,
uint8_t Idx4, uint8_t Idx5, uint8_t Idx6, uint8_t Idx7, uint8_t,
uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t>
static typename std::enable_if<Vectors<Ty>::NumElements == 8, Ty>::type
shufflevector(Ty V1, Ty V2) {
const uint8_t NumElements = 8;
return __builtin_shufflevector(
V1, V2, Idx0 % (NumElements * 2), Idx1 % (NumElements * 2),
Idx2 % (NumElements * 2), Idx3 % (NumElements * 2),
Idx4 % (NumElements * 2), Idx5 % (NumElements * 2),
Idx6 % (NumElements * 2), Idx7 % (NumElements * 2));
}
//----------------------------------------------------------------------------
//
// V16??? Shuffles.
//
//----------------------------------------------------------------------------
template <typename Ty, uint8_t Idx0, uint8_t Idx1, uint8_t Idx2, uint8_t Idx3,
uint8_t Idx4, uint8_t Idx5, uint8_t Idx6, uint8_t Idx7,
uint8_t Idx8, uint8_t Idx9, uint8_t Idx10, uint8_t Idx11,
uint8_t Idx12, uint8_t Idx13, uint8_t Idx14, uint8_t Idx15>
static typename std::enable_if<Vectors<Ty>::NumElements == 16, Ty>::type
shufflevector(Ty V1, Ty V2) {
const uint8_t NumElements = 16;
return __builtin_shufflevector(
V1, V2, Idx0, Idx1 % (NumElements * 2), Idx2 % (NumElements * 2),
Idx3 % (NumElements * 2), Idx4 % (NumElements * 2),
Idx5 % (NumElements * 2), Idx6 % (NumElements * 2),
Idx7 % (NumElements * 2), Idx8 % (NumElements * 2),
Idx9 % (NumElements * 2), Idx10 % (NumElements * 2),
Idx11 % (NumElements * 2), Idx12 % (NumElements * 2),
Idx13 % (NumElements * 2), Idx14 % (NumElements * 2),
Idx15 % (NumElements * 2));
}
public:
typedef VecTy (*TestFn)(VecTy V1, VecTy V2);
static TestFn Tests[];
static const uint32_t NumTests;
};
template <typename Ty>
typename ShuffleVectorTest<Ty>::TestFn ShuffleVectorTest<Ty>::Tests[] = {
#define X(...) &ShuffleVectorTest<Ty>::shufflevector<Ty, __VA_ARGS__>,
SHUFFLETESTS_TABLE
#undef X
};
template <typename Ty>
const uint32_t ShuffleVectorTest<Ty>::NumTests = 0
#define X(...) +1
SHUFFLETESTS_TABLE
#undef X
;
} // end of anonymous namespace
extern "C" {
#define X(Ty, ElmtTy, CastTy) \
TY(Ty) shufflevector_##Ty(TY(Ty) V1, TY(Ty) V2, uint32_t Which) { \
return (*ShuffleVectorTest<TY(Ty)>::Tests[Which])(V1, V2); \
} \
uint32_t shufflevector_count_##Ty() { \
return ShuffleVectorTest<TY(Ty)>::NumTests; \
}
VECTOR_TYPE_TABLE
#undef X
#define X(I1Ty, Ty, numelements) \
TY(I1Ty) shufflevector_##I1Ty(TY(I1Ty) V1, TY(I1Ty) V2, uint32_t Which) { \
return (*ShuffleVectorTest<TY(I1Ty)>::Tests[Which])(V1, V2); \
} \
uint32_t shufflevector_count_##I1Ty() { \
return ShuffleVectorTest<TY(I1Ty)>::NumTests; \
}
I1_VECTOR_TYPE_TABLE
#undef X
} // end of extern "C"
......@@ -18,8 +18,8 @@
#include "vectors.h"
// The VectorOps<> class acts like Vectors<> but also has insertelement,
// Subzero_insertelement, extractelement, and Subzero_extractelement
// fields.
// Subzero_insertelement, extractelement, Subzero_extractelement,
// shufflevector, Subzero_shufflevector, and shufflevector_count fields.
template <typename T> struct VectorOps;
#define FIELD(TYNAME, FIELDNAME) VectorOps<TYNAME>::FIELDNAME
......@@ -28,15 +28,21 @@ template <typename T> struct VectorOps;
#define DECLARE_VECTOR_OPS(NAME) \
template <> struct VectorOps<NAME> : public Vectors<NAME> { \
static Ty (*insertelement)(Ty, CastTy, int32_t); \
static Ty (*shufflevector)(Ty, Ty, uint32_t); \
static CastTy (*extractelement)(Ty, int32_t); \
static Ty (*Subzero_insertelement)(Ty, CastTy, int32_t); \
static Ty (*Subzero_shufflevector)(Ty, Ty, uint32_t); \
static CastTy (*Subzero_extractelement)(Ty, int32_t); \
static uint32_t (*shufflevector_count)(); \
}; \
extern "C" { \
TY(NAME) insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \
TY(NAME) Subzero_insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \
CASTTY(NAME) extractelement_##NAME(TY(NAME), int32_t); \
CASTTY(NAME) Subzero_extractelement_##NAME(TY(NAME), int32_t); \
TY(NAME) shufflevector_##NAME(TY(NAME), TY(NAME), uint32_t); \
TY(NAME) Subzero_shufflevector_##NAME(TY(NAME), TY(NAME), uint32_t); \
uint32_t shufflevector_count_##NAME(); \
} \
TY(NAME) (*FIELD(NAME, insertelement))(TY(NAME), CASTTY(NAME), int32_t) = \
&insertelement_##NAME; \
......@@ -45,7 +51,12 @@ template <typename T> struct VectorOps;
CASTTY(NAME) (*FIELD(NAME, extractelement))(TY(NAME), int32_t) = \
&extractelement_##NAME; \
CASTTY(NAME) (*FIELD(NAME, Subzero_extractelement))(TY(NAME), int32_t) = \
&Subzero_extractelement_##NAME;
&Subzero_extractelement_##NAME; \
TY(NAME) (*FIELD(NAME, shufflevector))(TY(NAME), TY(NAME), uint32_t) = \
&shufflevector_##NAME; \
TY(NAME) (*FIELD(NAME, Subzero_shufflevector))( \
TY(NAME), TY(NAME), uint32_t) = &Subzero_shufflevector_##NAME; \
uint32_t (*FIELD(NAME, shufflevector_count))() = &shufflevector_count_##NAME;
#define X(ty, eltty, castty) DECLARE_VECTOR_OPS(ty)
VECTOR_TYPE_TABLE
......
......@@ -130,6 +130,41 @@ void testExtractElement(size_t &TotalTests, size_t &Passes, size_t &Failures) {
free(TestVectors);
}
template <typename T>
void testShuffleVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef typename VectorOps<T>::Ty Ty;
typedef typename VectorOps<T>::ElementTy ElementTy;
size_t NumTestVectors;
Ty *TestVectors = getTestVectors<T>(NumTestVectors);
for (size_t VI = 0; VI < NumTestVectors; ++VI) {
Ty Vect0 = TestVectors[VI];
for (size_t VJ = 0; VJ < NumTestVectors; ++VJ) {
Ty Vect1 = TestVectors[VJ];
for (uint32_t Which = 0; Which < VectorOps<T>::shufflevector_count();
++Which) {
Ty ResultLlc = VectorOps<T>::shufflevector(Vect0, Vect1, Which);
Ty ResultSz = VectorOps<T>::Subzero_shufflevector(Vect0, Vect1, Which);
++TotalTests;
if (!memcmp(&ResultLlc, &ResultSz, sizeof(ResultLlc))) {
++Passes;
} else {
++Failures;
std::cout << "shufflevector<" << VectorOps<T>::TypeName << ">(Vect0=";
std::cout << vectAsString<T>(Vect0)
<< ", Vect1=" << vectAsString<T>(Vect1) << ", Which=" << VJ
<< ")\n";
std::cout << "llc=" << vectAsString<T>(ResultLlc) << "\n";
std::cout << "sz =" << vectAsString<T>(ResultSz) << "\n";
}
}
}
}
free(TestVectors);
}
int main(int argc, char *argv[]) {
size_t TotalTests = 0;
size_t Passes = 0;
......@@ -157,6 +192,17 @@ int main(int argc, char *argv[]) {
testExtractElement<v4ui32>(TotalTests, Passes, Failures);
testExtractElement<v4f32>(TotalTests, Passes, Failures);
testShuffleVector<v4i1>(TotalTests, Passes, Failures);
testShuffleVector<v8i1>(TotalTests, Passes, Failures);
testShuffleVector<v16i1>(TotalTests, Passes, Failures);
testShuffleVector<v16si8>(TotalTests, Passes, Failures);
testShuffleVector<v16ui8>(TotalTests, Passes, Failures);
testShuffleVector<v8si16>(TotalTests, Passes, Failures);
testShuffleVector<v8ui16>(TotalTests, Passes, Failures);
testShuffleVector<v4si32>(TotalTests, Passes, Failures);
testShuffleVector<v4ui32>(TotalTests, Passes, Failures);
testShuffleVector<v4f32>(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
......
......@@ -432,6 +432,8 @@ public:
void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void pshufd(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
void punpckldq(Type, XmmRegister Dst, XmmRegister Src);
void punpckldq(Type, XmmRegister Dst, const Address &Src);
void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void shufps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
......
......@@ -1565,6 +1565,29 @@ void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::punpckldq(Type, XmmRegister Dst,
XmmRegister Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, Dst, Src);
emitUint8(0x0F);
emitUint8(0x62);
emitXmmRegisterOperand(Dst, Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::punpckldq(Type, XmmRegister Dst,
const Address &Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, Src, Dst);
emitUint8(0x0F);
emitUint8(0x62);
emitOperand(gprEncoding(Dst), Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
XmmRegister src,
const Immediate &imm) {
......
......@@ -50,8 +50,8 @@ const TargetX8632Traits::InstCmppsAttributesType
const TargetX8632Traits::TypeAttributesType
TargetX8632Traits::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, width, fld } \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
,
ICETYPEX8632_TABLE
#undef X
......
......@@ -212,22 +212,22 @@
//#define X(val, emit)
#define ICETYPEX8632_TABLE \
/* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)
/* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF
......@@ -51,8 +51,8 @@ const TargetX8664Traits::InstCmppsAttributesType
const TargetX8664Traits::TypeAttributesType
TargetX8664Traits::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, width, fld } \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
,
ICETYPEX8664_TABLE
#undef X
......
......@@ -293,22 +293,22 @@
//#define X(val, emit)
#define ICETYPEX8664_TABLE \
/* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, pack, width, fld)
/* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, pack, unpack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF
......@@ -143,6 +143,7 @@ template <typename TraitsType> struct InstImpl {
Pop,
Por,
Pshufd,
Punpckl,
Psll,
Psra,
Psrl,
......@@ -183,7 +184,7 @@ template <typename TraitsType> struct InstImpl {
IacaEnd
};
enum SseSuffix { None, Packed, Scalar, Integral };
enum SseSuffix { None, Packed, Unpack, Scalar, Integral };
static const char *getWidthString(Type Ty);
static const char *getFldString(Type Ty);
......@@ -841,6 +842,9 @@ template <typename TraitsType> struct InstImpl {
case InstX86Base::SseSuffix::Packed:
SuffixString = Traits::TypeAttributes[DestTy].PdPsString;
break;
case InstX86Base::SseSuffix::Unpack:
SuffixString = Traits::TypeAttributes[DestTy].UnpackString;
break;
case InstX86Base::SseSuffix::Scalar:
SuffixString = Traits::TypeAttributes[DestTy].SdSsString;
break;
......@@ -2839,6 +2843,23 @@ template <typename TraitsType> struct InstImpl {
private:
InstX86IacaEnd(Cfg *Func);
};
class InstX86Punpckl
: public InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
InstX86Base::SseSuffix::Unpack> {
public:
static InstX86Punpckl *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Punpckl>())
InstX86Punpckl(Func, Dest, Source);
}
private:
InstX86Punpckl(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
InstX86Base::SseSuffix::Unpack>(Func, Dest,
Source) {}
};
}; // struct InstImpl
/// struct Insts is a template that can be used to instantiate all the X86
......@@ -2960,6 +2981,8 @@ template <typename TraitsType> struct Insts {
using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart;
using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd;
using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl;
};
/// X86 Instructions have static data (particularly, opcodes and instruction
......@@ -3189,6 +3212,9 @@ template <typename TraitsType> struct Insts {
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \
/* Inplace GPR ops */ \
template <> \
template <> \
......@@ -3550,6 +3576,12 @@ template <typename TraitsType> struct Insts {
&InstImpl<TraitsType>::Assembler::psrl, \
&InstImpl<TraitsType>::Assembler::psrl, \
&InstImpl<TraitsType>::Assembler::psrl}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::punpckldq, \
&InstImpl<TraitsType>::Assembler::punpckldq}; \
} \
}
......
......@@ -101,7 +101,7 @@ const size_t TargetX8632Traits::TableIcmp64Size =
const TargetX8632Traits::TableTypeX8632AttributesType
TargetX8632Traits::TableTypeX8632Attributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \
,
ICETYPEX8632_TABLE
......@@ -459,7 +459,8 @@ ICEINSTICMP_TABLE
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag,
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
_tmp_##tag,
ICETYPEX8632_TABLE
#undef X
_num
......@@ -471,7 +472,7 @@ ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
......
......@@ -976,13 +976,14 @@ public:
} InstCmppsAttributes[];
static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpsdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpsdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
} TypeAttributes[];
static const char *InstSegmentRegNames[];
......
......@@ -94,7 +94,7 @@ const size_t TargetX8664Traits::TableIcmp64Size =
const TargetX8664Traits::TableTypeX8664AttributesType
TargetX8664Traits::TableTypeX8664Attributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \
,
ICETYPEX8664_TABLE
......@@ -787,7 +787,8 @@ ICEINSTICMP_TABLE
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag,
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
_tmp_##tag,
ICETYPEX8664_TABLE
#undef X
_num
......@@ -799,7 +800,7 @@ ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
......
......@@ -1021,13 +1021,14 @@ public:
} InstCmppsAttributes[];
static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
} TypeAttributes[];
};
......
......@@ -801,6 +801,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Por>(Dest, Src0);
}
void _punpckl(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0);
}
void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1);
......@@ -1082,6 +1086,23 @@ private:
BoolFolding<Traits> FoldingInfo;
/// Helpers for lowering ShuffleVector
/// @{
Variable *lowerShuffleVector_AllFromSameSrc(Variable *Src, SizeT Index0,
SizeT Index1, SizeT Index2,
SizeT Index3);
static constexpr SizeT IGNORE_INDEX = 0x80000000u;
Variable *lowerShuffleVector_TwoFromSameSrc(Variable *Src0, SizeT Index0,
SizeT Index1, Variable *Src1,
SizeT Index2, SizeT Index3);
static constexpr SizeT UNIFIED_INDEX_0 = 0;
static constexpr SizeT UNIFIED_INDEX_1 = 2;
Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Variable *Src0,
SizeT Index0,
Variable *Src1,
SizeT Index1);
/// @}
static FixupKind PcRelFixup;
static FixupKind AbsFixup;
};
......
......@@ -5610,25 +5610,295 @@ void TargetX86Base<TraitsType>::lowerRet(const InstRet *Instr) {
keepEspLiveAtExit();
}
inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
SizeT Index3) {
const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
assert(Mask < 256);
return Mask;
}
template <typename TraitsType>
Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
constexpr SizeT SrcBit = 1 << 2;
assert((Index0 & SrcBit) == (Index1 & SrcBit));
assert((Index0 & SrcBit) == (Index2 & SrcBit));
assert((Index0 & SrcBit) == (Index3 & SrcBit));
(void)SrcBit;
const Type SrcTy = Src->getType();
auto *T = makeReg(SrcTy);
auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
auto *Mask =
Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
_pshufd(T, SrcRM, Mask);
return T;
}
template <typename TraitsType>
Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
SizeT Index3) {
constexpr SizeT SrcBit = 1 << 2;
assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
(void)SrcBit;
const Type SrcTy = Src0->getType();
assert(Src1->getType() == SrcTy);
auto *T = makeReg(SrcTy);
auto *Src0R = legalizeToReg(Src0);
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
auto *Mask =
Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
_movp(T, Src0R);
_shufps(T, Src1RM, Mask);
return T;
}
template <typename TraitsType>
Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
Index1, IGNORE_INDEX);
}
inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
SizeT Index3) {
constexpr SizeT SrcBit = 1 << 2;
const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
}
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerShuffleVector(
const InstShuffleVector *Instr) {
auto *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
const SizeT NumElements = typeNumElements(DestTy);
auto *T = makeReg(DestTy);
switch (DestTy) {
default:
break;
// TODO(jpp): figure out how to properly lower this without scalarization.
// TODO(jpp): figure out how to properly lower the remaining cases without
// scalarization.
case IceType_v4i1:
case IceType_v4i32:
case IceType_v4f32: {
static constexpr SizeT ExpectedNumElements = 4;
assert(ExpectedNumElements == Instr->getNumIndexes());
const SizeT Index0 = Instr->getIndex(0)->getValue();
const SizeT Index1 = Instr->getIndex(1)->getValue();
const SizeT Index2 = Instr->getIndex(2)->getValue();
const SizeT Index3 = Instr->getIndex(3)->getValue();
Variable *T = nullptr;
switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
#define CASE_SRCS_IN(S0, S1, S2, S3) \
case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
CASE_SRCS_IN(0, 0, 0, 0) : {
T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
Index3);
}
break;
CASE_SRCS_IN(0, 0, 0, 1) : {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
Src1, Index3);
T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
break;
CASE_SRCS_IN(0, 0, 1, 0) : {
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
Src0, Index3);
T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
break;
CASE_SRCS_IN(0, 0, 1, 1) : {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
Index2, Index3);
}
break;
CASE_SRCS_IN(0, 1, 0, 0) : {
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
Src1, Index1);
T = lowerShuffleVector_TwoFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
}
break;
CASE_SRCS_IN(0, 1, 0, 1) : {
if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
(Index3 - ExpectedNumElements) == 1) {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
auto *Src0R = legalizeToReg(Src0);
T = makeReg(DestTy);
_movp(T, Src0R);
_punpckl(T, Src1RM);
} else if (Index0 == Index2 && Index1 == Index3) {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
Src0, Index0, Src1, Index1);
T = lowerShuffleVector_AllFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
UNIFIED_INDEX_1);
} else {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src0, Index0, Src1, Index1);
auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src0, Index2, Src1, Index3);
T = lowerShuffleVector_TwoFromSameSrc(
Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
}
break;
CASE_SRCS_IN(0, 1, 1, 0) : {
if (Index0 == Index3 && Index1 == Index2) {
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
Src0, Index0, Src1, Index1);
T = lowerShuffleVector_AllFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
UNIFIED_INDEX_0);
} else {
auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src0, Index0, Src1, Index1);
auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src1, Index2, Src0, Index3);
T = lowerShuffleVector_TwoFromSameSrc(
Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
}
break;
CASE_SRCS_IN(0, 1, 1, 1) : {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
Src1, Index1);
T = lowerShuffleVector_TwoFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
}
break;
CASE_SRCS_IN(1, 0, 0, 0) : {
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
Src0, Index1);
T = lowerShuffleVector_TwoFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
}
break;
CASE_SRCS_IN(1, 0, 0, 1) : {
if (Index0 == Index3 && Index1 == Index2) {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
Src1, Index0, Src0, Index1);
T = lowerShuffleVector_AllFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
UNIFIED_INDEX_0);
} else {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src1, Index0, Src0, Index1);
auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src0, Index2, Src1, Index3);
T = lowerShuffleVector_TwoFromSameSrc(
Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
}
break;
CASE_SRCS_IN(1, 0, 1, 0) : {
if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
(Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
auto *Src0R = legalizeToReg(Src1);
T = makeReg(DestTy);
_movp(T, Src0R);
_punpckl(T, Src1RM);
} else if (Index0 == Index2 && Index1 == Index3) {
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
Src1, Index0, Src0, Index1);
T = lowerShuffleVector_AllFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
UNIFIED_INDEX_1);
} else {
auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src1, Index0, Src0, Index1);
auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
Src1, Index2, Src0, Index3);
T = lowerShuffleVector_TwoFromSameSrc(
Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
}
break;
CASE_SRCS_IN(1, 0, 1, 1) : {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
Src0, Index1);
T = lowerShuffleVector_TwoFromSameSrc(
Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
}
break;
CASE_SRCS_IN(1, 1, 0, 0) : {
T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
Index2, Index3);
}
break;
CASE_SRCS_IN(1, 1, 0, 1) : {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
Src1, Index3);
T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
break;
CASE_SRCS_IN(1, 1, 1, 0) : {
auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
Src0, Index3);
T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
UNIFIED_INDEX_0, UNIFIED_INDEX_1);
}
break;
CASE_SRCS_IN(1, 1, 1, 1) : {
assert(false && "Following code is untested but likely correct; test "
"and remove assert.");
T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
Index3);
}
break;
#undef CASE_SRCS_IN
}
assert(T != nullptr);
assert(T->getType() == DestTy);
_movp(Dest, T);
return;
} break;
}
// Unoptimized shuffle. Perform a series of inserts and extracts.
Context.insert<InstFakeDef>(T);
auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
const SizeT NumElements = typeNumElements(DestTy);
const Type ElementType = typeElementType(DestTy);
for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
auto *Index = Instr->getIndex(I);
......
; Some shufflevector optimized lowering. This list is by no means exhaustive. It
; is only a **basic** smoke test. the vector_ops crosstest has a broader range
; of test cases.
; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble -a -O2 \
; RUN: --allow-externally-defined-symbols | FileCheck %s --check-prefix=X86
declare void @useV4I32(<4 x i32> %t);
define internal void @shuffleV4I32(<4 x i32> %a, <4 x i32> %b) {
; X86-LABEL: shuffleV4I32
%a_0 = extractelement <4 x i32> %a, i32 0
%a_1 = extractelement <4 x i32> %a, i32 1
%a_2 = extractelement <4 x i32> %a, i32 2
%a_3 = extractelement <4 x i32> %a, i32 3
%b_0 = extractelement <4 x i32> %b, i32 0
%b_1 = extractelement <4 x i32> %b, i32 1
%b_2 = extractelement <4 x i32> %b, i32 2
%b_3 = extractelement <4 x i32> %b, i32 3
%t0_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
%t0_1 = insertelement <4 x i32> %t0_0, i32 %b_0, i32 1
%t0_2 = insertelement <4 x i32> %t0_1, i32 %a_1, i32 2
%t0 = insertelement <4 x i32> %t0_2, i32 %b_1, i32 3
; X86: punpckldq {{.*}}
call void @useV4I32(<4 x i32> %t0)
; X86: call
%t1_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
%t1_1 = insertelement <4 x i32> %t1_0, i32 %b_1, i32 1
%t1_2 = insertelement <4 x i32> %t1_1, i32 %b_1, i32 2
%t1 = insertelement <4 x i32> %t1_2, i32 %a_0, i32 3
; X86: shufps [[T:xmm[0-9]+]],{{.*}},0x10
; X86: pshufd {{.*}},[[T]],0x28
call void @useV4I32(<4 x i32> %t1)
; X86: call
%t2_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
%t2_1 = insertelement <4 x i32> %t2_0, i32 %b_3, i32 1
%t2_2 = insertelement <4 x i32> %t2_1, i32 %a_2, i32 2
%t2 = insertelement <4 x i32> %t2_2, i32 %b_2, i32 3
; X86: shufps {{.*}},0x30
; X86: shufps {{.*}},0x22
; X86: shufps {{.*}},0x88
call void @useV4I32(<4 x i32> %t2)
; X86: call
ret void
}
......@@ -995,33 +995,79 @@ TEST_F(AssemblerX8632Test, Shufp) {
reset(); \
} while (0)
#define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \
#define TestImpl(Dst, Src) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", " #Src ", " #Inst ", Untyped)"; \
TestImplSingleXmmXmm(Dst, Src, pshufd); \
TestImplSingleXmmAddr(Dst, pshufd); \
TestImplSingleXmmXmm(Dst, Src, shufps); \
TestImplSingleXmmAddr(Dst, shufps); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplSingleXmmAddr
#undef TestImplSingleXmmXmm
}
TEST_F(AssemblerX8632Test, Punpckldq) {
const Dqword V0(uint64_t(0x1111111122222222ull),
uint64_t(0x5555555577777777ull));
const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
uint64_t(0xCCCCCCCCDDDDDDDDull));
const Dqword Expected(uint64_t(0xBBBBBBBB22222222ull),
uint64_t(0xAAAAAAAA11111111ull));
#define TestImplXmmXmm(Dst, Src, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src, \
Immediate(Inst##Imm)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplSingleXmmXmm(Dst, Src, pshufd); \
TestImplSingleXmmAddr(Dst, pshufd); \
TestImplSingleXmmXmm(Dst, Src, shufps); \
TestImplSingleXmmAddr(Dst, shufps); \
TestImplXmmXmm(Dst, Src, punpckldq); \
TestImplXmmAddr(Dst, punpckldq); \
} while (0)
TestImpl(xmm0, xmm1);
......@@ -1034,9 +1080,8 @@ TEST_F(AssemblerX8632Test, Shufp) {
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplSingleXmmXmmUntyped
#undef TestImplSingleXmmAddr
#undef TestImplSingleXmmXmm
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8632Test, Cvt) {
......
......@@ -1083,6 +1083,81 @@ TEST_F(AssemblerX8664Test, Shufp) {
#undef TestImplSingleXmmXmm
}
TEST_F(AssemblerX8664Test, Punpckldq) {
const Dqword V0(uint64_t(0x1111111122222222ull),
uint64_t(0x5555555577777777ull));
const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
uint64_t(0xCCCCCCCCDDDDDDDDull));
const Dqword Expected(uint64_t(0xBBBBBBBB22222222ull),
uint64_t(0xAAAAAAAA11111111ull));
#define TestImplXmmXmm(Dst, Src, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, punpckldq); \
TestImplXmmAddr(Dst, punpckldq); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm8);
TestImpl(xmm8, xmm9);
TestImpl(xmm9, xmm10);
TestImpl(xmm10, xmm11);
TestImpl(xmm11, xmm12);
TestImpl(xmm12, xmm13);
TestImpl(xmm13, xmm14);
TestImpl(xmm14, xmm15);
TestImpl(xmm15, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8664Test, Cvt) {
const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment