Commit 133b87d6 by Nicolas Capens Committed by Nicolas Capens

Implement byte swizzle operations

Add Swizzle() intrinsics for Byte16, Byte8, and Byte4, and add Byte4 constructors and assignment operators. Also move LLVM-specific implementations to the generic Reactor.cpp source file. On x86 these all translate to a pshufb instruction, which is very efficient. Bug: b/148295813 Change-Id: Icf88fe1621623f8104c4a642d560643a01b9ef55 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40549 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent 413953a5
......@@ -141,6 +141,8 @@ Function<Int(Float)> function;
Note that this is a bitwise cast. Unlike C++'s ```reinterpret_cast<>```, it does not allow casting between different sized types. Think of it as storing the value in memory and then loading from that same address into the casted type.
An important exception is that 16-, 8-, and 4-byte vectors can be cast to other vectors of one of these sizes. Casting to a longer vector leaves the upper contents undefined.
### Pointers
Pointers also use a template class:
......
......@@ -2906,35 +2906,6 @@ RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
#endif
}
RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
{
RR_DEBUG_INFO_UPDATE_LOC();
int pshufb[16] = {
select0 + 0,
select0 + 1,
select1 + 0,
select1 + 1,
select2 + 0,
select2 + 1,
select3 + 0,
select3 + 1,
select4 + 0,
select4 + 1,
select5 + 0,
select5 + 1,
select6 + 0,
select6 + 1,
select7 + 0,
select7 + 1,
};
Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
return RValue<UShort8>(short8);
}
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
{
RR_DEBUG_INFO_UPDATE_LOC();
......
......@@ -1250,12 +1250,62 @@ Byte4::Byte4(RValue<Byte8> cast)
storeValue(Nucleus::createBitCast(cast.value, getType()));
}
Byte4::Byte4(RValue<UShort4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
}
Byte4::Byte4(RValue<Short4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
}
Byte4::Byte4(RValue<UInt4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
}
Byte4::Byte4(RValue<Int4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
}
Byte4::Byte4(RValue<Byte4> rhs)
{
storeValue(rhs.value);
}
Byte4::Byte4(const Byte4 &rhs)
{
Value *value = rhs.loadValue();
storeValue(value);
}
Byte4::Byte4(const Reference<Byte4> &rhs)
{
Value *value = rhs.loadValue();
storeValue(value);
}
RValue<Byte4> Byte4::operator=(RValue<Byte4> rhs)
{
storeValue(rhs.value);
return rhs;
}
RValue<Byte4> Byte4::operator=(const Byte4 &rhs)
{
Value *value = rhs.loadValue();
storeValue(value);
return RValue<Byte4>(value);
}
Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{
int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
......@@ -1417,8 +1467,35 @@ RValue<Byte8> operator~(RValue<Byte8> val)
return RValue<Byte8>(Nucleus::createNot(val.value));
}
RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select)
{
// Real type is v16i8
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = {
static_cast<int>((select >> 28) & 0x07),
static_cast<int>((select >> 24) & 0x07),
static_cast<int>((select >> 20) & 0x07),
static_cast<int>((select >> 16) & 0x07),
static_cast<int>((select >> 12) & 0x07),
static_cast<int>((select >> 8) & 0x07),
static_cast<int>((select >> 4) & 0x07),
static_cast<int>((select >> 0) & 0x07),
static_cast<int>((select >> 28) & 0x07),
static_cast<int>((select >> 24) & 0x07),
static_cast<int>((select >> 20) & 0x07),
static_cast<int>((select >> 16) & 0x07),
static_cast<int>((select >> 12) & 0x07),
static_cast<int>((select >> 8) & 0x07),
static_cast<int>((select >> 4) & 0x07),
static_cast<int>((select >> 0) & 0x07),
};
return As<Byte8>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
RValue<Short4> Unpack(RValue<Byte4> x)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
......@@ -1430,12 +1507,14 @@ RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
......@@ -1606,12 +1685,14 @@ RValue<SByte8> operator~(RValue<SByte8> val)
RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
......@@ -1657,6 +1738,30 @@ RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
return RValue<Byte16>(value);
}
RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select)
{
int shuffle[16] = {
static_cast<int>((select >> 60) & 0x0F),
static_cast<int>((select >> 56) & 0x0F),
static_cast<int>((select >> 52) & 0x0F),
static_cast<int>((select >> 48) & 0x0F),
static_cast<int>((select >> 44) & 0x0F),
static_cast<int>((select >> 40) & 0x0F),
static_cast<int>((select >> 36) & 0x0F),
static_cast<int>((select >> 32) & 0x0F),
static_cast<int>((select >> 28) & 0x0F),
static_cast<int>((select >> 24) & 0x0F),
static_cast<int>((select >> 20) & 0x0F),
static_cast<int>((select >> 16) & 0x0F),
static_cast<int>((select >> 12) & 0x0F),
static_cast<int>((select >> 8) & 0x0F),
static_cast<int>((select >> 4) & 0x0F),
static_cast<int>((select >> 0) & 0x0F),
};
return As<Byte16>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
Short2::Short2(RValue<Short4> cast)
{
storeValue(Nucleus::createBitCast(cast.value, getType()));
......@@ -1890,6 +1995,7 @@ RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 }; // Real type is v8i16
auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Int2>(Swizzle(As<Int4>(lowHigh), 0x2323));
......@@ -1898,6 +2004,7 @@ RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
RValue<Short4> Swizzle(RValue<Short4> x, uint16_t select)
{
// Real type is v8i16
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[8] = {
(select >> 12) & 0x03,
(select >> 8) & 0x03,
......@@ -2214,6 +2321,22 @@ RValue<UShort8> operator~(RValue<UShort8> val)
return RValue<UShort8>(Nucleus::createNot(val.value));
}
RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select)
{
int swizzle[16] = {
static_cast<int>((select >> 28) & 0x07),
static_cast<int>((select >> 24) & 0x07),
static_cast<int>((select >> 20) & 0x07),
static_cast<int>((select >> 16) & 0x07),
static_cast<int>((select >> 12) & 0x07),
static_cast<int>((select >> 8) & 0x07),
static_cast<int>((select >> 4) & 0x07),
static_cast<int>((select >> 0) & 0x07),
};
return RValue<UShort8>(Nucleus::createShuffleVector(x.value, x.value, swizzle));
}
Int::Int(Argument<Int> argument)
{
storeValue(argument.value);
......@@ -3073,12 +3196,14 @@ RValue<Int2> operator~(RValue<Int2> val)
RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
{
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32
auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(lowHigh, 0x2323));
......
......@@ -517,15 +517,19 @@ class Byte4 : public LValue<Byte4>
{
public:
explicit Byte4(RValue<Byte8> cast);
explicit Byte4(RValue<UShort4> cast);
explicit Byte4(RValue<Short4> cast);
explicit Byte4(RValue<UInt4> cast);
explicit Byte4(RValue<Int4> cast);
Byte4() = default;
// Byte4(int x, int y, int z, int w);
// Byte4(RValue<Byte4> rhs);
// Byte4(const Byte4 &rhs);
Byte4(RValue<Byte4> rhs);
Byte4(const Byte4 &rhs);
Byte4(const Reference<Byte4> &rhs);
// RValue<Byte4> operator=(RValue<Byte4> rhs);
// RValue<Byte4> operator=(const Byte4 &rhs);
RValue<Byte4> operator=(RValue<Byte4> rhs);
RValue<Byte4> operator=(const Byte4 &rhs);
// RValue<Byte4> operator=(const Reference<Byte4> &rhs);
static Type *getType();
......@@ -656,6 +660,7 @@ RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y);
RValue<Int> SignMask(RValue<Byte8> x);
// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select);
class SByte8 : public LValue<SByte8>
{
......@@ -713,7 +718,6 @@ class Byte16 : public LValue<Byte16>
{
public:
Byte16() = default;
// Byte16(int x, int y, int z, int w);
Byte16(RValue<Byte16> rhs);
Byte16(const Byte16 &rhs);
Byte16(const Reference<Byte16> &rhs);
......@@ -752,6 +756,7 @@ public:
// const Byte16 &operator++(Byte16 &val); // Pre-increment
// RValue<Byte16> operator--(Byte16 &val, int); // Post-decrement
// const Byte16 &operator--(Byte16 &val); // Pre-decrement
RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select);
class SByte16 : public LValue<SByte16>
{
......@@ -1065,7 +1070,7 @@ RValue<UShort8> operator~(RValue<UShort8> val);
// RValue<Bool> operator!=(RValue<UShort8> lhs, RValue<UShort8> rhs);
// RValue<Bool> operator==(RValue<UShort8> lhs, RValue<UShort8> rhs);
RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7);
RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select);
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y);
class Int : public LValue<Int>
......
......@@ -345,133 +345,199 @@ TEST(ReactorUnitTests, Concatenate)
}
}
TEST(ReactorUnitTests, Swizzle)
TEST(ReactorUnitTests, Cast)
{
auto swizzleCode = [](int i) -> uint16_t {
auto x = (i >> 0) & 0x03;
auto y = (i >> 2) & 0x03;
auto z = (i >> 4) & 0x03;
auto w = (i >> 6) & 0x03;
return (x << 12) | (y << 8) | (z << 4) | (w << 0);
};
FunctionT<void(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
Int4 c = Int4(0x01020304, 0x05060708, 0x09101112, 0x13141516);
*Pointer<Short4>(out + 16 * 0) = Short4(c);
*Pointer<Byte4>(out + 16 * 1 + 0) = Byte4(c);
*Pointer<Byte4>(out + 16 * 1 + 4) = Byte4(As<Byte8>(c));
*Pointer<Byte4>(out + 16 * 1 + 8) = Byte4(As<Short4>(c));
}
auto routine = function("one");
if(routine)
{
FunctionT<int(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
int out[2][4];
for(int i = 0; i < 256; i++)
{
*Pointer<Float4>(out + 16 * i) = Swizzle(Float4(1.0f, 2.0f, 3.0f, 4.0f), swizzleCode(i));
}
memset(&out, 0, sizeof(out));
for(int i = 0; i < 256; i++)
{
*Pointer<Float4>(out + 16 * (256 + i)) = ShuffleLowHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f), swizzleCode(i));
}
routine(&out);
*Pointer<Float4>(out + 16 * (512 + 0)) = UnpackLow(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
*Pointer<Float4>(out + 16 * (512 + 1)) = UnpackHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
*Pointer<Int2>(out + 16 * (512 + 2)) = UnpackLow(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Int2>(out + 16 * (512 + 3)) = UnpackHigh(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Short4>(out + 16 * (512 + 4)) = UnpackLow(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
*Pointer<Short4>(out + 16 * (512 + 5)) = UnpackHigh(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
EXPECT_EQ(out[0][0], 0x07080304);
EXPECT_EQ(out[0][1], 0x15161112);
for(int i = 0; i < 256; i++)
{
*Pointer<Short4>(out + 16 * (512 + 6) + (8 * i)) =
Swizzle(Short4(1, 2, 3, 4), swizzleCode(i));
}
EXPECT_EQ(out[1][0], 0x16120804);
EXPECT_EQ(out[1][1], 0x01020304);
EXPECT_EQ(out[1][2], 0x06080204);
}
}
for(int i = 0; i < 256; i++)
{
*Pointer<Int4>(out + 16 * (512 + 6 + i) + (8 * 256)) =
Swizzle(Int4(1, 2, 3, 4), swizzleCode(i));
}
static uint16_t swizzleCode4(int i)
{
auto x = (i >> 0) & 0x03;
auto y = (i >> 2) & 0x03;
auto z = (i >> 4) & 0x03;
auto w = (i >> 6) & 0x03;
return static_cast<uint16_t>((x << 12) | (y << 8) | (z << 4) | (w << 0));
}
Return(0);
TEST(ReactorUnitTests, Swizzle4)
{
FunctionT<void(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
for(int i = 0; i < 256; i++)
{
*Pointer<Float4>(out + 16 * i) = Swizzle(Float4(1.0f, 2.0f, 3.0f, 4.0f), swizzleCode4(i));
}
auto routine = function("one");
for(int i = 0; i < 256; i++)
{
*Pointer<Float4>(out + 16 * (256 + i)) = ShuffleLowHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f), swizzleCode4(i));
}
if(routine)
*Pointer<Float4>(out + 16 * (512 + 0)) = UnpackLow(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
*Pointer<Float4>(out + 16 * (512 + 1)) = UnpackHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
*Pointer<Int2>(out + 16 * (512 + 2)) = UnpackLow(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Int2>(out + 16 * (512 + 3)) = UnpackHigh(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Short4>(out + 16 * (512 + 4)) = UnpackLow(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
*Pointer<Short4>(out + 16 * (512 + 5)) = UnpackHigh(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
for(int i = 0; i < 256; i++)
{
struct
{
float f[256 + 256 + 2][4];
int i[388][4];
} out;
*Pointer<Short4>(out + 16 * (512 + 6) + (8 * i)) =
Swizzle(Short4(1, 2, 3, 4), swizzleCode4(i));
}
memset(&out, 0, sizeof(out));
for(int i = 0; i < 256; i++)
{
*Pointer<Int4>(out + 16 * (512 + 6 + i) + (8 * 256)) =
Swizzle(Int4(1, 2, 3, 4), swizzleCode4(i));
}
}
routine(&out);
auto routine = function("one");
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.f[i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.f[i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.f[i][2], float((i >> 4) & 0x03) + 1.0f);
EXPECT_EQ(out.f[i][3], float((i >> 6) & 0x03) + 1.0f);
}
if(routine)
{
struct
{
float f[256 + 256 + 2][4];
int i[388][4];
} out;
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.f[256 + i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][2], float((i >> 4) & 0x03) + 5.0f);
EXPECT_EQ(out.f[256 + i][3], float((i >> 6) & 0x03) + 5.0f);
}
memset(&out, 0, sizeof(out));
EXPECT_EQ(out.f[512 + 0][0], 1.0f);
EXPECT_EQ(out.f[512 + 0][1], 5.0f);
EXPECT_EQ(out.f[512 + 0][2], 2.0f);
EXPECT_EQ(out.f[512 + 0][3], 6.0f);
EXPECT_EQ(out.f[512 + 1][0], 3.0f);
EXPECT_EQ(out.f[512 + 1][1], 7.0f);
EXPECT_EQ(out.f[512 + 1][2], 4.0f);
EXPECT_EQ(out.f[512 + 1][3], 8.0f);
EXPECT_EQ(out.i[0][0], 0x00050001);
EXPECT_EQ(out.i[0][1], 0x00060002);
EXPECT_EQ(out.i[0][2], 0x00000000);
EXPECT_EQ(out.i[0][3], 0x00000000);
EXPECT_EQ(out.i[1][0], 0x00070003);
EXPECT_EQ(out.i[1][1], 0x00080004);
EXPECT_EQ(out.i[1][2], 0x00000000);
EXPECT_EQ(out.i[1][3], 0x00000000);
EXPECT_EQ(out.i[2][0], 0x0A020901);
EXPECT_EQ(out.i[2][1], 0x0C040B03);
EXPECT_EQ(out.i[2][2], 0x00000000);
EXPECT_EQ(out.i[2][3], 0x00000000);
EXPECT_EQ(out.i[3][0], 0x0E060D05);
EXPECT_EQ(out.i[3][1], 0x10080F07);
EXPECT_EQ(out.i[3][2], 0x00000000);
EXPECT_EQ(out.i[3][3], 0x00000000);
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] & 0xFFFF,
((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] >> 16,
((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] & 0xFFFF,
((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] >> 16,
((i >> 6) & 0x03) + 1);
}
routine(&out);
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[132 + i][0], ((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][1], ((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][2], ((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][3], ((i >> 6) & 0x03) + 1);
}
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.f[i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.f[i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.f[i][2], float((i >> 4) & 0x03) + 1.0f);
EXPECT_EQ(out.f[i][3], float((i >> 6) & 0x03) + 1.0f);
}
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.f[256 + i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][2], float((i >> 4) & 0x03) + 5.0f);
EXPECT_EQ(out.f[256 + i][3], float((i >> 6) & 0x03) + 5.0f);
}
EXPECT_EQ(out.f[512 + 0][0], 1.0f);
EXPECT_EQ(out.f[512 + 0][1], 5.0f);
EXPECT_EQ(out.f[512 + 0][2], 2.0f);
EXPECT_EQ(out.f[512 + 0][3], 6.0f);
EXPECT_EQ(out.f[512 + 1][0], 3.0f);
EXPECT_EQ(out.f[512 + 1][1], 7.0f);
EXPECT_EQ(out.f[512 + 1][2], 4.0f);
EXPECT_EQ(out.f[512 + 1][3], 8.0f);
EXPECT_EQ(out.i[0][0], 0x00050001);
EXPECT_EQ(out.i[0][1], 0x00060002);
EXPECT_EQ(out.i[0][2], 0x00000000);
EXPECT_EQ(out.i[0][3], 0x00000000);
EXPECT_EQ(out.i[1][0], 0x00070003);
EXPECT_EQ(out.i[1][1], 0x00080004);
EXPECT_EQ(out.i[1][2], 0x00000000);
EXPECT_EQ(out.i[1][3], 0x00000000);
EXPECT_EQ(out.i[2][0], 0x0A020901);
EXPECT_EQ(out.i[2][1], 0x0C040B03);
EXPECT_EQ(out.i[2][2], 0x00000000);
EXPECT_EQ(out.i[2][3], 0x00000000);
EXPECT_EQ(out.i[3][0], 0x0E060D05);
EXPECT_EQ(out.i[3][1], 0x10080F07);
EXPECT_EQ(out.i[3][2], 0x00000000);
EXPECT_EQ(out.i[3][3], 0x00000000);
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] & 0xFFFF,
((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] >> 16,
((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] & 0xFFFF,
((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] >> 16,
((i >> 6) & 0x03) + 1);
}
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[132 + i][0], ((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][1], ((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][2], ((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][3], ((i >> 6) & 0x03) + 1);
}
}
}
TEST(ReactorUnitTests, Swizzle)
{
FunctionT<void(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
Int4 c = Int4(0x01020304, 0x05060708, 0x09101112, 0x13141516);
*Pointer<Byte16>(out + 16 * 0) = Swizzle(As<Byte16>(c), 0xFEDCBA9876543210ull);
*Pointer<Byte8>(out + 16 * 1) = Swizzle(As<Byte8>(c), 0x76543210u);
*Pointer<UShort8>(out + 16 * 2) = Swizzle(As<UShort8>(c), 0x76543210u);
}
auto routine = function("one");
if(routine)
{
int out[3][4];
memset(&out, 0, sizeof(out));
routine(&out);
EXPECT_EQ(out[0][0], 0x16151413);
EXPECT_EQ(out[0][1], 0x12111009);
EXPECT_EQ(out[0][2], 0x08070605);
EXPECT_EQ(out[0][3], 0x04030201);
EXPECT_EQ(out[1][0], 0x08070605);
EXPECT_EQ(out[1][1], 0x04030201);
EXPECT_EQ(out[2][0], 0x15161314);
EXPECT_EQ(out[2][1], 0x11120910);
EXPECT_EQ(out[2][2], 0x07080506);
EXPECT_EQ(out[2][3], 0x03040102);
}
}
......
......@@ -2682,24 +2682,12 @@ RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
}
}
RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
{
UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
return UShort8(0);
}
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
{
UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
return UShort8(0);
}
// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
// {
// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
// }
Type *UShort8::getType()
{
return T(Ice::IceType_v8i16);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment