Commit 133b87d6 by Nicolas Capens Committed by Nicolas Capens

Implement byte swizzle operations

Add Swizzle() intrinsics for Byte16, Byte8, and Byte4, and add Byte4 constructors and assignment operators. Also move LLVM-specific implementations to the generic Reactor.cpp source file. On x86 these all translate to a pshufb instruction, which is very efficient. Bug: b/148295813 Change-Id: Icf88fe1621623f8104c4a642d560643a01b9ef55 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40549 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent 413953a5
...@@ -141,6 +141,8 @@ Function<Int(Float)> function; ...@@ -141,6 +141,8 @@ Function<Int(Float)> function;
Note that this is a bitwise cast. Unlike C++'s ```reinterpret_cast<>```, it does not allow casting between different sized types. Think of it as storing the value in memory and then loading from that same address into the casted type. Note that this is a bitwise cast. Unlike C++'s ```reinterpret_cast<>```, it does not allow casting between different sized types. Think of it as storing the value in memory and then loading from that same address into the casted type.
An important exception is that 16-, 8-, and 4-byte vectors can be cast to other vectors of one of these sizes. Casting to a longer vector leaves the upper contents undefined.
### Pointers ### Pointers
Pointers also use a template class: Pointers also use a template class:
......
...@@ -2906,35 +2906,6 @@ RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) ...@@ -2906,35 +2906,6 @@ RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
#endif #endif
} }
RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
{
RR_DEBUG_INFO_UPDATE_LOC();
int pshufb[16] = {
select0 + 0,
select0 + 1,
select1 + 0,
select1 + 1,
select2 + 0,
select2 + 1,
select3 + 0,
select3 + 1,
select4 + 0,
select4 + 1,
select5 + 0,
select5 + 1,
select6 + 0,
select6 + 1,
select7 + 0,
select7 + 1,
};
Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
return RValue<UShort8>(short8);
}
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
{ {
RR_DEBUG_INFO_UPDATE_LOC(); RR_DEBUG_INFO_UPDATE_LOC();
......
...@@ -1250,12 +1250,62 @@ Byte4::Byte4(RValue<Byte8> cast) ...@@ -1250,12 +1250,62 @@ Byte4::Byte4(RValue<Byte8> cast)
storeValue(Nucleus::createBitCast(cast.value, getType())); storeValue(Nucleus::createBitCast(cast.value, getType()));
} }
Byte4::Byte4(RValue<UShort4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
}
Byte4::Byte4(RValue<Short4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
}
Byte4::Byte4(RValue<UInt4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
}
Byte4::Byte4(RValue<Int4> cast)
{
// TODO(b/148379603): Optimize narrowing swizzle.
*this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
}
Byte4::Byte4(RValue<Byte4> rhs)
{
storeValue(rhs.value);
}
Byte4::Byte4(const Byte4 &rhs)
{
Value *value = rhs.loadValue();
storeValue(value);
}
Byte4::Byte4(const Reference<Byte4> &rhs) Byte4::Byte4(const Reference<Byte4> &rhs)
{ {
Value *value = rhs.loadValue(); Value *value = rhs.loadValue();
storeValue(value); storeValue(value);
} }
RValue<Byte4> Byte4::operator=(RValue<Byte4> rhs)
{
storeValue(rhs.value);
return rhs;
}
RValue<Byte4> Byte4::operator=(const Byte4 &rhs)
{
Value *value = rhs.loadValue();
storeValue(value);
return RValue<Byte4>(value);
}
Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{ {
int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 }; int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
...@@ -1417,8 +1467,35 @@ RValue<Byte8> operator~(RValue<Byte8> val) ...@@ -1417,8 +1467,35 @@ RValue<Byte8> operator~(RValue<Byte8> val)
return RValue<Byte8>(Nucleus::createNot(val.value)); return RValue<Byte8>(Nucleus::createNot(val.value));
} }
RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select)
{
// Real type is v16i8
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = {
static_cast<int>((select >> 28) & 0x07),
static_cast<int>((select >> 24) & 0x07),
static_cast<int>((select >> 20) & 0x07),
static_cast<int>((select >> 16) & 0x07),
static_cast<int>((select >> 12) & 0x07),
static_cast<int>((select >> 8) & 0x07),
static_cast<int>((select >> 4) & 0x07),
static_cast<int>((select >> 0) & 0x07),
static_cast<int>((select >> 28) & 0x07),
static_cast<int>((select >> 24) & 0x07),
static_cast<int>((select >> 20) & 0x07),
static_cast<int>((select >> 16) & 0x07),
static_cast<int>((select >> 12) & 0x07),
static_cast<int>((select >> 8) & 0x07),
static_cast<int>((select >> 4) & 0x07),
static_cast<int>((select >> 0) & 0x07),
};
return As<Byte8>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
RValue<Short4> Unpack(RValue<Byte4> x) RValue<Short4> Unpack(RValue<Byte4> x)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 }; // Real type is v16i8 int shuffle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
} }
...@@ -1430,12 +1507,14 @@ RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y) ...@@ -1430,12 +1507,14 @@ RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
} }
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323)); return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
...@@ -1606,12 +1685,14 @@ RValue<SByte8> operator~(RValue<SByte8> val) ...@@ -1606,12 +1685,14 @@ RValue<SByte8> operator~(RValue<SByte8> val)
RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
} }
RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323)); return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
...@@ -1657,6 +1738,30 @@ RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) ...@@ -1657,6 +1738,30 @@ RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
return RValue<Byte16>(value); return RValue<Byte16>(value);
} }
RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select)
{
int shuffle[16] = {
static_cast<int>((select >> 60) & 0x0F),
static_cast<int>((select >> 56) & 0x0F),
static_cast<int>((select >> 52) & 0x0F),
static_cast<int>((select >> 48) & 0x0F),
static_cast<int>((select >> 44) & 0x0F),
static_cast<int>((select >> 40) & 0x0F),
static_cast<int>((select >> 36) & 0x0F),
static_cast<int>((select >> 32) & 0x0F),
static_cast<int>((select >> 28) & 0x0F),
static_cast<int>((select >> 24) & 0x0F),
static_cast<int>((select >> 20) & 0x0F),
static_cast<int>((select >> 16) & 0x0F),
static_cast<int>((select >> 12) & 0x0F),
static_cast<int>((select >> 8) & 0x0F),
static_cast<int>((select >> 4) & 0x0F),
static_cast<int>((select >> 0) & 0x0F),
};
return As<Byte16>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
Short2::Short2(RValue<Short4> cast) Short2::Short2(RValue<Short4> cast)
{ {
storeValue(Nucleus::createBitCast(cast.value, getType())); storeValue(Nucleus::createBitCast(cast.value, getType()));
...@@ -1890,6 +1995,7 @@ RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) ...@@ -1890,6 +1995,7 @@ RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 }; // Real type is v8i16 int shuffle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 }; // Real type is v8i16
auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Int2>(Swizzle(As<Int4>(lowHigh), 0x2323)); return As<Int2>(Swizzle(As<Int4>(lowHigh), 0x2323));
...@@ -1898,6 +2004,7 @@ RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) ...@@ -1898,6 +2004,7 @@ RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
RValue<Short4> Swizzle(RValue<Short4> x, uint16_t select) RValue<Short4> Swizzle(RValue<Short4> x, uint16_t select)
{ {
// Real type is v8i16 // Real type is v8i16
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[8] = { int shuffle[8] = {
(select >> 12) & 0x03, (select >> 12) & 0x03,
(select >> 8) & 0x03, (select >> 8) & 0x03,
...@@ -2214,6 +2321,22 @@ RValue<UShort8> operator~(RValue<UShort8> val) ...@@ -2214,6 +2321,22 @@ RValue<UShort8> operator~(RValue<UShort8> val)
return RValue<UShort8>(Nucleus::createNot(val.value)); return RValue<UShort8>(Nucleus::createNot(val.value));
} }
RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select)
{
int swizzle[16] = {
static_cast<int>((select >> 28) & 0x07),
static_cast<int>((select >> 24) & 0x07),
static_cast<int>((select >> 20) & 0x07),
static_cast<int>((select >> 16) & 0x07),
static_cast<int>((select >> 12) & 0x07),
static_cast<int>((select >> 8) & 0x07),
static_cast<int>((select >> 4) & 0x07),
static_cast<int>((select >> 0) & 0x07),
};
return RValue<UShort8>(Nucleus::createShuffleVector(x.value, x.value, swizzle));
}
Int::Int(Argument<Int> argument) Int::Int(Argument<Int> argument)
{ {
storeValue(argument.value); storeValue(argument.value);
...@@ -3073,12 +3196,14 @@ RValue<Int2> operator~(RValue<Int2> val) ...@@ -3073,12 +3196,14 @@ RValue<Int2> operator~(RValue<Int2> val)
RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y) RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32 int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
} }
RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
{ {
// TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32 int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32
auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(lowHigh, 0x2323)); return As<Short4>(Swizzle(lowHigh, 0x2323));
......
...@@ -517,15 +517,19 @@ class Byte4 : public LValue<Byte4> ...@@ -517,15 +517,19 @@ class Byte4 : public LValue<Byte4>
{ {
public: public:
explicit Byte4(RValue<Byte8> cast); explicit Byte4(RValue<Byte8> cast);
explicit Byte4(RValue<UShort4> cast);
explicit Byte4(RValue<Short4> cast);
explicit Byte4(RValue<UInt4> cast);
explicit Byte4(RValue<Int4> cast);
Byte4() = default; Byte4() = default;
// Byte4(int x, int y, int z, int w); // Byte4(int x, int y, int z, int w);
// Byte4(RValue<Byte4> rhs); Byte4(RValue<Byte4> rhs);
// Byte4(const Byte4 &rhs); Byte4(const Byte4 &rhs);
Byte4(const Reference<Byte4> &rhs); Byte4(const Reference<Byte4> &rhs);
// RValue<Byte4> operator=(RValue<Byte4> rhs); RValue<Byte4> operator=(RValue<Byte4> rhs);
// RValue<Byte4> operator=(const Byte4 &rhs); RValue<Byte4> operator=(const Byte4 &rhs);
// RValue<Byte4> operator=(const Reference<Byte4> &rhs); // RValue<Byte4> operator=(const Reference<Byte4> &rhs);
static Type *getType(); static Type *getType();
...@@ -656,6 +660,7 @@ RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y); ...@@ -656,6 +660,7 @@ RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y);
RValue<Int> SignMask(RValue<Byte8> x); RValue<Int> SignMask(RValue<Byte8> x);
// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y); // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y); RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select);
class SByte8 : public LValue<SByte8> class SByte8 : public LValue<SByte8>
{ {
...@@ -713,7 +718,6 @@ class Byte16 : public LValue<Byte16> ...@@ -713,7 +718,6 @@ class Byte16 : public LValue<Byte16>
{ {
public: public:
Byte16() = default; Byte16() = default;
// Byte16(int x, int y, int z, int w);
Byte16(RValue<Byte16> rhs); Byte16(RValue<Byte16> rhs);
Byte16(const Byte16 &rhs); Byte16(const Byte16 &rhs);
Byte16(const Reference<Byte16> &rhs); Byte16(const Reference<Byte16> &rhs);
...@@ -752,6 +756,7 @@ public: ...@@ -752,6 +756,7 @@ public:
// const Byte16 &operator++(Byte16 &val); // Pre-increment // const Byte16 &operator++(Byte16 &val); // Pre-increment
// RValue<Byte16> operator--(Byte16 &val, int); // Post-decrement // RValue<Byte16> operator--(Byte16 &val, int); // Post-decrement
// const Byte16 &operator--(Byte16 &val); // Pre-decrement // const Byte16 &operator--(Byte16 &val); // Pre-decrement
RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select);
class SByte16 : public LValue<SByte16> class SByte16 : public LValue<SByte16>
{ {
...@@ -1065,7 +1070,7 @@ RValue<UShort8> operator~(RValue<UShort8> val); ...@@ -1065,7 +1070,7 @@ RValue<UShort8> operator~(RValue<UShort8> val);
// RValue<Bool> operator!=(RValue<UShort8> lhs, RValue<UShort8> rhs); // RValue<Bool> operator!=(RValue<UShort8> lhs, RValue<UShort8> rhs);
// RValue<Bool> operator==(RValue<UShort8> lhs, RValue<UShort8> rhs); // RValue<Bool> operator==(RValue<UShort8> lhs, RValue<UShort8> rhs);
RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7); RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select);
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y); RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y);
class Int : public LValue<Int> class Int : public LValue<Int>
......
...@@ -345,133 +345,199 @@ TEST(ReactorUnitTests, Concatenate) ...@@ -345,133 +345,199 @@ TEST(ReactorUnitTests, Concatenate)
} }
} }
TEST(ReactorUnitTests, Swizzle) TEST(ReactorUnitTests, Cast)
{ {
auto swizzleCode = [](int i) -> uint16_t { FunctionT<void(void *)> function;
auto x = (i >> 0) & 0x03; {
auto y = (i >> 2) & 0x03; Pointer<Byte> out = function.Arg<0>();
auto z = (i >> 4) & 0x03;
auto w = (i >> 6) & 0x03;
return (x << 12) | (y << 8) | (z << 4) | (w << 0);
};
Int4 c = Int4(0x01020304, 0x05060708, 0x09101112, 0x13141516);
*Pointer<Short4>(out + 16 * 0) = Short4(c);
*Pointer<Byte4>(out + 16 * 1 + 0) = Byte4(c);
*Pointer<Byte4>(out + 16 * 1 + 4) = Byte4(As<Byte8>(c));
*Pointer<Byte4>(out + 16 * 1 + 8) = Byte4(As<Short4>(c));
}
auto routine = function("one");
if(routine)
{ {
FunctionT<int(void *)> function; int out[2][4];
{
Pointer<Byte> out = function.Arg<0>();
for(int i = 0; i < 256; i++) memset(&out, 0, sizeof(out));
{
*Pointer<Float4>(out + 16 * i) = Swizzle(Float4(1.0f, 2.0f, 3.0f, 4.0f), swizzleCode(i));
}
for(int i = 0; i < 256; i++) routine(&out);
{
*Pointer<Float4>(out + 16 * (256 + i)) = ShuffleLowHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f), swizzleCode(i));
}
*Pointer<Float4>(out + 16 * (512 + 0)) = UnpackLow(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f)); EXPECT_EQ(out[0][0], 0x07080304);
*Pointer<Float4>(out + 16 * (512 + 1)) = UnpackHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f)); EXPECT_EQ(out[0][1], 0x15161112);
*Pointer<Int2>(out + 16 * (512 + 2)) = UnpackLow(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Int2>(out + 16 * (512 + 3)) = UnpackHigh(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Short4>(out + 16 * (512 + 4)) = UnpackLow(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
*Pointer<Short4>(out + 16 * (512 + 5)) = UnpackHigh(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
for(int i = 0; i < 256; i++) EXPECT_EQ(out[1][0], 0x16120804);
{ EXPECT_EQ(out[1][1], 0x01020304);
*Pointer<Short4>(out + 16 * (512 + 6) + (8 * i)) = EXPECT_EQ(out[1][2], 0x06080204);
Swizzle(Short4(1, 2, 3, 4), swizzleCode(i)); }
} }
for(int i = 0; i < 256; i++) static uint16_t swizzleCode4(int i)
{ {
*Pointer<Int4>(out + 16 * (512 + 6 + i) + (8 * 256)) = auto x = (i >> 0) & 0x03;
Swizzle(Int4(1, 2, 3, 4), swizzleCode(i)); auto y = (i >> 2) & 0x03;
} auto z = (i >> 4) & 0x03;
auto w = (i >> 6) & 0x03;
return static_cast<uint16_t>((x << 12) | (y << 8) | (z << 4) | (w << 0));
}
Return(0); TEST(ReactorUnitTests, Swizzle4)
{
FunctionT<void(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
for(int i = 0; i < 256; i++)
{
*Pointer<Float4>(out + 16 * i) = Swizzle(Float4(1.0f, 2.0f, 3.0f, 4.0f), swizzleCode4(i));
} }
auto routine = function("one"); for(int i = 0; i < 256; i++)
{
*Pointer<Float4>(out + 16 * (256 + i)) = ShuffleLowHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f), swizzleCode4(i));
}
if(routine) *Pointer<Float4>(out + 16 * (512 + 0)) = UnpackLow(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
*Pointer<Float4>(out + 16 * (512 + 1)) = UnpackHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
*Pointer<Int2>(out + 16 * (512 + 2)) = UnpackLow(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Int2>(out + 16 * (512 + 3)) = UnpackHigh(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
*Pointer<Short4>(out + 16 * (512 + 4)) = UnpackLow(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
*Pointer<Short4>(out + 16 * (512 + 5)) = UnpackHigh(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
for(int i = 0; i < 256; i++)
{ {
struct *Pointer<Short4>(out + 16 * (512 + 6) + (8 * i)) =
{ Swizzle(Short4(1, 2, 3, 4), swizzleCode4(i));
float f[256 + 256 + 2][4]; }
int i[388][4];
} out;
memset(&out, 0, sizeof(out)); for(int i = 0; i < 256; i++)
{
*Pointer<Int4>(out + 16 * (512 + 6 + i) + (8 * 256)) =
Swizzle(Int4(1, 2, 3, 4), swizzleCode4(i));
}
}
routine(&out); auto routine = function("one");
for(int i = 0; i < 256; i++) if(routine)
{ {
EXPECT_EQ(out.f[i][0], float((i >> 0) & 0x03) + 1.0f); struct
EXPECT_EQ(out.f[i][1], float((i >> 2) & 0x03) + 1.0f); {
EXPECT_EQ(out.f[i][2], float((i >> 4) & 0x03) + 1.0f); float f[256 + 256 + 2][4];
EXPECT_EQ(out.f[i][3], float((i >> 6) & 0x03) + 1.0f); int i[388][4];
} } out;
for(int i = 0; i < 256; i++) memset(&out, 0, sizeof(out));
{
EXPECT_EQ(out.f[256 + i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][2], float((i >> 4) & 0x03) + 5.0f);
EXPECT_EQ(out.f[256 + i][3], float((i >> 6) & 0x03) + 5.0f);
}
EXPECT_EQ(out.f[512 + 0][0], 1.0f); routine(&out);
EXPECT_EQ(out.f[512 + 0][1], 5.0f);
EXPECT_EQ(out.f[512 + 0][2], 2.0f);
EXPECT_EQ(out.f[512 + 0][3], 6.0f);
EXPECT_EQ(out.f[512 + 1][0], 3.0f);
EXPECT_EQ(out.f[512 + 1][1], 7.0f);
EXPECT_EQ(out.f[512 + 1][2], 4.0f);
EXPECT_EQ(out.f[512 + 1][3], 8.0f);
EXPECT_EQ(out.i[0][0], 0x00050001);
EXPECT_EQ(out.i[0][1], 0x00060002);
EXPECT_EQ(out.i[0][2], 0x00000000);
EXPECT_EQ(out.i[0][3], 0x00000000);
EXPECT_EQ(out.i[1][0], 0x00070003);
EXPECT_EQ(out.i[1][1], 0x00080004);
EXPECT_EQ(out.i[1][2], 0x00000000);
EXPECT_EQ(out.i[1][3], 0x00000000);
EXPECT_EQ(out.i[2][0], 0x0A020901);
EXPECT_EQ(out.i[2][1], 0x0C040B03);
EXPECT_EQ(out.i[2][2], 0x00000000);
EXPECT_EQ(out.i[2][3], 0x00000000);
EXPECT_EQ(out.i[3][0], 0x0E060D05);
EXPECT_EQ(out.i[3][1], 0x10080F07);
EXPECT_EQ(out.i[3][2], 0x00000000);
EXPECT_EQ(out.i[3][3], 0x00000000);
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] & 0xFFFF,
((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] >> 16,
((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] & 0xFFFF,
((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] >> 16,
((i >> 6) & 0x03) + 1);
}
for(int i = 0; i < 256; i++) for(int i = 0; i < 256; i++)
{ {
EXPECT_EQ(out.i[132 + i][0], ((i >> 0) & 0x03) + 1); EXPECT_EQ(out.f[i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.i[132 + i][1], ((i >> 2) & 0x03) + 1); EXPECT_EQ(out.f[i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.i[132 + i][2], ((i >> 4) & 0x03) + 1); EXPECT_EQ(out.f[i][2], float((i >> 4) & 0x03) + 1.0f);
EXPECT_EQ(out.i[132 + i][3], ((i >> 6) & 0x03) + 1); EXPECT_EQ(out.f[i][3], float((i >> 6) & 0x03) + 1.0f);
} }
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.f[256 + i][0], float((i >> 0) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][1], float((i >> 2) & 0x03) + 1.0f);
EXPECT_EQ(out.f[256 + i][2], float((i >> 4) & 0x03) + 5.0f);
EXPECT_EQ(out.f[256 + i][3], float((i >> 6) & 0x03) + 5.0f);
}
EXPECT_EQ(out.f[512 + 0][0], 1.0f);
EXPECT_EQ(out.f[512 + 0][1], 5.0f);
EXPECT_EQ(out.f[512 + 0][2], 2.0f);
EXPECT_EQ(out.f[512 + 0][3], 6.0f);
EXPECT_EQ(out.f[512 + 1][0], 3.0f);
EXPECT_EQ(out.f[512 + 1][1], 7.0f);
EXPECT_EQ(out.f[512 + 1][2], 4.0f);
EXPECT_EQ(out.f[512 + 1][3], 8.0f);
EXPECT_EQ(out.i[0][0], 0x00050001);
EXPECT_EQ(out.i[0][1], 0x00060002);
EXPECT_EQ(out.i[0][2], 0x00000000);
EXPECT_EQ(out.i[0][3], 0x00000000);
EXPECT_EQ(out.i[1][0], 0x00070003);
EXPECT_EQ(out.i[1][1], 0x00080004);
EXPECT_EQ(out.i[1][2], 0x00000000);
EXPECT_EQ(out.i[1][3], 0x00000000);
EXPECT_EQ(out.i[2][0], 0x0A020901);
EXPECT_EQ(out.i[2][1], 0x0C040B03);
EXPECT_EQ(out.i[2][2], 0x00000000);
EXPECT_EQ(out.i[2][3], 0x00000000);
EXPECT_EQ(out.i[3][0], 0x0E060D05);
EXPECT_EQ(out.i[3][1], 0x10080F07);
EXPECT_EQ(out.i[3][2], 0x00000000);
EXPECT_EQ(out.i[3][3], 0x00000000);
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] & 0xFFFF,
((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] >> 16,
((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] & 0xFFFF,
((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] >> 16,
((i >> 6) & 0x03) + 1);
} }
for(int i = 0; i < 256; i++)
{
EXPECT_EQ(out.i[132 + i][0], ((i >> 0) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][1], ((i >> 2) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][2], ((i >> 4) & 0x03) + 1);
EXPECT_EQ(out.i[132 + i][3], ((i >> 6) & 0x03) + 1);
}
}
}
TEST(ReactorUnitTests, Swizzle)
{
FunctionT<void(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
Int4 c = Int4(0x01020304, 0x05060708, 0x09101112, 0x13141516);
*Pointer<Byte16>(out + 16 * 0) = Swizzle(As<Byte16>(c), 0xFEDCBA9876543210ull);
*Pointer<Byte8>(out + 16 * 1) = Swizzle(As<Byte8>(c), 0x76543210u);
*Pointer<UShort8>(out + 16 * 2) = Swizzle(As<UShort8>(c), 0x76543210u);
}
auto routine = function("one");
if(routine)
{
int out[3][4];
memset(&out, 0, sizeof(out));
routine(&out);
EXPECT_EQ(out[0][0], 0x16151413);
EXPECT_EQ(out[0][1], 0x12111009);
EXPECT_EQ(out[0][2], 0x08070605);
EXPECT_EQ(out[0][3], 0x04030201);
EXPECT_EQ(out[1][0], 0x08070605);
EXPECT_EQ(out[1][1], 0x04030201);
EXPECT_EQ(out[2][0], 0x15161314);
EXPECT_EQ(out[2][1], 0x11120910);
EXPECT_EQ(out[2][2], 0x07080506);
EXPECT_EQ(out[2][3], 0x03040102);
} }
} }
......
...@@ -2682,24 +2682,12 @@ RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) ...@@ -2682,24 +2682,12 @@ RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
} }
} }
RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
{
UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
return UShort8(0);
}
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
{ {
UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)"); UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
return UShort8(0); return UShort8(0);
} }
// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
// {
// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
// }
Type *UShort8::getType() Type *UShort8::getType()
{ {
return T(Ice::IceType_v8i16); return T(Ice::IceType_v8i16);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment