Commit aa8f699a by Chris Forbes Committed by Nicolas Capens

Subzero MulHigh implementation for Int4/UInt4

Also add implementations of multiplication and right shift for Long type. Bug b/126873455 Change-Id: I9952c2b9a3feca6a7741cd02e2295340935e4447 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/25988Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent 914a46d4
...@@ -4560,6 +4560,16 @@ namespace rr ...@@ -4560,6 +4560,16 @@ namespace rr
return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
} }
RValue<Long> operator*(RValue<Long> lhs, RValue<Long> rhs)
{
return RValue<Long>(Nucleus::createMul(lhs.value, rhs.value));
}
RValue<Long> operator>>(RValue<Long> lhs, RValue<Long> rhs)
{
return RValue<Long>(Nucleus::createAShr(lhs.value, rhs.value));
}
RValue<Long> operator+=(Long &lhs, RValue<Long> rhs) RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
{ {
return lhs = lhs + rhs; return lhs = lhs + rhs;
......
...@@ -1099,14 +1099,14 @@ namespace rr ...@@ -1099,14 +1099,14 @@ namespace rr
RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs); RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs);
RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs); RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator*(RValue<Long> lhs, RValue<Long> rhs); RValue<Long> operator*(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator/(RValue<Long> lhs, RValue<Long> rhs); // RValue<Long> operator/(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator%(RValue<Long> lhs, RValue<Long> rhs); // RValue<Long> operator%(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator&(RValue<Long> lhs, RValue<Long> rhs); // RValue<Long> operator&(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator|(RValue<Long> lhs, RValue<Long> rhs); // RValue<Long> operator|(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator^(RValue<Long> lhs, RValue<Long> rhs); // RValue<Long> operator^(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator<<(RValue<Long> lhs, RValue<Long> rhs); // RValue<Long> operator<<(RValue<Long> lhs, RValue<Long> rhs);
// RValue<Long> operator>>(RValue<Long> lhs, RValue<Long> rhs); RValue<Long> operator>>(RValue<Long> lhs, RValue<Long> rhs);
RValue<Long> operator+=(Long &lhs, RValue<Long> rhs); RValue<Long> operator+=(Long &lhs, RValue<Long> rhs);
RValue<Long> operator-=(Long &lhs, RValue<Long> rhs); RValue<Long> operator-=(Long &lhs, RValue<Long> rhs);
// RValue<Long> operator*=(Long &lhs, RValue<Long> rhs); // RValue<Long> operator*=(Long &lhs, RValue<Long> rhs);
...@@ -1872,7 +1872,6 @@ namespace rr ...@@ -1872,7 +1872,6 @@ namespace rr
UInt4(int x, int yzw); UInt4(int x, int yzw);
UInt4(int x, int y, int zw); UInt4(int x, int y, int zw);
UInt4(int x, int y, int z, int w); UInt4(int x, int y, int z, int w);
UInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
UInt4(RValue<UInt4> rhs); UInt4(RValue<UInt4> rhs);
UInt4(const UInt4 &rhs); UInt4(const UInt4 &rhs);
UInt4(const Reference<UInt4> &rhs); UInt4(const Reference<UInt4> &rhs);
......
...@@ -925,14 +925,29 @@ TEST(ReactorUnitTests, MulHigh) ...@@ -925,14 +925,29 @@ TEST(ReactorUnitTests, MulHigh)
{ {
Pointer<Byte> out = function.Arg<0>(); Pointer<Byte> out = function.Arg<0>();
*Pointer<Short4>(out + 8 * 0) = *Pointer<Short4>(out + 16 * 0) =
MulHigh(Short4(0x1aa, 0x2dd, 0x3ee, 0xF422), MulHigh(Short4(0x01AA, 0x02DD, 0x03EE, 0xF422),
Short4(0x1bb, 0x2cc, 0x3ff, 0xF411)); Short4(0x01BB, 0x02CC, 0x03FF, 0xF411));
*Pointer<UShort4>(out + 8 * 1) = *Pointer<UShort4>(out + 16 * 1) =
MulHigh(UShort4(0x1aa, 0x2dd, 0x3ee, 0xF422), MulHigh(UShort4(0x01AA, 0x02DD, 0x03EE, 0xF422),
UShort4(0x1bb, 0x2cc, 0x3ff, 0xF411)); UShort4(0x01BB, 0x02CC, 0x03FF, 0xF411));
*Pointer<Int4>(out + 16 * 2) =
MulHigh(Int4(0x000001AA, 0x000002DD, 0xC8000000, 0xF8000000),
Int4(0x000001BB, 0x84000000, 0x000003EE, 0xD7000000));
*Pointer<UInt4>(out + 16 * 3) =
MulHigh(UInt4(0x000001AAu, 0x000002DDu, 0xC8000000u, 0xD8000000u),
UInt4(0x000001BBu, 0x84000000u, 0x000003EEu, 0xD7000000u));
*Pointer<Int4>(out + 16 * 4) =
MulHigh(Int4(0x7FFFFFFF, 0x7FFFFFFF, 0x80008000, 0xFFFFFFFF),
Int4(0x7FFFFFFF, 0x80000000, 0x80008000, 0xFFFFFFFF));
*Pointer<UInt4>(out + 16 * 5) =
MulHigh(UInt4(0x7FFFFFFFu, 0x7FFFFFFFu, 0x80008000u, 0xFFFFFFFFu),
UInt4(0x7FFFFFFFu, 0x80000000u, 0x80008000u, 0xFFFFFFFFu));
// (U)Short8 variants currently unimplemented.
// (U)Short8 variants are mentioned but unimplemented
Return(0); Return(0);
} }
...@@ -940,7 +955,7 @@ TEST(ReactorUnitTests, MulHigh) ...@@ -940,7 +955,7 @@ TEST(ReactorUnitTests, MulHigh)
if(routine) if(routine)
{ {
unsigned int out[2][2]; unsigned int out[6][4];
memset(&out, 0, sizeof(out)); memset(&out, 0, sizeof(out));
...@@ -948,10 +963,30 @@ TEST(ReactorUnitTests, MulHigh) ...@@ -948,10 +963,30 @@ TEST(ReactorUnitTests, MulHigh)
callable(&out); callable(&out);
EXPECT_EQ(out[0][0], 0x00080002u); EXPECT_EQ(out[0][0], 0x00080002u);
EXPECT_EQ(out[0][1], 0x008D000fu); EXPECT_EQ(out[0][1], 0x008D000Fu);
EXPECT_EQ(out[1][0], 0x00080002u); EXPECT_EQ(out[1][0], 0x00080002u);
EXPECT_EQ(out[1][1], 0xe8C0000Fu); EXPECT_EQ(out[1][1], 0xE8C0000Fu);
EXPECT_EQ(out[2][0], 0x00000000u);
EXPECT_EQ(out[2][1], 0xFFFFFE9Cu);
EXPECT_EQ(out[2][2], 0xFFFFFF23u);
EXPECT_EQ(out[2][3], 0x01480000u);
EXPECT_EQ(out[3][0], 0x00000000u);
EXPECT_EQ(out[3][1], 0x00000179u);
EXPECT_EQ(out[3][2], 0x00000311u);
EXPECT_EQ(out[3][3], 0xB5680000u);
EXPECT_EQ(out[4][0], 0x3FFFFFFFu);
EXPECT_EQ(out[4][1], 0xC0000000u);
EXPECT_EQ(out[4][2], 0x3FFF8000u);
EXPECT_EQ(out[4][3], 0x00000000u);
EXPECT_EQ(out[5][0], 0x3FFFFFFFu);
EXPECT_EQ(out[5][1], 0x3FFFFFFFu);
EXPECT_EQ(out[5][2], 0x40008000u);
EXPECT_EQ(out[5][3], 0xFFFFFFFEu);
} }
} }
......
...@@ -4087,6 +4087,52 @@ namespace rr ...@@ -4087,6 +4087,52 @@ namespace rr
} }
} }
RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
{
// TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
// Scalarized implementation.
Int4 result;
result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
return result;
}
RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
{
// TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
if(false) // Partial product based implementation.
{
auto xh = x >> 16;
auto yh = y >> 16;
auto xl = x & UInt4(0x0000FFFF);
auto yl = y & UInt4(0x0000FFFF);
auto xlyh = xl * yh;
auto xhyl = xh * yl;
auto xlyhh = xlyh >> 16;
auto xhylh = xhyl >> 16;
auto xlyhl = xlyh & UInt4(0x0000FFFF);
auto xhyll = xhyl & UInt4(0x0000FFFF);
auto xlylh = (xl * yl) >> 16;
auto oflow = (xlyhl + xhyll + xlylh) >> 16;
return (xh * yh) + (xlyhh + xhylh) + oflow;
}
// Scalarized implementation.
Int4 result;
result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
return As<UInt4>(result);
}
RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
{ {
assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr)); assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
...@@ -4777,6 +4823,16 @@ namespace rr ...@@ -4777,6 +4823,16 @@ namespace rr
return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
} }
RValue<Long> operator*(RValue<Long> lhs, RValue<Long> rhs)
{
return RValue<Long>(Nucleus::createMul(lhs.value, rhs.value));
}
RValue<Long> operator>>(RValue<Long> lhs, RValue<Long> rhs)
{
return RValue<Long>(Nucleus::createAShr(lhs.value, rhs.value));
}
RValue<Long> operator+=(Long &lhs, RValue<Long> rhs) RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
{ {
return lhs = lhs + rhs; return lhs = lhs + rhs;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment