Commit c4f2c297 by Alexis Hetu Committed by Alexis Hétu

New integer related core functions

- Removed float <-> int bit conversion functions, as these will not be needed if everything is stored as float. - Added ineg for the minus (-) sign in from of a value. - Added f2i/i2f/f2u/u2f for float <-> int conversions - Added b2i/i2b/b2u/u2b for bool <-> int conversions - Added iadd, isub, imul, imad, [iu]div, [iu]mod, [iu]min, [iu]max for these basic operations as integer operations. - Added left and right shifts - Added ucmp to compare unsigned values - Modified or/xor/and to support vectors instead of only scalars. - Added vector equality comparison functions Change-Id: I0f138e3707242ec0fffc1c12b95064ddc98f0087 Reviewed-on: https://swiftshader-review.googlesource.com/3888Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com>
parent c4b57f53
......@@ -263,9 +263,9 @@ namespace sw
case Shader::OPCODE_ALL: all(d.x, s0); break;
case Shader::OPCODE_ANY: any(d.x, s0); break;
case Shader::OPCODE_NOT: not(d, s0); break;
case Shader::OPCODE_OR: or(d.x, s0.x, s1.x); break;
case Shader::OPCODE_XOR: xor(d.x, s0.x, s1.x); break;
case Shader::OPCODE_AND: and(d.x, s0.x, s1.x); break;
case Shader::OPCODE_OR: or(d, s0, s1); break;
case Shader::OPCODE_XOR: xor(d, s0, s1); break;
case Shader::OPCODE_AND: and(d, s0, s1); break;
case Shader::OPCODE_END: break;
default:
ASSERT(false);
......
......@@ -484,26 +484,6 @@ namespace sw
return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f);
}
Int4 floatBitsToInt(RValue<Float4> x)
{
return As<Int4>(x);
}
UInt4 floatBitsToUInt(RValue<Float4> x)
{
return As<UInt4>(x);
}
Float4 intBitsToFloat(RValue<Int4> x)
{
return As<Float4>(x);
}
Float4 uintBitsToFloat(RValue<UInt4> x)
{
return As<Float4>(x);
}
Float4 dot2(const Vector4f &v0, const Vector4f &v1)
{
return v0.x * v1.x + v0.y * v1.y;
......@@ -613,6 +593,22 @@ namespace sw
}
}
void ShaderCore::neg(Vector4f &dst, const Vector4f &src)
{
dst.x = -src.x;
dst.y = -src.y;
dst.z = -src.z;
dst.w = -src.w;
}
void ShaderCore::ineg(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(-As<Int4>(src.x));
dst.y = As<Float4>(-As<Int4>(src.y));
dst.z = As<Float4>(-As<Int4>(src.z));
dst.w = As<Float4>(-As<Int4>(src.w));
}
void ShaderCore::f2b(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f)));
......@@ -629,6 +625,70 @@ namespace sw
dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f)));
}
void ShaderCore::f2i(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(Int4(src.x));
dst.y = As<Float4>(Int4(src.y));
dst.z = As<Float4>(Int4(src.z));
dst.w = As<Float4>(Int4(src.w));
}
void ShaderCore::i2f(Vector4f &dst, const Vector4f &src)
{
dst.x = Float4(As<Int4>(src.x));
dst.y = Float4(As<Int4>(src.y));
dst.z = Float4(As<Int4>(src.z));
dst.w = Float4(As<Int4>(src.w));
}
void ShaderCore::f2u(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(UInt4(src.x));
dst.y = As<Float4>(UInt4(src.y));
dst.z = As<Float4>(UInt4(src.z));
dst.w = As<Float4>(UInt4(src.w));
}
void ShaderCore::u2f(Vector4f &dst, const Vector4f &src)
{
dst.x = Float4(As<UInt4>(src.x));
dst.y = Float4(As<UInt4>(src.y));
dst.z = Float4(As<UInt4>(src.z));
dst.w = Float4(As<UInt4>(src.w));
}
void ShaderCore::i2b(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(CmpNEQ(As<Int4>(src.x), Int4(0)));
dst.y = As<Float4>(CmpNEQ(As<Int4>(src.y), Int4(0)));
dst.z = As<Float4>(CmpNEQ(As<Int4>(src.z), Int4(0)));
dst.w = As<Float4>(CmpNEQ(As<Int4>(src.w), Int4(0)));
}
void ShaderCore::b2i(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(As<Int4>(src.x) & Int4(1));
dst.y = As<Float4>(As<Int4>(src.y) & Int4(1));
dst.z = As<Float4>(As<Int4>(src.z) & Int4(1));
dst.w = As<Float4>(As<Int4>(src.w) & Int4(1));
}
void ShaderCore::u2b(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(CmpNEQ(As<UInt4>(src.x), UInt4(0)));
dst.y = As<Float4>(CmpNEQ(As<UInt4>(src.y), UInt4(0)));
dst.z = As<Float4>(CmpNEQ(As<UInt4>(src.z), UInt4(0)));
dst.w = As<Float4>(CmpNEQ(As<UInt4>(src.w), UInt4(0)));
}
void ShaderCore::b2u(Vector4f &dst, const Vector4f &src)
{
dst.x = As<Float4>(As<UInt4>(src.x) & UInt4(1));
dst.y = As<Float4>(As<UInt4>(src.y) & UInt4(1));
dst.z = As<Float4>(As<UInt4>(src.z) & UInt4(1));
dst.w = As<Float4>(As<UInt4>(src.w) & UInt4(1));
}
void ShaderCore::add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = src0.x + src1.x;
......@@ -637,6 +697,14 @@ namespace sw
dst.w = src0.w + src1.w;
}
void ShaderCore::iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) + As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) + As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) + As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) + As<Int4>(src1.w));
}
void ShaderCore::sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = src0.x - src1.x;
......@@ -645,6 +713,14 @@ namespace sw
dst.w = src0.w - src1.w;
}
void ShaderCore::isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) - As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) - As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) - As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) - As<Int4>(src1.w));
}
void ShaderCore::mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
{
dst.x = src0.x * src1.x + src2.x;
......@@ -653,6 +729,14 @@ namespace sw
dst.w = src0.w * src1.w + src2.w;
}
void ShaderCore::imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
{
dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x) + As<Int4>(src2.x));
dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y) + As<Int4>(src2.y));
dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z) + As<Int4>(src2.z));
dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w) + As<Int4>(src2.w));
}
void ShaderCore::mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = src0.x * src1.x;
......@@ -661,6 +745,14 @@ namespace sw
dst.w = src0.w * src1.w;
}
void ShaderCore::imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w));
}
void ShaderCore::rcpx(Vector4f &dst, const Vector4f &src, bool pp)
{
Float4 rcp = reciprocal(src.x, pp, true);
......@@ -679,6 +771,32 @@ namespace sw
dst.w = src0.w / src1.w;
}
void ShaderCore::idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
Float4 intMax(As<Float4>(Int4(INT_MAX)));
cmp0i(dst.x, src1.x, intMax, src1.x);
dst.x = As<Float4>(As<Int4>(src0.x) / As<Int4>(dst.x));
cmp0i(dst.y, src1.y, intMax, src1.y);
dst.y = As<Float4>(As<Int4>(src0.y) / As<Int4>(dst.y));
cmp0i(dst.z, src1.z, intMax, src1.z);
dst.z = As<Float4>(As<Int4>(src0.z) / As<Int4>(dst.z));
cmp0i(dst.w, src1.w, intMax, src1.w);
dst.w = As<Float4>(As<Int4>(src0.w) / As<Int4>(dst.w));
}
void ShaderCore::udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
Float4 uintMax(As<Float4>(UInt4(UINT_MAX)));
cmp0i(dst.x, src1.x, uintMax, src1.x);
dst.x = As<Float4>(As<UInt4>(src0.x) / As<UInt4>(dst.x));
cmp0i(dst.y, src1.y, uintMax, src1.y);
dst.y = As<Float4>(As<UInt4>(src0.y) / As<UInt4>(dst.y));
cmp0i(dst.z, src1.z, uintMax, src1.z);
dst.z = As<Float4>(As<UInt4>(src0.z) / As<UInt4>(dst.z));
cmp0i(dst.w, src1.w, uintMax, src1.w);
dst.w = As<Float4>(As<UInt4>(src0.w) / As<UInt4>(dst.w));
}
void ShaderCore::mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = modulo(src0.x, src1.x);
......@@ -687,6 +805,53 @@ namespace sw
dst.w = modulo(src0.w, src1.w);
}
void ShaderCore::imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
cmp0i(dst.x, src1.x, src0.x, src1.x);
dst.x = As<Float4>(As<Int4>(src0.x) % As<Int4>(dst.x));
cmp0i(dst.y, src1.y, src0.y, src1.y);
dst.y = As<Float4>(As<Int4>(src0.y) % As<Int4>(dst.y));
cmp0i(dst.z, src1.z, src0.z, src1.z);
dst.z = As<Float4>(As<Int4>(src0.z) % As<Int4>(dst.z));
cmp0i(dst.w, src1.w, src0.w, src1.w);
dst.w = As<Float4>(As<Int4>(src0.w) % As<Int4>(dst.w));
}
void ShaderCore::umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
cmp0i(dst.x, src1.x, src0.x, src1.x);
dst.x = As<Float4>(As<UInt4>(src0.x) % As<UInt4>(dst.x));
cmp0i(dst.y, src1.y, src0.y, src1.y);
dst.y = As<Float4>(As<UInt4>(src0.y) % As<UInt4>(dst.y));
cmp0i(dst.z, src1.z, src0.z, src1.z);
dst.z = As<Float4>(As<UInt4>(src0.z) % As<UInt4>(dst.z));
cmp0i(dst.w, src1.w, src0.w, src1.w);
dst.w = As<Float4>(As<UInt4>(src0.w) % As<UInt4>(dst.w));
}
void ShaderCore::shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) << As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) << As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) << As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) << As<Int4>(src1.w));
}
void ShaderCore::ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) >> As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) >> As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) >> As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) >> As<Int4>(src1.w));
}
void ShaderCore::ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<UInt4>(src0.x) >> As<UInt4>(src1.x));
dst.y = As<Float4>(As<UInt4>(src0.y) >> As<UInt4>(src1.y));
dst.z = As<Float4>(As<UInt4>(src0.z) >> As<UInt4>(src1.z));
dst.w = As<Float4>(As<UInt4>(src0.w) >> As<UInt4>(src1.w));
}
void ShaderCore::rsqx(Vector4f &dst, const Vector4f &src, bool pp)
{
Float4 rsq = reciprocalSquareRoot(src.x, true, pp);
......@@ -818,6 +983,22 @@ namespace sw
dst.w = Min(src0.w, src1.w);
}
void ShaderCore::imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(Min(As<Int4>(src0.x), As<Int4>(src1.x)));
dst.y = As<Float4>(Min(As<Int4>(src0.y), As<Int4>(src1.y)));
dst.z = As<Float4>(Min(As<Int4>(src0.z), As<Int4>(src1.z)));
dst.w = As<Float4>(Min(As<Int4>(src0.w), As<Int4>(src1.w)));
}
void ShaderCore::umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(Min(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(Min(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(Min(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(Min(As<UInt4>(src0.w), As<UInt4>(src1.w)));
}
void ShaderCore::max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = Max(src0.x, src1.x);
......@@ -826,6 +1007,22 @@ namespace sw
dst.w = Max(src0.w, src1.w);
}
void ShaderCore::imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x)));
dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y)));
dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z)));
dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w)));
}
void ShaderCore::umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x)));
dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y)));
dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z)));
dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w)));
}
void ShaderCore::slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f)));
......@@ -937,38 +1134,6 @@ namespace sw
Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
}
void ShaderCore::floatBitsToInt(Vector4i &dst, const Vector4f &src)
{
dst.x = sw::floatBitsToInt(src.x);
dst.y = sw::floatBitsToInt(src.y);
dst.z = sw::floatBitsToInt(src.z);
dst.w = sw::floatBitsToInt(src.w);
}
void ShaderCore::floatBitsToUInt(Vector4u &dst, const Vector4f &src)
{
dst.x = sw::floatBitsToUInt(src.x);
dst.y = sw::floatBitsToUInt(src.y);
dst.z = sw::floatBitsToUInt(src.z);
dst.w = sw::floatBitsToUInt(src.w);
}
void ShaderCore::intBitsToFloat(Vector4f &dst, const Vector4i &src)
{
dst.x = sw::intBitsToFloat(src.x);
dst.y = sw::intBitsToFloat(src.y);
dst.z = sw::intBitsToFloat(src.z);
dst.w = sw::intBitsToFloat(src.w);
}
void ShaderCore::uintBitsToFloat(Vector4f &dst, const Vector4u &src)
{
dst.x = sw::uintBitsToFloat(src.x);
dst.y = sw::uintBitsToFloat(src.y);
dst.z = sw::uintBitsToFloat(src.z);
dst.w = sw::uintBitsToFloat(src.w);
}
void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
{
dst.x = Frac(src.x);
......@@ -1393,17 +1558,17 @@ namespace sw
void ShaderCore::extract(Float4 &dst, const Vector4f &src0, const Float4 &src1)
{
select(dst, CmpEQ(src1, Float4(1.0f)), src0.y, src0.x);
select(dst, CmpEQ(src1, Float4(2.0f)), src0.z, dst);
select(dst, CmpEQ(src1, Float4(3.0f)), src0.w, dst);
select(dst, CmpEQ(As<Int4>(src1), Int4(1)), src0.y, src0.x);
select(dst, CmpEQ(As<Int4>(src1), Int4(2)), src0.z, dst);
select(dst, CmpEQ(As<Int4>(src1), Int4(3)), src0.w, dst);
}
void ShaderCore::insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index)
{
select(dst.x, CmpEQ(index, Float4(0.0f)), element, src.x);
select(dst.y, CmpEQ(index, Float4(1.0f)), element, src.y);
select(dst.z, CmpEQ(index, Float4(2.0f)), element, src.z);
select(dst.w, CmpEQ(index, Float4(3.0f)), element, src.w);
select(dst.x, CmpEQ(As<Int4>(index), Int4(0)), element, src.x);
select(dst.y, CmpEQ(As<Int4>(index), Int4(1)), element, src.y);
select(dst.z, CmpEQ(As<Int4>(index), Int4(2)), element, src.z);
select(dst.w, CmpEQ(As<Int4>(index), Int4(3)), element, src.w);
}
void ShaderCore::sgn(Float4 &dst, const Float4 &src)
......@@ -1419,6 +1584,12 @@ namespace sw
select(dst, pos, src1, src2);
}
void ShaderCore::cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2)
{
Int4 pos = CmpEQ(Int4(0), As<Int4>(src0));
select(dst, pos, src1, src2);
}
void ShaderCore::select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2)
{
// FIXME: LLVM vector select
......@@ -1515,6 +1686,51 @@ namespace sw
}
}
void ShaderCore::ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
{
switch(control)
{
case Shader::CONTROL_GT:
dst.x = As<Float4>(CmpNLE(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(CmpNLE(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(CmpNLE(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(CmpNLE(As<UInt4>(src0.w), As<UInt4>(src1.w)));
break;
case Shader::CONTROL_EQ:
dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
break;
case Shader::CONTROL_GE:
dst.x = As<Float4>(CmpNLT(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(CmpNLT(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(CmpNLT(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(CmpNLT(As<UInt4>(src0.w), As<UInt4>(src1.w)));
break;
case Shader::CONTROL_LT:
dst.x = As<Float4>(CmpLT(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(CmpLT(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(CmpLT(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(CmpLT(As<UInt4>(src0.w), As<UInt4>(src1.w)));
break;
case Shader::CONTROL_NE:
dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
break;
case Shader::CONTROL_LE:
dst.x = As<Float4>(CmpLE(As<UInt4>(src0.x), As<UInt4>(src1.x)));
dst.y = As<Float4>(CmpLE(As<UInt4>(src0.y), As<UInt4>(src1.y)));
dst.z = As<Float4>(CmpLE(As<UInt4>(src0.z), As<UInt4>(src1.z)));
dst.w = As<Float4>(CmpLE(As<UInt4>(src0.w), As<UInt4>(src1.w)));
break;
default:
ASSERT(false);
}
}
void ShaderCore::all(Float4 &dst, const Vector4f &src)
{
dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w));
......@@ -1533,18 +1749,49 @@ namespace sw
dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF));
}
void ShaderCore::or(Float4 &dst, const Float4 &src0, const Float4 &src1)
void ShaderCore::or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) | As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) | As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) | As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) | As<Int4>(src1.w));
}
void ShaderCore::xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst.x = As<Float4>(As<Int4>(src0.x) ^ As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) ^ As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) ^ As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) ^ As<Int4>(src1.w));
}
void ShaderCore::and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst = As<Float4>(As<Int4>(src0) | As<Int4>(src1));
dst.x = As<Float4>(As<Int4>(src0.x) & As<Int4>(src1.x));
dst.y = As<Float4>(As<Int4>(src0.y) & As<Int4>(src1.y));
dst.z = As<Float4>(As<Int4>(src0.z) & As<Int4>(src1.z));
dst.w = As<Float4>(As<Int4>(src0.w) & As<Int4>(src1.w));
}
void ShaderCore::xor(Float4 &dst, const Float4 &src0, const Float4 &src1)
void ShaderCore::equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst = As<Float4>(As<Int4>(src0) ^ As<Int4>(src1));
dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) &
CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) &
CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) &
CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
dst.y = dst.x;
dst.z = dst.x;
dst.w = dst.x;
}
void ShaderCore::and(Float4 &dst, const Float4 &src0, const Float4 &src1)
void ShaderCore::notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
{
dst = As<Float4>(As<Int4>(src0) & As<Int4>(src1));
dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) |
CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) |
CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) |
CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
dst.y = dst.x;
dst.z = dst.x;
dst.w = dst.x;
}
}
......@@ -105,10 +105,6 @@ namespace sw
Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
Float4 arcsinh(RValue<Float4> x, bool pp = false);
Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
Int4 floatBitsToInt(RValue<Float4> x);
UInt4 floatBitsToUInt(RValue<Float4> x);
Float4 intBitsToFloat(RValue<Int4> x);
Float4 uintBitsToFloat(RValue<UInt4> x);
Float4 dot2(const Vector4f &v0, const Vector4f &v1);
Float4 dot3(const Vector4f &v0, const Vector4f &v1);
......@@ -243,15 +239,36 @@ namespace sw
public:
void mov(Vector4f &dst, const Vector4f &src, bool floorToInteger = false);
void neg(Vector4f &dst, const Vector4f &src);
void ineg(Vector4f &dst, const Vector4f &src);
void f2b(Vector4f &dst, const Vector4f &src);
void b2f(Vector4f &dst, const Vector4f &src);
void f2i(Vector4f &dst, const Vector4f &src);
void i2f(Vector4f &dst, const Vector4f &src);
void f2u(Vector4f &dst, const Vector4f &src);
void u2f(Vector4f &dst, const Vector4f &src);
void i2b(Vector4f &dst, const Vector4f &src);
void b2i(Vector4f &dst, const Vector4f &src);
void u2b(Vector4f &dst, const Vector4f &src);
void b2u(Vector4f &dst, const Vector4f &src);
void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
......@@ -268,7 +285,11 @@ namespace sw
void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
......@@ -281,10 +302,6 @@ namespace sw
void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void floatBitsToInt(Vector4i &dst, const Vector4f &src);
void floatBitsToUInt(Vector4u &dst, const Vector4f &src);
void intBitsToFloat(Vector4f &dst, const Vector4i &src);
void uintBitsToFloat(Vector4f &dst, const Vector4u &src);
void frc(Vector4f &dst, const Vector4f &src);
void trunc(Vector4f &dst, const Vector4f &src);
void floor(Vector4f &dst, const Vector4f &src);
......@@ -330,19 +347,23 @@ namespace sw
void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
void all(Float4 &dst, const Vector4f &src);
void any(Float4 &dst, const Vector4f &src);
void not(Vector4f &dst, const Vector4f &src);
void or(Float4 &dst, const Float4 &src0, const Float4 &src1);
void xor(Float4 &dst, const Float4 &src0, const Float4 &src1);
void and(Float4 &dst, const Float4 &src0, const Float4 &src1);
void or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
private:
void sgn(Float4 &dst, const Float4 &src);
void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
};
}
......
......@@ -246,9 +246,9 @@ namespace sw
case Shader::OPCODE_ALL: all(d.x, s0); break;
case Shader::OPCODE_ANY: any(d.x, s0); break;
case Shader::OPCODE_NOT: not(d, s0); break;
case Shader::OPCODE_OR: or(d.x, s0.x, s1.x); break;
case Shader::OPCODE_XOR: xor(d.x, s0.x, s1.x); break;
case Shader::OPCODE_AND: and(d.x, s0.x, s1.x); break;
case Shader::OPCODE_OR: or(d, s0, s1); break;
case Shader::OPCODE_XOR: xor(d, s0, s1); break;
case Shader::OPCODE_AND: and(d, s0, s1); break;
case Shader::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1); break;
case Shader::OPCODE_TEX: TEX(r, d, s0, src1); break;
case Shader::OPCODE_END: break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment