Commit b40a2568 by Nicolas Capens

Optimize Int2 construction.

Change-Id: Ibab854164a45c998976e65b8bfec80a8a688461b Reviewed-on: https://swiftshader-review.googlesource.com/4511Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com> Tested-by: 's avatarNicolas Capens <capn@google.com>
parent 62abb558
......@@ -4623,13 +4623,23 @@ namespace sw
Int2::Int2(RValue<Int> lo, RValue<Int> hi)
{
Constant *shuffle[2];
shuffle[0] = Nucleus::createConstantInt(0);
shuffle[1] = Nucleus::createConstantInt(1);
Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
storeValue(Nucleus::createBitCast(packed, Int2::getType()));
if(CPUID::supportsMMX2())
{
// movd mm0, lo
// movd mm1, hi
// punpckldq mm0, mm1
storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
}
else
{
Constant *shuffle[2];
shuffle[0] = Nucleus::createConstantInt(0);
shuffle[1] = Nucleus::createConstantInt(1);
Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
storeValue(Nucleus::createBitCast(packed, Int2::getType()));
}
}
RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
......
......@@ -1013,14 +1013,14 @@ namespace sw
Short4 c01;
Short4 c23;
Pointer<Byte> buffer;
Pointer<Byte> buffer2;
switch(state.targetFormat[index])
{
case FORMAT_R5G6B5:
buffer = cBuffer + 2 * x;
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
buffer2 = buffer + *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
pixel.x = c01 & Short4(0xF800u);
pixel.y = (c01 & Short4(0x07E0u)) << 5;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment