Commit b40a2568 by Nicolas Capens

Optimize Int2 construction.

Change-Id: Ibab854164a45c998976e65b8bfec80a8a688461b Reviewed-on: https://swiftshader-review.googlesource.com/4511Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com> Tested-by: 's avatarNicolas Capens <capn@google.com>
parent 62abb558
...@@ -4623,6 +4623,15 @@ namespace sw ...@@ -4623,6 +4623,15 @@ namespace sw
Int2::Int2(RValue<Int> lo, RValue<Int> hi) Int2::Int2(RValue<Int> lo, RValue<Int> hi)
{ {
if(CPUID::supportsMMX2())
{
// movd mm0, lo
// movd mm1, hi
// punpckldq mm0, mm1
storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
}
else
{
Constant *shuffle[2]; Constant *shuffle[2];
shuffle[0] = Nucleus::createConstantInt(0); shuffle[0] = Nucleus::createConstantInt(0);
shuffle[1] = Nucleus::createConstantInt(1); shuffle[1] = Nucleus::createConstantInt(1);
...@@ -4631,6 +4640,7 @@ namespace sw ...@@ -4631,6 +4640,7 @@ namespace sw
storeValue(Nucleus::createBitCast(packed, Int2::getType())); storeValue(Nucleus::createBitCast(packed, Int2::getType()));
} }
}
RValue<Int2> Int2::operator=(RValue<Int2> rhs) const RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
{ {
......
...@@ -1013,14 +1013,14 @@ namespace sw ...@@ -1013,14 +1013,14 @@ namespace sw
Short4 c01; Short4 c01;
Short4 c23; Short4 c23;
Pointer<Byte> buffer; Pointer<Byte> buffer;
Pointer<Byte> buffer2;
switch(state.targetFormat[index]) switch(state.targetFormat[index])
{ {
case FORMAT_R5G6B5: case FORMAT_R5G6B5:
buffer = cBuffer + 2 * x; buffer = cBuffer + 2 * x;
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); buffer2 = buffer + *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index])); c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
pixel.x = c01 & Short4(0xF800u); pixel.x = c01 & Short4(0xF800u);
pixel.y = (c01 & Short4(0x07E0u)) << 5; pixel.y = (c01 & Short4(0x07E0u)) << 5;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment