Commit b40a2568 by Nicolas Capens

Optimize Int2 construction.

Change-Id: Ibab854164a45c998976e65b8bfec80a8a688461b Reviewed-on: https://swiftshader-review.googlesource.com/4511Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com> Tested-by: 's avatarNicolas Capens <capn@google.com>
parent 62abb558
...@@ -4623,13 +4623,23 @@ namespace sw ...@@ -4623,13 +4623,23 @@ namespace sw
Int2::Int2(RValue<Int> lo, RValue<Int> hi) Int2::Int2(RValue<Int> lo, RValue<Int> hi)
{ {
Constant *shuffle[2]; if(CPUID::supportsMMX2())
shuffle[0] = Nucleus::createConstantInt(0); {
shuffle[1] = Nucleus::createConstantInt(1); // movd mm0, lo
// movd mm1, hi
Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2)); // punpckldq mm0, mm1
storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
storeValue(Nucleus::createBitCast(packed, Int2::getType())); }
else
{
Constant *shuffle[2];
shuffle[0] = Nucleus::createConstantInt(0);
shuffle[1] = Nucleus::createConstantInt(1);
Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
storeValue(Nucleus::createBitCast(packed, Int2::getType()));
}
} }
RValue<Int2> Int2::operator=(RValue<Int2> rhs) const RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
......
...@@ -1013,14 +1013,14 @@ namespace sw ...@@ -1013,14 +1013,14 @@ namespace sw
Short4 c01; Short4 c01;
Short4 c23; Short4 c23;
Pointer<Byte> buffer; Pointer<Byte> buffer;
Pointer<Byte> buffer2;
switch(state.targetFormat[index]) switch(state.targetFormat[index])
{ {
case FORMAT_R5G6B5: case FORMAT_R5G6B5:
buffer = cBuffer + 2 * x; buffer = cBuffer + 2 * x;
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); buffer2 = buffer + *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index])); c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
pixel.x = c01 & Short4(0xF800u); pixel.x = c01 & Short4(0xF800u);
pixel.y = (c01 & Short4(0x07E0u)) << 5; pixel.y = (c01 & Short4(0x07E0u)) << 5;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment