Commit f8beb4be by Nicolas Capens

Optimize replication.

Since floating-point scalars are stored in vector registers, Subzero allows us to just bitcast between them, eliminating a load and insert before shuffling. Change-Id: Ibccf242fd4cfc28604f35f420a04fd4ee6eabe52 Reviewed-on: https://swiftshader-review.googlesource.com/8575Tested-by: 's avatarNicolas Capens <capn@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com>
parent 9b0e6557
...@@ -2928,9 +2928,9 @@ namespace sw ...@@ -2928,9 +2928,9 @@ namespace sw
Short4::Short4(RValue<Int4> cast) Short4::Short4(RValue<Int4> cast)
{ {
int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType()); Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
Value *packed = Nucleus::createShuffleVector(byte16, byte16, pshufb); Value *packed = Nucleus::createShuffleVector(short8, short8, select);
Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value; Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
Value *short4 = Nucleus::createBitCast(int2, Short4::getType()); Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
...@@ -4691,7 +4691,7 @@ namespace sw ...@@ -4691,7 +4691,7 @@ namespace sw
RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
{ {
int shuffle[16] = {0, 4, 1, 5}; // Real type is v4i32 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(lowHigh, 0xEE)); return As<Short4>(Swizzle(lowHigh, 0xEE));
} }
...@@ -5008,11 +5008,10 @@ namespace sw ...@@ -5008,11 +5008,10 @@ namespace sw
Int4::Int4(RValue<Int> rhs) Int4::Int4(RValue<Int> rhs)
{ {
Value *vector = loadValue(); Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
int swizzle[4] = {0, 0, 0, 0}; int swizzle[4] = {0, 0, 0, 0};
Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
storeValue(replicate); storeValue(replicate);
} }
...@@ -5908,11 +5907,10 @@ namespace sw ...@@ -5908,11 +5907,10 @@ namespace sw
Float4::Float4(RValue<Float> rhs) : FloatXYZW(this) Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
{ {
Value *vector = loadValue(); Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
int swizzle[4] = {0, 0, 0, 0}; int swizzle[4] = {0, 0, 0, 0};
Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
storeValue(replicate); storeValue(replicate);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment