Commit 9b0e6557 by Nicolas Capens

Use unsigned index array accesses for texture sampling.

Array accesses with unsigned indices can be faster on x86-64 because we can take advantage of implicit zero-extension of 32-bit integers to 64-bit during pointer arithmetic. Change-Id: I17d531d9ad05c2d2994f007d5444b2a514a591b8 Reviewed-on: https://swiftshader-review.googlesource.com/8571Reviewed-by: 's avatarNicolas Capens <capn@google.com> Tested-by: 's avatarNicolas Capens <capn@google.com>
parent 0c2b0584
# This file is used to manage the SwiftShader's dependencies in the Chromium src # This file is used to manage SwiftShader's dependencies in the Chromium src
# repo. It is used by gclient to determine what version of each dependency to # repo. It is used by gclient to determine what version of each dependency to
# check out, and where. # check out, and where.
...@@ -7,7 +7,7 @@ use_relative_paths = True ...@@ -7,7 +7,7 @@ use_relative_paths = True
vars = { vars = {
'chromium_git': 'https://chromium.googlesource.com/', 'chromium_git': 'https://chromium.googlesource.com/',
# Current revision of subzero. # Current revision of subzero.
'subzero_revision': 'fc8f6bfae75430b00d8d6fbf78e62da4c3abed9d', 'subzero_revision': 'c48bb8b02c98ae49438e43aa1143a958784822a5',
} }
deps = { deps = {
......
...@@ -3,5 +3,7 @@ ...@@ -3,5 +3,7 @@
# This list is used by Chromium to make sure that one of the owners in this # This list is used by Chromium to make sure that one of the owners in this
# list has approved a SwiftShader related change before landing it # list has approved a SwiftShader related change before landing it
sugoi@chromium.org
capn@chromium.org capn@chromium.org
capn@google.com
sugoi@chromium.org
sugoi@google.com
\ No newline at end of file
...@@ -1674,7 +1674,7 @@ namespace sw ...@@ -1674,7 +1674,7 @@ namespace sw
return As<Short4>(UShort4(tmp)); return As<Short4>(UShort4(tmp));
} }
void SamplerCore::computeIndices(Int index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function) void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function)
{ {
bool texelFetch = (function == Fetch); bool texelFetch = (function == Fetch);
bool hasOffset = (function.option == Offset); bool hasOffset = (function.option == Offset);
...@@ -1731,8 +1731,8 @@ namespace sw ...@@ -1731,8 +1731,8 @@ namespace sw
{ {
size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth))); size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
} }
Int min = Int(0); UInt min = 0;
Int max = size - Int(1); UInt max = size - 1;
for(int i = 0; i < 4; i++) for(int i = 0; i < 4; i++)
{ {
...@@ -1743,7 +1743,7 @@ namespace sw ...@@ -1743,7 +1743,7 @@ namespace sw
void SamplerCore::sampleTexel(Vector4s &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) void SamplerCore::sampleTexel(Vector4s &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
{ {
Int index[4]; UInt index[4];
computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function); computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
...@@ -1754,10 +1754,10 @@ namespace sw ...@@ -1754,10 +1754,10 @@ namespace sw
if(has16bitTextureFormat()) if(has16bitTextureFormat())
{ {
c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
switch(state.textureFormat) switch(state.textureFormat)
{ {
...@@ -1776,10 +1776,10 @@ namespace sw ...@@ -1776,10 +1776,10 @@ namespace sw
{ {
case 4: case 4:
{ {
Byte4 c0 = *Pointer<Byte4>(buffer[f0] + 4 * index[0]); Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
Byte4 c1 = *Pointer<Byte4>(buffer[f1] + 4 * index[1]); Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
Byte4 c2 = *Pointer<Byte4>(buffer[f2] + 4 * index[2]); Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
Byte4 c3 = *Pointer<Byte4>(buffer[f3] + 4 * index[3]); Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
c.x = Unpack(c0, c1); c.x = Unpack(c0, c1);
c.y = Unpack(c2, c3); c.y = Unpack(c2, c3);
...@@ -1819,10 +1819,10 @@ namespace sw ...@@ -1819,10 +1819,10 @@ namespace sw
break; break;
case 3: case 3:
{ {
Byte4 c0 = *Pointer<Byte4>(buffer[f0] + 4 * index[0]); Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
Byte4 c1 = *Pointer<Byte4>(buffer[f1] + 4 * index[1]); Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
Byte4 c2 = *Pointer<Byte4>(buffer[f2] + 4 * index[2]); Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
Byte4 c3 = *Pointer<Byte4>(buffer[f3] + 4 * index[3]); Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
c.x = Unpack(c0, c1); c.x = Unpack(c0, c1);
c.y = Unpack(c2, c3); c.y = Unpack(c2, c3);
...@@ -1857,10 +1857,10 @@ namespace sw ...@@ -1857,10 +1857,10 @@ namespace sw
} }
break; break;
case 2: case 2:
c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
switch(state.textureFormat) switch(state.textureFormat)
{ {
...@@ -1896,10 +1896,10 @@ namespace sw ...@@ -1896,10 +1896,10 @@ namespace sw
switch(textureComponentCount()) switch(textureComponentCount())
{ {
case 4: case 4:
c.x = *Pointer<Short4>(buffer[f0] + 8 * index[0]); c.x = Pointer<Short4>(buffer[f0])[index[0]];
c.y = *Pointer<Short4>(buffer[f1] + 8 * index[1]); c.y = Pointer<Short4>(buffer[f1])[index[1]];
c.z = *Pointer<Short4>(buffer[f2] + 8 * index[2]); c.z = Pointer<Short4>(buffer[f2])[index[2]];
c.w = *Pointer<Short4>(buffer[f3] + 8 * index[3]); c.w = Pointer<Short4>(buffer[f3])[index[3]];
transpose4x4(c.x, c.y, c.z, c.w); transpose4x4(c.x, c.y, c.z, c.w);
break; break;
case 2: case 2:
...@@ -1912,10 +1912,10 @@ namespace sw ...@@ -1912,10 +1912,10 @@ namespace sw
c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)); c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
break; break;
case 1: case 1:
c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
break; break;
default: default:
ASSERT(false); ASSERT(false);
...@@ -1974,25 +1974,25 @@ namespace sw ...@@ -1974,25 +1974,25 @@ namespace sw
const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255; const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255; const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
Int c0 = Int(*Pointer<Byte>(buffer[0] + index[0])); Int c0 = Int(buffer[0][index[0]]);
Int c1 = Int(*Pointer<Byte>(buffer[0] + index[1])); Int c1 = Int(buffer[0][index[1]]);
Int c2 = Int(*Pointer<Byte>(buffer[0] + index[2])); Int c2 = Int(buffer[0][index[2]]);
Int c3 = Int(*Pointer<Byte>(buffer[0] + index[3])); Int c3 = Int(buffer[0][index[3]]);
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0))); UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function); computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
c0 = Int(*Pointer<Byte>(buffer[1] + index[0])); c0 = Int(buffer[1][index[0]]);
c1 = Int(*Pointer<Byte>(buffer[1] + index[1])); c1 = Int(buffer[1][index[1]]);
c2 = Int(*Pointer<Byte>(buffer[1] + index[2])); c2 = Int(buffer[1][index[2]]);
c3 = Int(*Pointer<Byte>(buffer[1] + index[3])); c3 = Int(buffer[1][index[3]]);
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0))); UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
c0 = Int(*Pointer<Byte>(buffer[2] + index[0])); c0 = Int(buffer[2][index[0]]);
c1 = Int(*Pointer<Byte>(buffer[2] + index[1])); c1 = Int(buffer[2][index[1]]);
c2 = Int(*Pointer<Byte>(buffer[2] + index[2])); c2 = Int(buffer[2][index[2]]);
c3 = Int(*Pointer<Byte>(buffer[2] + index[3])); c3 = Int(buffer[2][index[3]]);
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0))); UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
...@@ -2020,7 +2020,7 @@ namespace sw ...@@ -2020,7 +2020,7 @@ namespace sw
void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
{ {
Int index[4]; UInt index[4];
computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function); computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
......
...@@ -74,7 +74,7 @@ namespace sw ...@@ -74,7 +74,7 @@ namespace sw
void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function); void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z); void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z);
Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode); Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
void computeIndices(Int index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function); void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
void sampleTexel(Vector4s &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function); void sampleTexel(Vector4s &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
void sampleTexel(Vector4f &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function); void sampleTexel(Vector4f &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD); void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
......
pnacl-subzero @ c48bb8b0
Subproject commit 8bd18e1be3eb25d60a4696bb948ab41f6ce6afd6 Subproject commit c48bb8b02c98ae49438e43aa1143a958784822a5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment