Commit bb575d48 by Nicolas Capens Committed by Nicolas Capens

Address cubemap faces as consecutive layers

Vulkan cubemaps use six consecutive layers for the faces, so we can reuse the same addressing logic as for 2D array textures. Hence the 3D lookup vector becomes a 2D coordinate plus layer coordinate after projection. The only difference is we don't have to clamp to the range of layers. This simplifies the sampled image descriptor since we only have to store a single pointer per mipmap level. We also avoid the per-lane lookup (gather) operation. YCbCr sampling was adjusted to not use the same array of buffer pointers. Also eliminate the unused lodOrBias parameter from computeLod*(). It's added afterwards. Bug: b/134164485 Bug: b/129523279 Change-Id: I5c349ff458aabb1d77e32104429b635d96237292 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31088Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 95b1db96
......@@ -1855,7 +1855,7 @@ namespace sw
// Low Border, Low Pixel, High Border, High Pixel
Int LB(-1), LP(0), HB(dim), HP(dim-1);
for(int i = 0; i < 6; ++i)
for(int face = 0; face < 6; face++)
{
computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
......
......@@ -29,7 +29,7 @@ namespace sw
{
struct Mipmap
{
const void *buffer[6];
const void *buffer;
short4 uHalf;
short4 vHalf;
......@@ -102,7 +102,8 @@ namespace sw
ADDRESSING_MIRRORONCE,
ADDRESSING_BORDER, // Single color
ADDRESSING_SEAMLESS, // Border of pixels
ADDRESSING_LAYER,
ADDRESSING_CUBEFACE, // Cube face layer
ADDRESSING_LAYER, // Array layer
ADDRESSING_TEXELFETCH,
ADDRESSING_LAST = ADDRESSING_TEXELFETCH
......
......@@ -71,7 +71,6 @@ namespace sw
Float4 wwww = w;
Float4 qqqq = q;
Int face[4];
Float lod;
Float anisotropy;
Float4 uDelta;
......@@ -80,7 +79,8 @@ namespace sw
if(state.textureType == TEXTURE_CUBE)
{
cubeFace(face, uuuu, vvvv, u, v, w, M);
Int4 face = cubeFace(uuuu, vvvv, u, v, w, M);
wwww = As<Float4>(face);
}
if(function == Implicit || function == Bias || function == Grad || function == Query)
......@@ -89,16 +89,16 @@ namespace sw
{
if(state.textureType != TEXTURE_CUBE)
{
computeLod(texture, sampler, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodOrBias.x, dsx, dsy, function);
computeLod(texture, sampler, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, dsx, dsy, function);
}
else
{
computeLodCube(texture, sampler, lod, u, v, w, lodOrBias.x, dsx, dsy, M, function);
computeLodCube(texture, sampler, lod, u, v, w, dsx, dsy, M, function);
}
}
else
{
computeLod3D(texture, sampler, lod, uuuu, vvvv, wwww, lodOrBias.x, dsx, dsy, function);
computeLod3D(texture, sampler, lod, uuuu, vvvv, wwww, dsx, dsy, function);
}
if(function == Bias)
......@@ -155,7 +155,7 @@ namespace sw
if(use32BitFiltering)
{
c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, function);
if (!hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable)
{
......@@ -197,7 +197,7 @@ namespace sw
}
else // 16-bit filtering.
{
Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, function);
switch (state.textureFormat)
{
......@@ -315,9 +315,9 @@ namespace sw
return uvw;
}
Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
{
Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, false, function);
if(function == Fetch)
{
......@@ -326,7 +326,7 @@ namespace sw
if(state.mipmapFilter == MIPMAP_LINEAR)
{
Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, true, function);
lod *= Float(1 << 16);
......@@ -360,13 +360,13 @@ namespace sw
return c;
}
Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
{
Vector4s c;
if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
{
c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function);
c = sampleQuad(texture, u, v, w, offset, lod, secondLOD, function);
}
else
{
......@@ -397,7 +397,7 @@ namespace sw
Do
{
c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function);
c = sampleQuad(texture, u0, v0, w, offset, lod, secondLOD, function);
u0 += du;
v0 += dv;
......@@ -420,11 +420,11 @@ namespace sw
return c;
}
Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
{
if(state.textureType != TEXTURE_3D)
{
return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function);
return sampleQuad2D(texture, u, v, w, offset, lod, secondLOD, function);
}
else
{
......@@ -432,7 +432,7 @@ namespace sw
}
}
Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
{
Vector4s c;
......@@ -440,9 +440,8 @@ namespace sw
bool gather = (state.textureFilter == FILTER_GATHER);
Pointer<Byte> mipmap;
Pointer<Byte> buffer[4];
selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
Pointer<Byte> buffer;
selectMipmap(texture, mipmap, buffer, lod, secondLOD);
bool texelFetch = (function == Fetch);
......@@ -644,10 +643,8 @@ namespace sw
int componentCount = textureComponentCount();
Pointer<Byte> mipmap;
Pointer<Byte> buffer[4];
Int face[4];
selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
Pointer<Byte> buffer;
selectMipmap(texture, mipmap, buffer, lod, secondLOD);
bool texelFetch = (function == Fetch);
......@@ -762,9 +759,9 @@ namespace sw
return c_;
}
Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
{
Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, false, function);
if(function == Fetch)
{
......@@ -773,7 +770,7 @@ namespace sw
if(state.mipmapFilter == MIPMAP_LINEAR)
{
Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, true, function);
Float4 lod4 = Float4(Frac(lod));
......@@ -786,13 +783,13 @@ namespace sw
return c;
}
Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
{
Vector4f c;
if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
{
c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function);
c = sampleFloat(texture, u, v, w, q, offset, lod, secondLOD, function);
}
else
{
......@@ -821,7 +818,7 @@ namespace sw
Do
{
c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function);
c = sampleFloat(texture, u0, v0, w, q, offset, lod, secondLOD, function);
u0 += du;
v0 += dv;
......@@ -844,11 +841,11 @@ namespace sw
return c;
}
Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
{
if(state.textureType != TEXTURE_3D)
{
return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function);
return sampleFloat2D(texture, u, v, w, q, offset, lod, secondLOD, function);
}
else
{
......@@ -856,7 +853,7 @@ namespace sw
}
}
Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
{
Vector4f c;
......@@ -864,9 +861,8 @@ namespace sw
bool gather = (state.textureFilter == FILTER_GATHER);
Pointer<Byte> mipmap;
Pointer<Byte> buffer[4];
selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
Pointer<Byte> buffer;
selectMipmap(texture, mipmap, buffer, lod, secondLOD);
Int4 x0, x1, y0, y1, z0;
Float4 fu, fv, fw;
......@@ -877,10 +873,9 @@ namespace sw
Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
y0 *= pitchP;
if(hasThirdCoordinate())
if(state.addressingModeW != ADDRESSING_UNUSED)
{
Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
z0 *= sliceP;
z0 *= *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
}
if(state.textureFilter == FILTER_POINT || (function == Fetch))
......@@ -942,10 +937,8 @@ namespace sw
int componentCount = textureComponentCount();
Pointer<Byte> mipmap;
Pointer<Byte> buffer[4];
Int face[4];
selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
Pointer<Byte> buffer;
selectMipmap(texture, mipmap, buffer, lod, secondLOD);
Int4 x0, x1, y0, y1, z0, z1;
Float4 fu, fv, fw;
......@@ -1038,7 +1031,7 @@ namespace sw
return lod;
}
void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
{
Float4 duvdxy;
......@@ -1084,7 +1077,7 @@ namespace sw
lod = log2sqrt(lod); // log2(sqrt(lod))
}
void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
{
Float4 dudxy, dvdxy, dsdxy;
......@@ -1125,7 +1118,7 @@ namespace sw
lod = log2(lod);
}
void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
{
Float4 dudxy, dvdxy, dsdxy;
......@@ -1159,8 +1152,11 @@ namespace sw
lod = log2sqrt(lod); // log2(sqrt(lod))
}
void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
Int4 SamplerCore::cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
{
// TODO: Comply with Vulkan recommendation:
// Vulkan 1.1: "The rules should have as the first rule that rz wins over ry and rx, and the second rule that ry wins over rx."
Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0
Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0
Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0
......@@ -1189,13 +1185,15 @@ namespace sw
Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
Int negative = SignMask(n);
face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
face[1] = (face[0] >> 4) & 0x7;
face[2] = (face[0] >> 8) & 0x7;
face[3] = (face[0] >> 12) & 0x7;
face[0] &= 0x7;
Int faces = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
Int4 face;
face.x = faces & 0x7;
face.y = (faces >> 4) & 0x7;
face.z = (faces >> 8) & 0x7;
face.w = (faces >> 12) & 0x7;
M = Max(Max(absX, absY), Max(absZ, Float4(std::numeric_limits<float>::min())));
......@@ -1208,6 +1206,8 @@ namespace sw
M = reciprocal(M) * Float4(0.5f);
U = U * M + Float4(0.5f);
V = V * M + Float4(0.5f);
return face;
}
Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
......@@ -1315,7 +1315,7 @@ namespace sw
{
UInt4 indices = uuuu + vvvv;
if(hasThirdCoordinate())
if(state.addressingModeW != ADDRESSING_UNUSED)
{
indices += As<UInt4>(wwww);
}
......@@ -1333,21 +1333,16 @@ namespace sw
}
}
Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
{
Vector4s c;
int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
if(has16bitTextureFormat())
{
c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
switch(state.textureFormat)
{
......@@ -1378,10 +1373,10 @@ namespace sw
{
case 4:
{
Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
Byte4 c0 = Pointer<Byte4>(buffer)[index[0]];
Byte4 c1 = Pointer<Byte4>(buffer)[index[1]];
Byte4 c2 = Pointer<Byte4>(buffer)[index[2]];
Byte4 c3 = Pointer<Byte4>(buffer)[index[3]];
c.x = Unpack(c0, c1);
c.y = Unpack(c2, c3);
......@@ -1445,10 +1440,10 @@ namespace sw
}
break;
case 2:
c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
switch(state.textureFormat)
{
......@@ -1474,10 +1469,10 @@ namespace sw
break;
case 1:
{
Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
Int c0 = Int(*Pointer<Byte>(buffer + index[0]));
Int c1 = Int(*Pointer<Byte>(buffer + index[1]));
Int c2 = Int(*Pointer<Byte>(buffer + index[2]));
Int c3 = Int(*Pointer<Byte>(buffer + index[3]));
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
switch(state.textureFormat)
......@@ -1514,33 +1509,33 @@ namespace sw
switch(textureComponentCount())
{
case 4:
c.x = Pointer<Short4>(buffer[f0])[index[0]];
c.y = Pointer<Short4>(buffer[f1])[index[1]];
c.z = Pointer<Short4>(buffer[f2])[index[2]];
c.w = Pointer<Short4>(buffer[f3])[index[3]];
c.x = Pointer<Short4>(buffer)[index[0]];
c.y = Pointer<Short4>(buffer)[index[1]];
c.z = Pointer<Short4>(buffer)[index[2]];
c.w = Pointer<Short4>(buffer)[index[3]];
transpose4x4(c.x, c.y, c.z, c.w);
break;
case 3:
c.x = Pointer<Short4>(buffer[f0])[index[0]];
c.y = Pointer<Short4>(buffer[f1])[index[1]];
c.z = Pointer<Short4>(buffer[f2])[index[2]];
c.w = Pointer<Short4>(buffer[f3])[index[3]];
c.x = Pointer<Short4>(buffer)[index[0]];
c.y = Pointer<Short4>(buffer)[index[1]];
c.z = Pointer<Short4>(buffer)[index[2]];
c.w = Pointer<Short4>(buffer)[index[3]];
transpose4x3(c.x, c.y, c.z, c.w);
break;
case 2:
c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
c.x = *Pointer<Short4>(buffer + 4 * index[0]);
c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
c.z = *Pointer<Short4>(buffer + 4 * index[2]);
c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
c.y = c.x;
c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
break;
case 1:
c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
break;
default:
ASSERT(false);
......@@ -1549,10 +1544,10 @@ namespace sw
else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32)
{
Int4 cc;
cc = Insert(cc, Pointer<Int>(buffer[f0])[index[0]], 0);
cc = Insert(cc, Pointer<Int>(buffer[f1])[index[1]], 1);
cc = Insert(cc, Pointer<Int>(buffer[f2])[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer[f3])[index[3]], 3);
cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
// shift each 10 bit field left 6, and replicate 6 high bits into bottom 6
c.x = Short4(((cc << 6) & Int4(0xFFC0)) | ((cc >> 4) & Int4(0x3F)));
......@@ -1568,10 +1563,10 @@ namespace sw
else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
{
Int4 cc;
cc = Insert(cc, Pointer<Int>(buffer[f0])[index[0]], 0);
cc = Insert(cc, Pointer<Int>(buffer[f1])[index[1]], 1);
cc = Insert(cc, Pointer<Int>(buffer[f2])[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer[f3])[index[3]], 3);
cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
c.x = Short4(((cc) & Int4(0x3FF)));
c.y = Short4(((cc >> 10) & Int4(0x3FF)));
......@@ -1594,7 +1589,7 @@ namespace sw
return c;
}
Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function)
{
Vector4s c;
......@@ -1603,11 +1598,16 @@ namespace sw
if(isYcbcrFormat())
{
// Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats.
Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
// Luminance
Int c0 = Int(buffer[0][index[0]]);
Int c1 = Int(buffer[0][index[1]]);
Int c2 = Int(buffer[0][index[2]]);
Int c3 = Int(buffer[0][index[3]]);
Int c0 = Int(bufferY[index[0]]);
Int c1 = Int(bufferY[index[1]]);
Int c2 = Int(bufferY[index[2]]);
Int c3 = Int(bufferY[index[3]]);
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
......@@ -1620,27 +1620,27 @@ namespace sw
if(state.textureFormat == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM)
{
c0 = Int(buffer[1][index[0]]);
c1 = Int(buffer[1][index[1]]);
c2 = Int(buffer[1][index[2]]);
c3 = Int(buffer[1][index[3]]);
c0 = Int(bufferU[index[0]]);
c1 = Int(bufferU[index[1]]);
c2 = Int(bufferU[index[2]]);
c3 = Int(bufferU[index[3]]);
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
U = As<UShort4>(Unpack(As<Byte4>(c0)));
c0 = Int(buffer[2][index[0]]);
c1 = Int(buffer[2][index[1]]);
c2 = Int(buffer[2][index[2]]);
c3 = Int(buffer[2][index[3]]);
c0 = Int(bufferV[index[0]]);
c1 = Int(bufferV[index[1]]);
c2 = Int(bufferV[index[2]]);
c3 = Int(bufferV[index[3]]);
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
V = As<UShort4>(Unpack(As<Byte4>(c0)));
}
else if(state.textureFormat == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM)
{
Short4 UV;
UV = Insert(UV, Pointer<Short>(buffer[1])[index[0]], 0); // TODO: Insert(UShort4, UShort)
UV = Insert(UV, Pointer<Short>(buffer[1])[index[1]], 1);
UV = Insert(UV, Pointer<Short>(buffer[1])[index[2]], 2);
UV = Insert(UV, Pointer<Short>(buffer[1])[index[3]], 3);
UV = Insert(UV, Pointer<Short>(bufferU)[index[0]], 0); // TODO: Insert(UShort4, UShort)
UV = Insert(UV, Pointer<Short>(bufferU)[index[1]], 1);
UV = Insert(UV, Pointer<Short>(bufferU)[index[2]], 2);
UV = Insert(UV, Pointer<Short>(bufferU)[index[3]], 3);
U = (UV & Short4(0x00FFu)) | (UV << 8);
V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
}
......@@ -1732,7 +1732,7 @@ namespace sw
return c;
}
Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function)
{
Int4 valid;
......@@ -1754,18 +1754,13 @@ namespace sw
if(hasFloatTexture() || has32bitIntegerTextureComponents())
{
int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
switch (state.textureFormat)
{
case VK_FORMAT_R16_SFLOAT:
t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
......@@ -1773,10 +1768,10 @@ namespace sw
c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
break;
case VK_FORMAT_R16G16_SFLOAT:
t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
// FIXME: shuffles
c.x = As<Float4>(halfToFloatBits(t0));
......@@ -1786,10 +1781,10 @@ namespace sw
transpose4x4(c.x, c.y, c.z, c.w);
break;
case VK_FORMAT_R16G16B16A16_SFLOAT:
t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1));
......@@ -1802,19 +1797,19 @@ namespace sw
case VK_FORMAT_R32_UINT:
case VK_FORMAT_D32_SFLOAT:
// FIXME: Optimal shuffling?
c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
c.x.x = *Pointer<Float>(buffer + index[0] * 4);
c.x.y = *Pointer<Float>(buffer + index[1] * 4);
c.x.z = *Pointer<Float>(buffer + index[2] * 4);
c.x.w = *Pointer<Float>(buffer + index[3] * 4);
break;
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32_UINT:
// FIXME: Optimal shuffling?
c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
c.y = c.x;
c.x = Float4(c.x.xz, c.z.xz);
c.y = Float4(c.y.yw, c.z.yw);
......@@ -1822,28 +1817,28 @@ namespace sw
case VK_FORMAT_R32G32B32_SFLOAT:
case VK_FORMAT_R32G32B32_SINT:
case VK_FORMAT_R32G32B32_UINT:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w);
break;
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32G32B32A32_UINT:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
transpose4x4(c.x, c.y, c.z, c.w);
break;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
{
Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
t.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
t.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
t.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
t.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
t.x = *Pointer<Float>(buffer + index[0] * 4);
t.y = *Pointer<Float>(buffer + index[1] * 4);
t.z = *Pointer<Float>(buffer + index[2] * 4);
t.w = *Pointer<Float>(buffer + index[3] * 4);
t0 = As<UInt4>(t);
c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
c.x = Float4((t0) & UInt4(0x1FF)) * c.w;
......@@ -1854,10 +1849,10 @@ namespace sw
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
{
Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
t.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
t.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
t.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
t.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
t.x = *Pointer<Float>(buffer + index[0] * 4);
t.y = *Pointer<Float>(buffer + index[1] * 4);
t.z = *Pointer<Float>(buffer + index[2] * 4);
t.w = *Pointer<Float>(buffer + index[3] * 4);
t0 = As<UInt4>(t);
c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
......@@ -1988,11 +1983,13 @@ namespace sw
return out;
}
void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
void SamplerCore::selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD)
{
Pointer<Byte> mipmap0 = texture + OFFSET(Texture, mipmap[0]);
if(state.mipmapFilter == MIPMAP_NONE)
{
mipmap = texture + OFFSET(Texture,mipmap[0]);
mipmap = mipmap0;
}
else
{
......@@ -2008,26 +2005,10 @@ namespace sw
ilod = Int(lod);
}
mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
mipmap = mipmap0 + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
}
if(state.textureType != TEXTURE_CUBE)
{
buffer[0] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[0]));
if(isYcbcrFormat())
{
buffer[1] = *Pointer<Pointer<Byte>>(mipmap + sizeof(Mipmap) * 1 + OFFSET(Mipmap,buffer[0]));
buffer[2] = *Pointer<Pointer<Byte>>(mipmap + sizeof(Mipmap) * 2 + OFFSET(Mipmap,buffer[0]));
}
}
else
{
for(int i = 0; i < 4; i++)
{
buffer[i] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
}
}
buffer = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap, buffer));
}
Int4 SamplerCore::computeFilterOffset(Float &lod)
......@@ -2122,6 +2103,10 @@ namespace sw
{
xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
}
else if(addressingMode == ADDRESSING_CUBEFACE)
{
xyz0 = As<Int4>(uvw);
}
else
{
const int halfBits = 0x3EFFFFFF; // Value just under 0.5f
......
......@@ -66,30 +66,30 @@ namespace sw
private:
Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Float log2sqrt(Float lod);
Float log2(Float lod);
void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function);
void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function);
void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, SamplerFunction function);
Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer[4]);
Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function);
Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer);
Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function);
Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid);
void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
void selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD);
Short4 address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);
void address(Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
Int4 computeFilterOffset(Float &lod);
......
......@@ -272,12 +272,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam
}
// Fall through to CUBE case:
case VK_IMAGE_VIEW_TYPE_CUBE:
if(coordinateIndex >= 2)
{
// Cube faces are addressed as 2D images.
return ADDRESSING_UNUSED;
}
else
if(coordinateIndex <= 1) // Cube faces themselves are addressed as 2D images.
{
// Vulkan 1.1 spec:
// "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then
......@@ -286,6 +281,15 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam
// This corresponds with our 'SEAMLESS' addressing mode.
return ADDRESSING_SEAMLESS;
}
else if(coordinateIndex == 2)
{
// The cube face is an index into array layers.
return ADDRESSING_CUBEFACE;
}
else
{
return ADDRESSING_UNUSED;
}
break;
case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0.
......
......@@ -1475,7 +1475,7 @@ namespace rr
RValue<Vector4> operator=(RValue<typename Scalar<Vector4>::Type> rhs);
private:
Float4 *parent;
Vector4 *parent;
};
template<class Vector4, int T>
......
......@@ -322,7 +322,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
imageSampler[i].texture.depth = sw::replicate(1);
sw::Mipmap &mipmap = imageSampler[i].texture.mipmap[0];
mipmap.buffer[0] = bufferView->getPointer();
mipmap.buffer = bufferView->getPointer();
mipmap.width[0] = mipmap.width[1] = mipmap.width[2] = mipmap.width[3] = numElements;
mipmap.height[0] = mipmap.height[1] = mipmap.height[2] = mipmap.height[3] = 1;
mipmap.depth[0] = mipmap.depth[1] = mipmap.depth[2] = mipmap.depth[3] = 1;
......@@ -373,11 +373,11 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
const int level = 0;
VkOffset3D offset = {0, 0, 0};
texture->mipmap[0].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_0_BIT, level, 0, ImageView::SAMPLING);
texture->mipmap[1].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_1_BIT, level, 0, ImageView::SAMPLING);
texture->mipmap[0].buffer = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_0_BIT, level, 0, ImageView::SAMPLING);
texture->mipmap[1].buffer = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_1_BIT, level, 0, ImageView::SAMPLING);
if(format.getAspects() & VK_IMAGE_ASPECT_PLANE_2_BIT)
{
texture->mipmap[2].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_2_BIT, level, 0, ImageView::SAMPLING);
texture->mipmap[2].buffer = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_2_BIT, level, 0, ImageView::SAMPLING);
}
VkExtent3D extent = imageView->getMipLevelExtent(0);
......@@ -407,20 +407,15 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
if(imageView->getType() == VK_IMAGE_VIEW_TYPE_CUBE)
{
for(int face = 0; face < 6; face++)
{
// Obtain the pointer to the corner of the level including the border, for seamless sampling.
// This is taken into account in the sampling routine, which can't handle negative texel coordinates.
VkOffset3D offset = {-1, -1, 0};
// TODO(b/129523279): Implement as 6 consecutive layers instead of separate pointers.
mipmap.buffer[face] = imageView->getOffsetPointer(offset, aspect, level, face, ImageView::SAMPLING);
}
// Obtain the pointer to the corner of the level including the border, for seamless sampling.
// This is taken into account in the sampling routine, which can't handle negative texel coordinates.
VkOffset3D offset = {-1, -1, 0};
mipmap.buffer = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
}
else
{
VkOffset3D offset = {0, 0, 0};
mipmap.buffer[0] = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
mipmap.buffer = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
}
VkExtent3D extent = imageView->getMipLevelExtent(level);
......@@ -438,7 +433,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
}
}
else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
entry.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)
entry.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)
{
auto descriptor = reinterpret_cast<StorageImageDescriptor *>(memToWrite);
for(uint32_t i = 0; i < entry.descriptorCount; i++)
......@@ -485,9 +480,9 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
}
}
else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
{
auto descriptor = reinterpret_cast<BufferDescriptor *>(memToWrite);
for (uint32_t i = 0; i < entry.descriptorCount; i++)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment