Commit 8ac0bd6c by Nicolas Capens Committed by Nicolas Capens

Fix image sampling with divergent LOD

Currently our SamplerCore code performs sampling for four SIMD lanes simultaneously. With implicit LOD calculation for fragment shaders, all four pixels in a quad share the same LOD and thus sample from the same mipmap level. But for the vertex shader the LOD is always explicitly provided, and can vary significantly between completely unrelated vertices. Previously we only used the LOD of the first one in each group of four. As a workaround, process explicit-lod sampling instructions in a lane-by-lane manner. Bug: b/133868964 Tests: dEQP-VK.glsl.texture_functions.* Change-Id: If4e0d3c04d29529300111d73801124080cb4b544 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32488 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent 7e95471a
......@@ -65,7 +65,7 @@ namespace sw
TEXTURE_2D,
TEXTURE_3D,
TEXTURE_CUBE,
TEXTURE_1D_ARRAY, // Treated as 2D texture with second coordinate 0.
TEXTURE_1D_ARRAY, // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
TEXTURE_2D_ARRAY,
TEXTURE_CUBE_ARRAY,
......
......@@ -85,24 +85,6 @@ namespace sw
state.shaderID = context->vertexShader->getSerialID();
switch(context->topology)
{
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
state.verticesPerPrimitive = 1;
break;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
state.verticesPerPrimitive = 2;
break;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
state.verticesPerPrimitive = 3;
break;
default:
UNIMPLEMENTED("topology %d", int(context->topology));
}
for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
{
state.input[i].type = context->input[i].type;
......
......@@ -50,9 +50,6 @@ namespace sw
uint64_t shaderID;
bool textureSampling : 1; // TODO: Eliminate by querying shader.
unsigned char verticesPerPrimitive : 2; // 1 (points), 2 (lines) or 3 (triangles)
struct Input
{
operator bool() const // Returns true if stream contains data
......
......@@ -53,7 +53,7 @@ namespace sw
{
}
Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &lodOrBias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, SamplerFunction function)
{
Vector4f c;
......@@ -103,17 +103,17 @@ namespace sw
if(function == Bias)
{
lod += lodOrBias.x;
lod += lodOrBias;
}
}
else if(function == Lod)
{
lod = lodOrBias.x;
lod = lodOrBias;
}
else if(function == Fetch)
{
// TODO: Eliminate int-float-int conversion.
lod = Float(As<Int>(Float(lodOrBias.x)));
lod = Float(As<Int>(lodOrBias));
}
else if(function == Base || function == Gather)
{
......@@ -1031,7 +1031,7 @@ namespace sw
return lod;
}
void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Float4 &dsx, Float4 &dsy, SamplerFunction function)
{
Float4 duvdxy;
......@@ -1041,8 +1041,8 @@ namespace sw
}
else
{
Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
Float4 dudxy = Float4(dsx.xx, dsy.xx);
Float4 dvdxy = Float4(dsx.yy, dsy.yy);
duvdxy = Float4(dudxy.xz, dvdxy.xz);
}
......@@ -1077,7 +1077,7 @@ namespace sw
lod = log2sqrt(lod); // log2(sqrt(lod))
}
void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function)
{
Float4 dudxy, dvdxy, dsdxy;
......@@ -1093,9 +1093,9 @@ namespace sw
}
else
{
dudxy = Float4(dsx.x.xx, dsy.x.xx);
dvdxy = Float4(dsx.y.xx, dsy.y.xx);
dsdxy = Float4(dsx.z.xx, dsy.z.xx);
dudxy = Float4(dsx.xx, dsy.xx);
dvdxy = Float4(dsx.yy, dsy.yy);
dsdxy = Float4(dsx.zz, dsy.zz);
dudxy = Abs(dudxy * Float4(M.x));
dvdxy = Abs(dvdxy * Float4(M.x));
......@@ -1118,7 +1118,7 @@ namespace sw
lod = log2(lod);
}
void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Float4 &dsx, Float4 &dsy, SamplerFunction function)
{
Float4 dudxy, dvdxy, dsdxy;
......@@ -1130,9 +1130,9 @@ namespace sw
}
else
{
dudxy = Float4(dsx.x.xx, dsy.x.xx);
dvdxy = Float4(dsx.y.xx, dsy.y.xx);
dsdxy = Float4(dsx.z.xx, dsy.z.xx);
dudxy = Float4(dsx.xx, dsy.xx);
dvdxy = Float4(dsx.yy, dsy.yy);
dsdxy = Float4(dsx.zz, dsy.zz);
}
// Scale by texture dimensions.
......@@ -2344,7 +2344,7 @@ namespace sw
{
return (state.textureType == TEXTURE_3D) ||
(state.textureType == TEXTURE_2D_ARRAY) ||
(state.textureType == TEXTURE_1D_ARRAY); // Treated as 2D texture with second coordinate 0.
(state.textureType == TEXTURE_1D_ARRAY); // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
}
bool SamplerCore::has16bitTextureFormat() const
......
......@@ -62,7 +62,7 @@ namespace sw
public:
SamplerCore(Pointer<Byte> &constants, const Sampler &state);
Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &lodOrBias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, SamplerFunction function);
private:
Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
......@@ -78,9 +78,9 @@ namespace sw
Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Float log2sqrt(Float lod);
Float log2(Float lod);
void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function);
void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float4 &dsx, Float4 &dsy, SamplerFunction function);
void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function);
void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, SamplerFunction function);
Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
......
......@@ -145,7 +145,7 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins
i++;
}
// TODO(b/129523279): Currently 1D textures are treated as 2D by setting the second coordinate to 0.
// TODO(b/134669567): Currently 1D textures are treated as 2D by setting the second coordinate to 0.
// Implement optimized 1D sampling.
if(samplerState.textureType == TEXTURE_1D)
{
......@@ -184,13 +184,52 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins
}
SamplerCore s(constants, samplerState);
Vector4f sample = s.sampleTexture(texture, sampler, uvw[0], uvw[1], uvw[2], q, lodOrBias, dsx, dsy, offset, samplerFunction);
Pointer<SIMD::Float> rgba = out;
rgba[0] = sample.x;
rgba[1] = sample.y;
rgba[2] = sample.z;
rgba[3] = sample.w;
// For explicit-lod instructions the LOD can be different per SIMD lane. SamplerCore currently assumes
// a single LOD per four elements, so we sample the image again for each LOD separately.
if(samplerFunction.method == Lod || samplerFunction.method == Grad) // TODO(b/133868964): Also handle divergent Bias and Fetch with Lod.
{
auto lod = Pointer<Float>(&lodOrBias);
For(Int i = 0, i < SIMD::Width, i++)
{
SIMD::Float dPdx;
SIMD::Float dPdy;
dPdx.x = Pointer<Float>(&dsx.x)[i];
dPdx.y = Pointer<Float>(&dsx.y)[i];
dPdx.z = Pointer<Float>(&dsx.z)[i];
dPdy.x = Pointer<Float>(&dsy.x)[i];
dPdy.y = Pointer<Float>(&dsy.y)[i];
dPdy.z = Pointer<Float>(&dsy.z)[i];
// 1D textures are treated as 2D texture with second coordinate 0, so we also need to zero out the second grad component. TODO(b/134669567)
if(samplerState.textureType == TEXTURE_1D || samplerState.textureType == TEXTURE_1D_ARRAY)
{
dPdx.y = Float(0.0f);
dPdy.y = Float(0.0f);
}
Vector4f sample = s.sampleTexture(texture, sampler, uvw[0], uvw[1], uvw[2], q, lod[i], dPdx, dPdy, offset, samplerFunction);
Pointer<Float> rgba = out;
rgba[0 * SIMD::Width + i] = Pointer<Float>(&sample.x)[i];
rgba[1 * SIMD::Width + i] = Pointer<Float>(&sample.y)[i];
rgba[2 * SIMD::Width + i] = Pointer<Float>(&sample.z)[i];
rgba[3 * SIMD::Width + i] = Pointer<Float>(&sample.w)[i];
}
}
else
{
Vector4f sample = s.sampleTexture(texture, sampler, uvw[0], uvw[1], uvw[2], q, lodOrBias.x, (dsx.x), (dsy.x), offset, samplerFunction);
Pointer<SIMD::Float> rgba = out;
rgba[0] = sample.x;
rgba[1] = sample.y;
rgba[2] = sample.z;
rgba[3] = sample.w;
}
}
return (ImageSampler*)function("sampler")->getEntry();
......@@ -292,7 +331,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam
}
break;
case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0.
case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
if(coordinateIndex == 1)
{
return ADDRESSING_WRAP;
......@@ -310,7 +349,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam
}
break;
case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0.
case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
if(coordinateIndex == 1)
{
return ADDRESSING_WRAP;
......
......@@ -41,8 +41,6 @@ namespace sw
void VertexRoutine::generate()
{
const bool textureSampling = state.textureSampling;
Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
......@@ -55,7 +53,7 @@ namespace sw
{
UInt index = *Pointer<UInt>(batch);
UInt tagIndex = index & 0x0000003C;
UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance.
UInt indexQ = index & 0xFFFFFFFC;
If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
{
......@@ -139,14 +137,12 @@ namespace sw
Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
{
const bool textureSampling = state.textureSampling;
Vector4f v;
Pointer<Byte> source0 = buffer + index * stride;
Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0);
Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0);
Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0);
Pointer<Byte> source1 = source0 + stride;
Pointer<Byte> source2 = source1 + stride;
Pointer<Byte> source3 = source2 + stride;
bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment