Commit 8ac0bd6c by Nicolas Capens Committed by Nicolas Capens

Fix image sampling with divergent LOD

Currently our SamplerCore code performs sampling for four SIMD lanes simultaneously. With implicit LOD calculation for fragment shaders, all four pixels in a quad share the same LOD and thus sample from the same mipmap level. But for the vertex shader the LOD is always explicitly provided, and can vary significantly between completely unrelated vertices. Previously we only used the LOD of the first one in each group of four. As a workaround, process explicit-lod sampling instructions in a lane-by-lane manner. Bug: b/133868964 Tests: dEQP-VK.glsl.texture_functions.* Change-Id: If4e0d3c04d29529300111d73801124080cb4b544 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32488 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent 7e95471a
...@@ -65,7 +65,7 @@ namespace sw ...@@ -65,7 +65,7 @@ namespace sw
TEXTURE_2D, TEXTURE_2D,
TEXTURE_3D, TEXTURE_3D,
TEXTURE_CUBE, TEXTURE_CUBE,
TEXTURE_1D_ARRAY, // Treated as 2D texture with second coordinate 0. TEXTURE_1D_ARRAY, // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
TEXTURE_2D_ARRAY, TEXTURE_2D_ARRAY,
TEXTURE_CUBE_ARRAY, TEXTURE_CUBE_ARRAY,
......
...@@ -85,24 +85,6 @@ namespace sw ...@@ -85,24 +85,6 @@ namespace sw
state.shaderID = context->vertexShader->getSerialID(); state.shaderID = context->vertexShader->getSerialID();
switch(context->topology)
{
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
state.verticesPerPrimitive = 1;
break;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
state.verticesPerPrimitive = 2;
break;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
state.verticesPerPrimitive = 3;
break;
default:
UNIMPLEMENTED("topology %d", int(context->topology));
}
for(int i = 0; i < MAX_VERTEX_INPUTS; i++) for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
{ {
state.input[i].type = context->input[i].type; state.input[i].type = context->input[i].type;
......
...@@ -50,9 +50,6 @@ namespace sw ...@@ -50,9 +50,6 @@ namespace sw
uint64_t shaderID; uint64_t shaderID;
bool textureSampling : 1; // TODO: Eliminate by querying shader.
unsigned char verticesPerPrimitive : 2; // 1 (points), 2 (lines) or 3 (triangles)
struct Input struct Input
{ {
operator bool() const // Returns true if stream contains data operator bool() const // Returns true if stream contains data
......
...@@ -53,7 +53,7 @@ namespace sw ...@@ -53,7 +53,7 @@ namespace sw
{ {
} }
Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &lodOrBias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, SamplerFunction function)
{ {
Vector4f c; Vector4f c;
...@@ -103,17 +103,17 @@ namespace sw ...@@ -103,17 +103,17 @@ namespace sw
if(function == Bias) if(function == Bias)
{ {
lod += lodOrBias.x; lod += lodOrBias;
} }
} }
else if(function == Lod) else if(function == Lod)
{ {
lod = lodOrBias.x; lod = lodOrBias;
} }
else if(function == Fetch) else if(function == Fetch)
{ {
// TODO: Eliminate int-float-int conversion. // TODO: Eliminate int-float-int conversion.
lod = Float(As<Int>(Float(lodOrBias.x))); lod = Float(As<Int>(lodOrBias));
} }
else if(function == Base || function == Gather) else if(function == Base || function == Gather)
{ {
...@@ -1031,7 +1031,7 @@ namespace sw ...@@ -1031,7 +1031,7 @@ namespace sw
return lod; return lod;
} }
void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Float4 &dsx, Float4 &dsy, SamplerFunction function)
{ {
Float4 duvdxy; Float4 duvdxy;
...@@ -1041,8 +1041,8 @@ namespace sw ...@@ -1041,8 +1041,8 @@ namespace sw
} }
else else
{ {
Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx); Float4 dudxy = Float4(dsx.xx, dsy.xx);
Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx); Float4 dvdxy = Float4(dsx.yy, dsy.yy);
duvdxy = Float4(dudxy.xz, dvdxy.xz); duvdxy = Float4(dudxy.xz, dvdxy.xz);
} }
...@@ -1077,7 +1077,7 @@ namespace sw ...@@ -1077,7 +1077,7 @@ namespace sw
lod = log2sqrt(lod); // log2(sqrt(lod)) lod = log2sqrt(lod); // log2(sqrt(lod))
} }
void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function) void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function)
{ {
Float4 dudxy, dvdxy, dsdxy; Float4 dudxy, dvdxy, dsdxy;
...@@ -1093,9 +1093,9 @@ namespace sw ...@@ -1093,9 +1093,9 @@ namespace sw
} }
else else
{ {
dudxy = Float4(dsx.x.xx, dsy.x.xx); dudxy = Float4(dsx.xx, dsy.xx);
dvdxy = Float4(dsx.y.xx, dsy.y.xx); dvdxy = Float4(dsx.yy, dsy.yy);
dsdxy = Float4(dsx.z.xx, dsy.z.xx); dsdxy = Float4(dsx.zz, dsy.zz);
dudxy = Abs(dudxy * Float4(M.x)); dudxy = Abs(dudxy * Float4(M.x));
dvdxy = Abs(dvdxy * Float4(M.x)); dvdxy = Abs(dvdxy * Float4(M.x));
...@@ -1118,7 +1118,7 @@ namespace sw ...@@ -1118,7 +1118,7 @@ namespace sw
lod = log2(lod); lod = log2(lod);
} }
void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Float4 &dsx, Float4 &dsy, SamplerFunction function)
{ {
Float4 dudxy, dvdxy, dsdxy; Float4 dudxy, dvdxy, dsdxy;
...@@ -1130,9 +1130,9 @@ namespace sw ...@@ -1130,9 +1130,9 @@ namespace sw
} }
else else
{ {
dudxy = Float4(dsx.x.xx, dsy.x.xx); dudxy = Float4(dsx.xx, dsy.xx);
dvdxy = Float4(dsx.y.xx, dsy.y.xx); dvdxy = Float4(dsx.yy, dsy.yy);
dsdxy = Float4(dsx.z.xx, dsy.z.xx); dsdxy = Float4(dsx.zz, dsy.zz);
} }
// Scale by texture dimensions. // Scale by texture dimensions.
...@@ -2344,7 +2344,7 @@ namespace sw ...@@ -2344,7 +2344,7 @@ namespace sw
{ {
return (state.textureType == TEXTURE_3D) || return (state.textureType == TEXTURE_3D) ||
(state.textureType == TEXTURE_2D_ARRAY) || (state.textureType == TEXTURE_2D_ARRAY) ||
(state.textureType == TEXTURE_1D_ARRAY); // Treated as 2D texture with second coordinate 0. (state.textureType == TEXTURE_1D_ARRAY); // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
} }
bool SamplerCore::has16bitTextureFormat() const bool SamplerCore::has16bitTextureFormat() const
......
...@@ -62,7 +62,7 @@ namespace sw ...@@ -62,7 +62,7 @@ namespace sw
public: public:
SamplerCore(Pointer<Byte> &constants, const Sampler &state); SamplerCore(Pointer<Byte> &constants, const Sampler &state);
Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &lodOrBias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, SamplerFunction function);
private: private:
Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod); Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
...@@ -78,9 +78,9 @@ namespace sw ...@@ -78,9 +78,9 @@ namespace sw
Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function); Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
Float log2sqrt(Float lod); Float log2sqrt(Float lod);
Float log2(Float lod); Float log2(Float lod);
void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Vector4f &dsx, Vector4f &dsy, SamplerFunction function); void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float4 &dsx, Float4 &dsy, SamplerFunction function);
void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function); void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function);
void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, SamplerFunction function); void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, SamplerFunction function);
Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M); Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode); Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function); void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
......
...@@ -145,7 +145,7 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins ...@@ -145,7 +145,7 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins
i++; i++;
} }
// TODO(b/129523279): Currently 1D textures are treated as 2D by setting the second coordinate to 0. // TODO(b/134669567): Currently 1D textures are treated as 2D by setting the second coordinate to 0.
// Implement optimized 1D sampling. // Implement optimized 1D sampling.
if(samplerState.textureType == TEXTURE_1D) if(samplerState.textureType == TEXTURE_1D)
{ {
...@@ -184,13 +184,52 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins ...@@ -184,13 +184,52 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins
} }
SamplerCore s(constants, samplerState); SamplerCore s(constants, samplerState);
Vector4f sample = s.sampleTexture(texture, sampler, uvw[0], uvw[1], uvw[2], q, lodOrBias, dsx, dsy, offset, samplerFunction);
Pointer<SIMD::Float> rgba = out; // For explicit-lod instructions the LOD can be different per SIMD lane. SamplerCore currently assumes
rgba[0] = sample.x; // a single LOD per four elements, so we sample the image again for each LOD separately.
rgba[1] = sample.y; if(samplerFunction.method == Lod || samplerFunction.method == Grad) // TODO(b/133868964): Also handle divergent Bias and Fetch with Lod.
rgba[2] = sample.z; {
rgba[3] = sample.w; auto lod = Pointer<Float>(&lodOrBias);
For(Int i = 0, i < SIMD::Width, i++)
{
SIMD::Float dPdx;
SIMD::Float dPdy;
dPdx.x = Pointer<Float>(&dsx.x)[i];
dPdx.y = Pointer<Float>(&dsx.y)[i];
dPdx.z = Pointer<Float>(&dsx.z)[i];
dPdy.x = Pointer<Float>(&dsy.x)[i];
dPdy.y = Pointer<Float>(&dsy.y)[i];
dPdy.z = Pointer<Float>(&dsy.z)[i];
// 1D textures are treated as 2D texture with second coordinate 0, so we also need to zero out the second grad component. TODO(b/134669567)
if(samplerState.textureType == TEXTURE_1D || samplerState.textureType == TEXTURE_1D_ARRAY)
{
dPdx.y = Float(0.0f);
dPdy.y = Float(0.0f);
}
Vector4f sample = s.sampleTexture(texture, sampler, uvw[0], uvw[1], uvw[2], q, lod[i], dPdx, dPdy, offset, samplerFunction);
Pointer<Float> rgba = out;
rgba[0 * SIMD::Width + i] = Pointer<Float>(&sample.x)[i];
rgba[1 * SIMD::Width + i] = Pointer<Float>(&sample.y)[i];
rgba[2 * SIMD::Width + i] = Pointer<Float>(&sample.z)[i];
rgba[3 * SIMD::Width + i] = Pointer<Float>(&sample.w)[i];
}
}
else
{
Vector4f sample = s.sampleTexture(texture, sampler, uvw[0], uvw[1], uvw[2], q, lodOrBias.x, (dsx.x), (dsy.x), offset, samplerFunction);
Pointer<SIMD::Float> rgba = out;
rgba[0] = sample.x;
rgba[1] = sample.y;
rgba[2] = sample.z;
rgba[3] = sample.w;
}
} }
return (ImageSampler*)function("sampler")->getEntry(); return (ImageSampler*)function("sampler")->getEntry();
...@@ -292,7 +331,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam ...@@ -292,7 +331,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam
} }
break; break;
case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
if(coordinateIndex == 1) if(coordinateIndex == 1)
{ {
return ADDRESSING_WRAP; return ADDRESSING_WRAP;
...@@ -310,7 +349,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam ...@@ -310,7 +349,7 @@ sw::AddressingMode SpirvShader::convertAddressingMode(int coordinateIndex, VkSam
} }
break; break;
case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
if(coordinateIndex == 1) if(coordinateIndex == 1)
{ {
return ADDRESSING_WRAP; return ADDRESSING_WRAP;
......
...@@ -41,8 +41,6 @@ namespace sw ...@@ -41,8 +41,6 @@ namespace sw
void VertexRoutine::generate() void VertexRoutine::generate()
{ {
const bool textureSampling = state.textureSampling;
Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag); Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
...@@ -55,7 +53,7 @@ namespace sw ...@@ -55,7 +53,7 @@ namespace sw
{ {
UInt index = *Pointer<UInt>(batch); UInt index = *Pointer<UInt>(batch);
UInt tagIndex = index & 0x0000003C; UInt tagIndex = index & 0x0000003C;
UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance. UInt indexQ = index & 0xFFFFFFFC;
If(*Pointer<UInt>(tagCache + tagIndex) != indexQ) If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
{ {
...@@ -139,14 +137,12 @@ namespace sw ...@@ -139,14 +137,12 @@ namespace sw
Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
{ {
const bool textureSampling = state.textureSampling;
Vector4f v; Vector4f v;
Pointer<Byte> source0 = buffer + index * stride; Pointer<Byte> source0 = buffer + index * stride;
Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0); Pointer<Byte> source1 = source0 + stride;
Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0); Pointer<Byte> source2 = source1 + stride;
Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0); Pointer<Byte> source3 = source2 + stride;
bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized; bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment