Commit 8d75c100 by Nicolas Capens Committed by Nicolas Capens

Implement scatter/gather for 1- and 2-byte texels

Fixes out-of-bounds reads/writes and also avoids unaligned accesses. This is a generic reference implementation. Future optimizations could use unaligned 4-byte accesses if they're known to be safe (e.g. due to padding) and efficient. We can also eliminate if we know we're in a basic block post-dominated by the entry block and the number of active invocations is a multiple of the SIMD width. Bug: b/160531165 Change-Id: I892cfd3c7da8d8891cabe80695e5f35c57da73b4 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/46168 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent fd8f0e2f
...@@ -640,15 +640,45 @@ SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState ...@@ -640,15 +640,45 @@ SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState
auto texelSize = vk::Format(vkFormat).bytes(); auto texelSize = vk::Format(vkFormat).bytes();
auto texelPtr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect, robustness); auto texelPtr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect, robustness);
// Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
// TODO(b/160531165): Provide gather abstractions for various element sizes.
SIMD::Int packed[4]; SIMD::Int packed[4];
// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch if(texelSize == 4 || texelSize == 8 || texelSize == 16)
// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
// TODO: specialize for small formats?
for(auto i = 0; i < (texelSize + 3) / 4; i++)
{ {
packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4)); for(auto i = 0; i < texelSize / 4; i++)
texelPtr += sizeof(float); {
packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask());
texelPtr += sizeof(float);
}
}
else if(texelSize == 2)
{
SIMD::Int offsets = texelPtr.offsets();
SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
for(int i = 0; i < SIMD::Width; i++)
{
If(Extract(mask, i) != 0)
{
packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.base + Extract(offsets, i))), i);
}
}
} }
else if(texelSize == 1)
{
SIMD::Int offsets = texelPtr.offsets();
SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
for(int i = 0; i < SIMD::Width; i++)
{
If(Extract(mask, i) != 0)
{
packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.base + Extract(offsets, i))), i);
}
}
}
else
UNREACHABLE("texelSize: %d", int(texelSize));
// Format support requirements here come from two sources: // Format support requirements here come from two sources:
// - Minimum required set of formats for loads from storage images // - Minimum required set of formats for loads from storage images
...@@ -918,7 +948,6 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -918,7 +948,6 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes)); auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
SIMD::Int packed[4]; SIMD::Int packed[4];
auto numPackedElements = 0u;
int texelSize = 0; int texelSize = 0;
auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8)); auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
switch(format) switch(format)
...@@ -931,14 +960,12 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -931,14 +960,12 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
packed[1] = texel.Int(1); packed[1] = texel.Int(1);
packed[2] = texel.Int(2); packed[2] = texel.Int(2);
packed[3] = texel.Int(3); packed[3] = texel.Int(3);
numPackedElements = 4;
break; break;
case spv::ImageFormatR32f: case spv::ImageFormatR32f:
case spv::ImageFormatR32i: case spv::ImageFormatR32i:
case spv::ImageFormatR32ui: case spv::ImageFormatR32ui:
texelSize = 4; texelSize = 4;
packed[0] = texel.Int(0); packed[0] = texel.Int(0);
numPackedElements = 1;
break; break;
case spv::ImageFormatRgba8: case spv::ImageFormatRgba8:
texelSize = 4; texelSize = 4;
...@@ -946,7 +973,6 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -946,7 +973,6 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) | ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) | ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24); ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
numPackedElements = 1;
break; break;
case spv::ImageFormatRgba8Snorm: case spv::ImageFormatRgba8Snorm:
texelSize = 4; texelSize = 4;
...@@ -961,7 +987,6 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -961,7 +987,6 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
SIMD::Int(0xFF)) SIMD::Int(0xFF))
<< 24); << 24);
numPackedElements = 1;
break; break;
case spv::ImageFormatRgba8i: case spv::ImageFormatRgba8i:
case spv::ImageFormatRgba8ui: case spv::ImageFormatRgba8ui:
...@@ -970,20 +995,17 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -970,20 +995,17 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
(SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
(SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) | (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
(SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24); (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
numPackedElements = 1;
break; break;
case spv::ImageFormatRgba16f: case spv::ImageFormatRgba16f:
texelSize = 8; texelSize = 8;
packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true); packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true); packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
numPackedElements = 2;
break; break;
case spv::ImageFormatRgba16i: case spv::ImageFormatRgba16i:
case spv::ImageFormatRgba16ui: case spv::ImageFormatRgba16ui:
texelSize = 8; texelSize = 8;
packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFFFF)) << 16); packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFFFF)) << 16);
packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xFFFF)) << 16); packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xFFFF)) << 16);
numPackedElements = 2;
break; break;
case spv::ImageFormatRg32f: case spv::ImageFormatRg32f:
case spv::ImageFormatRg32i: case spv::ImageFormatRg32i:
...@@ -991,18 +1013,15 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -991,18 +1013,15 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
texelSize = 8; texelSize = 8;
packed[0] = texel.Int(0); packed[0] = texel.Int(0);
packed[1] = texel.Int(1); packed[1] = texel.Int(1);
numPackedElements = 2;
break; break;
case spv::ImageFormatRg16f: case spv::ImageFormatRg16f:
texelSize = 4; texelSize = 4;
packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true); packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
numPackedElements = 1;
break; break;
case spv::ImageFormatRg16i: case spv::ImageFormatRg16i:
case spv::ImageFormatRg16ui: case spv::ImageFormatRg16ui:
texelSize = 4; texelSize = 4;
packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFFFF)) << 16); packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFFFF)) << 16);
numPackedElements = 1;
break; break;
case spv::ImageFormatR11fG11fB10f: case spv::ImageFormatR11fG11fB10f:
...@@ -1041,11 +1060,44 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -1041,11 +1060,44 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
auto texelPtr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, texelSize, 0, false, robustness); auto texelPtr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, texelSize, 0, false, robustness);
for(auto i = 0u; i < numPackedElements; i++) // Scatter packed texel data.
// TODO(b/160531165): Provide scatter abstractions for various element sizes.
if(texelSize == 4 || texelSize == 8 || texelSize == 16)
{ {
texelPtr.Store(packed[i], robustness, state->activeLaneMask()); for(auto i = 0; i < texelSize / 4; i++)
texelPtr += sizeof(float); {
texelPtr.Store(packed[i], robustness, state->activeLaneMask());
texelPtr += sizeof(float);
}
}
else if(texelSize == 2)
{
SIMD::Int offsets = texelPtr.offsets();
SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
for(int i = 0; i < SIMD::Width; i++)
{
If(Extract(mask, i) != 0)
{
*Pointer<Short>(texelPtr.base + Extract(offsets, i)) = Short(Extract(packed[0], i));
}
}
} }
else if(texelSize == 1)
{
SIMD::Int offsets = texelPtr.offsets();
SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
for(int i = 0; i < SIMD::Width; i++)
{
If(Extract(mask, i) != 0)
{
*Pointer<Byte>(texelPtr.base + Extract(offsets, i)) = Byte(Extract(packed[0], i));
}
}
}
else
UNREACHABLE("texelSize: %d", int(texelSize));
return EmitResult::Continue; return EmitResult::Continue;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment