Commit 9e4bc1ba by Ben Clayton

SpirvShader: Refactor loads and stores. Consider limits.

This change moves all calls to rr::Load() and rr::Store() to two new functions: SIMD::Load() and SIMD::Store(). This attempts to consolodate the SIMD memory ops into reusable functions, while also adding bounds checking on the accesses. The additional branches hurts the JIT codegen performance. This will be resolved with a future change. Tests: dEQP-VK.robustness.* Bug: b/131224163 Change-Id: I3a392a1f4f5366fa5134c081e0a2479575f92d80 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29334Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 4d1f8d05
...@@ -217,10 +217,90 @@ namespace ...@@ -217,10 +217,90 @@ namespace
s * ( a * fkgj - b * ekgi + c * ejfi), s * ( a * fkgj - b * ekgi + c * ejfi),
}}; }};
} }
}
sw::SIMD::Pointer interleaveByLane(sw::SIMD::Pointer p)
{
p *= sw::SIMD::Width;
p.staticOffsets[0] += 0 * sizeof(float);
p.staticOffsets[1] += 1 * sizeof(float);
p.staticOffsets[2] += 2 * sizeof(float);
p.staticOffsets[3] += 3 * sizeof(float);
return p;
}
} // anonymous namespace
namespace sw namespace sw
{ {
namespace SIMD
{
template<typename T>
T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
{
using EL = typename Element<T>::type;
T out;
auto offsets = ptr.offsets();
mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
auto anyLanesDisabled = AnyFalse(mask);
If(ptr.hasEqualOffsets() && !anyLanesDisabled)
{
// Load one, replicate.
auto offset = Extract(offsets, 0);
out = T(Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order));
}
Else If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
{
// Load all elements in a single SIMD instruction.
auto offset = Extract(offsets, 0);
out = Load(rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
}
Else
{
// Divergent offsets or masked lanes - load each element individually.
out = T(0);
for (int i = 0; i < SIMD::Width; i++)
{
If(Extract(mask, i) != 0)
{
auto offset = Extract(offsets, i);
auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
out = Insert(out, el, i);
}
}
}
return out;
}
template<typename T>
void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
{
using EL = typename Element<T>::type;
auto offsets = ptr.offsets();
mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
auto anyLanesDisabled = AnyFalse(mask);
If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
{
// Store all elements in a single SIMD instruction.
auto offset = Extract(offsets, 0);
Store(val, rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
}
Else
{
// Divergent offsets or masked lanes.
for (int i = 0; i < SIMD::Width; i++)
{
If(Extract(mask, i) != 0)
{
auto offset = Extract(offsets, i);
rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
}
}
}
}
} // namespace SIMD
volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader. volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
SpirvShader::SpirvShader(InsnStore const &insns) SpirvShader::SpirvShader(InsnStore const &insns)
...@@ -1180,7 +1260,6 @@ namespace sw ...@@ -1180,7 +1260,6 @@ namespace sw
ASSERT(d.Binding >= 0); ASSERT(d.Binding >= 0);
auto set = routine->getPointer(id); auto set = routine->getPointer(id);
ASSERT(set.uniform);
auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet); auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex)); int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
...@@ -1189,6 +1268,7 @@ namespace sw ...@@ -1189,6 +1268,7 @@ namespace sw
Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(bufferInfo + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer* Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(bufferInfo + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void* Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
Int offset = *Pointer<Int>(bufferInfo + OFFSET(VkDescriptorBufferInfo, offset)); Int offset = *Pointer<Int>(bufferInfo + OFFSET(VkDescriptorBufferInfo, offset));
Int size = *Pointer<Int>(buffer + vk::Buffer::DataSize); // void*
if (setLayout->isBindingDynamic(d.Binding)) if (setLayout->isBindingDynamic(d.Binding))
{ {
uint32_t dynamicBindingIndex = uint32_t dynamicBindingIndex =
...@@ -1197,12 +1277,12 @@ namespace sw ...@@ -1197,12 +1277,12 @@ namespace sw
arrayIndex; arrayIndex;
offset += routine->descriptorDynamicOffsets[dynamicBindingIndex]; offset += routine->descriptorDynamicOffsets[dynamicBindingIndex];
} }
return SIMD::Pointer(data + offset); return SIMD::Pointer(data + offset, size - offset);
} }
default: default:
UNREACHABLE("Invalid pointer kind %d", int(object.kind)); UNREACHABLE("Invalid pointer kind %d", int(object.kind));
return SIMD::Pointer(Pointer<Byte>()); return SIMD::Pointer(Pointer<Byte>(), 0);
} }
} }
...@@ -1298,7 +1378,7 @@ namespace sw ...@@ -1298,7 +1378,7 @@ namespace sw
} }
else else
{ {
ptr.addOffset(SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0)); ptr += SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0);
} }
typeId = type.element; typeId = type.element;
break; break;
...@@ -1316,7 +1396,7 @@ namespace sw ...@@ -1316,7 +1396,7 @@ namespace sw
} }
else else
{ {
ptr.addOffset(SIMD::Int(columnStride) * routine->getIntermediate(indexIds[i]).Int(0)); ptr += SIMD::Int(columnStride) * routine->getIntermediate(indexIds[i]).Int(0);
} }
typeId = type.element; typeId = type.element;
break; break;
...@@ -1331,7 +1411,7 @@ namespace sw ...@@ -1331,7 +1411,7 @@ namespace sw
} }
else else
{ {
ptr.addOffset(SIMD::Int(elemStride) * routine->getIntermediate(indexIds[i]).Int(0)); ptr += SIMD::Int(elemStride) * routine->getIntermediate(indexIds[i]).Int(0);
} }
typeId = type.element; typeId = type.element;
break; break;
...@@ -1341,10 +1421,7 @@ namespace sw ...@@ -1341,10 +1421,7 @@ namespace sw
} }
} }
if (constantOffset != 0) ptr += constantOffset;
{
ptr.addOffset(constantOffset);
}
return ptr; return ptr;
} }
...@@ -1405,11 +1482,11 @@ namespace sw ...@@ -1405,11 +1482,11 @@ namespace sw
auto & obj = getObject(indexIds[i]); auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant) if (obj.kind == Object::Kind::Constant)
{ {
constantOffset += stride * GetConstantInt(indexIds[i]); ptr += stride * GetConstantInt(indexIds[i]);
} }
else else
{ {
ptr.addOffset(SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0)); ptr += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
} }
} }
typeId = type.element; typeId = type.element;
...@@ -1423,7 +1500,7 @@ namespace sw ...@@ -1423,7 +1500,7 @@ namespace sw
if (constantOffset != 0) if (constantOffset != 0)
{ {
ptr.addOffset(constantOffset); ptr += constantOffset;
} }
return ptr; return ptr;
} }
...@@ -2261,8 +2338,11 @@ namespace sw ...@@ -2261,8 +2338,11 @@ namespace sw
case spv::StorageClassPrivate: case spv::StorageClassPrivate:
case spv::StorageClassFunction: case spv::StorageClassFunction:
{ {
ASSERT(objectTy.opcode() == spv::OpTypePointer);
auto base = &routine->getVariable(resultId)[0]; auto base = &routine->getVariable(resultId)[0];
routine->createPointer(resultId, SIMD::Pointer(base)); auto elementTy = getType(objectTy.element);
auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
routine->createPointer(resultId, SIMD::Pointer(base, size));
break; break;
} }
case spv::StorageClassInput: case spv::StorageClassInput:
...@@ -2277,8 +2357,11 @@ namespace sw ...@@ -2277,8 +2357,11 @@ namespace sw
dst[offset++] = routine->inputs[scalarSlot]; dst[offset++] = routine->inputs[scalarSlot];
}); });
} }
ASSERT(objectTy.opcode() == spv::OpTypePointer);
auto base = &routine->getVariable(resultId)[0]; auto base = &routine->getVariable(resultId)[0];
routine->createPointer(resultId, SIMD::Pointer(base)); auto elementTy = getType(objectTy.element);
auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
routine->createPointer(resultId, SIMD::Pointer(base, size));
break; break;
} }
case spv::StorageClassUniformConstant: case spv::StorageClassUniformConstant:
...@@ -2292,7 +2375,8 @@ namespace sw ...@@ -2292,7 +2375,8 @@ namespace sw
size_t bindingOffset = setLayout->getBindingOffset(d.Binding, arrayIndex); size_t bindingOffset = setLayout->getBindingOffset(d.Binding, arrayIndex);
Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet* Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor* Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
routine->createPointer(resultId, SIMD::Pointer(binding)); auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
routine->createPointer(resultId, SIMD::Pointer(binding, size));
break; break;
} }
case spv::StorageClassUniform: case spv::StorageClassUniform:
...@@ -2300,13 +2384,13 @@ namespace sw ...@@ -2300,13 +2384,13 @@ namespace sw
{ {
const auto &d = descriptorDecorations.at(resultId); const auto &d = descriptorDecorations.at(resultId);
ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS); ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
routine->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet])); routine->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
break; break;
} }
case spv::StorageClassPushConstant: case spv::StorageClassPushConstant:
{ {
routine->createPointer(resultId, SIMD::Pointer(routine->pushConstants)); routine->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
break; break;
} }
default: default:
...@@ -2358,55 +2442,15 @@ namespace sw ...@@ -2358,55 +2442,15 @@ namespace sw
auto ptr = GetPointerToData(pointerId, 0, routine); auto ptr = GetPointerToData(pointerId, 0, routine);
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]); auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
If(!ptr.uniform || anyInactiveLanes)
{
// Divergent offsets or masked lanes.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
{
// i wish i had a Float,Float,Float,Float constructor here..
for (int j = 0; j < SIMD::Width; j++)
{
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(o) + Extract(ptr.offset, j);
if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
load[i] = Insert(load[i], Load(Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder), j);
}
}
});
}
Else
{
// No divergent offsets or masked lanes.
if (interleavedByLane)
{
// Lane-interleaved data.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
Pointer<SIMD::Float> src = &ptr.base[offset * SIMD::Width];
load[i] = Load(src, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
});
}
else
{
// Non-interleaved data.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset) VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{ {
Pointer<Float> src = &ptr.base[offset]; auto p = ptr + offset;
load[i] = RValue<SIMD::Float>(Load(src, sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment if (interleavedByLane) { p = interleaveByLane(p); }
dst.move(i, SIMD::Load<SIMD::Float>(p, state->activeLaneMask(), atomic, memoryOrder));
}); });
}
}
auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
for (auto i = 0u; i < resultTy.sizeInComponents; i++)
{
dst.move(i, load[i]);
}
return EmitResult::Continue; return EmitResult::Continue;
} }
...@@ -2438,84 +2482,30 @@ namespace sw ...@@ -2438,84 +2482,30 @@ namespace sw
} }
auto ptr = GetPointerToData(pointerId, 0, routine); auto ptr = GetPointerToData(pointerId, 0, routine);
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
if (object.kind == Object::Kind::Constant) if (object.kind == Object::Kind::Constant)
{ {
// Constant source data. // Constant source data.
auto src = reinterpret_cast<float *>(object.constantValue.get()); auto src = reinterpret_cast<float *>(object.constantValue.get());
If(!ptr.uniform || anyInactiveLanes)
{
// Divergent offsets or masked lanes.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
{
for (int j = 0; j < SIMD::Width; j++)
{
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(o) + Extract(ptr.offset, j);
if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
Store(Float(src[i]), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
}
}
});
}
Else
{
// Constant source data.
// No divergent offsets or masked lanes.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset) VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{ {
Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width]; auto p = ptr + offset;
Store(SIMD::Float(src[i]), dst, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, SIMD::Float(src[i]), state->activeLaneMask(), atomic, memoryOrder);
}); });
} }
}
else else
{ {
// Intermediate source data. // Intermediate source data.
auto &src = routine->getIntermediate(objectId); auto &src = routine->getIntermediate(objectId);
If(!ptr.uniform || anyInactiveLanes)
{
// Divergent offsets or masked lanes.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
{
for (int j = 0; j < SIMD::Width; j++)
{
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(o) + Extract(ptr.offset, j);
if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
Store(Extract(src.Float(i), j), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
}
}
});
}
Else
{
// No divergent offsets or masked lanes.
if (interleavedByLane)
{
// Lane-interleaved data.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
Store(src.Float(i), dst, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
});
}
else
{
// Intermediate source data. Non-interleaved data.
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset) VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{ {
Pointer<SIMD::Float> dst = &ptr.base[offset]; auto p = ptr + offset;
Store(SIMD::Float(src.Float(i)), dst, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, src.Float(i), state->activeLaneMask(), atomic, memoryOrder);
}); });
} }
}
}
return EmitResult::Continue; return EmitResult::Continue;
} }
...@@ -3561,16 +3551,9 @@ namespace sw ...@@ -3561,16 +3551,9 @@ namespace sw
dst.move(i, frac); dst.move(i, frac);
// TODO: Refactor and consolidate with EmitStore. auto p = ptr + (i * sizeof(float));
for (int j = 0; j < SIMD::Width; j++) if (interleavedByLane) { p = interleaveByLane(p); }
{ SIMD::Store(p, whole, state->activeLaneMask());
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
Store(Extract(whole, j), Pointer<Float>(&ptr.base[offset]), sizeof(float), false, std::memory_order_relaxed);
}
}
} }
break; break;
} }
...@@ -3705,16 +3688,9 @@ namespace sw ...@@ -3705,16 +3688,9 @@ namespace sw
dst.move(i, significand); dst.move(i, significand);
// TODO: Refactor and consolidate with EmitStore. auto p = ptr + (i * sizeof(float));
for (int j = 0; j < SIMD::Width; j++) if (interleavedByLane) { p = interleaveByLane(p); }
{ SIMD::Store(p, exponent, state->activeLaneMask());
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(uint32_t)); }
Store(Extract(exponent, j), Pointer<Int>(&ptr.base[offset]), sizeof(uint32_t), false, std::memory_order_relaxed);
}
}
} }
break; break;
} }
...@@ -4454,30 +4430,29 @@ namespace sw ...@@ -4454,30 +4430,29 @@ namespace sw
return EmitResult::Continue; return EmitResult::Continue;
} }
SIMD::Int SpirvShader::GetTexelOffset(GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const SIMD::Pointer SpirvShader::GetTexelAddress(SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const
{ {
// returns a (lane-divergent) byte offset to a texel within a storage image.
bool isArrayed = imageType.definition.word(5) != 0; bool isArrayed = imageType.definition.word(5) != 0;
int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0); int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0);
SIMD::Int texelOffset = coordinate.Int(0) * SIMD::Int(texelSize); ptr += coordinate.Int(0) * SIMD::Int(texelSize);
if (dims > 1) if (dims > 1)
{ {
texelOffset += coordinate.Int(1) * SIMD::Int( ptr += coordinate.Int(1) * SIMD::Int(
*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, rowPitchBytes))); *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, rowPitchBytes)));
} }
if (dims > 2) if (dims > 2)
{ {
texelOffset += coordinate.Int(2) * SIMD::Int( ptr += coordinate.Int(2) * SIMD::Int(
*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, slicePitchBytes))); *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, slicePitchBytes)));
} }
if (isArrayed) if (isArrayed)
{ {
texelOffset += coordinate.Int(dims) * SIMD::Int( ptr += coordinate.Int(dims) * SIMD::Int(
*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, slicePitchBytes))); *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, slicePitchBytes)));
} }
return texelOffset; return ptr;
} }
SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
...@@ -4496,9 +4471,9 @@ namespace sw ...@@ -4496,9 +4471,9 @@ namespace sw
auto coordinate = GenericValue(this, state->routine, insn.word(4)); auto coordinate = GenericValue(this, state->routine, insn.word(4));
auto pointer = state->routine->getPointer(imageId); auto pointer = state->routine->getPointer(imageId);
ASSERT(pointer.uniform);
Pointer<Byte> binding = pointer.base; Pointer<Byte> binding = pointer.base;
Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr)); Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents); auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
...@@ -4546,19 +4521,13 @@ namespace sw ...@@ -4546,19 +4521,13 @@ namespace sw
UNIMPLEMENTED("spv::ImageFormat %u", format); UNIMPLEMENTED("spv::ImageFormat %u", format);
} }
SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, texelSize); auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto texelPtr = GetTexelAddress(basePtr, coordinate, imageType, binding, texelSize);
for (auto i = 0u; i < numPackedElements; i++) for (auto i = 0u; i < numPackedElements; i++)
{ {
for (int j = 0; j < 4; j++) packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask());
{ texelPtr += sizeof(float);
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(sizeof(float) * i) + Extract(texelOffset, j);
packed[i] = Insert(packed[i], Load(RValue<Pointer<Int>>(&imageBase[offset]), sizeof(uint32_t), false,
std::memory_order_relaxed), j);
}
}
} }
switch(format) switch(format)
...@@ -4651,6 +4620,7 @@ namespace sw ...@@ -4651,6 +4620,7 @@ namespace sw
Pointer<Byte> binding = state->routine->getPointer(imageId).base; Pointer<Byte> binding = state->routine->getPointer(imageId).base;
Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr)); Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
SIMD::Int packed[4]; SIMD::Int packed[4];
auto numPackedElements = 0u; auto numPackedElements = 0u;
...@@ -4721,19 +4691,13 @@ namespace sw ...@@ -4721,19 +4691,13 @@ namespace sw
UNIMPLEMENTED("spv::ImageFormat %u", format); UNIMPLEMENTED("spv::ImageFormat %u", format);
} }
SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, texelSize); auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto texelPtr = GetTexelAddress(basePtr, coordinate, imageType, binding, texelSize);
for (auto i = 0u; i < numPackedElements; i++) for (auto i = 0u; i < numPackedElements; i++)
{ {
for (int j = 0; j < 4; j++) SIMD::Store(texelPtr, packed[i], state->activeLaneMask());
{ texelPtr += sizeof(float);
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(sizeof(float) * i) + Extract(texelOffset, j);
Store(Extract(packed[i], j), RValue<Pointer<Int>>(&imageBase[offset]), sizeof(uint32_t), false,
std::memory_order_relaxed);
}
}
} }
return EmitResult::Continue; return EmitResult::Continue;
...@@ -4757,10 +4721,12 @@ namespace sw ...@@ -4757,10 +4721,12 @@ namespace sw
Pointer<Byte> binding = state->routine->getPointer(imageId).base; Pointer<Byte> binding = state->routine->getPointer(imageId).base;
Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr)); Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t)); auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto ptr = GetTexelAddress(basePtr, coordinate, imageType, binding, sizeof(uint32_t));
state->routine->createPointer(resultId, SIMD::Pointer(imageBase, texelOffset)); state->routine->createPointer(resultId, ptr);
return EmitResult::Continue; return EmitResult::Continue;
} }
...@@ -4776,13 +4742,14 @@ namespace sw ...@@ -4776,13 +4742,14 @@ namespace sw
auto value = (insn.wordCount() == 7) ? GenericValue(this, state->routine, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1); auto value = (insn.wordCount() == 7) ? GenericValue(this, state->routine, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1);
auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents); auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
auto ptr = state->routine->getPointer(insn.word(3)); auto ptr = state->routine->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets();
SIMD::UInt x; SIMD::UInt x;
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
If(Extract(state->activeLaneMask(), j) != 0) If(Extract(state->activeLaneMask(), j) != 0)
{ {
auto offset = Extract(ptr.offset, j); auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value, j); auto laneValue = Extract(value, j);
UInt v; UInt v;
switch (insn.opcode()) switch (insn.opcode())
...@@ -4846,13 +4813,14 @@ namespace sw ...@@ -4846,13 +4813,14 @@ namespace sw
auto comparator = GenericValue(this, state->routine, insn.word(8)); auto comparator = GenericValue(this, state->routine, insn.word(8));
auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents); auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
auto ptr = state->routine->getPointer(insn.word(3)); auto ptr = state->routine->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets();
SIMD::UInt x; SIMD::UInt x;
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
If(Extract(state->activeLaneMask(), j) != 0) If(Extract(state->activeLaneMask(), j) != 0)
{ {
auto offset = Extract(ptr.offset, j); auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value.UInt(0), j); auto laneValue = Extract(value.UInt(0), j);
auto laneComparator = Extract(comparator.UInt(0), j); auto laneComparator = Extract(comparator.UInt(0), j);
UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal); UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "Vulkan/VkDebug.hpp" #include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkConfig.h" #include "Vulkan/VkConfig.h"
#include "Vulkan/VkDescriptorSet.hpp" #include "Vulkan/VkDescriptorSet.hpp"
#include "Common/Types.hpp"
#include "Device/Config.hpp" #include "Device/Config.hpp"
#include <spirv/unified1/spirv.hpp> #include <spirv/unified1/spirv.hpp>
...@@ -65,21 +66,122 @@ namespace sw ...@@ -65,21 +66,122 @@ namespace sw
struct Pointer struct Pointer
{ {
Pointer(rr::Pointer<Byte> base) : base(base), offset(0), uniform(true) {} Pointer(rr::Pointer<Byte> base, rr::Int limit)
Pointer(rr::Pointer<Byte> base, SIMD::Int offset) : base(base), offset(offset), uniform(false) {} : base(base), limit(limit), dynamicOffsets(0), staticOffsets{}, hasDynamicOffsets(false) {}
Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
: base(base), limit(limit), dynamicOffsets(offset), staticOffsets{}, hasDynamicOffsets(false) {}
inline void addOffset(Int delta) { offset += delta; uniform = false; } inline Pointer& operator += (Int i)
{
dynamicOffsets += i;
hasDynamicOffsets = true;
return *this;
}
inline Pointer& operator *= (Int i)
{
dynamicOffsets = offsets() * i;
staticOffsets = {};
hasDynamicOffsets = true;
return *this;
}
inline Pointer operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; }
inline Pointer operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; }
inline Pointer& operator += (int i)
{
for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
return *this;
}
inline Pointer& operator *= (int i)
{
for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; }
if (hasDynamicOffsets)
{
dynamicOffsets *= SIMD::Int(i);
}
return *this;
}
inline Pointer operator + (int i) { Pointer p = *this; p += i; return p; }
inline Pointer operator * (int i) { Pointer p = *this; p *= i; return p; }
inline SIMD::Int offsets() const
{
static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]);
}
// Returns true if all offsets are sequential (N+0, N+1, N+2, N+3)
inline rr::Bool hasSequentialOffsets() const
{
if (hasDynamicOffsets)
{
auto o = offsets();
static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1, 2, 3, 0))) == 0;
}
else
{
for (int i = 1; i < SIMD::Width; i++)
{
if (staticOffsets[i-1] + 1 != staticOffsets[i]) { return false; }
}
return true;
}
}
// Returns true if all offsets are equal (N, N, N, N)
inline rr::Bool hasEqualOffsets() const
{
if (hasDynamicOffsets)
{
auto o = offsets();
static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
return rr::SignMask(~CmpEQ(o, o.yzwx)) == 0;
}
else
{
for (int i = 1; i < SIMD::Width; i++)
{
if (staticOffsets[i-1] != staticOffsets[i]) { return false; }
}
return true;
}
}
// Base address for the pointer, common across all lanes. // Base address for the pointer, common across all lanes.
rr::Pointer<rr::Byte> base; rr::Pointer<rr::Byte> base;
// Per lane offsets from base in bytes. // Upper (non-inclusive) limit for offsets from base.
// If uniform is true, all offsets are considered zero. rr::Int limit;
Int offset;
// Per lane offsets from base.
SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
std::array<int32_t, SIMD::Width> staticOffsets;
// True if all offsets are zero. // True if all dynamicOffsets are zero.
bool uniform; bool hasDynamicOffsets;
}; };
template <typename T> struct Element {};
template <> struct Element<Float> { using type = rr::Float; };
template <> struct Element<Int> { using type = rr::Int; };
template <> struct Element<UInt> { using type = rr::UInt; };
template<typename T>
void Store(Pointer ptr, T val, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
template<typename T>
void Store(Pointer ptr, RValue<T> val, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
{
Store(ptr, T(val), mask, atomic, order);
}
template<typename T>
T Load(Pointer ptr, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
} }
// Incrementally constructed complex bundle of rvalues // Incrementally constructed complex bundle of rvalues
...@@ -736,7 +838,7 @@ namespace sw ...@@ -736,7 +838,7 @@ namespace sw
EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const; EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const;
EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const; EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
SIMD::Int GetTexelOffset(GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const; SIMD::Pointer GetTexelAddress(SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const;
// OpcodeName() returns the name of the opcode op. // OpcodeName() returns the name of the opcode op.
// If NDEBUG is defined, then OpcodeName() will only return the numerical code. // If NDEBUG is defined, then OpcodeName() will only return the numerical code.
......
...@@ -22,6 +22,7 @@ namespace vk ...@@ -22,6 +22,7 @@ namespace vk
{ {
const int Buffer::DataOffset = static_cast<int>(offsetof(Buffer, memory)); const int Buffer::DataOffset = static_cast<int>(offsetof(Buffer, memory));
const int Buffer::DataSize = static_cast<int>(offsetof(Buffer, size));
Buffer::Buffer(const VkBufferCreateInfo* pCreateInfo, void* mem) : Buffer::Buffer(const VkBufferCreateInfo* pCreateInfo, void* mem) :
flags(pCreateInfo->flags), size(pCreateInfo->size), usage(pCreateInfo->usage), flags(pCreateInfo->flags), size(pCreateInfo->size), usage(pCreateInfo->usage),
......
...@@ -43,6 +43,7 @@ public: ...@@ -43,6 +43,7 @@ public:
// DataOffset is the offset in bytes from the Buffer to the pointer to the // DataOffset is the offset in bytes from the Buffer to the pointer to the
// buffer's data memory. // buffer's data memory.
static const int DataOffset; static const int DataOffset;
static const int DataSize;
private: private:
void* memory = nullptr; void* memory = nullptr;
......
...@@ -28,7 +28,7 @@ BufferView::BufferView(const VkBufferViewCreateInfo* pCreateInfo, void* mem) : ...@@ -28,7 +28,7 @@ BufferView::BufferView(const VkBufferViewCreateInfo* pCreateInfo, void* mem) :
} }
else else
{ {
range = pCreateInfo->range - offset; range = pCreateInfo->range;
} }
} }
......
...@@ -34,6 +34,7 @@ public: ...@@ -34,6 +34,7 @@ public:
void *getPointer() const; void *getPointer() const;
uint32_t getElementCount() const { return range / Format(format).bytes(); } uint32_t getElementCount() const { return range / Format(format).bytes(); }
uint32_t getRangeInBytes() const { return range; }
private: private:
VkBuffer buffer; VkBuffer buffer;
......
...@@ -429,6 +429,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor ...@@ -429,6 +429,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
? imageView->layerPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT) ? imageView->layerPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT)
: imageView->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); : imageView->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
descriptor[i].arrayLayers = imageView->getSubresourceRange().layerCount; descriptor[i].arrayLayers = imageView->getSubresourceRange().layerCount;
descriptor[i].sizeInBytes = imageView->getImageSizeInBytes();
} }
} }
else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)
...@@ -443,6 +444,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor ...@@ -443,6 +444,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
descriptor[i].rowPitchBytes = 0; descriptor[i].rowPitchBytes = 0;
descriptor[i].slicePitchBytes = 0; descriptor[i].slicePitchBytes = 0;
descriptor[i].arrayLayers = 1; descriptor[i].arrayLayers = 1;
descriptor[i].sizeInBytes = bufferView->getRangeInBytes();
} }
} }
else else
......
...@@ -43,6 +43,7 @@ struct StorageImageDescriptor ...@@ -43,6 +43,7 @@ struct StorageImageDescriptor
int rowPitchBytes; int rowPitchBytes;
int slicePitchBytes; int slicePitchBytes;
int arrayLayers; int arrayLayers;
int sizeInBytes;
}; };
class DescriptorSetLayout : public Object<DescriptorSetLayout, VkDescriptorSetLayout> class DescriptorSetLayout : public Object<DescriptorSetLayout, VkDescriptorSetLayout>
......
...@@ -50,6 +50,7 @@ public: ...@@ -50,6 +50,7 @@ public:
const VkComponentMapping &getComponentMapping() const { return components; } const VkComponentMapping &getComponentMapping() const { return components; }
const VkImageSubresourceRange &getSubresourceRange() const { return subresourceRange; } const VkImageSubresourceRange &getSubresourceRange() const { return subresourceRange; }
const size_t getImageSizeInBytes() const { return image->getMemoryRequirements().size; }
private: private:
bool imageTypesMatch(VkImageType imageType) const; bool imageTypesMatch(VkImageType imageType) const;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment