Commit a30de542 by Chris Forbes

Add support for push constants

- Proper support for calculating offsets in explicit-layout storage classes (push constant, uniform, and storage buffer) according to the Offset, ArrayStride and MatrixStride decorations. - Plumb a block of push constant data throughout the pipeline - Implement push constant update commands Bug: b/128690261 Bug: b/128872954 Test: dEQP-VK.*push_constant* Test: dEQP-VK.glsl.* Test: dEQP-VK.spirv_assembly.* Change-Id: I7d5a66ac4aafd6b637b4693eb6ce96a327b4904e Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27528Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 2dc03039
......@@ -15,6 +15,7 @@
#ifndef sw_Context_hpp
#define sw_Context_hpp
#include "Vulkan/VkConfig.h"
#include "Sampler.hpp"
#include "Stream.hpp"
#include "Point.hpp"
......@@ -107,6 +108,11 @@ namespace sw
TRANSPARENCY_LAST = TRANSPARENCY_ALPHA_TO_COVERAGE
};
struct PushConstantStorage
{
unsigned char data[vk::MAX_PUSH_CONSTANT_SIZE];
};
class Context
{
public:
......@@ -220,6 +226,8 @@ namespace sw
unsigned int sampleMask;
unsigned int multiSampleMask;
int sampleCount;
PushConstantStorage pushConstants;
};
}
......
......@@ -444,6 +444,11 @@ namespace sw
data->scissorY1 = scissor.offset.y + scissor.extent.height;
}
// Push constants
{
data->pushConstants = context->pushConstants;
}
draw->primitive = 0;
draw->count = count;
......
......@@ -187,6 +187,8 @@ namespace sw
float4 a2c1;
float4 a2c2;
float4 a2c3;
PushConstantStorage pushConstants;
};
class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
......
......@@ -52,6 +52,8 @@ namespace sw
routine.descriptorSets[i] = descriptorSetsIn[i];
}
routine.pushConstants = Pointer<Byte>(data + OFFSET(Data, pushConstants));
auto &modes = shader->getModes();
int localSize[3] = {modes.WorkgroupSizeX, modes.WorkgroupSizeY, modes.WorkgroupSizeZ};
......@@ -167,7 +169,7 @@ namespace sw
}
void ComputeProgram::run(
Routine *routine, void** descriptorSets,
Routine *routine, void** descriptorSets, PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
auto runWorkgroup = (void(*)(void*))(routine->getEntry());
......@@ -178,6 +180,7 @@ namespace sw
data.numWorkgroups[Y] = groupCountY;
data.numWorkgroups[Z] = groupCountZ;
data.numWorkgroups[3] = 0;
data.pushConstants = pushConstants;
// TODO(bclayton): Split work across threads.
for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
......
......@@ -18,6 +18,7 @@
#include "SpirvShader.hpp"
#include "Reactor/Reactor.hpp"
#include "Device/Context.hpp"
#include <functional>
......@@ -47,7 +48,7 @@ namespace sw
// run executes the compute shader routine for all workgroups.
// TODO(bclayton): This probably does not belong here. Consider moving.
static void run(
Routine *routine, void** descriptorSets,
Routine *routine, void** descriptorSets, PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
protected:
......@@ -62,6 +63,7 @@ namespace sw
void** descriptorSets;
uint4 numWorkgroups;
uint4 workgroupID;
PushConstantStorage pushConstants;
};
SpirvRoutine routine;
......
......@@ -31,6 +31,8 @@ namespace sw
{
enableIndex = 0;
routine.pushConstants = data + OFFSET(DrawData, pushConstants);
spirvShader->emit(&routine);
spirvShader->emitEpilog(&routine);
......
......@@ -181,6 +181,7 @@ namespace sw
break;
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
case spv::StorageClassPushConstant:
object.kind = Object::Kind::PhysicalPointer;
break;
......@@ -192,7 +193,6 @@ namespace sw
case spv::StorageClassWorkgroup:
case spv::StorageClassCrossWorkgroup:
case spv::StorageClassGeneric:
case spv::StorageClassPushConstant:
case spv::StorageClassAtomicCounter:
case spv::StorageClassImage:
UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
......@@ -653,6 +653,7 @@ namespace sw
{
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
case spv::StorageClassPushConstant:
return false;
default:
return true;
......@@ -742,10 +743,88 @@ namespace sw
VisitInterfaceInner<F>(def.word(1), d, f);
}
SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
{
// Produce a offset into external memory in sizeof(float) units
int constantOffset = 0;
SIMD::Int dynamicOffset = SIMD::Int(0);
auto &baseObject = getObject(id);
Type::ID typeId = getType(baseObject.type).element;
// The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
// Start with its offset and build from there.
if (baseObject.kind == Object::Kind::Value)
{
dynamicOffset += routine->getIntermediate(id).Int(0);
}
for (auto i = 0u; i < numIndexes; i++)
{
auto & type = getType(typeId);
switch (type.definition.opcode())
{
case spv::OpTypeStruct:
{
int memberIndex = GetConstantInt(indexIds[i]);
Decorations d{};
ApplyDecorationsForIdMember(&d, typeId, memberIndex);
ASSERT(d.HasOffset);
constantOffset += d.Offset / sizeof(float);
typeId = type.definition.word(2u + memberIndex);
break;
}
case spv::OpTypeArray:
case spv::OpTypeRuntimeArray:
{
// TODO: b/127950082: Check bounds.
Decorations d{};
ApplyDecorationsForId(&d, typeId);
ASSERT(d.HasArrayStride);
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
else
dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
typeId = type.element;
break;
}
case spv::OpTypeMatrix:
{
// TODO: b/127950082: Check bounds.
Decorations d{};
ApplyDecorationsForId(&d, typeId);
ASSERT(d.HasMatrixStride);
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
else
dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
typeId = type.element;
break;
}
case spv::OpTypeVector:
{
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
constantOffset += GetConstantInt(indexIds[i]);
else
dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
typeId = type.element;
break;
}
default:
UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
}
}
return dynamicOffset + SIMD::Int(constantOffset);
}
SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
{
// TODO: think about explicit layout (UBO/SSBO) storage classes
// TODO: avoid doing per-lane work in some cases if we can?
// Produce a *component* offset into location-oriented memory
int constantOffset = 0;
SIMD::Int dynamicOffset = SIMD::Int(0);
......@@ -1275,6 +1354,11 @@ namespace sw
routine->physicalPointers[resultId] = address;
break;
}
case spv::StorageClassPushConstant:
{
routine->physicalPointers[resultId] = routine->pushConstants;
break;
}
default:
break;
}
......@@ -1372,7 +1456,17 @@ namespace sw
ASSERT(getObject(baseId).pointerBase == getObject(objectId).pointerBase);
auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
dst.emplace(0, WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
if (type.storageClass == spv::StorageClassPushConstant ||
type.storageClass == spv::StorageClassUniform ||
type.storageClass == spv::StorageClassStorageBuffer)
{
dst.emplace(0, WalkExplicitLayoutAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
}
else
{
dst.emplace(0, WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
}
}
void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
......
......@@ -471,6 +471,7 @@ namespace sw
void ProcessInterfaceVariable(Object &object);
SIMD::Int WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
SIMD::Int WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
......@@ -524,6 +525,7 @@ namespace sw
SIMD::Int activeLaneMask = SIMD::Int(0xFFFFFFFF);
std::array<Pointer<Byte>, vk::MAX_BOUND_DESCRIPTOR_SETS> descriptorSets;
Pointer<Byte> pushConstants;
void createLvalue(SpirvShader::Object::ID id, uint32_t size)
{
......
......@@ -43,6 +43,8 @@ namespace sw
routine.getValue(it->second.Id)[it->second.FirstComponent] =
As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID)))));
}
routine.pushConstants = data + OFFSET(DrawData, pushConstants);
}
VertexProgram::~VertexProgram()
......
......@@ -139,7 +139,8 @@ protected:
executionState.pipelines[VK_PIPELINE_BIND_POINT_COMPUTE]);
pipeline->run(groupCountX, groupCountY, groupCountZ,
MAX_BOUND_DESCRIPTOR_SETS,
executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE]);
executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE],
executionState.pushConstants);
}
private:
......@@ -241,6 +242,8 @@ struct Draw : public CommandBuffer::Command
}
}
context.pushConstants = executionState.pushConstants;
executionState.renderer->setContext(context);
executionState.renderer->setScissor(pipeline->getScissor());
executionState.renderer->setViewport(pipeline->getViewport());
......@@ -288,6 +291,8 @@ struct DrawIndexed : public CommandBuffer::Command
}
}
context.pushConstants = executionState.pushConstants;
context.indexBuffer = Cast(executionState.indexBufferBinding.buffer)->getOffsetPointer(
executionState.indexBufferBinding.offset + firstIndex * (executionState.indexType == VK_INDEX_TYPE_UINT16 ? 2 : 4));
......@@ -571,6 +576,28 @@ private:
const VkDescriptorSet descriptorSet;
};
struct SetPushConstants : public CommandBuffer::Command
{
SetPushConstants(uint32_t offset, uint32_t size, void const *pValues)
: offset(offset), size(size)
{
ASSERT(offset < MAX_PUSH_CONSTANT_SIZE);
ASSERT(offset + size <= MAX_PUSH_CONSTANT_SIZE);
memcpy(data, pValues, size);
}
void play(CommandBuffer::ExecutionState& executionState)
{
memcpy(&executionState.pushConstants.data[offset], data, size);
}
private:
uint32_t offset;
uint32_t size;
unsigned char data[MAX_PUSH_CONSTANT_SIZE];
};
CommandBuffer::CommandBuffer(VkCommandBufferLevel pLevel) : level(pLevel)
{
// FIXME (b/119409619): replace this vector by an allocator so we can control all memory allocations
......@@ -740,7 +767,7 @@ void CommandBuffer::copyQueryPoolResults(VkQueryPool queryPool, uint32_t firstQu
void CommandBuffer::pushConstants(VkPipelineLayout layout, VkShaderStageFlags stageFlags,
uint32_t offset, uint32_t size, const void* pValues)
{
UNIMPLEMENTED("pushConstants");
addCommand<SetPushConstants>(offset, size, pValues);
}
void CommandBuffer::setViewport(uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports)
......
......@@ -17,6 +17,7 @@
#include "VkConfig.h"
#include "VkObject.hpp"
#include "Device/Context.hpp"
#include <memory>
#include <vector>
......@@ -126,6 +127,7 @@ public:
Framebuffer* renderPassFramebuffer = nullptr;
Pipeline* pipelines[VK_PIPELINE_BIND_POINT_RANGE_SIZE] = {};
VkDescriptorSet boundDescriptorSets[VK_PIPELINE_BIND_POINT_RANGE_SIZE][MAX_BOUND_DESCRIPTOR_SETS] = { { VK_NULL_HANDLE } };
sw::PushConstantStorage pushConstants;
struct VertexInputBinding
{
......
......@@ -57,6 +57,7 @@ enum
{
MAX_BOUND_DESCRIPTOR_SETS = 4,
MAX_VERTEX_INPUT_BINDINGS = 16,
MAX_PUSH_CONSTANT_SIZE = 128,
};
enum
......
......@@ -147,7 +147,7 @@ const VkPhysicalDeviceLimits& PhysicalDevice::getLimits() const
65536, // maxTexelBufferElements
16384, // maxUniformBufferRange
(1ul << 27), // maxStorageBufferRange
128, // maxPushConstantsSize
vk::MAX_PUSH_CONSTANT_SIZE, // maxPushConstantsSize
4096, // maxMemoryAllocationCount
4000, // maxSamplerAllocationCount
131072, // bufferImageGranularity
......
......@@ -546,11 +546,11 @@ void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, co
}
void ComputePipeline::run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
size_t numDescriptorSets, VkDescriptorSet *descriptorSets)
size_t numDescriptorSets, VkDescriptorSet *descriptorSets, sw::PushConstantStorage const &pushConstants)
{
ASSERT_OR_RETURN(routine != nullptr);
sw::ComputeProgram::run(
routine, reinterpret_cast<void**>(descriptorSets),
routine, reinterpret_cast<void**>(descriptorSets), pushConstants,
groupCountX, groupCountY, groupCountZ);
}
......
......@@ -104,7 +104,7 @@ public:
void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo);
void run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
size_t numDescriptorSets, VkDescriptorSet *descriptorSets);
size_t numDescriptorSets, VkDescriptorSet *descriptorSets, sw::PushConstantStorage const &pushConstants);
protected:
sw::SpirvShader *shader = nullptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment