Commit a30de542 by Chris Forbes

Add support for push constants

- Proper support for calculating offsets in explicit-layout storage classes (push constant, uniform, and storage buffer) according to the Offset, ArrayStride and MatrixStride decorations. - Plumb a block of push constant data throughout the pipeline - Implement push constant update commands Bug: b/128690261 Bug: b/128872954 Test: dEQP-VK.*push_constant* Test: dEQP-VK.glsl.* Test: dEQP-VK.spirv_assembly.* Change-Id: I7d5a66ac4aafd6b637b4693eb6ce96a327b4904e Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27528Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 2dc03039
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef sw_Context_hpp #ifndef sw_Context_hpp
#define sw_Context_hpp #define sw_Context_hpp
#include "Vulkan/VkConfig.h"
#include "Sampler.hpp" #include "Sampler.hpp"
#include "Stream.hpp" #include "Stream.hpp"
#include "Point.hpp" #include "Point.hpp"
...@@ -107,6 +108,11 @@ namespace sw ...@@ -107,6 +108,11 @@ namespace sw
TRANSPARENCY_LAST = TRANSPARENCY_ALPHA_TO_COVERAGE TRANSPARENCY_LAST = TRANSPARENCY_ALPHA_TO_COVERAGE
}; };
struct PushConstantStorage
{
unsigned char data[vk::MAX_PUSH_CONSTANT_SIZE];
};
class Context class Context
{ {
public: public:
...@@ -220,6 +226,8 @@ namespace sw ...@@ -220,6 +226,8 @@ namespace sw
unsigned int sampleMask; unsigned int sampleMask;
unsigned int multiSampleMask; unsigned int multiSampleMask;
int sampleCount; int sampleCount;
PushConstantStorage pushConstants;
}; };
} }
......
...@@ -444,6 +444,11 @@ namespace sw ...@@ -444,6 +444,11 @@ namespace sw
data->scissorY1 = scissor.offset.y + scissor.extent.height; data->scissorY1 = scissor.offset.y + scissor.extent.height;
} }
// Push constants
{
data->pushConstants = context->pushConstants;
}
draw->primitive = 0; draw->primitive = 0;
draw->count = count; draw->count = count;
......
...@@ -187,6 +187,8 @@ namespace sw ...@@ -187,6 +187,8 @@ namespace sw
float4 a2c1; float4 a2c1;
float4 a2c2; float4 a2c2;
float4 a2c3; float4 a2c3;
PushConstantStorage pushConstants;
}; };
class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
......
...@@ -52,6 +52,8 @@ namespace sw ...@@ -52,6 +52,8 @@ namespace sw
routine.descriptorSets[i] = descriptorSetsIn[i]; routine.descriptorSets[i] = descriptorSetsIn[i];
} }
routine.pushConstants = Pointer<Byte>(data + OFFSET(Data, pushConstants));
auto &modes = shader->getModes(); auto &modes = shader->getModes();
int localSize[3] = {modes.WorkgroupSizeX, modes.WorkgroupSizeY, modes.WorkgroupSizeZ}; int localSize[3] = {modes.WorkgroupSizeX, modes.WorkgroupSizeY, modes.WorkgroupSizeZ};
...@@ -167,7 +169,7 @@ namespace sw ...@@ -167,7 +169,7 @@ namespace sw
} }
void ComputeProgram::run( void ComputeProgram::run(
Routine *routine, void** descriptorSets, Routine *routine, void** descriptorSets, PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{ {
auto runWorkgroup = (void(*)(void*))(routine->getEntry()); auto runWorkgroup = (void(*)(void*))(routine->getEntry());
...@@ -178,6 +180,7 @@ namespace sw ...@@ -178,6 +180,7 @@ namespace sw
data.numWorkgroups[Y] = groupCountY; data.numWorkgroups[Y] = groupCountY;
data.numWorkgroups[Z] = groupCountZ; data.numWorkgroups[Z] = groupCountZ;
data.numWorkgroups[3] = 0; data.numWorkgroups[3] = 0;
data.pushConstants = pushConstants;
// TODO(bclayton): Split work across threads. // TODO(bclayton): Split work across threads.
for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++) for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "SpirvShader.hpp" #include "SpirvShader.hpp"
#include "Reactor/Reactor.hpp" #include "Reactor/Reactor.hpp"
#include "Device/Context.hpp"
#include <functional> #include <functional>
...@@ -47,7 +48,7 @@ namespace sw ...@@ -47,7 +48,7 @@ namespace sw
// run executes the compute shader routine for all workgroups. // run executes the compute shader routine for all workgroups.
// TODO(bclayton): This probably does not belong here. Consider moving. // TODO(bclayton): This probably does not belong here. Consider moving.
static void run( static void run(
Routine *routine, void** descriptorSets, Routine *routine, void** descriptorSets, PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
protected: protected:
...@@ -62,6 +63,7 @@ namespace sw ...@@ -62,6 +63,7 @@ namespace sw
void** descriptorSets; void** descriptorSets;
uint4 numWorkgroups; uint4 numWorkgroups;
uint4 workgroupID; uint4 workgroupID;
PushConstantStorage pushConstants;
}; };
SpirvRoutine routine; SpirvRoutine routine;
......
...@@ -31,6 +31,8 @@ namespace sw ...@@ -31,6 +31,8 @@ namespace sw
{ {
enableIndex = 0; enableIndex = 0;
routine.pushConstants = data + OFFSET(DrawData, pushConstants);
spirvShader->emit(&routine); spirvShader->emit(&routine);
spirvShader->emitEpilog(&routine); spirvShader->emitEpilog(&routine);
......
...@@ -181,6 +181,7 @@ namespace sw ...@@ -181,6 +181,7 @@ namespace sw
break; break;
case spv::StorageClassUniform: case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer: case spv::StorageClassStorageBuffer:
case spv::StorageClassPushConstant:
object.kind = Object::Kind::PhysicalPointer; object.kind = Object::Kind::PhysicalPointer;
break; break;
...@@ -192,7 +193,6 @@ namespace sw ...@@ -192,7 +193,6 @@ namespace sw
case spv::StorageClassWorkgroup: case spv::StorageClassWorkgroup:
case spv::StorageClassCrossWorkgroup: case spv::StorageClassCrossWorkgroup:
case spv::StorageClassGeneric: case spv::StorageClassGeneric:
case spv::StorageClassPushConstant:
case spv::StorageClassAtomicCounter: case spv::StorageClassAtomicCounter:
case spv::StorageClassImage: case spv::StorageClassImage:
UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass); UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
...@@ -653,6 +653,7 @@ namespace sw ...@@ -653,6 +653,7 @@ namespace sw
{ {
case spv::StorageClassUniform: case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer: case spv::StorageClassStorageBuffer:
case spv::StorageClassPushConstant:
return false; return false;
default: default:
return true; return true;
...@@ -742,10 +743,88 @@ namespace sw ...@@ -742,10 +743,88 @@ namespace sw
VisitInterfaceInner<F>(def.word(1), d, f); VisitInterfaceInner<F>(def.word(1), d, f);
} }
SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
{
// Produce a offset into external memory in sizeof(float) units
int constantOffset = 0;
SIMD::Int dynamicOffset = SIMD::Int(0);
auto &baseObject = getObject(id);
Type::ID typeId = getType(baseObject.type).element;
// The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
// Start with its offset and build from there.
if (baseObject.kind == Object::Kind::Value)
{
dynamicOffset += routine->getIntermediate(id).Int(0);
}
for (auto i = 0u; i < numIndexes; i++)
{
auto & type = getType(typeId);
switch (type.definition.opcode())
{
case spv::OpTypeStruct:
{
int memberIndex = GetConstantInt(indexIds[i]);
Decorations d{};
ApplyDecorationsForIdMember(&d, typeId, memberIndex);
ASSERT(d.HasOffset);
constantOffset += d.Offset / sizeof(float);
typeId = type.definition.word(2u + memberIndex);
break;
}
case spv::OpTypeArray:
case spv::OpTypeRuntimeArray:
{
// TODO: b/127950082: Check bounds.
Decorations d{};
ApplyDecorationsForId(&d, typeId);
ASSERT(d.HasArrayStride);
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
else
dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
typeId = type.element;
break;
}
case spv::OpTypeMatrix:
{
// TODO: b/127950082: Check bounds.
Decorations d{};
ApplyDecorationsForId(&d, typeId);
ASSERT(d.HasMatrixStride);
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
else
dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
typeId = type.element;
break;
}
case spv::OpTypeVector:
{
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
constantOffset += GetConstantInt(indexIds[i]);
else
dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
typeId = type.element;
break;
}
default:
UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
}
}
return dynamicOffset + SIMD::Int(constantOffset);
}
SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
{ {
// TODO: think about explicit layout (UBO/SSBO) storage classes
// TODO: avoid doing per-lane work in some cases if we can? // TODO: avoid doing per-lane work in some cases if we can?
// Produce a *component* offset into location-oriented memory
int constantOffset = 0; int constantOffset = 0;
SIMD::Int dynamicOffset = SIMD::Int(0); SIMD::Int dynamicOffset = SIMD::Int(0);
...@@ -1275,6 +1354,11 @@ namespace sw ...@@ -1275,6 +1354,11 @@ namespace sw
routine->physicalPointers[resultId] = address; routine->physicalPointers[resultId] = address;
break; break;
} }
case spv::StorageClassPushConstant:
{
routine->physicalPointers[resultId] = routine->pushConstants;
break;
}
default: default:
break; break;
} }
...@@ -1372,7 +1456,17 @@ namespace sw ...@@ -1372,7 +1456,17 @@ namespace sw
ASSERT(getObject(baseId).pointerBase == getObject(objectId).pointerBase); ASSERT(getObject(baseId).pointerBase == getObject(objectId).pointerBase);
auto &dst = routine->createIntermediate(objectId, type.sizeInComponents); auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
dst.emplace(0, WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
if (type.storageClass == spv::StorageClassPushConstant ||
type.storageClass == spv::StorageClassUniform ||
type.storageClass == spv::StorageClassStorageBuffer)
{
dst.emplace(0, WalkExplicitLayoutAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
}
else
{
dst.emplace(0, WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
}
} }
void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
......
...@@ -471,6 +471,7 @@ namespace sw ...@@ -471,6 +471,7 @@ namespace sw
void ProcessInterfaceVariable(Object &object); void ProcessInterfaceVariable(Object &object);
SIMD::Int WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
SIMD::Int WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const; SIMD::Int WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const; uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
...@@ -524,6 +525,7 @@ namespace sw ...@@ -524,6 +525,7 @@ namespace sw
SIMD::Int activeLaneMask = SIMD::Int(0xFFFFFFFF); SIMD::Int activeLaneMask = SIMD::Int(0xFFFFFFFF);
std::array<Pointer<Byte>, vk::MAX_BOUND_DESCRIPTOR_SETS> descriptorSets; std::array<Pointer<Byte>, vk::MAX_BOUND_DESCRIPTOR_SETS> descriptorSets;
Pointer<Byte> pushConstants;
void createLvalue(SpirvShader::Object::ID id, uint32_t size) void createLvalue(SpirvShader::Object::ID id, uint32_t size)
{ {
......
...@@ -43,6 +43,8 @@ namespace sw ...@@ -43,6 +43,8 @@ namespace sw
routine.getValue(it->second.Id)[it->second.FirstComponent] = routine.getValue(it->second.Id)[it->second.FirstComponent] =
As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID))))); As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID)))));
} }
routine.pushConstants = data + OFFSET(DrawData, pushConstants);
} }
VertexProgram::~VertexProgram() VertexProgram::~VertexProgram()
......
...@@ -139,7 +139,8 @@ protected: ...@@ -139,7 +139,8 @@ protected:
executionState.pipelines[VK_PIPELINE_BIND_POINT_COMPUTE]); executionState.pipelines[VK_PIPELINE_BIND_POINT_COMPUTE]);
pipeline->run(groupCountX, groupCountY, groupCountZ, pipeline->run(groupCountX, groupCountY, groupCountZ,
MAX_BOUND_DESCRIPTOR_SETS, MAX_BOUND_DESCRIPTOR_SETS,
executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE]); executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE],
executionState.pushConstants);
} }
private: private:
...@@ -241,6 +242,8 @@ struct Draw : public CommandBuffer::Command ...@@ -241,6 +242,8 @@ struct Draw : public CommandBuffer::Command
} }
} }
context.pushConstants = executionState.pushConstants;
executionState.renderer->setContext(context); executionState.renderer->setContext(context);
executionState.renderer->setScissor(pipeline->getScissor()); executionState.renderer->setScissor(pipeline->getScissor());
executionState.renderer->setViewport(pipeline->getViewport()); executionState.renderer->setViewport(pipeline->getViewport());
...@@ -288,6 +291,8 @@ struct DrawIndexed : public CommandBuffer::Command ...@@ -288,6 +291,8 @@ struct DrawIndexed : public CommandBuffer::Command
} }
} }
context.pushConstants = executionState.pushConstants;
context.indexBuffer = Cast(executionState.indexBufferBinding.buffer)->getOffsetPointer( context.indexBuffer = Cast(executionState.indexBufferBinding.buffer)->getOffsetPointer(
executionState.indexBufferBinding.offset + firstIndex * (executionState.indexType == VK_INDEX_TYPE_UINT16 ? 2 : 4)); executionState.indexBufferBinding.offset + firstIndex * (executionState.indexType == VK_INDEX_TYPE_UINT16 ? 2 : 4));
...@@ -571,6 +576,28 @@ private: ...@@ -571,6 +576,28 @@ private:
const VkDescriptorSet descriptorSet; const VkDescriptorSet descriptorSet;
}; };
struct SetPushConstants : public CommandBuffer::Command
{
SetPushConstants(uint32_t offset, uint32_t size, void const *pValues)
: offset(offset), size(size)
{
ASSERT(offset < MAX_PUSH_CONSTANT_SIZE);
ASSERT(offset + size <= MAX_PUSH_CONSTANT_SIZE);
memcpy(data, pValues, size);
}
void play(CommandBuffer::ExecutionState& executionState)
{
memcpy(&executionState.pushConstants.data[offset], data, size);
}
private:
uint32_t offset;
uint32_t size;
unsigned char data[MAX_PUSH_CONSTANT_SIZE];
};
CommandBuffer::CommandBuffer(VkCommandBufferLevel pLevel) : level(pLevel) CommandBuffer::CommandBuffer(VkCommandBufferLevel pLevel) : level(pLevel)
{ {
// FIXME (b/119409619): replace this vector by an allocator so we can control all memory allocations // FIXME (b/119409619): replace this vector by an allocator so we can control all memory allocations
...@@ -740,7 +767,7 @@ void CommandBuffer::copyQueryPoolResults(VkQueryPool queryPool, uint32_t firstQu ...@@ -740,7 +767,7 @@ void CommandBuffer::copyQueryPoolResults(VkQueryPool queryPool, uint32_t firstQu
void CommandBuffer::pushConstants(VkPipelineLayout layout, VkShaderStageFlags stageFlags, void CommandBuffer::pushConstants(VkPipelineLayout layout, VkShaderStageFlags stageFlags,
uint32_t offset, uint32_t size, const void* pValues) uint32_t offset, uint32_t size, const void* pValues)
{ {
UNIMPLEMENTED("pushConstants"); addCommand<SetPushConstants>(offset, size, pValues);
} }
void CommandBuffer::setViewport(uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports) void CommandBuffer::setViewport(uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports)
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "VkConfig.h" #include "VkConfig.h"
#include "VkObject.hpp" #include "VkObject.hpp"
#include "Device/Context.hpp"
#include <memory> #include <memory>
#include <vector> #include <vector>
...@@ -126,6 +127,7 @@ public: ...@@ -126,6 +127,7 @@ public:
Framebuffer* renderPassFramebuffer = nullptr; Framebuffer* renderPassFramebuffer = nullptr;
Pipeline* pipelines[VK_PIPELINE_BIND_POINT_RANGE_SIZE] = {}; Pipeline* pipelines[VK_PIPELINE_BIND_POINT_RANGE_SIZE] = {};
VkDescriptorSet boundDescriptorSets[VK_PIPELINE_BIND_POINT_RANGE_SIZE][MAX_BOUND_DESCRIPTOR_SETS] = { { VK_NULL_HANDLE } }; VkDescriptorSet boundDescriptorSets[VK_PIPELINE_BIND_POINT_RANGE_SIZE][MAX_BOUND_DESCRIPTOR_SETS] = { { VK_NULL_HANDLE } };
sw::PushConstantStorage pushConstants;
struct VertexInputBinding struct VertexInputBinding
{ {
......
...@@ -57,6 +57,7 @@ enum ...@@ -57,6 +57,7 @@ enum
{ {
MAX_BOUND_DESCRIPTOR_SETS = 4, MAX_BOUND_DESCRIPTOR_SETS = 4,
MAX_VERTEX_INPUT_BINDINGS = 16, MAX_VERTEX_INPUT_BINDINGS = 16,
MAX_PUSH_CONSTANT_SIZE = 128,
}; };
enum enum
......
...@@ -147,7 +147,7 @@ const VkPhysicalDeviceLimits& PhysicalDevice::getLimits() const ...@@ -147,7 +147,7 @@ const VkPhysicalDeviceLimits& PhysicalDevice::getLimits() const
65536, // maxTexelBufferElements 65536, // maxTexelBufferElements
16384, // maxUniformBufferRange 16384, // maxUniformBufferRange
(1ul << 27), // maxStorageBufferRange (1ul << 27), // maxStorageBufferRange
128, // maxPushConstantsSize vk::MAX_PUSH_CONSTANT_SIZE, // maxPushConstantsSize
4096, // maxMemoryAllocationCount 4096, // maxMemoryAllocationCount
4000, // maxSamplerAllocationCount 4000, // maxSamplerAllocationCount
131072, // bufferImageGranularity 131072, // bufferImageGranularity
......
...@@ -546,11 +546,11 @@ void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, co ...@@ -546,11 +546,11 @@ void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, co
} }
void ComputePipeline::run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ, void ComputePipeline::run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
size_t numDescriptorSets, VkDescriptorSet *descriptorSets) size_t numDescriptorSets, VkDescriptorSet *descriptorSets, sw::PushConstantStorage const &pushConstants)
{ {
ASSERT_OR_RETURN(routine != nullptr); ASSERT_OR_RETURN(routine != nullptr);
sw::ComputeProgram::run( sw::ComputeProgram::run(
routine, reinterpret_cast<void**>(descriptorSets), routine, reinterpret_cast<void**>(descriptorSets), pushConstants,
groupCountX, groupCountY, groupCountZ); groupCountX, groupCountY, groupCountZ);
} }
......
...@@ -104,7 +104,7 @@ public: ...@@ -104,7 +104,7 @@ public:
void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo); void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo);
void run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ, void run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
size_t numDescriptorSets, VkDescriptorSet *descriptorSets); size_t numDescriptorSets, VkDescriptorSet *descriptorSets, sw::PushConstantStorage const &pushConstants);
protected: protected:
sw::SpirvShader *shader = nullptr; sw::SpirvShader *shader = nullptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment