Commit f2be26a1 by Ben Clayton

Vulkan: Very rough first implementation of compute shaders

This is by no means finished: * ComputeProgram holds the codegen and the invoke logic. ComputeProgram::run() should probably be moved somewhere else (maybe Renderer?). * ComputeProgram::run() is currently fully single threaded. * The compute routines are currently not cached. With that said, this is enough to start passing a whole bunch of dEQP compute tests. Test: *.compute.* Bug: b/126871859 Change-Id: Ic6a76826b2fec0d34d54e0bed564b360ea0610c0 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/26551 Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 3d599beb
......@@ -300,6 +300,7 @@ if %errorlevel% neq 0 goto :VCEnd</Command>
<ClInclude Include="$(SolutionDir)src\Device\Vector.hpp" />
<ClInclude Include="$(SolutionDir)src\Device\Vertex.hpp" />
<ClInclude Include="$(SolutionDir)src\Device\VertexProcessor.hpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\ComputeProgram.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\Constants.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\PixelProgram.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\PixelRoutine.cpp" />
......@@ -309,6 +310,7 @@ if %errorlevel% neq 0 goto :VCEnd</Command>
<ClCompile Include="$(SolutionDir)src\Pipeline\SpirvShader.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\VertexProgram.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\VertexRoutine.cpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\ComputeProgram.hpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\Constants.hpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\PixelProgram.hpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\PixelRoutine.hpp" />
......
......@@ -157,6 +157,9 @@
<ClCompile Include="$(SolutionDir)src\Device\VertexProcessor.cpp">
<Filter>src\Device</Filter>
</ClCompile>
<ClCompile Include="$(SolutionDir)src\Pipeline\ComputeProgram.cpp">
<Filter>src\Pipeline</Filter>
</ClCompile>
<ClCompile Include="$(SolutionDir)src\Pipeline\Constants.cpp">
<Filter>src\Pipeline</Filter>
</ClCompile>
......@@ -390,6 +393,9 @@
<ClInclude Include="$(SolutionDir)src\Device\VertexProcessor.hpp">
<Filter>src\Device</Filter>
</ClInclude>
<ClInclude Include="$(SolutionDir)src\Pipeline\ComputeProgram.hpp">
<Filter>src\Pipeline</Filter>
</ClInclude>
<ClInclude Include="$(SolutionDir)src\Pipeline\Constants.hpp">
<Filter>src\Pipeline</Filter>
</ClInclude>
......
// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ComputeProgram.hpp"
#include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkPipelineLayout.hpp"
namespace sw
{
ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout)
: data(Arg<0>()),
routine(pipelineLayout),
shader(shader),
pipelineLayout(pipelineLayout)
{
}
ComputeProgram::~ComputeProgram()
{
}
void ComputeProgram::generate()
{
shader->emitProlog(&routine);
emit();
shader->emitEpilog(&routine);
}
void ComputeProgram::emit()
{
Pointer<Pointer<Byte>> descriptorSetsIn = *Pointer<Pointer<Pointer<Byte>>>(data + OFFSET(Data, descriptorSets));
size_t numDescriptorSets = routine.pipelineLayout->getNumDescriptorSets();
for(unsigned int i = 0; i < numDescriptorSets; i++)
{
routine.descriptorSets[i] = descriptorSetsIn[i];
}
auto &modes = shader->getModes();
Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
Int4 workgroupID = *Pointer<Int4>(data + OFFSET(Data, workgroupID));
Int4 workgroupSize = Int4(modes.LocalSizeX, modes.LocalSizeY, modes.LocalSizeZ, 0);
setInputBuiltin(spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
As<Float4>(Int4(Extract(numWorkgroups, component)));
}
});
setInputBuiltin(spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
As<Float4>(Int4(Extract(workgroupSize, component)));
}
});
// Total number of invocations required to execute this workgroup.
const int numInvocations = modes.LocalSizeX * modes.LocalSizeY * modes.LocalSizeZ;
enum { XXXX, YYYY, ZZZZ };
For(Int invocationIndex = 0, invocationIndex < numInvocations, invocationIndex += SIMD::Width)
{
Int4 localInvocationIndex = Int4(invocationIndex) + Int4(0, 1, 2, 3);
Int4 localInvocationID[3];
{
Int4 idx = localInvocationIndex;
localInvocationID[ZZZZ] = idx / Int4(modes.LocalSizeX * modes.LocalSizeY);
idx -= localInvocationID[ZZZZ] * Int4(modes.LocalSizeX * modes.LocalSizeY); // modulo
localInvocationID[YYYY] = idx / Int4(modes.LocalSizeX);
idx -= localInvocationID[YYYY] * Int4(modes.LocalSizeX); // modulo
localInvocationID[XXXX] = idx;
}
setInputBuiltin(spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
{
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<Float4>(localInvocationIndex);
});
setInputBuiltin(spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] = As<Float4>(localInvocationID[component]);
}
});
setInputBuiltin(spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
Int4 globalInvocationID =
Int4(Extract(workgroupID, component)) *
Int4(Extract(workgroupSize, component)) +
localInvocationID[component];
value[builtin.FirstComponent + component] = As<Float4>(globalInvocationID);
// RR_WATCH(component, globalInvocationID);
}
});
// TODO(bclayton): Disable lanes where (invocationIDs >= numInvocations)
// Int4 enabledLanes = invocationIDs < Int4(numInvocations);
// Process numLanes of the workgroup.
shader->emit(&routine);
}
}
void ComputeProgram::setInputBuiltin(spv::BuiltIn id, std::function<void(const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)> cb)
{
auto it = shader->inputBuiltins.find(id);
if (it != shader->inputBuiltins.end())
{
const auto& builtin = it->second;
auto &value = routine.getValue(builtin.Id);
cb(builtin, value);
}
}
void ComputeProgram::run(
Routine *routine,
uint32_t numDescriptorSets, void** descriptorSets,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
auto runWorkgroup = (void(*)(void*))(routine->getEntry());
Data data;
data.descriptorSets = descriptorSets;
data.numWorkgroups[0] = groupCountX;
data.numWorkgroups[1] = groupCountY;
data.numWorkgroups[2] = groupCountZ;
data.numWorkgroups[3] = 0;
// TODO(bclayton): Split work across threads.
for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
{
data.workgroupID[2] = groupZ;
for (uint32_t groupY = 0; groupY < groupCountY; groupY++)
{
data.workgroupID[1] = groupY;
for (uint32_t groupX = 0; groupX < groupCountX; groupX++)
{
data.workgroupID[0] = groupX;
runWorkgroup(&data);
}
}
}
}
}
// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef sw_ComputeProgram_hpp
#define sw_ComputeProgram_hpp
#include "SpirvShader.hpp"
#include "Reactor/Reactor.hpp"
#include <functional>
namespace vk
{
class PipelineLayout;
} // namespace vk
namespace sw
{
using namespace rr;
class DescriptorSetsLayout;
// ComputeProgram builds a SPIR-V compute shader.
class ComputeProgram : public Function<Void(Pointer<Byte>)>
{
public:
ComputeProgram(SpirvShader const *spirvShader, vk::PipelineLayout const *pipelineLayout);
virtual ~ComputeProgram();
// generate builds the shader program.
void generate();
// run executes the compute shader routine for all workgroups.
// TODO(bclayton): This probably does not belong here. Consider moving.
static void run(
Routine *routine,
uint32_t numDescriptorSets, void** descriptorSets,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
protected:
void emit();
void setInputBuiltin(spv::BuiltIn id, std::function<void(const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)> cb);
Pointer<Byte> data; // argument 0
struct Data
{
void** descriptorSets;
uint4 numWorkgroups;
uint4 workgroupID;
};
SpirvRoutine routine;
SpirvShader const * const shader;
vk::PipelineLayout const * const pipelineLayout;
};
} // namespace sw
#endif // sw_ComputeProgram_hpp
......@@ -19,6 +19,7 @@
#include "VkImage.hpp"
#include "VkImageView.hpp"
#include "VkPipeline.hpp"
#include "VkPipelineLayout.hpp"
#include "VkRenderPass.hpp"
#include "Device/Renderer.hpp"
......@@ -123,6 +124,30 @@ private:
VkPipeline pipeline;
};
class Dispatch : public CommandBuffer::Command
{
public:
Dispatch(uint32_t pGroupCountX, uint32_t pGroupCountY, uint32_t pGroupCountZ) :
groupCountX(pGroupCountX), groupCountY(pGroupCountY), groupCountZ(pGroupCountZ)
{
}
protected:
void play(CommandBuffer::ExecutionState& executionState) override
{
ComputePipeline* pipeline = static_cast<ComputePipeline*>(
executionState.pipelines[VK_PIPELINE_BIND_POINT_COMPUTE]);
pipeline->run(groupCountX, groupCountY, groupCountZ,
MAX_BOUND_DESCRIPTOR_SETS,
executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE]);
}
private:
uint32_t groupCountX;
uint32_t groupCountY;
uint32_t groupCountZ;
};
struct VertexBufferBind : public CommandBuffer::Command
{
VertexBufferBind(uint32_t pBinding, const VkBuffer pBuffer, const VkDeviceSize pOffset) :
......@@ -666,12 +691,15 @@ void CommandBuffer::pipelineBarrier(VkPipelineStageFlags srcStageMask, VkPipelin
void CommandBuffer::bindPipeline(VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline)
{
if(pipelineBindPoint != VK_PIPELINE_BIND_POINT_GRAPHICS)
switch(pipelineBindPoint)
{
UNIMPLEMENTED();
case VK_PIPELINE_BIND_POINT_COMPUTE:
case VK_PIPELINE_BIND_POINT_GRAPHICS:
addCommand<PipelineBind>(pipelineBindPoint, pipeline);
break;
default:
UNIMPLEMENTED();
}
addCommand<PipelineBind>(pipelineBindPoint, pipeline);
}
void CommandBuffer::bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount,
......@@ -822,7 +850,7 @@ void CommandBuffer::bindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkInde
void CommandBuffer::dispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
UNIMPLEMENTED();
addCommand<Dispatch>(groupCountX, groupCountY, groupCountZ);
}
void CommandBuffer::dispatchIndirect(VkBuffer buffer, VkDeviceSize offset)
......
......@@ -15,6 +15,7 @@
#include "VkPipeline.hpp"
#include "VkPipelineLayout.hpp"
#include "VkShaderModule.hpp"
#include "Pipeline/ComputeProgram.hpp"
#include "Pipeline/SpirvShader.hpp"
#include "spirv-tools/optimizer.hpp"
......@@ -538,6 +539,7 @@ ComputePipeline::ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo,
void ComputePipeline::destroyPipeline(const VkAllocationCallbacks* pAllocator)
{
delete shader;
}
size_t ComputePipeline::ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo* pCreateInfo)
......@@ -545,4 +547,35 @@ size_t ComputePipeline::ComputeRequiredAllocationSize(const VkComputePipelineCre
return 0;
}
void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo)
{
auto module = Cast(pCreateInfo->stage.module);
auto code = preprocessSpirv(module->getCode(), pCreateInfo->stage.pSpecializationInfo);
ASSERT_OR_RETURN(code.size() > 0);
ASSERT(shader == nullptr);
// FIXME (b/119409619): use allocator.
shader = new sw::SpirvShader(code);
sw::ComputeProgram program(shader, layout);
program.generate();
// TODO(bclayton): Cache program
routine = program("ComputeRoutine");
}
void ComputePipeline::run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
size_t numDescriptorSets, VkDescriptorSet *descriptorSets)
{
ASSERT_OR_RETURN(routine != nullptr);
sw::ComputeProgram::run(
routine,
numDescriptorSets, reinterpret_cast<void**>(descriptorSets),
groupCountX, groupCountY, groupCountZ);
}
} // namespace vk
......@@ -100,6 +100,15 @@ public:
#endif
static size_t ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo* pCreateInfo);
void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo);
void run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
size_t numDescriptorSets, VkDescriptorSet *descriptorSets);
protected:
sw::SpirvShader *shader = nullptr;
rr::Routine *routine = nullptr;
};
static inline Pipeline* Cast(VkPipeline object)
......
......@@ -1019,7 +1019,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines(VkDevice device, VkPipe
for(uint32_t i = 0; i < createInfoCount; i++)
{
VkResult result = vk::GraphicsPipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
if(result != VK_SUCCESS)
if(result == VK_SUCCESS)
{
static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
}
else
{
// According to the Vulkan spec, section 9.4. Multiple Pipeline Creation
// "When an application attempts to create many pipelines in a single command,
......@@ -1032,10 +1036,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines(VkDevice device, VkPipe
pPipelines[i] = VK_NULL_HANDLE;
errorResult = result;
}
else
{
static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
}
}
return errorResult;
......@@ -1052,7 +1052,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines(VkDevice device, VkPipel
for(uint32_t i = 0; i < createInfoCount; i++)
{
VkResult result = vk::ComputePipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
if(result != VK_SUCCESS)
if(result == VK_SUCCESS)
{
static_cast<vk::ComputePipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
}
else
{
// According to the Vulkan spec, section 9.4. Multiple Pipeline Creation
// "When an application attempts to create many pipelines in a single command,
......
......@@ -141,6 +141,7 @@ copy "$(OutDir)vk_swiftshader.dll" "$(SolutionDir)out\$(Configuration)_$(Platfor
<ClCompile Include="..\Device\SwiftConfig.cpp" />
<ClCompile Include="..\Device\Vector.cpp" />
<ClCompile Include="..\Device\VertexProcessor.cpp" />
<ClCompile Include="..\Pipeline\ComputeProgram.cpp" />
<ClCompile Include="..\Pipeline\Constants.cpp" />
<ClCompile Include="..\Pipeline\PixelProgram.cpp" />
<ClCompile Include="..\Pipeline\PixelRoutine.cpp" />
......@@ -252,6 +253,7 @@ copy "$(OutDir)vk_swiftshader.dll" "$(SolutionDir)out\$(Configuration)_$(Platfor
<ClInclude Include="..\Device\Vector.hpp" />
<ClInclude Include="..\Device\Vertex.hpp" />
<ClInclude Include="..\Device\VertexProcessor.hpp" />
<ClInclude Include="..\Pipeline\ComputeProgram.hpp" />
<ClInclude Include="..\Pipeline\Constants.hpp" />
<ClInclude Include="..\Pipeline\PixelProgram.hpp" />
<ClInclude Include="..\Pipeline\PixelRoutine.hpp" />
......
......@@ -120,6 +120,9 @@
<ClCompile Include="..\Pipeline\PixelProgram.cpp">
<Filter>Source Files\Pipeline</Filter>
</ClCompile>
<ClCompile Include="..\Pipeline\ComputeProgram.cpp">
<Filter>Source Files\Pipeline</Filter>
</ClCompile>
<ClCompile Include="..\Pipeline\Constants.cpp">
<Filter>Source Files\Pipeline</Filter>
</ClCompile>
......@@ -491,6 +494,9 @@
<ClInclude Include="..\Pipeline\PixelProgram.hpp">
<Filter>Header Files\Pipeline</Filter>
</ClInclude>
<ClInclude Include="..\Pipeline\ComputeProgram.hpp">
<Filter>Header Files\Pipeline</Filter>
</ClInclude>
<ClInclude Include="..\Pipeline\Constants.hpp">
<Filter>Header Files\Pipeline</Filter>
</ClInclude>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment