Commit f7b7b706 by Ben Clayton

Pipeline: Use Yarn to make compute multi-threaded.

Bug: b/139142453 Change-Id: I466b7c935db03104cb4df90735fafe10905bef9e Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35568Tested-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent d6c61361
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include "Vulkan/VkDebug.hpp" #include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkPipelineLayout.hpp" #include "Vulkan/VkPipelineLayout.hpp"
#include "Yarn/Defer.hpp"
#include "Yarn/Trace.hpp"
#include "Yarn/WaitGroup.hpp"
#include <queue> #include <queue>
namespace namespace
...@@ -40,6 +44,8 @@ namespace sw ...@@ -40,6 +44,8 @@ namespace sw
void ComputeProgram::generate() void ComputeProgram::generate()
{ {
YARN_SCOPED_EVENT("ComputeProgram::generate");
SpirvRoutine routine(pipelineLayout); SpirvRoutine routine(pipelineLayout);
shader->emitProlog(&routine); shader->emitProlog(&routine);
emit(&routine); emit(&routine);
...@@ -199,11 +205,6 @@ namespace sw ...@@ -199,11 +205,6 @@ namespace sw
auto invocationsPerWorkgroup = modes.WorkgroupSizeX * modes.WorkgroupSizeY * modes.WorkgroupSizeZ; auto invocationsPerWorkgroup = modes.WorkgroupSizeX * modes.WorkgroupSizeY * modes.WorkgroupSizeZ;
auto subgroupsPerWorkgroup = (invocationsPerWorkgroup + invocationsPerSubgroup - 1) / invocationsPerSubgroup; auto subgroupsPerWorkgroup = (invocationsPerWorkgroup + invocationsPerSubgroup - 1) / invocationsPerSubgroup;
// We're sharing a buffer here across all workgroups.
// We can only do this because we know a single workgroup is in flight
// at any time.
std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
Data data; Data data;
data.descriptorSets = descriptorSets; data.descriptorSets = descriptorSets;
data.descriptorDynamicOffsets = descriptorDynamicOffsets; data.descriptorDynamicOffsets = descriptorDynamicOffsets;
...@@ -221,14 +222,33 @@ namespace sw ...@@ -221,14 +222,33 @@ namespace sw
data.pushConstants = pushConstants; data.pushConstants = pushConstants;
data.constants = &sw::constants; data.constants = &sw::constants;
for (uint32_t groupZ = baseGroupZ; groupZ < baseGroupZ + groupCountZ; groupZ++) yarn::WaitGroup wg;
const uint32_t batchCount = 16;
auto groupCount = groupCountX * groupCountY * groupCountZ;
for (uint32_t batchID = 0; batchID < batchCount && batchID < groupCount; batchID++)
{ {
for (uint32_t groupY = baseGroupY; groupY < baseGroupY + groupCountY; groupY++) wg.add(1);
yarn::schedule([=, &data]
{ {
for (uint32_t groupX = baseGroupX; groupX < baseGroupX + groupCountX; groupX++) defer(wg.done());
std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
for (uint32_t groupIndex = batchID; groupIndex < groupCount; groupIndex += batchCount)
{ {
auto modulo = groupIndex;
auto groupOffsetZ = modulo / (groupCountX * groupCountY);
modulo -= groupOffsetZ * (groupCountX * groupCountY);
auto groupOffsetY = modulo / groupCountX;
modulo -= groupOffsetY * groupCountX;
auto groupOffsetX = modulo;
auto groupZ = baseGroupZ + groupOffsetZ;
auto groupY = baseGroupY + groupOffsetY;
auto groupX = baseGroupX + groupOffsetX;
YARN_SCOPED_EVENT("groupX: %d, groupY: %d, groupZ: %d", groupX, groupY, groupZ);
// TODO(bclayton): Split work across threads.
using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>; using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>;
std::queue<Coroutine> coroutines; std::queue<Coroutine> coroutines;
...@@ -261,10 +281,11 @@ namespace sw ...@@ -261,10 +281,11 @@ namespace sw
coroutines.push(std::move(coroutine)); coroutines.push(std::move(coroutine));
} }
} }
}
});
}
} // groupX wg.wait();
} // groupY
} // groupZ
} }
} // namespace sw } // namespace sw
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment