Commit e1f51b9e by Alexis Hetu Committed by Alexis Hétu

Threaded Queue submit with events and fences

This cl does 3 main things: - It pushes the queue submit operation to its own thread - It implements events - It implements fences Some details: - Because we can have N async draw operations and we need to signal the fence only after all operations are completed, fences have a add/done mechanism to allow signaling the fence only when all draw operations are completed. - Device::waitForFences() detects large timeouts to avoid integer overflow if now+timeout is bigger than the remaining nanoseconds available in a long long. Bug b/117835459 Change-Id: I2f02c3b4bb9d9ac9037909b02b0601e1bae15d21 Tests: dEQP-VK.synchronization.* Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29769 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Reviewed-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarAlexis Hétu <sugoi@google.com>
parent b317d96a
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "System/Timer.hpp" #include "System/Timer.hpp"
#include "Vulkan/VkConfig.h" #include "Vulkan/VkConfig.h"
#include "Vulkan/VkDebug.hpp" #include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkFence.hpp"
#include "Vulkan/VkImageView.hpp" #include "Vulkan/VkImageView.hpp"
#include "Vulkan/VkQueryPool.hpp" #include "Vulkan/VkQueryPool.hpp"
#include "Pipeline/SpirvShader.hpp" #include "Pipeline/SpirvShader.hpp"
...@@ -185,6 +186,8 @@ namespace sw ...@@ -185,6 +186,8 @@ namespace sw
references = -1; references = -1;
fence = nullptr;
data = (DrawData*)allocate(sizeof(DrawData)); data = (DrawData*)allocate(sizeof(DrawData));
data->constants = &constants; data->constants = &constants;
} }
...@@ -294,7 +297,7 @@ namespace sw ...@@ -294,7 +297,7 @@ namespace sw
sw::deallocate(mem); sw::deallocate(mem);
} }
void Renderer::draw(VkPrimitiveTopology topology, VkIndexType indexType, unsigned int count, int baseVertex, bool update) void Renderer::draw(VkPrimitiveTopology topology, VkIndexType indexType, unsigned int count, int baseVertex, vk::Fence* fence, bool update)
{ {
#ifndef NDEBUG #ifndef NDEBUG
if(count < minPrimitives || count > maxPrimitives) if(count < minPrimitives || count > maxPrimitives)
...@@ -409,6 +412,13 @@ namespace sw ...@@ -409,6 +412,13 @@ namespace sw
data->descriptorSets = context->descriptorSets; data->descriptorSets = context->descriptorSets;
data->descriptorDynamicOffsets = context->descriptorDynamicOffsets; data->descriptorDynamicOffsets = context->descriptorDynamicOffsets;
if(fence)
{
fence->add();
}
ASSERT(!draw->fence);
draw->fence = fence;
for(int i = 0; i < MAX_VERTEX_INPUTS; i++) for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
{ {
data->input[i] = context->input[i].buffer; data->input[i] = context->input[i].buffer;
...@@ -888,6 +898,12 @@ namespace sw ...@@ -888,6 +898,12 @@ namespace sw
draw.setupRoutine->unbind(); draw.setupRoutine->unbind();
draw.pixelRoutine->unbind(); draw.pixelRoutine->unbind();
if(draw.fence)
{
draw.fence->done();
draw.fence = nullptr;
}
sync->unlock(); sync->unlock();
draw.references = -1; draw.references = -1;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
namespace vk namespace vk
{ {
class DescriptorSet; class DescriptorSet;
class Fence;
struct Query; struct Query;
} }
...@@ -199,7 +200,7 @@ namespace sw ...@@ -199,7 +200,7 @@ namespace sw
void *operator new(size_t size); void *operator new(size_t size);
void operator delete(void * mem); void operator delete(void * mem);
void draw(VkPrimitiveTopology topology, VkIndexType indexType, unsigned int count, int baseVertex, bool update = true); void draw(VkPrimitiveTopology topology, VkIndexType indexType, unsigned int count, int baseVertex, vk::Fence* fence, bool update = true);
void setContext(const sw::Context& context); void setContext(const sw::Context& context);
...@@ -351,6 +352,7 @@ namespace sw ...@@ -351,6 +352,7 @@ namespace sw
vk::ImageView *renderTarget[RENDERTARGETS]; vk::ImageView *renderTarget[RENDERTARGETS];
vk::ImageView *depthBuffer; vk::ImageView *depthBuffer;
vk::ImageView *stencilBuffer; vk::ImageView *stencilBuffer;
vk::Fence* fence;
std::list<vk::Query*> *queries; std::list<vk::Query*> *queries;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "VkCommandBuffer.hpp" #include "VkCommandBuffer.hpp"
#include "VkBuffer.hpp" #include "VkBuffer.hpp"
#include "VkEvent.hpp" #include "VkEvent.hpp"
#include "VkFence.hpp"
#include "VkFramebuffer.hpp" #include "VkFramebuffer.hpp"
#include "VkImage.hpp" #include "VkImage.hpp"
#include "VkImageView.hpp" #include "VkImageView.hpp"
...@@ -523,7 +524,8 @@ struct DrawBase : public CommandBuffer::Command ...@@ -523,7 +524,8 @@ struct DrawBase : public CommandBuffer::Command
for(uint32_t instance = firstInstance; instance != firstInstance + instanceCount; instance++) for(uint32_t instance = firstInstance; instance != firstInstance + instanceCount; instance++)
{ {
executionState.renderer->setInstanceID(instance); executionState.renderer->setInstanceID(instance);
executionState.renderer->draw(context.topology, executionState.indexType, primitiveCount, vertexOffset); executionState.renderer->draw(context.topology, executionState.indexType, primitiveCount, vertexOffset,
executionState.fence);
executionState.renderer->advanceInstanceAttributes(); executionState.renderer->advanceInstanceAttributes();
} }
} }
...@@ -843,11 +845,8 @@ struct SignalEvent : public CommandBuffer::Command ...@@ -843,11 +845,8 @@ struct SignalEvent : public CommandBuffer::Command
void play(CommandBuffer::ExecutionState& executionState) override void play(CommandBuffer::ExecutionState& executionState) override
{ {
if(Cast(ev)->signal()) executionState.renderer->synchronize();
{ Cast(ev)->signal();
// Was waiting for signal on this event, sync now
executionState.renderer->synchronize();
}
} }
private: private:
...@@ -879,11 +878,8 @@ struct WaitEvent : public CommandBuffer::Command ...@@ -879,11 +878,8 @@ struct WaitEvent : public CommandBuffer::Command
void play(CommandBuffer::ExecutionState& executionState) override void play(CommandBuffer::ExecutionState& executionState) override
{ {
if(!Cast(ev)->wait()) executionState.renderer->synchronize();
{ Cast(ev)->wait();
// Already signaled, sync now
executionState.renderer->synchronize();
}
} }
private: private:
......
...@@ -31,6 +31,7 @@ namespace sw ...@@ -31,6 +31,7 @@ namespace sw
namespace vk namespace vk
{ {
class Fence;
class Framebuffer; class Framebuffer;
class Pipeline; class Pipeline;
class RenderPass; class RenderPass;
...@@ -132,6 +133,7 @@ public: ...@@ -132,6 +133,7 @@ public:
}; };
sw::Renderer* renderer = nullptr; sw::Renderer* renderer = nullptr;
Fence* fence = nullptr;
RenderPass* renderPass = nullptr; RenderPass* renderPass = nullptr;
Framebuffer* renderPassFramebuffer = nullptr; Framebuffer* renderPassFramebuffer = nullptr;
std::array<PipelineState, VK_PIPELINE_BIND_POINT_RANGE_SIZE> pipelineState; std::array<PipelineState, VK_PIPELINE_BIND_POINT_RANGE_SIZE> pipelineState;
......
...@@ -17,11 +17,22 @@ ...@@ -17,11 +17,22 @@
#include "VkConfig.h" #include "VkConfig.h"
#include "VkDebug.hpp" #include "VkDebug.hpp"
#include "VkDescriptorSetLayout.hpp" #include "VkDescriptorSetLayout.hpp"
#include "VkFence.hpp"
#include "VkQueue.hpp" #include "VkQueue.hpp"
#include "Device/Blitter.hpp" #include "Device/Blitter.hpp"
#include <chrono>
#include <climits>
#include <new> // Must #include this to use "placement new" #include <new> // Must #include this to use "placement new"
namespace
{
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> now()
{
return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now());
}
}
namespace vk namespace vk
{ {
...@@ -42,7 +53,7 @@ Device::Device(const Device::CreateInfo* info, void* mem) ...@@ -42,7 +53,7 @@ Device::Device(const Device::CreateInfo* info, void* mem)
for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++) for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
{ {
new (&queues[queueID]) Queue(queueCreateInfo.queueFamilyIndex, queueCreateInfo.pQueuePriorities[j]); new (&queues[queueID]) Queue();
} }
} }
...@@ -86,17 +97,85 @@ VkQueue Device::getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const ...@@ -86,17 +97,85 @@ VkQueue Device::getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const
return queues[queueIndex]; return queues[queueIndex];
} }
void Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout) VkResult Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout)
{ {
// FIXME(b/117835459) : noop const time_point start = now();
const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
bool infiniteTimeout = (timeout > max_timeout);
const time_point end_ns = start + std::chrono::nanoseconds(std::min(max_timeout, timeout));
if(waitAll) // All fences must be signaled
{
for(uint32_t i = 0; i < fenceCount; i++)
{
if(timeout == 0)
{
if(Cast(pFences[i])->getStatus() != VK_SUCCESS) // At least one fence is not signaled
{
return VK_TIMEOUT;
}
}
else if(infiniteTimeout)
{
if(Cast(pFences[i])->wait() != VK_SUCCESS) // At least one fence is not signaled
{
return VK_TIMEOUT;
}
}
else
{
if(Cast(pFences[i])->waitUntil(end_ns) != VK_SUCCESS) // At least one fence is not signaled
{
return VK_TIMEOUT;
}
}
}
return VK_SUCCESS;
}
else // At least one fence must be signaled
{
// Start by quickly checking the status of all fences, as only one is required
for(uint32_t i = 0; i < fenceCount; i++)
{
if(Cast(pFences[i])->getStatus() == VK_SUCCESS) // At least one fence is signaled
{
return VK_SUCCESS;
}
}
if(timeout > 0)
{
for(uint32_t i = 0; i < fenceCount; i++)
{
if(infiniteTimeout)
{
if(Cast(pFences[i])->wait() == VK_SUCCESS) // At least one fence is signaled
{
return VK_SUCCESS;
}
}
else
{
if(Cast(pFences[i])->waitUntil(end_ns) == VK_SUCCESS) // At least one fence is signaled
{
return VK_SUCCESS;
}
}
}
}
return VK_TIMEOUT;
}
} }
void Device::waitIdle() VkResult Device::waitIdle()
{ {
for(uint32_t i = 0; i < queueCount; i++) for(uint32_t i = 0; i < queueCount; i++)
{ {
queues[i].waitIdle(); queues[i].waitIdle();
} }
return VK_SUCCESS;
} }
void Device::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, void Device::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
......
...@@ -44,8 +44,8 @@ public: ...@@ -44,8 +44,8 @@ public:
static size_t ComputeRequiredAllocationSize(const CreateInfo* info); static size_t ComputeRequiredAllocationSize(const CreateInfo* info);
VkQueue getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const; VkQueue getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const;
void waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); VkResult waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout);
void waitIdle(); VkResult waitIdle();
void getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, void getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
VkDescriptorSetLayoutSupport* pSupport) const; VkDescriptorSetLayoutSupport* pSupport) const;
VkPhysicalDevice getPhysicalDevice() const { return physicalDevice; } VkPhysicalDevice getPhysicalDevice() const { return physicalDevice; }
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#define VK_EVENT_HPP_ #define VK_EVENT_HPP_
#include "VkObject.hpp" #include "VkObject.hpp"
#include <condition_variable>
#include <mutex>
namespace vk namespace vk
{ {
...@@ -34,37 +36,38 @@ public: ...@@ -34,37 +36,38 @@ public:
return 0; return 0;
} }
bool signal() void signal()
{ {
std::unique_lock<std::mutex> lock(mutex);
status = VK_EVENT_SET; status = VK_EVENT_SET;
bool wasWaiting = waiting; lock.unlock();
waiting = false; condition.notify_all();
return wasWaiting;
} }
void reset() void reset()
{ {
std::unique_lock<std::mutex> lock(mutex);
status = VK_EVENT_RESET; status = VK_EVENT_RESET;
} }
VkResult getStatus() const VkResult getStatus()
{ {
return status; std::unique_lock<std::mutex> lock(mutex);
auto result = status;
lock.unlock();
return result;
} }
bool wait() void wait()
{ {
if(status != VK_EVENT_SET) std::unique_lock<std::mutex> lock(mutex);
{ condition.wait(lock, [this] { return status == VK_EVENT_SET; });
waiting = true;
}
return waiting;
} }
private: private:
VkResult status = VK_EVENT_RESET; VkResult status = VK_EVENT_RESET; // guarded by mutex
bool waiting = false; std::mutex mutex;
std::condition_variable condition;
}; };
static inline Event* Cast(VkEvent object) static inline Event* Cast(VkEvent object)
......
...@@ -16,42 +16,94 @@ ...@@ -16,42 +16,94 @@
#define VK_FENCE_HPP_ #define VK_FENCE_HPP_
#include "VkObject.hpp" #include "VkObject.hpp"
#include <chrono>
#include <condition_variable>
#include <mutex>
namespace vk namespace vk
{ {
using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
class Fence : public Object<Fence, VkFence> class Fence : public Object<Fence, VkFence>
{ {
public: public:
Fence() : status(VK_NOT_READY) {}
Fence(const VkFenceCreateInfo* pCreateInfo, void* mem) : Fence(const VkFenceCreateInfo* pCreateInfo, void* mem) :
status((pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) ? VK_SUCCESS : VK_NOT_READY) status((pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) ? VK_SUCCESS : VK_NOT_READY)
{ {
} }
~Fence() = delete;
static size_t ComputeRequiredAllocationSize(const VkFenceCreateInfo* pCreateInfo) static size_t ComputeRequiredAllocationSize(const VkFenceCreateInfo* pCreateInfo)
{ {
return 0; return 0;
} }
void add()
{
std::unique_lock<std::mutex> lock(mutex);
++count;
}
void done()
{
std::unique_lock<std::mutex> lock(mutex);
ASSERT(count > 0);
--count;
if(count == 0)
{
// signal the fence, without the unlock/lock required to call signal() here
status = VK_SUCCESS;
lock.unlock();
condition.notify_all();
}
}
void signal() void signal()
{ {
std::unique_lock<std::mutex> lock(mutex);
status = VK_SUCCESS; status = VK_SUCCESS;
lock.unlock();
condition.notify_all();
} }
void reset() void reset()
{ {
std::unique_lock<std::mutex> lock(mutex);
ASSERT(count == 0);
status = VK_NOT_READY; status = VK_NOT_READY;
} }
VkResult getStatus() const VkResult getStatus()
{
std::unique_lock<std::mutex> lock(mutex);
auto out = status;
lock.unlock();
return out;
}
VkResult wait()
{
std::unique_lock<std::mutex> lock(mutex);
condition.wait(lock, [this] { return status == VK_SUCCESS; });
auto out = status;
lock.unlock();
return out;
}
VkResult waitUntil(const time_point& timeout_ns)
{ {
return status; std::unique_lock<std::mutex> lock(mutex);
return condition.wait_until(lock, timeout_ns, [this] { return status == VK_SUCCESS; }) ?
VK_SUCCESS : VK_TIMEOUT;
} }
private: private:
VkResult status = VK_NOT_READY; VkResult status = VK_NOT_READY; // guarded by mutex
int32_t count = 0; // guarded by mutex
std::mutex mutex;
std::condition_variable condition;
}; };
static inline Fence* Cast(VkFence object) static inline Fence* Cast(VkFence object)
......
...@@ -16,30 +16,115 @@ ...@@ -16,30 +16,115 @@
#include "VkFence.hpp" #include "VkFence.hpp"
#include "VkQueue.hpp" #include "VkQueue.hpp"
#include "VkSemaphore.hpp" #include "VkSemaphore.hpp"
#include "Device/Renderer.hpp"
#include "WSI/VkSwapchainKHR.hpp" #include "WSI/VkSwapchainKHR.hpp"
#include <cstring>
namespace
{
VkSubmitInfo* DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo* pSubmits)
{
size_t submitSize = sizeof(VkSubmitInfo) * submitCount;
size_t totalSize = submitSize;
for(uint32_t i = 0; i < submitCount; i++)
{
totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
totalSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
totalSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
}
uint8_t* mem = static_cast<uint8_t*>(
vk::allocate(totalSize, vk::REQUIRED_MEMORY_ALIGNMENT, vk::DEVICE_MEMORY, vk::Fence::GetAllocationScope()));
auto submits = new (mem) VkSubmitInfo[submitCount];
memcpy(mem, pSubmits, submitSize);
mem += submitSize;
for(uint32_t i = 0; i < submitCount; i++)
{
size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
submits[i].pWaitSemaphores = new (mem) VkSemaphore[pSubmits[i].waitSemaphoreCount];
memcpy(mem, pSubmits[i].pWaitSemaphores, size);
mem += size;
size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
submits[i].pWaitDstStageMask = new (mem) VkPipelineStageFlags[pSubmits[i].waitSemaphoreCount];
memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
mem += size;
size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
submits[i].pSignalSemaphores = new (mem) VkSemaphore[pSubmits[i].signalSemaphoreCount];
memcpy(mem, pSubmits[i].pSignalSemaphores, size);
mem += size;
size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
submits[i].pCommandBuffers = new (mem) VkCommandBuffer[pSubmits[i].commandBufferCount];
memcpy(mem, pSubmits[i].pCommandBuffers, size);
mem += size;
}
return submits;
}
} // anonymous namespace
namespace vk namespace vk
{ {
Queue::Queue(uint32_t pFamilyIndex, float pPriority) : familyIndex(pFamilyIndex), priority(pPriority) Queue::Queue()
{ {
// FIXME (b/119409619): use an allocator here so we can control all memory allocations context.reset(new sw::Context());
context = new sw::Context(); renderer.reset(new sw::Renderer(context.get(), sw::OpenGL, true));
renderer = new sw::Renderer(context, sw::OpenGL, true);
queueThread = std::thread(TaskLoop, this);
} }
void Queue::destroy() void Queue::destroy()
{ {
delete context; Task task;
delete renderer; task.type = Task::KILL_THREAD;
pending.put(task);
queueThread.join();
ASSERT_MSG(pending.count() == 0, "queue has work after worker thread shutdown");
garbageCollect();
renderer.reset(nullptr);
context.reset(nullptr);
} }
void Queue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) VkResult Queue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence)
{ {
for(uint32_t i = 0; i < submitCount; i++) garbageCollect();
Task task;
task.submitCount = submitCount;
task.pSubmits = DeepCopySubmitInfo(submitCount, pSubmits);
task.fence = (fence != VK_NULL_HANDLE) ? vk::Cast(fence) : nullptr;
if(task.fence)
{
task.fence->add();
}
pending.put(task);
return VK_SUCCESS;
}
void Queue::TaskLoop(vk::Queue* queue)
{
queue->taskLoop();
}
void Queue::submitQueue(const Task& task)
{
for(uint32_t i = 0; i < task.submitCount; i++)
{ {
auto& submitInfo = pSubmits[i]; auto& submitInfo = task.pSubmits[i];
for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++) for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++)
{ {
vk::Cast(submitInfo.pWaitSemaphores[j])->wait(submitInfo.pWaitDstStageMask[j]); vk::Cast(submitInfo.pWaitSemaphores[j])->wait(submitInfo.pWaitDstStageMask[j]);
...@@ -47,7 +132,8 @@ void Queue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence f ...@@ -47,7 +132,8 @@ void Queue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence f
{ {
CommandBuffer::ExecutionState executionState; CommandBuffer::ExecutionState executionState;
executionState.renderer = renderer; executionState.renderer = renderer.get();
executionState.fence = task.fence;
for(uint32_t j = 0; j < submitInfo.commandBufferCount; j++) for(uint32_t j = 0; j < submitInfo.commandBufferCount; j++)
{ {
vk::Cast(submitInfo.pCommandBuffers[j])->submit(executionState); vk::Cast(submitInfo.pCommandBuffers[j])->submit(executionState);
...@@ -60,21 +146,66 @@ void Queue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence f ...@@ -60,21 +146,66 @@ void Queue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence f
} }
} }
// FIXME (b/117835459): signal the fence only once the work is completed if (task.pSubmits)
if(fence != VK_NULL_HANDLE) {
toDelete.put(task.pSubmits);
}
if(task.fence)
{
task.fence->done();
}
}
void Queue::taskLoop()
{
while(true)
{ {
vk::Cast(fence)->signal(); Task task = pending.take();
switch(task.type)
{
case Task::KILL_THREAD:
ASSERT_MSG(pending.count() == 0, "queue has remaining work!");
return;
case Task::SUBMIT_QUEUE:
submitQueue(task);
break;
default:
UNIMPLEMENTED("task.type %d", static_cast<int>(task.type));
break;
}
} }
} }
void Queue::waitIdle() VkResult Queue::waitIdle()
{ {
// equivalent to submitting a fence to a queue and waiting // Wait for task queue to flush.
// with an infinite timeout for that fence to signal vk::Fence fence;
fence.add();
Task task;
task.fence = &fence;
pending.put(task);
// FIXME (b/117835459): implement once we have working fences fence.wait();
// Wait for all draw operations to complete, if any
renderer->synchronize(); renderer->synchronize();
garbageCollect();
return VK_SUCCESS;
}
void Queue::garbageCollect()
{
while (true)
{
auto v = toDelete.tryTake();
if (!v.second) { break; }
vk::deallocate(v.first, DEVICE_MEMORY);
}
} }
#ifndef __ANDROID__ #ifndef __ANDROID__
......
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
#define VK_QUEUE_HPP_ #define VK_QUEUE_HPP_
#include "VkObject.hpp" #include "VkObject.hpp"
#include "Device/Renderer.hpp"
#include <queue>
#include <thread>
#include <vulkan/vk_icd.h> #include <vulkan/vk_icd.h>
namespace sw namespace sw
...@@ -27,12 +30,97 @@ namespace sw ...@@ -27,12 +30,97 @@ namespace sw
namespace vk namespace vk
{ {
// Chan is a thread-safe FIFO queue of type T.
// Chan takes its name after Golang's chan.
template <typename T>
class Chan
{
public:
Chan();
// take returns the next item in the chan, blocking until an item is
// available.
T take();
// tryTake returns a <T, bool> pair.
// If the chan is not empty, then the next item and true are returned.
// If the chan is empty, then a default-initialized T and false are returned.
std::pair<T, bool> tryTake();
// put places an item into the chan, blocking if the chan is bounded and
// full.
void put(const T &v);
// Returns the number of items in the chan.
// Note: that this may change as soon as the function returns, so should
// only be used for debugging.
size_t count();
private:
std::queue<T> queue;
std::mutex mutex;
std::condition_variable added;
std::condition_variable removed;
};
template <typename T>
Chan<T>::Chan() {}
template <typename T>
T Chan<T>::take()
{
std::unique_lock<std::mutex> lock(mutex);
if (queue.size() == 0)
{
// Chan empty. Wait for item to be added.
added.wait(lock, [this] { return queue.size() > 0; });
}
T out = queue.front();
queue.pop();
lock.unlock();
removed.notify_one();
return out;
}
template <typename T>
std::pair<T, bool> Chan<T>::tryTake()
{
std::unique_lock<std::mutex> lock(mutex);
if (queue.size() == 0)
{
return std::make_pair(T{}, false);
}
T out = queue.front();
queue.pop();
lock.unlock();
removed.notify_one();
return std::make_pair(out, true);
}
template <typename T>
void Chan<T>::put(const T &item)
{
std::unique_lock<std::mutex> lock(mutex);
queue.push(item);
lock.unlock();
added.notify_one();
}
template <typename T>
size_t Chan<T>::count()
{
std::unique_lock<std::mutex> lock(mutex);
auto out = queue.size();
lock.unlock();
return out;
}
class Queue class Queue
{ {
VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC }; VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC };
public: public:
Queue(uint32_t pFamilyIndex, float pPriority); Queue();
~Queue() = delete; ~Queue() = delete;
operator VkQueue() operator VkQueue()
...@@ -41,17 +129,33 @@ public: ...@@ -41,17 +129,33 @@ public:
} }
void destroy(); void destroy();
void submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); VkResult submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence);
void waitIdle(); VkResult waitIdle();
#ifndef __ANDROID__ #ifndef __ANDROID__
void present(const VkPresentInfoKHR* presentInfo); void present(const VkPresentInfoKHR* presentInfo);
#endif #endif
private: private:
sw::Context* context = nullptr; struct Task
sw::Renderer* renderer = nullptr; {
uint32_t familyIndex = 0; uint32_t submitCount = 0;
float priority = 0.0f; VkSubmitInfo* pSubmits = nullptr;
Fence* fence = nullptr;
enum Type { KILL_THREAD, SUBMIT_QUEUE };
Type type = SUBMIT_QUEUE;
};
static void TaskLoop(vk::Queue* queue);
void taskLoop();
void garbageCollect();
void submitQueue(const Task& task);
std::unique_ptr<sw::Context> context;
std::unique_ptr<sw::Renderer> renderer;
Chan<Task> pending;
Chan<VkSubmitInfo*> toDelete;
std::thread queueThread;
}; };
static inline Queue* Cast(VkQueue object) static inline Queue* Cast(VkQueue object)
......
...@@ -522,27 +522,21 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(VkQueue queue, uint32_t submitCount ...@@ -522,27 +522,21 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(VkQueue queue, uint32_t submitCount
TRACE("(VkQueue queue = %p, uint32_t submitCount = %d, const VkSubmitInfo* pSubmits = %p, VkFence fence = %p)", TRACE("(VkQueue queue = %p, uint32_t submitCount = %d, const VkSubmitInfo* pSubmits = %p, VkFence fence = %p)",
queue, submitCount, pSubmits, fence); queue, submitCount, pSubmits, fence);
vk::Cast(queue)->submit(submitCount, pSubmits, fence); return vk::Cast(queue)->submit(submitCount, pSubmits, fence);
return VK_SUCCESS;
} }
VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle(VkQueue queue) VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle(VkQueue queue)
{ {
TRACE("(VkQueue queue = %p)", queue); TRACE("(VkQueue queue = %p)", queue);
vk::Cast(queue)->waitIdle(); return vk::Cast(queue)->waitIdle();
return VK_SUCCESS;
} }
VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(VkDevice device) VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(VkDevice device)
{ {
TRACE("(VkDevice device = %p)", device); TRACE("(VkDevice device = %p)", device);
vk::Cast(device)->waitIdle(); return vk::Cast(device)->waitIdle();
return VK_SUCCESS;
} }
VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory)
...@@ -725,7 +719,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyFence(VkDevice device, VkFence fence, const ...@@ -725,7 +719,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyFence(VkDevice device, VkFence fence, const
TRACE("(VkDevice device = %p, VkFence fence = %p, const VkAllocationCallbacks* pAllocator = %p)", TRACE("(VkDevice device = %p, VkFence fence = %p, const VkAllocationCallbacks* pAllocator = %p)",
device, fence, pAllocator); device, fence, pAllocator);
vk::destroy(fence, pAllocator); vk::destroy(fence, pAllocator);
} }
...@@ -754,9 +747,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences(VkDevice device, uint32_t fenceCo ...@@ -754,9 +747,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences(VkDevice device, uint32_t fenceCo
TRACE("(VkDevice device = %p, uint32_t fenceCount = %d, const VkFence* pFences = %p, VkBool32 waitAll = %d, uint64_t timeout = %d)", TRACE("(VkDevice device = %p, uint32_t fenceCount = %d, const VkFence* pFences = %p, VkBool32 waitAll = %d, uint64_t timeout = %d)",
device, int(fenceCount), pFences, int(waitAll), int(timeout)); device, int(fenceCount), pFences, int(waitAll), int(timeout));
vk::Cast(device)->waitForFences(fenceCount, pFences, waitAll, timeout); return vk::Cast(device)->waitForFences(fenceCount, pFences, waitAll, timeout);
return VK_SUCCESS;
} }
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore) VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment