Commit 3f0c4a56 by Jamie Madill Committed by Commit Bot

Vulkan: Faster state transitions.

Implements a transition table from Pipeline Cache entry to state change neighbouring Pipeline Cache entries. We use a 64-bit mask to do a quick scan over the pipeline desc. This ends up being a lot faster than doing a full hash and memcmp over the pipeline description. Note that there could be future optimizations to this design. We might keep a hash map of the pipeline transitions instead of a list. Or use a sorted list. This could speed up the search when there are many transitions for cache entries. Also we could skip the transition table and opt to do a full hash when there are more than a configurable number of dirty states. This might be a bit faster in some cases. Likely this will be something we can add performance tests for in the future. Documentation is also added in a README file for the Vulkan back end. This will be extended over time. Improves performance about 30-35% on the VBO state change test. Bug: angleproject:3013 Change-Id: I793f9e3efd8887acf00ad60e4ac2502a54c95dee Reviewed-on: https://chromium-review.googlesource.com/c/1369287 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarYuly Novikov <ynovikov@chromium.org>
parent 80766cfa
......@@ -1271,6 +1271,11 @@ inline uint16_t RotR16(uint16_t x, int8_t r)
# define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
#endif // namespace rx
constexpr unsigned int Log2(unsigned int bytes)
{
return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
}
} // namespace rx
#endif // COMMON_MATHUTIL_H_
......@@ -40,16 +40,11 @@ bool CheckedMathResult(const CheckedNumeric<GLuint> &value, GLuint *resultOut)
}
}
constexpr GLuint Log2(GLuint bytes)
{
return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
}
constexpr uint32_t PackTypeInfo(GLuint bytes, bool specialized)
{
// static_assert within constexpr requires c++17
// static_assert(isPow2(bytes));
return bytes | (Log2(bytes) << 8) | (specialized << 16);
return bytes | (rx::Log2(bytes) << 8) | (specialized << 16);
}
} // anonymous namespace
......
......@@ -109,6 +109,7 @@ constexpr size_t kDefaultBufferSize = kDefaultValueSize * 16;
ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk *renderer)
: ContextImpl(state, errorSet),
vk::Context(renderer),
mCurrentPipeline(nullptr),
mCurrentDrawMode(gl::PrimitiveMode::InvalidEnum),
mVertexArray(nullptr),
mDrawFramebuffer(nullptr),
......@@ -253,7 +254,7 @@ angle::Result ContextVk::setupDraw(const gl::Context *context,
{
invalidateCurrentPipeline();
mCurrentDrawMode = mode;
mGraphicsPipelineDesc->updateTopology(mCurrentDrawMode);
mGraphicsPipelineDesc->updateTopology(&mGraphicsPipelineTransition, mCurrentDrawMode);
}
if (!mDrawFramebuffer->appendToStartedRenderPass(mRenderer, commandBufferOut))
......@@ -365,13 +366,34 @@ angle::Result ContextVk::handleDirtyPipeline(const gl::Context *context,
{
if (!mCurrentPipeline)
{
const vk::GraphicsPipelineDesc *descPtr;
// Draw call shader patching, shader compilation, and pipeline cache query.
ANGLE_TRY(mProgram->getGraphicsPipeline(this, mCurrentDrawMode, *mGraphicsPipelineDesc,
mProgram->getState().getActiveAttribLocationsMask(),
&mCurrentPipeline));
&descPtr, &mCurrentPipeline));
mGraphicsPipelineTransition.reset();
}
else if (mGraphicsPipelineTransition.any())
{
if (!mCurrentPipeline->findTransition(mGraphicsPipelineTransition, *mGraphicsPipelineDesc,
&mCurrentPipeline))
{
vk::PipelineHelper *oldPipeline = mCurrentPipeline;
const vk::GraphicsPipelineDesc *descPtr;
ANGLE_TRY(mProgram->getGraphicsPipeline(
this, mCurrentDrawMode, *mGraphicsPipelineDesc,
mProgram->getState().getActiveAttribLocationsMask(), &descPtr, &mCurrentPipeline));
oldPipeline->addTransition(mGraphicsPipelineTransition, descPtr, mCurrentPipeline);
}
mGraphicsPipelineTransition.reset();
}
commandBuffer->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, mCurrentPipeline->get());
commandBuffer->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, mCurrentPipeline->getPipeline());
// Update the queue serial for the pipeline object.
ASSERT(mCurrentPipeline && mCurrentPipeline->valid());
......@@ -623,7 +645,7 @@ void ContextVk::updateColorMask(const gl::BlendState &blendState)
blendState.colorMaskBlue, blendState.colorMaskAlpha);
FramebufferVk *framebufferVk = vk::GetImpl(mState.getDrawFramebuffer());
mGraphicsPipelineDesc->updateColorWriteMask(mClearColorMask,
mGraphicsPipelineDesc->updateColorWriteMask(&mGraphicsPipelineTransition, mClearColorMask,
framebufferVk->getEmulatedAlphaAttachmentMask());
}
......@@ -689,16 +711,20 @@ angle::Result ContextVk::syncState(const gl::Context *context,
updateDepthRange(glState.getNearPlane(), glState.getFarPlane());
break;
case gl::State::DIRTY_BIT_BLEND_ENABLED:
mGraphicsPipelineDesc->updateBlendEnabled(glState.isBlendEnabled());
mGraphicsPipelineDesc->updateBlendEnabled(&mGraphicsPipelineTransition,
glState.isBlendEnabled());
break;
case gl::State::DIRTY_BIT_BLEND_COLOR:
mGraphicsPipelineDesc->updateBlendColor(glState.getBlendColor());
mGraphicsPipelineDesc->updateBlendColor(&mGraphicsPipelineTransition,
glState.getBlendColor());
break;
case gl::State::DIRTY_BIT_BLEND_FUNCS:
mGraphicsPipelineDesc->updateBlendFuncs(glState.getBlendState());
mGraphicsPipelineDesc->updateBlendFuncs(&mGraphicsPipelineTransition,
glState.getBlendState());
break;
case gl::State::DIRTY_BIT_BLEND_EQUATIONS:
mGraphicsPipelineDesc->updateBlendEquations(glState.getBlendState());
mGraphicsPipelineDesc->updateBlendEquations(&mGraphicsPipelineTransition,
glState.getBlendState());
break;
case gl::State::DIRTY_BIT_COLOR_MASK:
updateColorMask(glState.getBlendState());
......@@ -714,61 +740,75 @@ angle::Result ContextVk::syncState(const gl::Context *context,
case gl::State::DIRTY_BIT_SAMPLE_MASK:
break;
case gl::State::DIRTY_BIT_DEPTH_TEST_ENABLED:
mGraphicsPipelineDesc->updateDepthTestEnabled(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateDepthTestEnabled(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_DEPTH_FUNC:
mGraphicsPipelineDesc->updateDepthFunc(glState.getDepthStencilState());
mGraphicsPipelineDesc->updateDepthFunc(&mGraphicsPipelineTransition,
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_DEPTH_MASK:
mGraphicsPipelineDesc->updateDepthWriteEnabled(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateDepthWriteEnabled(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_STENCIL_TEST_ENABLED:
mGraphicsPipelineDesc->updateStencilTestEnabled(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateStencilTestEnabled(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_STENCIL_FUNCS_FRONT:
mGraphicsPipelineDesc->updateStencilFrontFuncs(glState.getStencilRef(),
mGraphicsPipelineDesc->updateStencilFrontFuncs(&mGraphicsPipelineTransition,
glState.getStencilRef(),
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_FUNCS_BACK:
mGraphicsPipelineDesc->updateStencilBackFuncs(glState.getStencilBackRef(),
mGraphicsPipelineDesc->updateStencilBackFuncs(&mGraphicsPipelineTransition,
glState.getStencilBackRef(),
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_OPS_FRONT:
mGraphicsPipelineDesc->updateStencilFrontOps(glState.getDepthStencilState());
mGraphicsPipelineDesc->updateStencilFrontOps(&mGraphicsPipelineTransition,
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_OPS_BACK:
mGraphicsPipelineDesc->updateStencilBackOps(glState.getDepthStencilState());
mGraphicsPipelineDesc->updateStencilBackOps(&mGraphicsPipelineTransition,
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_FRONT:
mGraphicsPipelineDesc->updateStencilFrontWriteMask(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateStencilFrontWriteMask(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_BACK:
mGraphicsPipelineDesc->updateStencilBackWriteMask(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateStencilBackWriteMask(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_CULL_FACE_ENABLED:
case gl::State::DIRTY_BIT_CULL_FACE:
mGraphicsPipelineDesc->updateCullMode(glState.getRasterizerState());
mGraphicsPipelineDesc->updateCullMode(&mGraphicsPipelineTransition,
glState.getRasterizerState());
break;
case gl::State::DIRTY_BIT_FRONT_FACE:
mGraphicsPipelineDesc->updateFrontFace(glState.getRasterizerState(),
mGraphicsPipelineDesc->updateFrontFace(&mGraphicsPipelineTransition,
glState.getRasterizerState(),
isViewportFlipEnabledForDrawFBO());
break;
case gl::State::DIRTY_BIT_POLYGON_OFFSET_FILL_ENABLED:
mGraphicsPipelineDesc->updatePolygonOffsetFillEnabled(
glState.isPolygonOffsetFillEnabled());
&mGraphicsPipelineTransition, glState.isPolygonOffsetFillEnabled());
break;
case gl::State::DIRTY_BIT_POLYGON_OFFSET:
mGraphicsPipelineDesc->updatePolygonOffset(glState.getRasterizerState());
mGraphicsPipelineDesc->updatePolygonOffset(&mGraphicsPipelineTransition,
glState.getRasterizerState());
break;
case gl::State::DIRTY_BIT_RASTERIZER_DISCARD_ENABLED:
break;
case gl::State::DIRTY_BIT_LINE_WIDTH:
mGraphicsPipelineDesc->updateLineWidth(glState.getLineWidth());
mGraphicsPipelineDesc->updateLineWidth(&mGraphicsPipelineTransition,
glState.getLineWidth());
break;
case gl::State::DIRTY_BIT_PRIMITIVE_RESTART_ENABLED:
break;
......@@ -813,19 +853,26 @@ angle::Result ContextVk::syncState(const gl::Context *context,
updateViewport(mDrawFramebuffer, glState.getViewport(), glState.getNearPlane(),
glState.getFarPlane(), isViewportFlipEnabledForDrawFBO());
updateColorMask(glState.getBlendState());
mGraphicsPipelineDesc->updateCullMode(glState.getRasterizerState());
mGraphicsPipelineDesc->updateCullMode(&mGraphicsPipelineTransition,
glState.getRasterizerState());
updateScissor(glState);
mGraphicsPipelineDesc->updateDepthTestEnabled(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateDepthTestEnabled(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
mGraphicsPipelineDesc->updateDepthWriteEnabled(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateDepthWriteEnabled(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
mGraphicsPipelineDesc->updateStencilTestEnabled(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateStencilTestEnabled(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
mGraphicsPipelineDesc->updateStencilFrontWriteMask(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateStencilFrontWriteMask(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
mGraphicsPipelineDesc->updateStencilBackWriteMask(glState.getDepthStencilState(),
mGraphicsPipelineDesc->updateStencilBackWriteMask(&mGraphicsPipelineTransition,
glState.getDepthStencilState(),
glState.getDrawFramebuffer());
mGraphicsPipelineDesc->updateRenderPassDesc(mDrawFramebuffer->getRenderPassDesc());
mGraphicsPipelineDesc->updateRenderPassDesc(&mGraphicsPipelineTransition,
mDrawFramebuffer->getRenderPassDesc());
break;
}
case gl::State::DIRTY_BIT_RENDERBUFFER_BINDING:
......@@ -851,6 +898,8 @@ angle::Result ContextVk::syncState(const gl::Context *context,
bool useVertexBuffer = (mProgram->getState().getMaxActiveAttribLocation());
mNonIndexedDirtyBitsMask.set(DIRTY_BIT_VERTEX_BUFFERS, useVertexBuffer);
mIndexedDirtyBitsMask.set(DIRTY_BIT_VERTEX_BUFFERS, useVertexBuffer);
mCurrentPipeline = nullptr;
mGraphicsPipelineTransition.reset();
break;
}
case gl::State::DIRTY_BIT_TEXTURE_BINDINGS:
......@@ -1054,7 +1103,7 @@ void ContextVk::onFramebufferChange(const vk::RenderPassDesc &renderPassDesc)
{
// Ensure that the RenderPass description is updated.
invalidateCurrentPipeline();
mGraphicsPipelineDesc->updateRenderPassDesc(renderPassDesc);
mGraphicsPipelineDesc->updateRenderPassDesc(&mGraphicsPipelineTransition, renderPassDesc);
}
angle::Result ContextVk::dispatchCompute(const gl::Context *context,
......@@ -1279,4 +1328,5 @@ angle::Result ContextVk::updateDefaultAttribute(size_t attribIndex)
static_cast<uint32_t>(offset));
return angle::Result::Continue;
}
} // namespace rx
......@@ -179,7 +179,8 @@ class ContextVk : public ContextImpl, public vk::Context
GLuint relativeOffset)
{
invalidateVertexAndIndexBuffers();
mGraphicsPipelineDesc->updateVertexInput(static_cast<uint32_t>(attribIndex), stride,
mGraphicsPipelineDesc->updateVertexInput(&mGraphicsPipelineTransition,
static_cast<uint32_t>(attribIndex), stride,
divisor, format, relativeOffset);
}
......@@ -270,7 +271,6 @@ class ContextVk : public ContextImpl, public vk::Context
mDirtyBits.set(DIRTY_BIT_PIPELINE);
mDirtyBits.set(DIRTY_BIT_VIEWPORT);
mDirtyBits.set(DIRTY_BIT_SCISSOR);
mCurrentPipeline = nullptr;
}
void invalidateCurrentTextures();
......@@ -291,12 +291,13 @@ class ContextVk : public ContextImpl, public vk::Context
angle::Result handleDirtyViewport(const gl::Context *context, vk::CommandBuffer *commandBuffer);
angle::Result handleDirtyScissor(const gl::Context *context, vk::CommandBuffer *commandBuffer);
vk::PipelineAndSerial *mCurrentPipeline;
vk::PipelineHelper *mCurrentPipeline;
gl::PrimitiveMode mCurrentDrawMode;
// Keep a cached pipeline description structure that can be used to query the pipeline cache.
// Kept in a pointer so allocations can be aligned, and structs can be portably packed.
std::unique_ptr<vk::GraphicsPipelineDesc> mGraphicsPipelineDesc;
vk::GraphicsPipelineTransitionBits mGraphicsPipelineTransition;
// The descriptor pools are externally sychronized, so cannot be accessed from different
// threads simultaneously. Hence, we keep them in the ContextVk instead of the RendererVk.
......
......@@ -123,7 +123,8 @@ class ProgramVk : public ProgramImpl
gl::PrimitiveMode mode,
const vk::GraphicsPipelineDesc &desc,
const gl::AttributesMask &activeAttribLocations,
vk::PipelineAndSerial **pipelineOut)
const vk::GraphicsPipelineDesc **descPtrOut,
vk::PipelineHelper **pipelineOut)
{
vk::ShaderProgramHelper *shaderProgram;
ANGLE_TRY(initShaders(contextVk, mode, &shaderProgram));
......@@ -132,7 +133,7 @@ class ProgramVk : public ProgramImpl
return shaderProgram->getGraphicsPipeline(
contextVk, &renderer->getRenderPassCache(), renderer->getPipelineCache(),
renderer->getCurrentQueueSerial(), mPipelineLayout.get(), desc, activeAttribLocations,
pipelineOut);
descPtrOut, pipelineOut);
}
private:
......
# ANGLE: Vulkan Back-end
ANGLE's Vulkan back-end implementation lives in this folder.
[Vulkan](https://www.khronos.org/vulkan/) is an explicit graphics API. It has a lot in common with
other explicit APIs such as Microsoft's
[D3D12](https://docs.microsoft.com/en-us/windows/desktop/direct3d12/directx-12-programming-guide)
and Apple's [Metal](https://developer.apple.com/metal/). Compared to APIs like OpenGL or D3D11
explicit APIs can offer a number of significant benefits:
* Lower API call CPU overhead.
* A smaller API surface with more direct hardware control.
* Better support for multi-core programming.
* Vulkan in particular has open-source tooling and tests.
## Back-end Design
The [RendererVk](RendererVk.cpp) is a singleton. RendererVk owns shared global resources like the
[VkDevice](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkDevice.html),
[VkQueue](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkQueue.html), the
[Vulkan format tables](vk_format_utils.h) and [internal Vulkan shaders](shaders). The back-end
creates a new [ContextVk](ContextVk.cpp) instance to manage each allocated OpenGL Context. ContextVk
processes state changes and handles action commands like `glDrawArrays` and `glDrawElements`.
### Fast OpenGL State Transitions
Typical OpenGL programs issue a few small state change commands between draw call commands. We want
the typical app's use case to be as fast as possible so this leads to unique performance challenges.
Vulkan in quite different from OpenGL because it requires a separate compiled
[VkPipeline](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkPipeline.html)
for each state vector. Compiling VkPipelines is multiple orders of magnitude slower than enabling or
disabling an OpenGL render state. To speed this up we use three levels of caching when transitioning
states in the Vulkan back-end.
The first level is the driver's
[VkPipelineCache](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkPipelineCache.html). The driver cache reduces pipeline recompilation time
significantly. But even cached pipeline recompilations are orders of manitude slower than OpenGL
state changes.
The second level cache is an ANGLE-owned hash map from OpenGL state vectors to compiled pipelines.
See
[GraphicsPipelineCache](https://chromium.googlesource.com/angle/angle/+/225f08bf85a368f905362cdd1366e4795680452c/src/libANGLE/renderer/vulkan/vk_cache_utils.h#498)
in [vk_cache_utils.h](vk_cache_utils.h). ANGLE's
[GraphicsPipelineDesc](https://chromium.googlesource.com/angle/angle/+/225f08bf85a368f905362cdd1366e4795680452c/src/libANGLE/renderer/vulkan/vk_cache_utils.h#244)
class is a tightly packed 256-byte description of the current OpenGL rendering state. We
also use a [xxHash](https://github.com/Cyan4973/xxHash) for the fastest possible hash computation.
The hash map speeds up state changes considerably. But it is still significantly slower than OpenGL
implementations.
To get best performance we use a transition table from each OpenGL state vector to neighbouring
state vectors. The transition table points from GraphicsPipelineCache entries directly to
neighbouring VkPipeline objects. When the application changes state the state change bits are
recorded into a compact bit mask that covers the GraphicsPipelineDesc state vector. Then on the next
draw call we scan the transition bit mask and compare the GraphicsPipelineDesc of the current state
vector and the state vector of the cached transition. With the hash map we compute a hash over the
entire state vector and then do a 256-byte `memcmp` to guard against hash collisions. With the
transition table we will only compare as many bytes as were changed in the transition bit mask. By
skipping the expensive hashing and `memcmp` we can get as good or faster performance than native
OpenGL drivers.
Note that the current design of the transition table stores transitions in an unsorted list. If
applications map from one state to many this will slow down the transition time. This could be
improved in the future using a faster look up. For instance we could keep a sorted transition table
or use a small hash map for transitions.
......@@ -325,25 +325,30 @@ angle::Result UtilsVk::setupProgram(vk::Context *context,
Serial serial = renderer->getCurrentQueueSerial();
vk::PipelineAndSerial *pipelineAndSerial;
if (isCompute)
{
vk::PipelineAndSerial *pipelineAndSerial;
program->setShader(gl::ShaderType::Compute, fsCsShader);
ANGLE_TRY(program->getComputePipeline(context, pipelineLayout.get(), &pipelineAndSerial));
pipelineAndSerial->updateSerial(serial);
commandBuffer->bindPipeline(bindPoint, pipelineAndSerial->get());
}
else
{
program->setShader(gl::ShaderType::Vertex, vsShader);
program->setShader(gl::ShaderType::Fragment, fsCsShader);
// This value is not used but is passed to getGraphicsPipeline to avoid a nullptr check.
const vk::GraphicsPipelineDesc *descPtr;
vk::PipelineHelper *helper;
ANGLE_TRY(program->getGraphicsPipeline(
context, &renderer->getRenderPassCache(), renderer->getPipelineCache(), serial,
pipelineLayout.get(), *pipelineDesc, gl::AttributesMask(), &pipelineAndSerial));
pipelineLayout.get(), *pipelineDesc, gl::AttributesMask(), &descPtr, &helper));
helper->updateSerial(serial);
commandBuffer->bindPipeline(bindPoint, helper->getPipeline());
}
commandBuffer->bindPipeline(bindPoint, pipelineAndSerial->get());
pipelineAndSerial->updateSerial(serial);
if (descriptorSet != VK_NULL_HANDLE)
{
commandBuffer->bindDescriptorSets(bindPoint, pipelineLayout.get(), 0, 1, &descriptorSet, 0,
......@@ -619,8 +624,8 @@ angle::Result UtilsVk::clearImage(ContextVk *contextVk,
vk::GraphicsPipelineDesc pipelineDesc;
pipelineDesc.initDefaults();
pipelineDesc.updateColorWriteMask(params.colorMaskFlags, *params.alphaMask);
pipelineDesc.updateRenderPassDesc(*params.renderPassDesc);
pipelineDesc.setColorWriteMask(params.colorMaskFlags, *params.alphaMask);
pipelineDesc.setRenderPassDesc(*params.renderPassDesc);
vk::ShaderLibrary &shaderLibrary = renderer->getShaderLibrary();
vk::RefCounted<vk::ShaderAndSerial> *vertexShader = nullptr;
......@@ -707,7 +712,7 @@ angle::Result UtilsVk::copyImage(vk::Context *context,
vk::GraphicsPipelineDesc pipelineDesc;
pipelineDesc.initDefaults();
pipelineDesc.updateRenderPassDesc(renderPassDesc);
pipelineDesc.setRenderPassDesc(renderPassDesc);
gl::Rectangle renderArea;
renderArea.x = params.destOffset[0];
......
......@@ -158,10 +158,10 @@ void UnpackStencilState(const vk::PackedStencilOpState &packedState,
uint8_t stencilReference,
VkStencilOpState *stateOut)
{
stateOut->failOp = static_cast<VkStencilOp>(packedState.failOp);
stateOut->passOp = static_cast<VkStencilOp>(packedState.passOp);
stateOut->depthFailOp = static_cast<VkStencilOp>(packedState.depthFailOp);
stateOut->compareOp = static_cast<VkCompareOp>(packedState.compareOp);
stateOut->failOp = static_cast<VkStencilOp>(packedState.ops.fail);
stateOut->passOp = static_cast<VkStencilOp>(packedState.ops.pass);
stateOut->depthFailOp = static_cast<VkStencilOp>(packedState.ops.depthFail);
stateOut->compareOp = static_cast<VkCompareOp>(packedState.ops.compare);
stateOut->compareMask = packedState.compareMask;
stateOut->writeMask = packedState.writeMask;
stateOut->reference = stencilReference;
......@@ -281,6 +281,54 @@ DestT Int4Array_Get(const uint8_t *arrayBytes, uint32_t arrayIndex)
#define SetBitField(lhs, rhs) \
lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
ASSERT(static_cast<decltype(rhs)>(lhs) == rhs);
// When converting a byte number to a transition bit index we can shift instead of divide.
constexpr size_t kTransitionByteShift = Log2(kGraphicsPipelineDirtyBitBytes);
// When converting a number of bits offset to a transition bit index we can also shift.
constexpr size_t kBitsPerByte = 8;
constexpr size_t kTransitionBitShift = kTransitionByteShift + Log2(kBitsPerByte);
// Helper macro to map from a PipelineDesc struct and field to a dirty bit index.
// Uses the 'offsetof' macro to compute the offset 'Member' within the PipelineDesc
// and the offset of 'Field' within 'Member'. We can optimize the dirty bit setting by computing
// the shifted dirty bit at compile time instead of calling "set".
#define ANGLE_GET_TRANSITION_BIT(Member, Field) \
((offsetof(GraphicsPipelineDesc, Member) + offsetof(decltype(Member), Field)) >> \
kTransitionByteShift)
// Indexed dirty bits cannot be entirely computed at compile time since the index is passed to
// the update function.
#define ANGLE_GET_INDEXED_TRANSITION_BIT(Member, Field, Index, BitWidth) \
(((BitWidth * Index) >> kTransitionBitShift) + ANGLE_GET_TRANSITION_BIT(Member, Field))
bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
GraphicsPipelineTransitionBits bitsB,
const GraphicsPipelineDesc &descA,
const GraphicsPipelineDesc &descB)
{
if (bitsA != bitsB)
return false;
// We currently mask over 4 bytes of the pipeline description with each dirty bit.
// We could consider using 8 bytes and a mask of 32 bits. This would make some parts
// of the code faster. The for loop below would scan over twice as many bits per iteration.
// But there may be more collisions between the same dirty bit masks leading to different
// transitions. Thus there may be additional cost when applications use many transitions.
// We should revisit this in the future and investigate using different bit widths.
static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
for (size_t dirtyBit : bitsA)
{
if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
return false;
}
return true;
}
} // anonymous namespace
// RenderPassDesc implementation.
......@@ -391,53 +439,52 @@ bool GraphicsPipelineDesc::operator==(const GraphicsPipelineDesc &other) const
void GraphicsPipelineDesc::initDefaults()
{
mRasterizationAndMultisampleStateInfo.depthClampEnable = 0;
mRasterizationAndMultisampleStateInfo.rasterizationDiscardEnable = 0;
SetBitField(mRasterizationAndMultisampleStateInfo.polygonMode, VK_POLYGON_MODE_FILL);
SetBitField(mRasterizationAndMultisampleStateInfo.cullMode, VK_CULL_MODE_NONE);
SetBitField(mRasterizationAndMultisampleStateInfo.frontFace, VK_FRONT_FACE_CLOCKWISE);
mRasterizationAndMultisampleStateInfo.depthBiasEnable = 0;
mRasterizationAndMultisampleStateInfo.bits.depthClampEnable = 0;
mRasterizationAndMultisampleStateInfo.bits.rasterizationDiscardEnable = 0;
SetBitField(mRasterizationAndMultisampleStateInfo.bits.polygonMode, VK_POLYGON_MODE_FILL);
SetBitField(mRasterizationAndMultisampleStateInfo.bits.cullMode, VK_CULL_MODE_NONE);
SetBitField(mRasterizationAndMultisampleStateInfo.bits.frontFace, VK_FRONT_FACE_CLOCKWISE);
mRasterizationAndMultisampleStateInfo.bits.depthBiasEnable = 0;
mRasterizationAndMultisampleStateInfo.depthBiasConstantFactor = 0.0f;
mRasterizationAndMultisampleStateInfo.depthBiasClamp = 0.0f;
mRasterizationAndMultisampleStateInfo.depthBiasSlopeFactor = 0.0f;
mRasterizationAndMultisampleStateInfo.lineWidth = 1.0f;
mRasterizationAndMultisampleStateInfo.rasterizationSamples = 1;
mRasterizationAndMultisampleStateInfo.sampleShadingEnable = 0;
mRasterizationAndMultisampleStateInfo.minSampleShading = 0.0f;
mRasterizationAndMultisampleStateInfo.bits.rasterizationSamples = 1;
mRasterizationAndMultisampleStateInfo.bits.sampleShadingEnable = 0;
mRasterizationAndMultisampleStateInfo.minSampleShading = 0.0f;
for (uint32_t &sampleMask : mRasterizationAndMultisampleStateInfo.sampleMask)
{
sampleMask = 0;
}
mRasterizationAndMultisampleStateInfo.alphaToCoverageEnable = 0;
mRasterizationAndMultisampleStateInfo.alphaToOneEnable = 0;
mRasterizationAndMultisampleStateInfo.bits.alphaToCoverageEnable = 0;
mRasterizationAndMultisampleStateInfo.bits.alphaToOneEnable = 0;
mDepthStencilStateInfo.depthTestEnable = 0;
mDepthStencilStateInfo.depthWriteEnable = 1;
mDepthStencilStateInfo.enable.depthTest = 0;
mDepthStencilStateInfo.enable.depthWrite = 1;
SetBitField(mDepthStencilStateInfo.depthCompareOp, VK_COMPARE_OP_LESS);
mDepthStencilStateInfo.depthBoundsTestEnable = 0;
mDepthStencilStateInfo.stencilTestEnable = 0;
mDepthStencilStateInfo.minDepthBounds = 0.0f;
mDepthStencilStateInfo.maxDepthBounds = 0.0f;
SetBitField(mDepthStencilStateInfo.front.failOp, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.front.passOp, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.front.depthFailOp, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.front.compareOp, VK_COMPARE_OP_ALWAYS);
mDepthStencilStateInfo.enable.depthBoundsTest = 0;
mDepthStencilStateInfo.enable.stencilTest = 0;
mDepthStencilStateInfo.minDepthBounds = 0.0f;
mDepthStencilStateInfo.maxDepthBounds = 0.0f;
SetBitField(mDepthStencilStateInfo.front.ops.fail, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.front.ops.pass, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.front.ops.depthFail, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.front.ops.compare, VK_COMPARE_OP_ALWAYS);
SetBitField(mDepthStencilStateInfo.front.compareMask, 0xFF);
SetBitField(mDepthStencilStateInfo.front.writeMask, 0xFF);
mDepthStencilStateInfo.frontStencilReference = 0;
SetBitField(mDepthStencilStateInfo.back.failOp, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.back.passOp, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.back.depthFailOp, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.back.compareOp, VK_COMPARE_OP_ALWAYS);
SetBitField(mDepthStencilStateInfo.back.ops.fail, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.back.ops.pass, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.back.ops.depthFail, VK_STENCIL_OP_KEEP);
SetBitField(mDepthStencilStateInfo.back.ops.compare, VK_COMPARE_OP_ALWAYS);
SetBitField(mDepthStencilStateInfo.back.compareMask, 0xFF);
SetBitField(mDepthStencilStateInfo.back.writeMask, 0xFF);
mDepthStencilStateInfo.backStencilReference = 0;
PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend =
mInputAssembltyAndColorBlendStateInfo;
inputAndBlend.logicOpEnable = 0;
inputAndBlend.logicOp = static_cast<uint32_t>(VK_LOGIC_OP_CLEAR);
PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = mInputAssemblyAndColorBlendStateInfo;
inputAndBlend.logic.opEnable = 0;
inputAndBlend.logic.op = static_cast<uint32_t>(VK_LOGIC_OP_CLEAR);
inputAndBlend.blendEnableMask = 0;
inputAndBlend.blendConstants[0] = 0.0f;
inputAndBlend.blendConstants[1] = 0.0f;
......@@ -464,8 +511,8 @@ void GraphicsPipelineDesc::initDefaults()
&inputAndBlend.attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS],
blendAttachmentState);
inputAndBlend.topology = static_cast<uint16_t>(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
inputAndBlend.primitiveRestartEnable = 0;
inputAndBlend.primitive.topology = static_cast<uint16_t>(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
inputAndBlend.primitive.restartEnable = 0;
}
angle::Result GraphicsPipelineDesc::initializePipeline(
......@@ -550,9 +597,9 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
inputAssemblyState.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
inputAssemblyState.flags = 0;
inputAssemblyState.topology =
static_cast<VkPrimitiveTopology>(mInputAssembltyAndColorBlendStateInfo.topology);
static_cast<VkPrimitiveTopology>(mInputAssemblyAndColorBlendStateInfo.primitive.topology);
inputAssemblyState.primitiveRestartEnable =
static_cast<VkBool32>(mInputAssembltyAndColorBlendStateInfo.primitiveRestartEnable);
static_cast<VkBool32>(mInputAssemblyAndColorBlendStateInfo.primitive.restartEnable);
// Set initial viewport and scissor state.
......@@ -569,13 +616,13 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
// Rasterizer state.
rasterState.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterState.flags = 0;
rasterState.depthClampEnable = static_cast<VkBool32>(rasterAndMS.depthClampEnable);
rasterState.depthClampEnable = static_cast<VkBool32>(rasterAndMS.bits.depthClampEnable);
rasterState.rasterizerDiscardEnable =
static_cast<VkBool32>(rasterAndMS.rasterizationDiscardEnable);
rasterState.polygonMode = static_cast<VkPolygonMode>(rasterAndMS.polygonMode);
rasterState.cullMode = static_cast<VkCullModeFlags>(rasterAndMS.cullMode);
rasterState.frontFace = static_cast<VkFrontFace>(rasterAndMS.frontFace);
rasterState.depthBiasEnable = static_cast<VkBool32>(rasterAndMS.depthBiasEnable);
static_cast<VkBool32>(rasterAndMS.bits.rasterizationDiscardEnable);
rasterState.polygonMode = static_cast<VkPolygonMode>(rasterAndMS.bits.polygonMode);
rasterState.cullMode = static_cast<VkCullModeFlags>(rasterAndMS.bits.cullMode);
rasterState.frontFace = static_cast<VkFrontFace>(rasterAndMS.bits.frontFace);
rasterState.depthBiasEnable = static_cast<VkBool32>(rasterAndMS.bits.depthBiasEnable);
rasterState.depthBiasConstantFactor = rasterAndMS.depthBiasConstantFactor;
rasterState.depthBiasClamp = rasterAndMS.depthBiasClamp;
rasterState.depthBiasSlopeFactor = rasterAndMS.depthBiasSlopeFactor;
......@@ -584,28 +631,30 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
// Multisample state.
multisampleState.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisampleState.flags = 0;
multisampleState.rasterizationSamples = gl_vk::GetSamples(rasterAndMS.rasterizationSamples);
multisampleState.sampleShadingEnable = static_cast<VkBool32>(rasterAndMS.sampleShadingEnable);
multisampleState.minSampleShading = rasterAndMS.minSampleShading;
multisampleState.rasterizationSamples =
gl_vk::GetSamples(rasterAndMS.bits.rasterizationSamples);
multisampleState.sampleShadingEnable =
static_cast<VkBool32>(rasterAndMS.bits.sampleShadingEnable);
multisampleState.minSampleShading = rasterAndMS.minSampleShading;
// TODO(jmadill): sample masks
multisampleState.pSampleMask = nullptr;
multisampleState.alphaToCoverageEnable =
static_cast<VkBool32>(rasterAndMS.alphaToCoverageEnable);
multisampleState.alphaToOneEnable = static_cast<VkBool32>(rasterAndMS.alphaToOneEnable);
static_cast<VkBool32>(rasterAndMS.bits.alphaToCoverageEnable);
multisampleState.alphaToOneEnable = static_cast<VkBool32>(rasterAndMS.bits.alphaToOneEnable);
// Depth/stencil state.
depthStencilState.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
depthStencilState.flags = 0;
depthStencilState.depthTestEnable =
static_cast<VkBool32>(mDepthStencilStateInfo.depthTestEnable);
static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthTest);
depthStencilState.depthWriteEnable =
static_cast<VkBool32>(mDepthStencilStateInfo.depthWriteEnable);
static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthWrite);
depthStencilState.depthCompareOp =
static_cast<VkCompareOp>(mDepthStencilStateInfo.depthCompareOp);
depthStencilState.depthBoundsTestEnable =
static_cast<VkBool32>(mDepthStencilStateInfo.depthBoundsTestEnable);
static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthBoundsTest);
depthStencilState.stencilTestEnable =
static_cast<VkBool32>(mDepthStencilStateInfo.stencilTestEnable);
static_cast<VkBool32>(mDepthStencilStateInfo.enable.stencilTest);
UnpackStencilState(mDepthStencilStateInfo.front, mDepthStencilStateInfo.frontStencilReference,
&depthStencilState.front);
UnpackStencilState(mDepthStencilStateInfo.back, mDepthStencilStateInfo.backStencilReference,
......@@ -614,12 +663,12 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
depthStencilState.maxDepthBounds = mDepthStencilStateInfo.maxDepthBounds;
const PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend =
mInputAssembltyAndColorBlendStateInfo;
mInputAssemblyAndColorBlendStateInfo;
blendState.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
blendState.flags = 0;
blendState.logicOpEnable = static_cast<VkBool32>(inputAndBlend.logicOpEnable);
blendState.logicOp = static_cast<VkLogicOp>(inputAndBlend.logicOp);
blendState.logicOpEnable = static_cast<VkBool32>(inputAndBlend.logic.opEnable);
blendState.logicOp = static_cast<VkLogicOp>(inputAndBlend.logic.op);
blendState.attachmentCount = mRenderPassDesc.colorAttachmentCount();
blendState.pAttachments = blendAttachmentState.data();
......@@ -672,7 +721,8 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
return angle::Result::Continue;
}
void GraphicsPipelineDesc::updateVertexInput(uint32_t attribIndex,
void GraphicsPipelineDesc::updateVertexInput(GraphicsPipelineTransitionBits *transition,
uint32_t attribIndex,
GLuint stride,
GLuint divisor,
VkFormat format,
......@@ -682,90 +732,124 @@ void GraphicsPipelineDesc::updateVertexInput(uint32_t attribIndex,
bindingDesc.stride = static_cast<uint16_t>(stride);
bindingDesc.inputRate = static_cast<uint16_t>(divisor > 0 ? VK_VERTEX_INPUT_RATE_INSTANCE
: VK_VERTEX_INPUT_RATE_VERTEX);
constexpr size_t kBindingBaseBit =
offsetof(GraphicsPipelineDesc, mVertexInputBindings) >> kTransitionByteShift;
transition->set(kBindingBaseBit + attribIndex);
static_assert(kVertexInputBindingSize == 4, "Size mismatch");
ASSERT(format <= std::numeric_limits<uint16_t>::max());
if (format == VK_FORMAT_UNDEFINED)
{
UNIMPLEMENTED();
}
mVertexInputAttribs.formats[attribIndex] = static_cast<uint8_t>(format);
mVertexInputAttribs.offsets[attribIndex] = static_cast<uint16_t>(relativeOffset);
SetBitField(mVertexInputAttribs.formats[attribIndex], format);
SetBitField(mVertexInputAttribs.offsets[attribIndex], relativeOffset);
transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, formats, attribIndex, 8));
transition->set(
ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, offsets, attribIndex, 16));
}
void GraphicsPipelineDesc::updateTopology(gl::PrimitiveMode drawMode)
void GraphicsPipelineDesc::updateTopology(GraphicsPipelineTransitionBits *transition,
gl::PrimitiveMode drawMode)
{
mInputAssembltyAndColorBlendStateInfo.topology =
static_cast<uint32_t>(gl_vk::GetPrimitiveTopology(drawMode));
VkPrimitiveTopology vkTopology = gl_vk::GetPrimitiveTopology(drawMode);
SetBitField(mInputAssemblyAndColorBlendStateInfo.primitive.topology, vkTopology);
transition->set(ANGLE_GET_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, primitive));
}
void GraphicsPipelineDesc::updateCullMode(const gl::RasterizerState &rasterState)
void GraphicsPipelineDesc::updateCullMode(GraphicsPipelineTransitionBits *transition,
const gl::RasterizerState &rasterState)
{
mRasterizationAndMultisampleStateInfo.cullMode =
mRasterizationAndMultisampleStateInfo.bits.cullMode =
static_cast<uint16_t>(gl_vk::GetCullMode(rasterState));
transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits));
}
void GraphicsPipelineDesc::updateFrontFace(const gl::RasterizerState &rasterState,
void GraphicsPipelineDesc::updateFrontFace(GraphicsPipelineTransitionBits *transition,
const gl::RasterizerState &rasterState,
bool invertFrontFace)
{
mRasterizationAndMultisampleStateInfo.frontFace =
mRasterizationAndMultisampleStateInfo.bits.frontFace =
static_cast<uint16_t>(gl_vk::GetFrontFace(rasterState.frontFace, invertFrontFace));
transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits));
}
void GraphicsPipelineDesc::updateLineWidth(float lineWidth)
void GraphicsPipelineDesc::updateLineWidth(GraphicsPipelineTransitionBits *transition,
float lineWidth)
{
mRasterizationAndMultisampleStateInfo.lineWidth = lineWidth;
transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, lineWidth));
}
const RenderPassDesc &GraphicsPipelineDesc::getRenderPassDesc() const
void GraphicsPipelineDesc::updateBlendColor(GraphicsPipelineTransitionBits *transition,
const gl::ColorF &color)
{
return mRenderPassDesc;
}
mInputAssemblyAndColorBlendStateInfo.blendConstants[0] = color.red;
mInputAssemblyAndColorBlendStateInfo.blendConstants[1] = color.green;
mInputAssemblyAndColorBlendStateInfo.blendConstants[2] = color.blue;
mInputAssemblyAndColorBlendStateInfo.blendConstants[3] = color.alpha;
constexpr size_t kSize = sizeof(mInputAssemblyAndColorBlendStateInfo.blendConstants[0]) * 8;
void GraphicsPipelineDesc::updateBlendColor(const gl::ColorF &color)
{
mInputAssembltyAndColorBlendStateInfo.blendConstants[0] = color.red;
mInputAssembltyAndColorBlendStateInfo.blendConstants[1] = color.green;
mInputAssembltyAndColorBlendStateInfo.blendConstants[2] = color.blue;
mInputAssembltyAndColorBlendStateInfo.blendConstants[3] = color.alpha;
for (int index = 0; index < 4; ++index)
{
const size_t kBit = ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
blendConstants, index, kSize);
transition->set(kBit);
}
}
void GraphicsPipelineDesc::updateBlendEnabled(bool isBlendEnabled)
void GraphicsPipelineDesc::updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
bool isBlendEnabled)
{
gl::DrawBufferMask blendEnabled;
if (isBlendEnabled)
blendEnabled.set();
mInputAssembltyAndColorBlendStateInfo.blendEnableMask =
mInputAssemblyAndColorBlendStateInfo.blendEnableMask =
static_cast<uint8_t>(blendEnabled.bits());
transition->set(
ANGLE_GET_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, blendEnableMask));
}
void GraphicsPipelineDesc::updateBlendEquations(const gl::BlendState &blendState)
void GraphicsPipelineDesc::updateBlendEquations(GraphicsPipelineTransitionBits *transition,
const gl::BlendState &blendState)
{
for (PackedColorBlendAttachmentState &blendAttachmentState :
mInputAssembltyAndColorBlendStateInfo.attachments)
constexpr size_t kSize = sizeof(PackedColorBlendAttachmentState) * 8;
for (size_t attachmentIndex = 0; attachmentIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
++attachmentIndex)
{
PackedColorBlendAttachmentState &blendAttachmentState =
mInputAssemblyAndColorBlendStateInfo.attachments[attachmentIndex];
blendAttachmentState.colorBlendOp = PackGLBlendOp(blendState.blendEquationRGB);
blendAttachmentState.alphaBlendOp = PackGLBlendOp(blendState.blendEquationAlpha);
transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
attachments, attachmentIndex, kSize));
}
}
void GraphicsPipelineDesc::updateBlendFuncs(const gl::BlendState &blendState)
void GraphicsPipelineDesc::updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
const gl::BlendState &blendState)
{
for (PackedColorBlendAttachmentState &blendAttachmentState :
mInputAssembltyAndColorBlendStateInfo.attachments)
constexpr size_t kSize = sizeof(PackedColorBlendAttachmentState) * 8;
for (size_t attachmentIndex = 0; attachmentIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
++attachmentIndex)
{
PackedColorBlendAttachmentState &blendAttachmentState =
mInputAssemblyAndColorBlendStateInfo.attachments[attachmentIndex];
blendAttachmentState.srcColorBlendFactor = PackGLBlendFactor(blendState.sourceBlendRGB);
blendAttachmentState.dstColorBlendFactor = PackGLBlendFactor(blendState.destBlendRGB);
blendAttachmentState.srcAlphaBlendFactor = PackGLBlendFactor(blendState.sourceBlendAlpha);
blendAttachmentState.dstAlphaBlendFactor = PackGLBlendFactor(blendState.destBlendAlpha);
transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
attachments, attachmentIndex, kSize));
}
}
void GraphicsPipelineDesc::updateColorWriteMask(VkColorComponentFlags colorComponentFlags,
const gl::DrawBufferMask &alphaMask)
void GraphicsPipelineDesc::setColorWriteMask(VkColorComponentFlags colorComponentFlags,
const gl::DrawBufferMask &alphaMask)
{
PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend =
mInputAssembltyAndColorBlendStateInfo;
PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = mInputAssemblyAndColorBlendStateInfo;
uint8_t colorMask = static_cast<uint8_t>(colorComponentFlags);
for (size_t colorIndex = 0; colorIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; colorIndex++)
......@@ -775,105 +859,163 @@ void GraphicsPipelineDesc::updateColorWriteMask(VkColorComponentFlags colorCompo
}
}
void GraphicsPipelineDesc::updateDepthTestEnabled(const gl::DepthStencilState &depthStencilState,
void GraphicsPipelineDesc::updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
VkColorComponentFlags colorComponentFlags,
const gl::DrawBufferMask &alphaMask)
{
setColorWriteMask(colorComponentFlags, alphaMask);
for (size_t colorIndex = 0; colorIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; colorIndex++)
{
transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
colorWriteMaskBits, colorIndex, 4));
}
}
void GraphicsPipelineDesc::updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Only enable the depth test if the draw framebuffer has a depth buffer. It's possible that
// we're emulating a stencil-only buffer with a depth-stencil buffer
mDepthStencilStateInfo.depthTestEnable =
mDepthStencilStateInfo.enable.depthTest =
static_cast<uint8_t>(depthStencilState.depthTest && drawFramebuffer->hasDepth());
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable));
}
void GraphicsPipelineDesc::updateDepthFunc(const gl::DepthStencilState &depthStencilState)
void GraphicsPipelineDesc::updateDepthFunc(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.depthCompareOp = PackGLCompareFunc(depthStencilState.depthFunc);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, depthCompareOp));
}
void GraphicsPipelineDesc::updateDepthWriteEnabled(const gl::DepthStencilState &depthStencilState,
void GraphicsPipelineDesc::updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Don't write to depth buffers that should not exist
mDepthStencilStateInfo.depthWriteEnable =
mDepthStencilStateInfo.enable.depthWrite =
static_cast<uint8_t>(drawFramebuffer->hasDepth() ? depthStencilState.depthMask : 0);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable));
}
void GraphicsPipelineDesc::updateStencilTestEnabled(const gl::DepthStencilState &depthStencilState,
void GraphicsPipelineDesc::updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Only enable the stencil test if the draw framebuffer has a stencil buffer. It's possible
// that we're emulating a depth-only buffer with a depth-stencil buffer
mDepthStencilStateInfo.stencilTestEnable =
mDepthStencilStateInfo.enable.stencilTest =
static_cast<uint8_t>(depthStencilState.stencilTest && drawFramebuffer->hasStencil());
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable));
}
void GraphicsPipelineDesc::updateStencilFrontFuncs(GLint ref,
void GraphicsPipelineDesc::updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
GLint ref,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.frontStencilReference = static_cast<uint8_t>(ref);
mDepthStencilStateInfo.front.compareOp = PackGLCompareFunc(depthStencilState.stencilFunc);
mDepthStencilStateInfo.front.ops.compare = PackGLCompareFunc(depthStencilState.stencilFunc);
mDepthStencilStateInfo.front.compareMask = static_cast<uint8_t>(depthStencilState.stencilMask);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front));
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, frontStencilReference));
}
void GraphicsPipelineDesc::updateStencilBackFuncs(GLint ref,
void GraphicsPipelineDesc::updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
GLint ref,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.backStencilReference = static_cast<uint8_t>(ref);
mDepthStencilStateInfo.back.compareOp = PackGLCompareFunc(depthStencilState.stencilBackFunc);
mDepthStencilStateInfo.back.ops.compare = PackGLCompareFunc(depthStencilState.stencilBackFunc);
mDepthStencilStateInfo.back.compareMask =
static_cast<uint8_t>(depthStencilState.stencilBackMask);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back));
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, backStencilReference));
}
void GraphicsPipelineDesc::updateStencilFrontOps(const gl::DepthStencilState &depthStencilState)
void GraphicsPipelineDesc::updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.front.passOp = PackGLStencilOp(depthStencilState.stencilPassDepthPass);
mDepthStencilStateInfo.front.failOp = PackGLStencilOp(depthStencilState.stencilFail);
mDepthStencilStateInfo.front.depthFailOp =
mDepthStencilStateInfo.front.ops.pass = PackGLStencilOp(depthStencilState.stencilPassDepthPass);
mDepthStencilStateInfo.front.ops.fail = PackGLStencilOp(depthStencilState.stencilFail);
mDepthStencilStateInfo.front.ops.depthFail =
PackGLStencilOp(depthStencilState.stencilPassDepthFail);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front));
}
void GraphicsPipelineDesc::updateStencilBackOps(const gl::DepthStencilState &depthStencilState)
void GraphicsPipelineDesc::updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.back.passOp =
mDepthStencilStateInfo.back.ops.pass =
PackGLStencilOp(depthStencilState.stencilBackPassDepthPass);
mDepthStencilStateInfo.back.failOp = PackGLStencilOp(depthStencilState.stencilBackFail);
mDepthStencilStateInfo.back.depthFailOp =
mDepthStencilStateInfo.back.ops.fail = PackGLStencilOp(depthStencilState.stencilBackFail);
mDepthStencilStateInfo.back.ops.depthFail =
PackGLStencilOp(depthStencilState.stencilBackPassDepthFail);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back));
}
void GraphicsPipelineDesc::updateStencilFrontWriteMask(
GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Don't write to stencil buffers that should not exist
mDepthStencilStateInfo.front.writeMask = static_cast<uint8_t>(
drawFramebuffer->hasStencil() ? depthStencilState.stencilWritemask : 0);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front));
}
void GraphicsPipelineDesc::updateStencilBackWriteMask(
GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Don't write to stencil buffers that should not exist
mDepthStencilStateInfo.back.writeMask = static_cast<uint8_t>(
drawFramebuffer->hasStencil() ? depthStencilState.stencilBackWritemask : 0);
transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back));
}
void GraphicsPipelineDesc::updatePolygonOffsetFillEnabled(bool enabled)
void GraphicsPipelineDesc::updatePolygonOffsetFillEnabled(
GraphicsPipelineTransitionBits *transition,
bool enabled)
{
mRasterizationAndMultisampleStateInfo.depthBiasEnable = enabled;
mRasterizationAndMultisampleStateInfo.bits.depthBiasEnable = enabled;
transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits));
}
void GraphicsPipelineDesc::updatePolygonOffset(const gl::RasterizerState &rasterState)
void GraphicsPipelineDesc::updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
const gl::RasterizerState &rasterState)
{
mRasterizationAndMultisampleStateInfo.depthBiasSlopeFactor = rasterState.polygonOffsetFactor;
mRasterizationAndMultisampleStateInfo.depthBiasConstantFactor = rasterState.polygonOffsetUnits;
transition->set(
ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, depthBiasSlopeFactor));
transition->set(
ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, depthBiasConstantFactor));
}
void GraphicsPipelineDesc::updateRenderPassDesc(const RenderPassDesc &renderPassDesc)
void GraphicsPipelineDesc::setRenderPassDesc(const RenderPassDesc &renderPassDesc)
{
mRenderPassDesc = renderPassDesc;
}
void GraphicsPipelineDesc::updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
const RenderPassDesc &renderPassDesc)
{
setRenderPassDesc(renderPassDesc);
// The RenderPass is a special case where it spans multiple bits but has no member.
constexpr size_t kFirstBit =
offsetof(GraphicsPipelineDesc, mRenderPassDesc) >> kTransitionByteShift;
constexpr size_t kBitCount = kRenderPassDescSize >> kTransitionByteShift;
for (size_t bit = 0; bit < kBitCount; ++bit)
{
transition->set(kFirstBit + bit);
}
}
// AttachmentOpsArray implementation.
AttachmentOpsArray::AttachmentOpsArray()
{
......@@ -1026,6 +1168,41 @@ const PushConstantRangeArray<PackedPushConstantRange> &PipelineLayoutDesc::getPu
{
return mPushConstantRanges;
}
// PipelineHelper implementation.
PipelineHelper::PipelineHelper() = default;
PipelineHelper::~PipelineHelper() = default;
void PipelineHelper::destroy(VkDevice device)
{
mPipeline.destroy(device);
}
bool PipelineHelper::findTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc &desc,
PipelineHelper **pipelineOut) const
{
// Search could be improved using sorting or hashing.
for (const GraphicsPipelineTransition &transition : mTransitions)
{
if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
{
*pipelineOut = transition.target;
return true;
}
}
return false;
}
void PipelineHelper::addTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc *desc,
PipelineHelper *pipeline)
{
GraphicsPipelineTransition transition = {bits, desc, pipeline};
mTransitions.push_back(transition);
}
} // namespace vk
// RenderPassCache implementation.
......@@ -1129,8 +1306,8 @@ void GraphicsPipelineCache::destroy(VkDevice device)
{
for (auto &item : mPayload)
{
vk::PipelineAndSerial &pipeline = item.second;
pipeline.get().destroy(device);
vk::PipelineHelper &pipeline = item.second;
pipeline.destroy(device);
}
mPayload.clear();
......@@ -1140,8 +1317,8 @@ void GraphicsPipelineCache::release(RendererVk *renderer)
{
for (auto &item : mPayload)
{
vk::PipelineAndSerial &pipeline = item.second;
renderer->releaseObject(pipeline.getSerial(), &pipeline.get());
vk::PipelineHelper &pipeline = item.second;
renderer->releaseObject(pipeline.getSerial(), &pipeline.getPipeline());
}
mPayload.clear();
......@@ -1156,7 +1333,8 @@ angle::Result GraphicsPipelineCache::insertPipeline(
const vk::ShaderModule &vertexModule,
const vk::ShaderModule &fragmentModule,
const vk::GraphicsPipelineDesc &desc,
vk::PipelineAndSerial **pipelineOut)
const vk::GraphicsPipelineDesc **descPtrOut,
vk::PipelineHelper **pipelineOut)
{
vk::Pipeline newPipeline;
......@@ -1169,9 +1347,9 @@ angle::Result GraphicsPipelineCache::insertPipeline(
}
// The Serial will be updated outside of this query.
auto insertedItem =
mPayload.emplace(desc, vk::PipelineAndSerial(std::move(newPipeline), Serial()));
*pipelineOut = &insertedItem.first->second;
auto insertedItem = mPayload.emplace(desc, std::move(newPipeline));
*descPtrOut = &insertedItem.first->first;
*pipelineOut = &insertedItem.first->second;
return angle::Result::Continue;
}
......@@ -1184,7 +1362,7 @@ void GraphicsPipelineCache::populate(const vk::GraphicsPipelineDesc &desc, vk::P
return;
}
mPayload.emplace(desc, vk::PipelineAndSerial(std::move(pipeline), Serial()));
mPayload.emplace(desc, std::move(pipeline));
}
// DescriptorSetLayoutCache implementation.
......
......@@ -124,15 +124,6 @@ bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
static_assert(sizeof(AttachmentOpsArray) == 80, "Size check failed");
struct PackedShaderStageInfo final
{
uint32_t stage;
uint32_t moduleSerial;
// TODO(jmadill): Do we want specialization constants?
};
static_assert(sizeof(PackedShaderStageInfo) == 8, "Size check failed");
struct PackedVertexInputBindingDesc final
{
// Although techncially stride can be any value in ES 2.0, in practice supporting stride
......@@ -142,11 +133,22 @@ struct PackedVertexInputBindingDesc final
uint16_t inputRate;
};
static_assert(sizeof(PackedVertexInputBindingDesc) == 4, "Size check failed");
constexpr size_t kVertexInputBindingSize = sizeof(PackedVertexInputBindingDesc);
static_assert(kVertexInputBindingSize == 4, "Size check failed");
struct PackedRasterizationAndMultisampleStateInfo final
using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>;
constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings);
struct VertexInputAttributes final
{
uint8_t formats[gl::MAX_VERTEX_ATTRIBS];
uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV
};
constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
struct RasterizationStateBits final
{
// Padded to ensure there's no gaps in this structure or those that use it.
uint32_t depthClampEnable : 4;
uint32_t rasterizationDiscardEnable : 4;
uint32_t polygonMode : 4;
......@@ -157,42 +159,66 @@ struct PackedRasterizationAndMultisampleStateInfo final
uint32_t sampleShadingEnable : 1;
uint32_t alphaToCoverageEnable : 1;
uint32_t alphaToOneEnable : 2;
};
constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
struct PackedRasterizationAndMultisampleStateInfo final
{
RasterizationStateBits bits;
// Padded to ensure there's no gaps in this structure or those that use it.
float minSampleShading;
uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
float depthBiasConstantFactor;
// Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
float depthBiasClamp;
float depthBiasConstantFactor;
float depthBiasSlopeFactor;
float lineWidth;
};
static constexpr size_t kPackedRasterizationAndMultisampleStateSize =
constexpr size_t kPackedRasterizationAndMultisampleStateSize =
sizeof(PackedRasterizationAndMultisampleStateInfo);
static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
struct StencilOps final
{
uint8_t fail : 4;
uint8_t pass : 4;
uint8_t depthFail : 4;
uint8_t compare : 4;
};
constexpr size_t kStencilOpsSize = sizeof(StencilOps);
static_assert(kStencilOpsSize == 2, "Size check failed");
struct PackedStencilOpState final
{
uint8_t failOp : 4;
uint8_t passOp : 4;
uint8_t depthFailOp : 4;
uint8_t compareOp : 4;
StencilOps ops;
uint8_t compareMask;
uint8_t writeMask;
};
static constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
static_assert(sizeof(PackedStencilOpState) == 4, "Size check failed");
constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
static_assert(kPackedStencilOpSize == 4, "Size check failed");
struct DepthStencilEnableFlags final
{
uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
uint8_t depthWrite : 2;
uint8_t depthBoundsTest : 2;
uint8_t stencilTest : 2;
};
constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
struct PackedDepthStencilStateInfo final
{
uint8_t depthTestEnable : 1;
uint8_t depthWriteEnable : 1;
uint8_t depthCompareOp : 4;
uint8_t depthBoundsTestEnable : 1;
uint8_t stencilTestEnable : 1;
DepthStencilEnableFlags enable;
uint8_t frontStencilReference;
uint8_t backStencilReference;
uint8_t padding;
uint8_t depthCompareOp; // only needs 4 bits. extra used as padding.
float minDepthBounds;
float maxDepthBounds;
PackedStencilOpState front;
......@@ -202,6 +228,15 @@ struct PackedDepthStencilStateInfo final
constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
struct LogicOpState final
{
uint8_t opEnable : 1;
uint8_t op : 7;
};
constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
static_assert(kLogicOpStateSize == 1, "Size check failed");
struct PackedColorBlendAttachmentState final
{
uint16_t srcColorBlendFactor : 5;
......@@ -212,35 +247,50 @@ struct PackedColorBlendAttachmentState final
uint16_t alphaBlendOp : 6;
};
static_assert(sizeof(PackedColorBlendAttachmentState) == 4, "Size check failed");
constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
struct PrimitiveState final
{
uint16_t topology : 15;
uint16_t restartEnable : 1;
};
constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
static_assert(kPrimitiveStateSize == 2, "Size check failed");
struct PackedInputAssemblyAndColorBlendStateInfo final
{
uint8_t logicOpEnable : 1;
uint8_t logicOp : 7;
uint8_t blendEnableMask;
uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
uint16_t topology : 15;
uint16_t primitiveRestartEnable : 1;
float blendConstants[4];
LogicOpState logic;
uint8_t blendEnableMask;
PrimitiveState primitive;
};
constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
sizeof(PackedInputAssemblyAndColorBlendStateInfo);
static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>;
constexpr size_t kGraphicsPipelineDescSumOfSizes =
kVertexInputBindingsSize + kVertexInputAttributesSize +
kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize +
kPackedDepthStencilStateSize + kRenderPassDescSize;
struct VertexInputAttributes final
{
uint8_t formats[gl::MAX_VERTEX_ATTRIBS];
uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV
};
// Number of dirty bits in the dirty bit set.
constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
constexpr static size_t kNumGraphicsPipelineDirtyBits =
kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings);
constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
// Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
// State changes are applied through the update methods. Each update method can also have a
// sibling method that applies the update without marking a state transition. The non-transition
// update methods are used for internal shader pipelines. Not every non-transition update method
// is implemented yet as not every state is used in internal shaders.
class GraphicsPipelineDesc final
{
public:
......@@ -258,6 +308,13 @@ class GraphicsPipelineDesc final
void initDefaults();
// For custom comparisons.
template <typename T>
const T *getPtr() const
{
return reinterpret_cast<const T *>(this);
}
angle::Result initializePipeline(vk::Context *context,
const vk::PipelineCache &pipelineCacheVk,
const RenderPass &compatibleRenderPass,
......@@ -267,53 +324,78 @@ class GraphicsPipelineDesc final
const ShaderModule &fragmentModule,
Pipeline *pipelineOut) const;
// Vertex input state
void updateVertexInput(uint32_t attribIndex,
// Vertex input state. For ES 3.1 this should be separated into binding and attribute.
void updateVertexInput(GraphicsPipelineTransitionBits *transition,
uint32_t attribIndex,
GLuint stride,
GLuint divisor,
VkFormat format,
GLuint relativeOffset);
// Input assembly info
void updateTopology(gl::PrimitiveMode drawMode);
void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
// Raster states
void updateCullMode(const gl::RasterizerState &rasterState);
void updateFrontFace(const gl::RasterizerState &rasterState, bool invertFrontFace);
void updateLineWidth(float lineWidth);
void updateCullMode(GraphicsPipelineTransitionBits *transition,
const gl::RasterizerState &rasterState);
void updateFrontFace(GraphicsPipelineTransitionBits *transition,
const gl::RasterizerState &rasterState,
bool invertFrontFace);
void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
// RenderPass description.
const RenderPassDesc &getRenderPassDesc() const;
void updateRenderPassDesc(const RenderPassDesc &renderPassDesc);
const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
const RenderPassDesc &renderPassDesc);
// Blend states
void updateBlendEnabled(bool isBlendEnabled);
void updateBlendColor(const gl::ColorF &color);
void updateBlendFuncs(const gl::BlendState &blendState);
void updateBlendEquations(const gl::BlendState &blendState);
void updateColorWriteMask(VkColorComponentFlags colorComponentFlags,
void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
const gl::BlendState &blendState);
void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
const gl::BlendState &blendState);
void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
const gl::DrawBufferMask &alphaMask);
void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
VkColorComponentFlags colorComponentFlags,
const gl::DrawBufferMask &alphaMask);
// Depth/stencil states.
void updateDepthTestEnabled(const gl::DepthStencilState &depthStencilState,
void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
void updateDepthFunc(const gl::DepthStencilState &depthStencilState);
void updateDepthWriteEnabled(const gl::DepthStencilState &depthStencilState,
void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState);
void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
void updateStencilTestEnabled(const gl::DepthStencilState &depthStencilState,
void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
void updateStencilFrontFuncs(GLint ref, const gl::DepthStencilState &depthStencilState);
void updateStencilBackFuncs(GLint ref, const gl::DepthStencilState &depthStencilState);
void updateStencilFrontOps(const gl::DepthStencilState &depthStencilState);
void updateStencilBackOps(const gl::DepthStencilState &depthStencilState);
void updateStencilFrontWriteMask(const gl::DepthStencilState &depthStencilState,
void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
GLint ref,
const gl::DepthStencilState &depthStencilState);
void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
GLint ref,
const gl::DepthStencilState &depthStencilState);
void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState);
void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState);
void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
void updateStencilBackWriteMask(const gl::DepthStencilState &depthStencilState,
void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
// Depth offset.
void updatePolygonOffsetFillEnabled(bool enabled);
void updatePolygonOffset(const gl::RasterizerState &rasterState);
void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
const gl::RasterizerState &rasterState);
private:
VertexInputBindings mVertexInputBindings;
......@@ -321,7 +403,7 @@ class GraphicsPipelineDesc final
RenderPassDesc mRenderPassDesc;
PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
PackedDepthStencilStateInfo mDepthStencilStateInfo;
PackedInputAssemblyAndColorBlendStateInfo mInputAssembltyAndColorBlendStateInfo;
PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
// Viewport and scissor are applied as dynamic state.
};
......@@ -329,12 +411,7 @@ class GraphicsPipelineDesc final
// This is not guaranteed by the spec, but is validated by a compile-time check.
// No gaps or padding at the end ensures that hashing and memcmp checks will not run
// into uninitialized memory regions.
constexpr size_t kGraphicsPipelineDescSumOfSizes =
kVertexInputBindingsSize + kVertexInputAttributesSize +
kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize +
kPackedDepthStencilStateSize + kRenderPassDescSize;
static constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
constexpr uint32_t kMaxDescriptorSetLayoutBindings = gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES;
......@@ -430,6 +507,43 @@ static_assert(sizeof(PipelineLayoutDesc) ==
// Disable warnings about struct padding.
ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
class PipelineHelper;
struct GraphicsPipelineTransition
{
GraphicsPipelineTransitionBits bits;
const GraphicsPipelineDesc *desc;
PipelineHelper *target;
};
class PipelineHelper final : angle::NonCopyable
{
public:
PipelineHelper();
~PipelineHelper();
explicit PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
void destroy(VkDevice device);
void updateSerial(Serial serial) { mSerial = serial; }
bool valid() const { return mPipeline.valid(); }
Serial getSerial() const { return mSerial; }
Pipeline &getPipeline() { return mPipeline; }
bool findTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc &desc,
PipelineHelper **pipelineOut) const;
void addTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc *desc,
PipelineHelper *pipeline);
private:
std::vector<GraphicsPipelineTransition> mTransitions;
Serial mSerial;
Pipeline mPipeline;
};
} // namespace vk
} // namespace rx
......@@ -538,18 +652,20 @@ class GraphicsPipelineCache final : angle::NonCopyable
const vk::ShaderModule &vertexModule,
const vk::ShaderModule &fragmentModule,
const vk::GraphicsPipelineDesc &desc,
vk::PipelineAndSerial **pipelineOut)
const vk::GraphicsPipelineDesc **descPtrOut,
vk::PipelineHelper **pipelineOut)
{
auto item = mPayload.find(desc);
if (item != mPayload.end())
{
*descPtrOut = &item->first;
*pipelineOut = &item->second;
return angle::Result::Continue;
}
return insertPipeline(context, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
activeAttribLocationsMask, vertexModule, fragmentModule, desc,
pipelineOut);
descPtrOut, pipelineOut);
}
private:
......@@ -561,9 +677,10 @@ class GraphicsPipelineCache final : angle::NonCopyable
const vk::ShaderModule &vertexModule,
const vk::ShaderModule &fragmentModule,
const vk::GraphicsPipelineDesc &desc,
vk::PipelineAndSerial **pipelineOut);
const vk::GraphicsPipelineDesc **descPtrOut,
vk::PipelineHelper **pipelineOut);
std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineAndSerial> mPayload;
std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
};
class DescriptorSetLayoutCache final : angle::NonCopyable
......
......@@ -641,7 +641,8 @@ class ShaderProgramHelper : angle::NonCopyable
const PipelineLayout &pipelineLayout,
const GraphicsPipelineDesc &pipelineDesc,
const gl::AttributesMask &activeAttribLocationsMask,
PipelineAndSerial **pipelineOut)
const vk::GraphicsPipelineDesc **descPtrOut,
PipelineHelper **pipelineOut)
{
// Pull in a compatible RenderPass.
vk::RenderPass *compatibleRenderPass = nullptr;
......@@ -651,7 +652,7 @@ class ShaderProgramHelper : angle::NonCopyable
return mGraphicsPipelines.getPipeline(
context, pipelineCache, *compatibleRenderPass, pipelineLayout,
activeAttribLocationsMask, mShaders[gl::ShaderType::Vertex].get().get(),
mShaders[gl::ShaderType::Fragment].get().get(), pipelineDesc, pipelineOut);
mShaders[gl::ShaderType::Fragment].get().get(), pipelineDesc, descPtrOut, pipelineOut);
}
angle::Result getComputePipeline(Context *context,
......@@ -661,6 +662,8 @@ class ShaderProgramHelper : angle::NonCopyable
private:
gl::ShaderMap<BindingPointer<ShaderAndSerial>> mShaders;
GraphicsPipelineCache mGraphicsPipelines;
// We should probably use PipelineHelper here so we can remove PipelineAndSerial.
PipelineAndSerial mComputePipeline;
};
} // namespace vk
......
......@@ -83,14 +83,15 @@ void VulkanPipelineCachePerfTest::step()
vk::PipelineLayout pl;
vk::PipelineCache pc;
vk::ShaderModule sm;
vk::PipelineAndSerial *result = nullptr;
const vk::GraphicsPipelineDesc *desc = nullptr;
vk::PipelineHelper *result = nullptr;
gl::AttributesMask am;
for (unsigned int iteration = 0; iteration < kIterationsPerStep; ++iteration)
{
for (const auto &hit : mCacheHits)
{
(void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, hit, &result);
(void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, hit, &desc, &result);
}
}
......@@ -98,7 +99,7 @@ void VulkanPipelineCachePerfTest::step()
++missCount, ++mMissIndex)
{
const auto &miss = mCacheMisses[mMissIndex];
(void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, miss, &result);
(void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, miss, &desc, &result);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment