Commit 7403dd2c by Jamie Madill Committed by Commit Bot

Vulkan: Inline transition query.

Slight speed improvement in the Vulkan vertex array state change test. Bug: angleproject:3014 Change-Id: I4a5cd26849cd247b7e67cd6bda083aabeb4e34c0 Reviewed-on: https://chromium-review.googlesource.com/c/1406890 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarYuly Novikov <ynovikov@chromium.org> Reviewed-by: 's avatarShahbaz Youssefi <syoussefi@chromium.org>
parent a63cc59f
...@@ -301,34 +301,6 @@ constexpr size_t kTransitionBitShift = kTransitionByteShift + Log2(kBitsPerByte) ...@@ -301,34 +301,6 @@ constexpr size_t kTransitionBitShift = kTransitionByteShift + Log2(kBitsPerByte)
// the update function. // the update function.
#define ANGLE_GET_INDEXED_TRANSITION_BIT(Member, Field, Index, BitWidth) \ #define ANGLE_GET_INDEXED_TRANSITION_BIT(Member, Field, Index, BitWidth) \
(((BitWidth * Index) >> kTransitionBitShift) + ANGLE_GET_TRANSITION_BIT(Member, Field)) (((BitWidth * Index) >> kTransitionBitShift) + ANGLE_GET_TRANSITION_BIT(Member, Field))
bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
GraphicsPipelineTransitionBits bitsB,
const GraphicsPipelineDesc &descA,
const GraphicsPipelineDesc &descB)
{
if (bitsA != bitsB)
return false;
// We currently mask over 4 bytes of the pipeline description with each dirty bit.
// We could consider using 8 bytes and a mask of 32 bits. This would make some parts
// of the code faster. The for loop below would scan over twice as many bits per iteration.
// But there may be more collisions between the same dirty bit masks leading to different
// transitions. Thus there may be additional cost when applications use many transitions.
// We should revisit this in the future and investigate using different bit widths.
static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
for (size_t dirtyBit : bitsA)
{
if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
return false;
}
return true;
}
} // anonymous namespace } // anonymous namespace
// RenderPassDesc implementation. // RenderPassDesc implementation.
...@@ -1222,23 +1194,6 @@ void PipelineHelper::destroy(VkDevice device) ...@@ -1222,23 +1194,6 @@ void PipelineHelper::destroy(VkDevice device)
mPipeline.destroy(device); mPipeline.destroy(device);
} }
bool PipelineHelper::findTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc &desc,
PipelineHelper **pipelineOut) const
{
// Search could be improved using sorting or hashing.
for (const GraphicsPipelineTransition &transition : mTransitions)
{
if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
{
*pipelineOut = transition.target;
return true;
}
}
return false;
}
void PipelineHelper::addTransition(GraphicsPipelineTransitionBits bits, void PipelineHelper::addTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc *desc, const GraphicsPipelineDesc *desc,
PipelineHelper *pipeline) PipelineHelper *pipeline)
......
...@@ -549,6 +549,34 @@ ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition( ...@@ -549,6 +549,34 @@ ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
: bits(bits), desc(desc), target(pipeline) : bits(bits), desc(desc), target(pipeline)
{} {}
ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
GraphicsPipelineTransitionBits bitsB,
const GraphicsPipelineDesc &descA,
const GraphicsPipelineDesc &descB)
{
if (bitsA != bitsB)
return false;
// We currently mask over 4 bytes of the pipeline description with each dirty bit.
// We could consider using 8 bytes and a mask of 32 bits. This would make some parts
// of the code faster. The for loop below would scan over twice as many bits per iteration.
// But there may be more collisions between the same dirty bit masks leading to different
// transitions. Thus there may be additional cost when applications use many transitions.
// We should revisit this in the future and investigate using different bit widths.
static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
for (size_t dirtyBit : bitsA)
{
if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
return false;
}
return true;
}
class PipelineHelper final : angle::NonCopyable class PipelineHelper final : angle::NonCopyable
{ {
public: public:
...@@ -563,9 +591,23 @@ class PipelineHelper final : angle::NonCopyable ...@@ -563,9 +591,23 @@ class PipelineHelper final : angle::NonCopyable
Serial getSerial() const { return mSerial; } Serial getSerial() const { return mSerial; }
Pipeline &getPipeline() { return mPipeline; } Pipeline &getPipeline() { return mPipeline; }
bool findTransition(GraphicsPipelineTransitionBits bits, ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc &desc, const GraphicsPipelineDesc &desc,
PipelineHelper **pipelineOut) const; PipelineHelper **pipelineOut) const
{
// Search could be improved using sorting or hashing.
for (const GraphicsPipelineTransition &transition : mTransitions)
{
if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
{
*pipelineOut = transition.target;
return true;
}
}
return false;
}
void addTransition(GraphicsPipelineTransitionBits bits, void addTransition(GraphicsPipelineTransitionBits bits,
const GraphicsPipelineDesc *desc, const GraphicsPipelineDesc *desc,
PipelineHelper *pipeline); PipelineHelper *pipeline);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment