Commit cc37cbf2 by Markus Tavenrath Committed by Commit Bot

Apply several small micro-optimizations to ValidateDrawElementsCommon

* Remove indirection when getting CotextState from Context * Reduce number of branches from 2 to 1 when checking for an InvalidEnum in FromGLEnum<DrawElementsType> * Provide IndexRange constructor which doesn't initialize the member variables * Reduce number of branches to 1 when checking for BasicDrawStateErrors for the fast path * Remove a few branches and reduce math cost during the buffer range check in ValidateDrawElementsCommon Bug: angleproject:2966 Change-Id: Ibdbed8a59ad9d7abce76622e9f507498e67ab997 Reviewed-on: https://chromium-review.googlesource.com/c/1392805 Commit-Queue: Markus Tavenrath <matavenrath@nvidia.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 60a50cfc
......@@ -281,13 +281,19 @@ enum class DrawElementsType : size_t
template <>
constexpr DrawElementsType FromGLenum<DrawElementsType>(GLenum from)
{
GLenum scaled = (from - GL_UNSIGNED_BYTE);
GLenum packed = (scaled >> 1);
if ((scaled & 1) != 0 || packed >= static_cast<GLenum>(DrawElementsType::EnumCount))
{
return DrawElementsType::InvalidEnum;
}
GLenum scaled = (from - GL_UNSIGNED_BYTE);
// This code sequence generates a ROR instruction on x86/arm. We want to check if the lowest bit
// of scaled is set and if (scaled >> 1) is greater than a non-pot value. If we rotate the
// lowest bit to the hightest bit both conditions can be checked with a single test.
static_assert(sizeof(GLenum) == 4, "Update (scaled << 31) to sizeof(GLenum) * 8 - 1");
GLenum packed = (scaled >> 1) | (scaled << 31);
// operator ? with a simple assignment usually translates to a cmov instruction and thus avoids
// a branch.
packed = (packed >= static_cast<GLenum>(DrawElementsType::EnumCount))
? static_cast<GLenum>(DrawElementsType::InvalidEnum)
: packed;
return static_cast<DrawElementsType>(packed);
}
......
......@@ -718,6 +718,9 @@ typedef Range<unsigned int> RangeUI;
struct IndexRange
{
struct Undefined
{};
IndexRange(Undefined) {}
IndexRange() : IndexRange(0, 0, 0) {}
IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
: start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
......
......@@ -133,6 +133,19 @@ class StateCache final : angle::NonCopyable
// 12. onActiveTransformFeedbackChange.
// 13. onUniformBufferStateChange.
// 14. onBufferBindingChange.
bool hasBasicDrawStatesError(Context *context) const
{
if (mCachedBasicDrawStatesError == 0)
{
return false;
}
if (mCachedBasicDrawStatesError != kInvalidPointer)
{
return true;
}
return getBasicDrawStatesErrorImpl(context) != 0;
}
intptr_t getBasicDrawStatesError(Context *context) const
{
if (mCachedBasicDrawStatesError != kInvalidPointer)
......@@ -1695,7 +1708,7 @@ class Context final : public egl::LabeledObject, angle::NonCopyable, public angl
GLint getClientMajorVersion() const { return mState.getClientMajorVersion(); }
GLint getClientMinorVersion() const { return mState.getClientMinorVersion(); }
const Version &getClientVersion() const { return mState.getClientVersion(); }
const State &getGLState() const { return mState.getState(); }
const State &getGLState() const { return mGLState; }
const Caps &getCaps() const { return mState.getCaps(); }
const TextureCapsMap &getTextureCaps() const { return mState.getTextureCaps(); }
const Extensions &getExtensions() const { return mState.getExtensions(); }
......
......@@ -2858,7 +2858,7 @@ bool ValidateDrawArraysCommon(Context *context,
if (context->getStateCache().isTransformFeedbackActiveUnpaused())
{
const State &state = context->getGLState();
const State &state = context->getGLState();
TransformFeedback *curTransformFeedback = state.getCurrentTransformFeedback();
if (!curTransformFeedback->checkBufferSpaceForDraw(count, primcount))
{
......@@ -3007,15 +3007,12 @@ bool ValidateDrawElementsCommon(Context *context,
return false;
}
const State &state = context->getGLState();
const VertexArray *vao = state.getVertexArray();
Buffer *elementArrayBuffer = vao->getElementArrayBuffer();
GLuint typeBytes = GetDrawElementsTypeSize(type);
ASSERT(isPow2(typeBytes) && typeBytes > 0);
ASSERT(isPow2(GetDrawElementsTypeSize(type)) && GetDrawElementsTypeSize(type) > 0);
if (context->getExtensions().webglCompatibility)
{
GLuint typeBytes = GetDrawElementsTypeSize(type);
if ((reinterpret_cast<uintptr_t>(indices) & static_cast<uintptr_t>(typeBytes - 1)) != 0)
{
// [WebGL 1.0] Section 6.4 Buffer Offset and Stride Requirements
......@@ -3041,6 +3038,10 @@ bool ValidateDrawElementsCommon(Context *context,
return true;
}
const State &state = context->getGLState();
const VertexArray *vao = state.getVertexArray();
Buffer *elementArrayBuffer = vao->getElementArrayBuffer();
if (!elementArrayBuffer)
{
if (!indices)
......@@ -3061,22 +3062,21 @@ bool ValidateDrawElementsCommon(Context *context,
constexpr uint64_t kUint64Max = std::numeric_limits<uint64_t>::max();
static_assert(kIntMax < kUint64Max / kMaxTypeSize, "");
uint64_t typeSize = typeBytes;
uint64_t elementCount = static_cast<uint64_t>(count);
ASSERT(elementCount > 0 && typeSize <= kMaxTypeSize);
ASSERT(elementCount > 0 && GetDrawElementsTypeSize(type) <= kMaxTypeSize);
// Doing the multiplication here is overflow-safe
uint64_t elementDataSizeNoOffset = typeSize * elementCount;
uint64_t elementDataSizeNoOffset = elementCount << GetDrawElementsTypeShift(type);
// The offset can be any value, check for overflows
uint64_t offset = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(indices));
if (elementDataSizeNoOffset > kUint64Max - offset)
uint64_t elementDataSizeWithOffset = elementDataSizeNoOffset + offset;
if (elementDataSizeWithOffset < elementDataSizeNoOffset)
{
context->validationError(GL_INVALID_OPERATION, kIntegerOverflow);
return false;
}
uint64_t elementDataSizeWithOffset = elementDataSizeNoOffset + offset;
if (elementDataSizeWithOffset > static_cast<uint64_t>(elementArrayBuffer->getSize()))
{
context->validationError(GL_INVALID_OPERATION, kInsufficientBufferSize);
......@@ -3087,7 +3087,7 @@ bool ValidateDrawElementsCommon(Context *context,
if (!context->getExtensions().robustBufferAccessBehavior && primcount > 0)
{
// Use the parameter buffer to retrieve and cache the index range.
IndexRange indexRange;
IndexRange indexRange{IndexRange::Undefined()};
ANGLE_VALIDATION_TRY(vao->getIndexRange(context, type, count, indices, &indexRange));
// If we use an index greater than our maximum supported index range, return an error.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment