Commit cb9609fe by Markus Tavenrath Committed by Commit Bot

Optimize glDrawElements performance

A call to glDrawElements results in a calling depth of up to 4 * glDrawElements * gl::Context::DrawElements * rx::ContextGL::DrawElements * VertexArrayGL::syncDrawState. Each function call has to save/restore a lot of registers which results in a stall in the prologue of rx::ContextGL::DrawElements due to memory bandwidth limitations. The main change is the function gl::Context::DrawElements being inlined to reduce the calling depth by one. In addition the call to ContextGL::syncDrawElementsState is now protected so that it gets called only if it's required. Finally a few small getter functions have been inlined where the calling code was bigger than the actual function. In total this change improves performance of the DrawElementsPerfBenchmark.Run/gl benchmark by 16%. Bug: angleproject:2966 Change-Id: I423d18452f2f5b520ab52850fda2054e1da86991 Reviewed-on: https://chromium-review.googlesource.com/c/1389988Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Commit-Queue: Markus Tavenrath <matavenrath@nvidia.com>
parent 7f6b3674
......@@ -254,6 +254,7 @@ template_header_includes = """#include <GLES{major}/gl{major}{minor}.h>
template_sources_includes = """#include "libGLESv2/entry_points_gles_{}_autogen.h"
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES{}{}.h"
#include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h"
......
......@@ -18,7 +18,7 @@
"D3D11 format:src/libANGLE/renderer/d3d/d3d11/gen_texture_format_table.py":
"15fb2a9b3f81e39a22090bce2f071185",
"D3D11 format:src/libANGLE/renderer/d3d/d3d11/texture_format_data.json":
"70a1798cb4e6ebabe31be5bec40c6d6f",
"d7483ece817e819588f4ca157716dc7b",
"D3D11 format:src/libANGLE/renderer/d3d/d3d11/texture_format_map.json":
"805d30e2443935e3a3bd68839699e171",
"DXGI format support:src/libANGLE/renderer/d3d/d3d11/dxgi_support_data.json":
......@@ -62,7 +62,7 @@
"GL/EGL entry points:scripts/entry_point_packed_gl_enums.json":
"0554a67f70407e82c872010014721099",
"GL/EGL entry points:scripts/generate_entry_points.py":
"9fc8f8bd28f5511108b9046d9066774c",
"a959669b31f086510fb60c5b55de56d1",
"GL/EGL entry points:scripts/gl.xml":
"b470cb06b06cbbe7adb2c8129ec85708",
"GL/EGL entry points:scripts/gl_angle_ext.xml":
......
......@@ -8,6 +8,7 @@
// rendering operations. It is the GLES2 specific implementation of EGLContext.
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include <string.h>
#include <iterator>
......@@ -25,7 +26,6 @@
#include "libANGLE/Fence.h"
#include "libANGLE/Framebuffer.h"
#include "libANGLE/FramebufferAttachment.h"
#include "libANGLE/GLES1Renderer.h"
#include "libANGLE/Path.h"
#include "libANGLE/Program.h"
#include "libANGLE/ProgramPipeline.h"
......@@ -42,7 +42,6 @@
#include "libANGLE/queryconversions.h"
#include "libANGLE/queryutils.h"
#include "libANGLE/renderer/BufferImpl.h"
#include "libANGLE/renderer/ContextImpl.h"
#include "libANGLE/renderer/EGLImplFactory.h"
#include "libANGLE/renderer/Format.h"
#include "libANGLE/validationES.h"
......@@ -2241,21 +2240,6 @@ void Context::drawArraysInstanced(PrimitiveMode mode,
MarkTransformFeedbackBufferUsage(this, count, instanceCount);
}
void Context::drawElements(PrimitiveMode mode,
GLsizei count,
DrawElementsType type,
const void *indices)
{
// No-op if count draws no primitives for given mode
if (noopDraw(mode, count))
{
return;
}
ANGLE_CONTEXT_TRY(prepareForDraw(mode));
ANGLE_CONTEXT_TRY(mImplementation->drawElements(this, mode, count, type, indices));
}
void Context::drawElementsInstanced(PrimitiveMode mode,
GLsizei count,
DrawElementsType type,
......@@ -3516,40 +3500,6 @@ bool Context::noopDrawInstanced(PrimitiveMode mode, GLsizei count, GLsizei insta
return (instanceCount == 0) || noopDraw(mode, count);
}
ANGLE_INLINE angle::Result Context::syncDirtyBits()
{
const State::DirtyBits &dirtyBits = mGLState.getDirtyBits();
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, mAllDirtyBits));
mGLState.clearDirtyBits();
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyBits(const State::DirtyBits &bitMask)
{
const State::DirtyBits &dirtyBits = (mGLState.getDirtyBits() & bitMask);
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, bitMask));
mGLState.clearDirtyBits(dirtyBits);
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyObjects(const State::DirtyObjects &objectMask)
{
return mGLState.syncDirtyObjects(this, objectMask);
}
ANGLE_INLINE angle::Result Context::prepareForDraw(PrimitiveMode mode)
{
if (mGLES1Renderer)
{
ANGLE_TRY(mGLES1Renderer->prepareForDraw(mode, this, &mGLState));
}
ANGLE_TRY(syncDirtyObjects(mDrawDirtyObjects));
ASSERT(!isRobustResourceInitEnabled() ||
!mGLState.getDrawFramebuffer()->hasResourceThatNeedsInit());
return syncDirtyBits();
}
angle::Result Context::prepareForClear(GLbitfield mask)
{
ANGLE_TRY(syncDirtyObjects(mClearDirtyObjects));
......@@ -8495,7 +8445,7 @@ void StateCache::updateValidDrawElementsTypes(Context *context)
void StateCache::updateTransformFeedbackActiveUnpaused(Context *context)
{
TransformFeedback *xfb = context->getGLState().getCurrentTransformFeedback();
TransformFeedback *xfb = context->getGLState().getCurrentTransformFeedback();
mCachedTransformFeedbackActiveUnpaused = xfb && xfb->isActive() && !xfb->isPaused();
}
} // namespace gl
......@@ -1912,6 +1912,7 @@ class Context final : public egl::LabeledObject, angle::NonCopyable, public angl
std::shared_ptr<angle::WorkerThreadPool> mThreadPool;
};
} // namespace gl
#endif // LIBANGLE_CONTEXT_H_
//
// Copyright (c) 2018 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Context.inl.h: Defines inline functions of gl::Context class
// Has to be included after libANGLE/Context.h when using one
// of the defined functions
#ifndef LIBANGLE_CONTEXT_INL_H_
#define LIBANGLE_CONTEXT_INL_H_
#include "libANGLE/GLES1Renderer.h"
#include "libANGLE/renderer/ContextImpl.h"
#define ANGLE_HANDLE_ERR(X) \
(void)(X); \
return;
#define ANGLE_CONTEXT_TRY(EXPR) ANGLE_TRY_TEMPLATE(EXPR, ANGLE_HANDLE_ERR);
namespace gl
{
ANGLE_INLINE angle::Result Context::syncDirtyBits()
{
const State::DirtyBits &dirtyBits = mGLState.getDirtyBits();
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, mAllDirtyBits));
mGLState.clearDirtyBits();
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyBits(const State::DirtyBits &bitMask)
{
const State::DirtyBits &dirtyBits = (mGLState.getDirtyBits() & bitMask);
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, bitMask));
mGLState.clearDirtyBits(dirtyBits);
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyObjects(const State::DirtyObjects &objectMask)
{
return mGLState.syncDirtyObjects(this, objectMask);
}
ANGLE_INLINE angle::Result Context::prepareForDraw(PrimitiveMode mode)
{
if (mGLES1Renderer)
{
ANGLE_TRY(mGLES1Renderer->prepareForDraw(mode, this, &mGLState));
}
ANGLE_TRY(syncDirtyObjects(mDrawDirtyObjects));
ASSERT(!isRobustResourceInitEnabled() ||
!mGLState.getDrawFramebuffer()->hasResourceThatNeedsInit());
return syncDirtyBits();
}
ANGLE_INLINE void Context::drawElements(PrimitiveMode mode,
GLsizei count,
DrawElementsType type,
const void *indices)
{
// No-op if count draws no primitives for given mode
if (noopDraw(mode, count))
{
return;
}
ANGLE_CONTEXT_TRY(prepareForDraw(mode));
ANGLE_CONTEXT_TRY(mImplementation->drawElements(this, mode, count, type, indices));
}
} // namespace gl
#endif // LIBANGLE_CONTEXT_INL_H_
......@@ -1741,11 +1741,6 @@ const std::vector<GLenum> &Program::getOutputVariableTypes() const
ASSERT(mLinkResolved);
return mState.mOutputVariableTypes;
}
DrawBufferMask Program::getActiveOutputVariables() const
{
ASSERT(mLinkResolved);
return mState.mActiveOutputVariables;
}
template <typename T>
void Program::getResourceName(GLuint index,
......@@ -1842,12 +1837,6 @@ const ProgramBindings &Program::getFragmentInputBindings() const
return mFragmentInputBindings;
}
int Program::getNumViews() const
{
ASSERT(mLinkResolved);
return mState.getNumViews();
}
ComponentTypeMask Program::getDrawBufferTypeMask() const
{
ASSERT(mLinkResolved);
......
......@@ -580,7 +580,11 @@ class Program final : angle::NonCopyable, public LabeledObject
GLint getFragDataLocation(const std::string &name) const;
size_t getOutputResourceCount() const;
const std::vector<GLenum> &getOutputVariableTypes() const;
DrawBufferMask getActiveOutputVariables() const;
DrawBufferMask getActiveOutputVariables() const
{
ASSERT(mLinkResolved);
return mState.mActiveOutputVariables;
}
// EXT_blend_func_extended
GLint getFragDataIndex(const std::string &name) const;
......@@ -817,7 +821,12 @@ class Program final : angle::NonCopyable, public LabeledObject
const ProgramBindings &getUniformLocationBindings() const;
const ProgramBindings &getFragmentInputBindings() const;
int getNumViews() const;
int getNumViews() const
{
ASSERT(mLinkResolved);
return mState.getNumViews();
}
bool usesMultiview() const { return mState.usesMultiview(); }
ComponentTypeMask getDrawBufferTypeMask() const;
......
......@@ -214,11 +214,19 @@ ANGLE_INLINE angle::Result ContextGL::setDrawElementsState(const gl::Context *co
const gl::Program *program = glState.getProgram();
const gl::VertexArray *vao = glState.getVertexArray();
const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
ANGLE_TRY(vaoGL->syncDrawElementsState(context, program->getActiveAttribLocationsMask(), count,
type, indices, instanceCount,
glState.isPrimitiveRestartEnabled(), outIndices));
const gl::StateCache &stateCache = context->getStateCache();
if (stateCache.hasAnyActiveClientAttrib() || vao->getElementArrayBuffer() == nullptr)
{
const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
ANGLE_TRY(vaoGL->syncDrawElementsState(context, program->getActiveAttribLocationsMask(),
count, type, indices, instanceCount,
glState.isPrimitiveRestartEnabled(), outIndices));
}
else
{
*outIndices = indices;
}
if (context->getExtensions().webglCompatibility)
{
......@@ -286,7 +294,8 @@ angle::Result ContextGL::drawElements(const gl::Context *context,
gl::DrawElementsType type,
const void *indices)
{
const gl::Program *program = context->getGLState().getProgram();
const gl::State &glState = context->getGLState();
const gl::Program *program = glState.getProgram();
const bool usesMultiview = program->usesMultiview();
const GLsizei instanceCount = usesMultiview ? program->getNumViews() : 0;
const void *drawIndexPtr = nullptr;
......
......@@ -112,19 +112,6 @@ angle::Result VertexArrayGL::syncClientSideData(const gl::Context *context,
gl::DrawElementsType::InvalidEnum, nullptr, instanceCount, false, nullptr);
}
angle::Result VertexArrayGL::syncDrawElementsState(const gl::Context *context,
const gl::AttributesMask &activeAttributesMask,
GLsizei count,
gl::DrawElementsType type,
const void *indices,
GLsizei instanceCount,
bool primitiveRestartEnabled,
const void **outIndices) const
{
return syncDrawState(context, activeAttributesMask, 0, count, type, indices, instanceCount,
primitiveRestartEnabled, outIndices);
}
void VertexArrayGL::updateElementArrayBufferBinding(const gl::Context *context) const
{
gl::Buffer *elementArrayBuffer = mState.getElementArrayBuffer();
......
......@@ -11,6 +11,10 @@
#include "libANGLE/renderer/VertexArrayImpl.h"
#include "common/mathutil.h"
#include "libANGLE/Context.h"
#include "libANGLE/renderer/gl/ContextGL.h"
namespace rx
{
......@@ -131,6 +135,21 @@ class VertexArrayGL : public VertexArrayImpl
mutable size_t mStreamingArrayBufferSize;
mutable GLuint mStreamingArrayBuffer;
};
ANGLE_INLINE angle::Result VertexArrayGL::syncDrawElementsState(
const gl::Context *context,
const gl::AttributesMask &activeAttributesMask,
GLsizei count,
gl::DrawElementsType type,
const void *indices,
GLsizei instanceCount,
bool primitiveRestartEnabled,
const void **outIndices) const
{
return syncDrawState(context, activeAttributesMask, 0, count, type, indices, instanceCount,
primitiveRestartEnabled, outIndices);
}
} // namespace rx
#endif // LIBANGLE_RENDERER_GL_VERTEXARRAYGL_H_
......@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_1_0_autogen.h"
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES1.h"
#include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h"
......
......@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_2_0_autogen.h"
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES2.h"
#include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h"
......
......@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_3_0_autogen.h"
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES3.h"
#include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h"
......
......@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_3_1_autogen.h"
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES31.h"
#include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h"
......
......@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_ext_autogen.h"
#include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationESEXT.h"
#include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment