Commit cb9609fe by Markus Tavenrath Committed by Commit Bot

Optimize glDrawElements performance

A call to glDrawElements results in a calling depth of up to 4 * glDrawElements * gl::Context::DrawElements * rx::ContextGL::DrawElements * VertexArrayGL::syncDrawState. Each function call has to save/restore a lot of registers which results in a stall in the prologue of rx::ContextGL::DrawElements due to memory bandwidth limitations. The main change is the function gl::Context::DrawElements being inlined to reduce the calling depth by one. In addition the call to ContextGL::syncDrawElementsState is now protected so that it gets called only if it's required. Finally a few small getter functions have been inlined where the calling code was bigger than the actual function. In total this change improves performance of the DrawElementsPerfBenchmark.Run/gl benchmark by 16%. Bug: angleproject:2966 Change-Id: I423d18452f2f5b520ab52850fda2054e1da86991 Reviewed-on: https://chromium-review.googlesource.com/c/1389988Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Commit-Queue: Markus Tavenrath <matavenrath@nvidia.com>
parent 7f6b3674
...@@ -254,6 +254,7 @@ template_header_includes = """#include <GLES{major}/gl{major}{minor}.h> ...@@ -254,6 +254,7 @@ template_header_includes = """#include <GLES{major}/gl{major}{minor}.h>
template_sources_includes = """#include "libGLESv2/entry_points_gles_{}_autogen.h" template_sources_includes = """#include "libGLESv2/entry_points_gles_{}_autogen.h"
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES{}{}.h" #include "libANGLE/validationES{}{}.h"
#include "libGLESv2/entry_points_utils.h" #include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h" #include "libGLESv2/global_state.h"
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
"D3D11 format:src/libANGLE/renderer/d3d/d3d11/gen_texture_format_table.py": "D3D11 format:src/libANGLE/renderer/d3d/d3d11/gen_texture_format_table.py":
"15fb2a9b3f81e39a22090bce2f071185", "15fb2a9b3f81e39a22090bce2f071185",
"D3D11 format:src/libANGLE/renderer/d3d/d3d11/texture_format_data.json": "D3D11 format:src/libANGLE/renderer/d3d/d3d11/texture_format_data.json":
"70a1798cb4e6ebabe31be5bec40c6d6f", "d7483ece817e819588f4ca157716dc7b",
"D3D11 format:src/libANGLE/renderer/d3d/d3d11/texture_format_map.json": "D3D11 format:src/libANGLE/renderer/d3d/d3d11/texture_format_map.json":
"805d30e2443935e3a3bd68839699e171", "805d30e2443935e3a3bd68839699e171",
"DXGI format support:src/libANGLE/renderer/d3d/d3d11/dxgi_support_data.json": "DXGI format support:src/libANGLE/renderer/d3d/d3d11/dxgi_support_data.json":
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
"GL/EGL entry points:scripts/entry_point_packed_gl_enums.json": "GL/EGL entry points:scripts/entry_point_packed_gl_enums.json":
"0554a67f70407e82c872010014721099", "0554a67f70407e82c872010014721099",
"GL/EGL entry points:scripts/generate_entry_points.py": "GL/EGL entry points:scripts/generate_entry_points.py":
"9fc8f8bd28f5511108b9046d9066774c", "a959669b31f086510fb60c5b55de56d1",
"GL/EGL entry points:scripts/gl.xml": "GL/EGL entry points:scripts/gl.xml":
"b470cb06b06cbbe7adb2c8129ec85708", "b470cb06b06cbbe7adb2c8129ec85708",
"GL/EGL entry points:scripts/gl_angle_ext.xml": "GL/EGL entry points:scripts/gl_angle_ext.xml":
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
// rendering operations. It is the GLES2 specific implementation of EGLContext. // rendering operations. It is the GLES2 specific implementation of EGLContext.
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include <string.h> #include <string.h>
#include <iterator> #include <iterator>
...@@ -25,7 +26,6 @@ ...@@ -25,7 +26,6 @@
#include "libANGLE/Fence.h" #include "libANGLE/Fence.h"
#include "libANGLE/Framebuffer.h" #include "libANGLE/Framebuffer.h"
#include "libANGLE/FramebufferAttachment.h" #include "libANGLE/FramebufferAttachment.h"
#include "libANGLE/GLES1Renderer.h"
#include "libANGLE/Path.h" #include "libANGLE/Path.h"
#include "libANGLE/Program.h" #include "libANGLE/Program.h"
#include "libANGLE/ProgramPipeline.h" #include "libANGLE/ProgramPipeline.h"
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include "libANGLE/queryconversions.h" #include "libANGLE/queryconversions.h"
#include "libANGLE/queryutils.h" #include "libANGLE/queryutils.h"
#include "libANGLE/renderer/BufferImpl.h" #include "libANGLE/renderer/BufferImpl.h"
#include "libANGLE/renderer/ContextImpl.h"
#include "libANGLE/renderer/EGLImplFactory.h" #include "libANGLE/renderer/EGLImplFactory.h"
#include "libANGLE/renderer/Format.h" #include "libANGLE/renderer/Format.h"
#include "libANGLE/validationES.h" #include "libANGLE/validationES.h"
...@@ -2241,21 +2240,6 @@ void Context::drawArraysInstanced(PrimitiveMode mode, ...@@ -2241,21 +2240,6 @@ void Context::drawArraysInstanced(PrimitiveMode mode,
MarkTransformFeedbackBufferUsage(this, count, instanceCount); MarkTransformFeedbackBufferUsage(this, count, instanceCount);
} }
void Context::drawElements(PrimitiveMode mode,
GLsizei count,
DrawElementsType type,
const void *indices)
{
// No-op if count draws no primitives for given mode
if (noopDraw(mode, count))
{
return;
}
ANGLE_CONTEXT_TRY(prepareForDraw(mode));
ANGLE_CONTEXT_TRY(mImplementation->drawElements(this, mode, count, type, indices));
}
void Context::drawElementsInstanced(PrimitiveMode mode, void Context::drawElementsInstanced(PrimitiveMode mode,
GLsizei count, GLsizei count,
DrawElementsType type, DrawElementsType type,
...@@ -3516,40 +3500,6 @@ bool Context::noopDrawInstanced(PrimitiveMode mode, GLsizei count, GLsizei insta ...@@ -3516,40 +3500,6 @@ bool Context::noopDrawInstanced(PrimitiveMode mode, GLsizei count, GLsizei insta
return (instanceCount == 0) || noopDraw(mode, count); return (instanceCount == 0) || noopDraw(mode, count);
} }
ANGLE_INLINE angle::Result Context::syncDirtyBits()
{
const State::DirtyBits &dirtyBits = mGLState.getDirtyBits();
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, mAllDirtyBits));
mGLState.clearDirtyBits();
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyBits(const State::DirtyBits &bitMask)
{
const State::DirtyBits &dirtyBits = (mGLState.getDirtyBits() & bitMask);
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, bitMask));
mGLState.clearDirtyBits(dirtyBits);
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyObjects(const State::DirtyObjects &objectMask)
{
return mGLState.syncDirtyObjects(this, objectMask);
}
ANGLE_INLINE angle::Result Context::prepareForDraw(PrimitiveMode mode)
{
if (mGLES1Renderer)
{
ANGLE_TRY(mGLES1Renderer->prepareForDraw(mode, this, &mGLState));
}
ANGLE_TRY(syncDirtyObjects(mDrawDirtyObjects));
ASSERT(!isRobustResourceInitEnabled() ||
!mGLState.getDrawFramebuffer()->hasResourceThatNeedsInit());
return syncDirtyBits();
}
angle::Result Context::prepareForClear(GLbitfield mask) angle::Result Context::prepareForClear(GLbitfield mask)
{ {
ANGLE_TRY(syncDirtyObjects(mClearDirtyObjects)); ANGLE_TRY(syncDirtyObjects(mClearDirtyObjects));
...@@ -8495,7 +8445,7 @@ void StateCache::updateValidDrawElementsTypes(Context *context) ...@@ -8495,7 +8445,7 @@ void StateCache::updateValidDrawElementsTypes(Context *context)
void StateCache::updateTransformFeedbackActiveUnpaused(Context *context) void StateCache::updateTransformFeedbackActiveUnpaused(Context *context)
{ {
TransformFeedback *xfb = context->getGLState().getCurrentTransformFeedback(); TransformFeedback *xfb = context->getGLState().getCurrentTransformFeedback();
mCachedTransformFeedbackActiveUnpaused = xfb && xfb->isActive() && !xfb->isPaused(); mCachedTransformFeedbackActiveUnpaused = xfb && xfb->isActive() && !xfb->isPaused();
} }
} // namespace gl } // namespace gl
...@@ -1912,6 +1912,7 @@ class Context final : public egl::LabeledObject, angle::NonCopyable, public angl ...@@ -1912,6 +1912,7 @@ class Context final : public egl::LabeledObject, angle::NonCopyable, public angl
std::shared_ptr<angle::WorkerThreadPool> mThreadPool; std::shared_ptr<angle::WorkerThreadPool> mThreadPool;
}; };
} // namespace gl } // namespace gl
#endif // LIBANGLE_CONTEXT_H_ #endif // LIBANGLE_CONTEXT_H_
//
// Copyright (c) 2018 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Context.inl.h: Defines inline functions of gl::Context class
// Has to be included after libANGLE/Context.h when using one
// of the defined functions
#ifndef LIBANGLE_CONTEXT_INL_H_
#define LIBANGLE_CONTEXT_INL_H_
#include "libANGLE/GLES1Renderer.h"
#include "libANGLE/renderer/ContextImpl.h"
#define ANGLE_HANDLE_ERR(X) \
(void)(X); \
return;
#define ANGLE_CONTEXT_TRY(EXPR) ANGLE_TRY_TEMPLATE(EXPR, ANGLE_HANDLE_ERR);
namespace gl
{
ANGLE_INLINE angle::Result Context::syncDirtyBits()
{
const State::DirtyBits &dirtyBits = mGLState.getDirtyBits();
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, mAllDirtyBits));
mGLState.clearDirtyBits();
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyBits(const State::DirtyBits &bitMask)
{
const State::DirtyBits &dirtyBits = (mGLState.getDirtyBits() & bitMask);
ANGLE_TRY(mImplementation->syncState(this, dirtyBits, bitMask));
mGLState.clearDirtyBits(dirtyBits);
return angle::Result::Continue;
}
ANGLE_INLINE angle::Result Context::syncDirtyObjects(const State::DirtyObjects &objectMask)
{
return mGLState.syncDirtyObjects(this, objectMask);
}
ANGLE_INLINE angle::Result Context::prepareForDraw(PrimitiveMode mode)
{
if (mGLES1Renderer)
{
ANGLE_TRY(mGLES1Renderer->prepareForDraw(mode, this, &mGLState));
}
ANGLE_TRY(syncDirtyObjects(mDrawDirtyObjects));
ASSERT(!isRobustResourceInitEnabled() ||
!mGLState.getDrawFramebuffer()->hasResourceThatNeedsInit());
return syncDirtyBits();
}
ANGLE_INLINE void Context::drawElements(PrimitiveMode mode,
GLsizei count,
DrawElementsType type,
const void *indices)
{
// No-op if count draws no primitives for given mode
if (noopDraw(mode, count))
{
return;
}
ANGLE_CONTEXT_TRY(prepareForDraw(mode));
ANGLE_CONTEXT_TRY(mImplementation->drawElements(this, mode, count, type, indices));
}
} // namespace gl
#endif // LIBANGLE_CONTEXT_INL_H_
...@@ -1741,11 +1741,6 @@ const std::vector<GLenum> &Program::getOutputVariableTypes() const ...@@ -1741,11 +1741,6 @@ const std::vector<GLenum> &Program::getOutputVariableTypes() const
ASSERT(mLinkResolved); ASSERT(mLinkResolved);
return mState.mOutputVariableTypes; return mState.mOutputVariableTypes;
} }
DrawBufferMask Program::getActiveOutputVariables() const
{
ASSERT(mLinkResolved);
return mState.mActiveOutputVariables;
}
template <typename T> template <typename T>
void Program::getResourceName(GLuint index, void Program::getResourceName(GLuint index,
...@@ -1842,12 +1837,6 @@ const ProgramBindings &Program::getFragmentInputBindings() const ...@@ -1842,12 +1837,6 @@ const ProgramBindings &Program::getFragmentInputBindings() const
return mFragmentInputBindings; return mFragmentInputBindings;
} }
int Program::getNumViews() const
{
ASSERT(mLinkResolved);
return mState.getNumViews();
}
ComponentTypeMask Program::getDrawBufferTypeMask() const ComponentTypeMask Program::getDrawBufferTypeMask() const
{ {
ASSERT(mLinkResolved); ASSERT(mLinkResolved);
......
...@@ -580,7 +580,11 @@ class Program final : angle::NonCopyable, public LabeledObject ...@@ -580,7 +580,11 @@ class Program final : angle::NonCopyable, public LabeledObject
GLint getFragDataLocation(const std::string &name) const; GLint getFragDataLocation(const std::string &name) const;
size_t getOutputResourceCount() const; size_t getOutputResourceCount() const;
const std::vector<GLenum> &getOutputVariableTypes() const; const std::vector<GLenum> &getOutputVariableTypes() const;
DrawBufferMask getActiveOutputVariables() const; DrawBufferMask getActiveOutputVariables() const
{
ASSERT(mLinkResolved);
return mState.mActiveOutputVariables;
}
// EXT_blend_func_extended // EXT_blend_func_extended
GLint getFragDataIndex(const std::string &name) const; GLint getFragDataIndex(const std::string &name) const;
...@@ -817,7 +821,12 @@ class Program final : angle::NonCopyable, public LabeledObject ...@@ -817,7 +821,12 @@ class Program final : angle::NonCopyable, public LabeledObject
const ProgramBindings &getUniformLocationBindings() const; const ProgramBindings &getUniformLocationBindings() const;
const ProgramBindings &getFragmentInputBindings() const; const ProgramBindings &getFragmentInputBindings() const;
int getNumViews() const; int getNumViews() const
{
ASSERT(mLinkResolved);
return mState.getNumViews();
}
bool usesMultiview() const { return mState.usesMultiview(); } bool usesMultiview() const { return mState.usesMultiview(); }
ComponentTypeMask getDrawBufferTypeMask() const; ComponentTypeMask getDrawBufferTypeMask() const;
......
...@@ -214,11 +214,19 @@ ANGLE_INLINE angle::Result ContextGL::setDrawElementsState(const gl::Context *co ...@@ -214,11 +214,19 @@ ANGLE_INLINE angle::Result ContextGL::setDrawElementsState(const gl::Context *co
const gl::Program *program = glState.getProgram(); const gl::Program *program = glState.getProgram();
const gl::VertexArray *vao = glState.getVertexArray(); const gl::VertexArray *vao = glState.getVertexArray();
const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
ANGLE_TRY(vaoGL->syncDrawElementsState(context, program->getActiveAttribLocationsMask(), count, const gl::StateCache &stateCache = context->getStateCache();
type, indices, instanceCount, if (stateCache.hasAnyActiveClientAttrib() || vao->getElementArrayBuffer() == nullptr)
glState.isPrimitiveRestartEnabled(), outIndices)); {
const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
ANGLE_TRY(vaoGL->syncDrawElementsState(context, program->getActiveAttribLocationsMask(),
count, type, indices, instanceCount,
glState.isPrimitiveRestartEnabled(), outIndices));
}
else
{
*outIndices = indices;
}
if (context->getExtensions().webglCompatibility) if (context->getExtensions().webglCompatibility)
{ {
...@@ -286,7 +294,8 @@ angle::Result ContextGL::drawElements(const gl::Context *context, ...@@ -286,7 +294,8 @@ angle::Result ContextGL::drawElements(const gl::Context *context,
gl::DrawElementsType type, gl::DrawElementsType type,
const void *indices) const void *indices)
{ {
const gl::Program *program = context->getGLState().getProgram(); const gl::State &glState = context->getGLState();
const gl::Program *program = glState.getProgram();
const bool usesMultiview = program->usesMultiview(); const bool usesMultiview = program->usesMultiview();
const GLsizei instanceCount = usesMultiview ? program->getNumViews() : 0; const GLsizei instanceCount = usesMultiview ? program->getNumViews() : 0;
const void *drawIndexPtr = nullptr; const void *drawIndexPtr = nullptr;
......
...@@ -112,19 +112,6 @@ angle::Result VertexArrayGL::syncClientSideData(const gl::Context *context, ...@@ -112,19 +112,6 @@ angle::Result VertexArrayGL::syncClientSideData(const gl::Context *context,
gl::DrawElementsType::InvalidEnum, nullptr, instanceCount, false, nullptr); gl::DrawElementsType::InvalidEnum, nullptr, instanceCount, false, nullptr);
} }
angle::Result VertexArrayGL::syncDrawElementsState(const gl::Context *context,
const gl::AttributesMask &activeAttributesMask,
GLsizei count,
gl::DrawElementsType type,
const void *indices,
GLsizei instanceCount,
bool primitiveRestartEnabled,
const void **outIndices) const
{
return syncDrawState(context, activeAttributesMask, 0, count, type, indices, instanceCount,
primitiveRestartEnabled, outIndices);
}
void VertexArrayGL::updateElementArrayBufferBinding(const gl::Context *context) const void VertexArrayGL::updateElementArrayBufferBinding(const gl::Context *context) const
{ {
gl::Buffer *elementArrayBuffer = mState.getElementArrayBuffer(); gl::Buffer *elementArrayBuffer = mState.getElementArrayBuffer();
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
#include "libANGLE/renderer/VertexArrayImpl.h" #include "libANGLE/renderer/VertexArrayImpl.h"
#include "common/mathutil.h"
#include "libANGLE/Context.h"
#include "libANGLE/renderer/gl/ContextGL.h"
namespace rx namespace rx
{ {
...@@ -131,6 +135,21 @@ class VertexArrayGL : public VertexArrayImpl ...@@ -131,6 +135,21 @@ class VertexArrayGL : public VertexArrayImpl
mutable size_t mStreamingArrayBufferSize; mutable size_t mStreamingArrayBufferSize;
mutable GLuint mStreamingArrayBuffer; mutable GLuint mStreamingArrayBuffer;
}; };
ANGLE_INLINE angle::Result VertexArrayGL::syncDrawElementsState(
const gl::Context *context,
const gl::AttributesMask &activeAttributesMask,
GLsizei count,
gl::DrawElementsType type,
const void *indices,
GLsizei instanceCount,
bool primitiveRestartEnabled,
const void **outIndices) const
{
return syncDrawState(context, activeAttributesMask, 0, count, type, indices, instanceCount,
primitiveRestartEnabled, outIndices);
}
} // namespace rx } // namespace rx
#endif // LIBANGLE_RENDERER_GL_VERTEXARRAYGL_H_ #endif // LIBANGLE_RENDERER_GL_VERTEXARRAYGL_H_
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_1_0_autogen.h" #include "libGLESv2/entry_points_gles_1_0_autogen.h"
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES1.h" #include "libANGLE/validationES1.h"
#include "libGLESv2/entry_points_utils.h" #include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h" #include "libGLESv2/global_state.h"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_2_0_autogen.h" #include "libGLESv2/entry_points_gles_2_0_autogen.h"
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES2.h" #include "libANGLE/validationES2.h"
#include "libGLESv2/entry_points_utils.h" #include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h" #include "libGLESv2/global_state.h"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_3_0_autogen.h" #include "libGLESv2/entry_points_gles_3_0_autogen.h"
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES3.h" #include "libANGLE/validationES3.h"
#include "libGLESv2/entry_points_utils.h" #include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h" #include "libGLESv2/global_state.h"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_3_1_autogen.h" #include "libGLESv2/entry_points_gles_3_1_autogen.h"
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationES31.h" #include "libANGLE/validationES31.h"
#include "libGLESv2/entry_points_utils.h" #include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h" #include "libGLESv2/global_state.h"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "libGLESv2/entry_points_gles_ext_autogen.h" #include "libGLESv2/entry_points_gles_ext_autogen.h"
#include "libANGLE/Context.h" #include "libANGLE/Context.h"
#include "libANGLE/Context.inl.h"
#include "libANGLE/validationESEXT.h" #include "libANGLE/validationESEXT.h"
#include "libGLESv2/entry_points_utils.h" #include "libGLESv2/entry_points_utils.h"
#include "libGLESv2/global_state.h" #include "libGLESv2/global_state.h"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment