Commit 4166f014 by Jamie Madill Committed by Commit Bot

D3D11: Optimize Renderer11::drawArrays.

Uses inlining and more efficient computation re-use to streamline this function. Bug: angleproject:2575 Change-Id: Ib13e32811f56ec9a010ed66f298d4235e5c6807d Reviewed-on: https://chromium-review.googlesource.com/1067120Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent bf7b95db
......@@ -1327,14 +1327,9 @@ void State::setTransformFeedbackBinding(const Context *context,
mDirtyBits.set(DIRTY_BIT_TRANSFORM_FEEDBACK_BINDING);
}
TransformFeedback *State::getCurrentTransformFeedback() const
{
return mTransformFeedback.get();
}
bool State::isTransformFeedbackActiveUnpaused() const
{
TransformFeedback *curTransformFeedback = getCurrentTransformFeedback();
TransformFeedback *curTransformFeedback = mTransformFeedback.get();
return curTransformFeedback && curTransformFeedback->isActive() &&
!curTransformFeedback->isPaused();
}
......
......@@ -215,7 +215,8 @@ class State : public angle::ObserverInterface, angle::NonCopyable
// Transform feedback object (not buffer) binding manipulation
void setTransformFeedbackBinding(const Context *context, TransformFeedback *transformFeedback);
TransformFeedback *getCurrentTransformFeedback() const;
TransformFeedback *getCurrentTransformFeedback() const { return mTransformFeedback.get(); }
bool isTransformFeedbackActiveUnpaused() const;
bool removeTransformFeedbackBinding(const Context *context, GLuint transformFeedback);
......
......@@ -100,12 +100,6 @@ GLint DrawCallParams::firstVertex() const
return mFirstVertex;
}
size_t DrawCallParams::vertexCount() const
{
ASSERT(!isDrawElements() || mIndexRange.valid());
return mVertexCount;
}
GLsizei DrawCallParams::indexCount() const
{
ASSERT(isDrawElements());
......
......@@ -99,7 +99,12 @@ class DrawCallParams final : angle::NonCopyable
// This value is the sum of 'baseVertex' and the first indexed vertex for DrawElements calls.
GLint firstVertex() const;
size_t vertexCount() const;
size_t vertexCount() const
{
ASSERT(!isDrawElements() || mIndexRange.valid());
return mVertexCount;
}
GLsizei indexCount() const;
GLint baseVertex() const;
GLenum type() const;
......
......@@ -296,6 +296,8 @@ class ProgramD3D : public ProgramImpl
const std::vector<D3DUniform *> &getD3DUniforms() const { return mD3DUniforms; }
void markUniformsClean();
const gl::ProgramState &getState() const { return mState; }
private:
// These forward-declared tasks are used for multi-thread shader compiles.
class GetExecutableTask;
......
......@@ -14,9 +14,9 @@
#include "common/debug.h"
#include "libANGLE/Framebuffer.h"
#include "libANGLE/FramebufferAttachment.h"
#include "libANGLE/renderer/d3d/FramebufferD3D.h"
#include "libANGLE/renderer/d3d/d3d11/renderer11_utils.h"
#include "libANGLE/renderer/d3d/d3d11/Framebuffer11.h"
#include "libANGLE/renderer/d3d/d3d11/Renderer11.h"
#include "libANGLE/renderer/d3d/d3d11/renderer11_utils.h"
namespace rx
{
......@@ -44,11 +44,11 @@ void RenderStateCache::clear()
// static
d3d11::BlendStateKey RenderStateCache::GetBlendStateKey(const gl::Context *context,
FramebufferD3D *framebufferD3D,
Framebuffer11 *framebuffer11,
const gl::BlendState &blendState)
{
d3d11::BlendStateKey key;
const gl::AttachmentList &colorbuffers = framebufferD3D->getColorAttachmentsForRender(context);
const gl::AttachmentList &colorbuffers = framebuffer11->getColorAttachmentsForRender(context);
const UINT8 blendStateMask =
gl_d3d11::ConvertColorMask(blendState.colorMaskRed, blendState.colorMaskGreen,
blendState.colorMaskBlue, blendState.colorMaskAlpha);
......
......@@ -18,11 +18,6 @@
#include <unordered_map>
namespace gl
{
class Framebuffer;
}
namespace std
{
template <>
......@@ -61,7 +56,7 @@ struct hash<gl::SamplerState>
namespace rx
{
class FramebufferD3D;
class Framebuffer11;
class Renderer11;
class RenderStateCache : angle::NonCopyable
......@@ -73,7 +68,7 @@ class RenderStateCache : angle::NonCopyable
void clear();
static d3d11::BlendStateKey GetBlendStateKey(const gl::Context *context,
FramebufferD3D *framebufferD3D,
Framebuffer11 *framebuffer11,
const gl::BlendState &blendState);
gl::Error getBlendState(Renderer11 *renderer,
const d3d11::BlendStateKey &key,
......
......@@ -368,17 +368,17 @@ bool IsArrayRTV(ID3D11RenderTargetView *rtv)
return false;
}
int GetAdjustedInstanceCount(const gl::Program *program, int instanceCount)
GLsizei GetAdjustedInstanceCount(const ProgramD3D *program, GLsizei instanceCount)
{
if (!program->usesMultiview())
if (!program->getState().usesMultiview())
{
return instanceCount;
}
if (instanceCount == 0)
{
return program->getNumViews();
return program->getState().getNumViews();
}
return program->getNumViews() * instanceCount;
return program->getState().getNumViews() * instanceCount;
}
const uint32_t ScratchMemoryBufferLifetime = 1000;
......@@ -1401,129 +1401,139 @@ void *Renderer11::getD3DDevice()
return reinterpret_cast<void *>(mDevice);
}
gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallParams &params)
gl::Error Renderer11::drawWithGeometryShaderAndTransformFeedback(const gl::Context *context,
gl::PrimitiveMode mode,
UINT instanceCount,
UINT vertexCount)
{
if (params.vertexCount() < static_cast<size_t>(mStateManager.getCurrentMinimumDrawCount()))
const gl::State &glState = context->getGLState();
ProgramD3D *programD3D = mStateManager.getProgramD3D();
// Since we use a geometry if-and-only-if we rewrite vertex streams, transform feedback
// won't get the correct output. To work around this, draw with *only* the stream out
// first (no pixel shader) to feed the stream out buffers and then draw again with the
// geometry shader + pixel shader to rasterize the primitives.
mStateManager.setPixelShader(nullptr);
if (instanceCount > 0)
{
return gl::NoError();
mDeviceContext->DrawInstanced(vertexCount, instanceCount, 0, 0);
}
const auto &glState = context->getGLState();
if (glState.isTransformFeedbackActiveUnpaused())
else
{
ANGLE_TRY(markTransformFeedbackUsage(context));
mDeviceContext->Draw(vertexCount, 0);
}
gl::Program *program = glState.getProgram();
ASSERT(program != nullptr);
GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(program, params.instances());
ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
// Note: vertex indexes can be arbitrarily large.
UINT clampedVertexCount = params.getClampedVertexCount<UINT>();
rx::ShaderExecutableD3D *pixelExe = nullptr;
ANGLE_TRY(programD3D->getPixelExecutableForCachedOutputLayout(&pixelExe, nullptr));
if (programD3D->usesGeometryShader(params.mode()) &&
glState.isTransformFeedbackActiveUnpaused())
// Skip the draw call if rasterizer discard is enabled (or no fragment shader).
if (!pixelExe || glState.getRasterizerState().rasterizerDiscard)
{
// Since we use a geometry if-and-only-if we rewrite vertex streams, transform feedback
// won't get the correct output. To work around this, draw with *only* the stream out
// first (no pixel shader) to feed the stream out buffers and then draw again with the
// geometry shader + pixel shader to rasterize the primitives.
mStateManager.setPixelShader(nullptr);
if (adjustedInstanceCount > 0)
{
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0);
}
else
{
mDeviceContext->Draw(clampedVertexCount, 0);
}
rx::ShaderExecutableD3D *pixelExe = nullptr;
ANGLE_TRY(programD3D->getPixelExecutableForCachedOutputLayout(&pixelExe, nullptr));
// Skip the draw call if rasterizer discard is enabled (or no fragment shader).
if (!pixelExe || glState.getRasterizerState().rasterizerDiscard)
{
return gl::NoError();
}
return gl::NoError();
}
mStateManager.setPixelShader(&GetAs<ShaderExecutable11>(pixelExe)->getPixelShader());
mStateManager.setPixelShader(&GetAs<ShaderExecutable11>(pixelExe)->getPixelShader());
// Retrieve the geometry shader.
rx::ShaderExecutableD3D *geometryExe = nullptr;
ANGLE_TRY(programD3D->getGeometryExecutableForPrimitiveType(context, params.mode(),
&geometryExe, nullptr));
// Retrieve the geometry shader.
rx::ShaderExecutableD3D *geometryExe = nullptr;
ANGLE_TRY(
programD3D->getGeometryExecutableForPrimitiveType(context, mode, &geometryExe, nullptr));
mStateManager.setGeometryShader(
&GetAs<ShaderExecutable11>(geometryExe)->getGeometryShader());
mStateManager.setGeometryShader(&GetAs<ShaderExecutable11>(geometryExe)->getGeometryShader());
if (adjustedInstanceCount > 0)
{
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0);
}
else
{
mDeviceContext->Draw(clampedVertexCount, 0);
}
return gl::NoError();
if (instanceCount > 0)
{
mDeviceContext->DrawInstanced(vertexCount, instanceCount, 0, 0);
}
if (params.mode() == gl::PrimitiveMode::LineLoop)
else
{
return drawLineLoop(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
mDeviceContext->Draw(vertexCount, 0);
}
if (params.mode() == gl::PrimitiveMode::TriangleFan)
return gl::NoError();
}
gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallParams &params)
{
if (params.vertexCount() < static_cast<size_t>(mStateManager.getCurrentMinimumDrawCount()))
{
return drawTriangleFan(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
return gl::NoError();
}
bool useInstancedPointSpriteEmulation =
programD3D->usesPointSize() && getWorkarounds().useInstancedPointSpriteEmulation;
ProgramD3D *programD3D = mStateManager.getProgramD3D();
GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(programD3D, params.instances());
// Note: vertex indexes can be arbitrarily large.
UINT clampedVertexCount = params.getClampedVertexCount<UINT>();
if (params.mode() != gl::PrimitiveMode::Points || !useInstancedPointSpriteEmulation)
const auto &glState = context->getGLState();
if (glState.getCurrentTransformFeedback() && glState.isTransformFeedbackActiveUnpaused())
{
if (adjustedInstanceCount == 0)
{
mDeviceContext->Draw(clampedVertexCount, 0);
}
else
ANGLE_TRY(markTransformFeedbackUsage(context));
if (programD3D->usesGeometryShader(params.mode()))
{
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0);
return drawWithGeometryShaderAndTransformFeedback(
context, params.mode(), adjustedInstanceCount, clampedVertexCount);
}
return gl::NoError();
}
// This code should not be reachable by multi-view programs.
ASSERT(program->usesMultiview() == false);
switch (params.mode())
{
case gl::PrimitiveMode::LineLoop:
return drawLineLoop(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
case gl::PrimitiveMode::TriangleFan:
return drawTriangleFan(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
case gl::PrimitiveMode::Points:
if (getWorkarounds().useInstancedPointSpriteEmulation)
{
// This code should not be reachable by multi-view programs.
ASSERT(programD3D->getState().usesMultiview() == false);
// If the shader is writing to gl_PointSize, then pointsprites are being rendered.
// Emulating instanced point sprites for FL9_3 requires the topology to be
// D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST and DrawIndexedInstanced is called instead.
// If the shader is writing to gl_PointSize, then pointsprites are being rendered.
// Emulating instanced point sprites for FL9_3 requires the topology to be
// D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST and DrawIndexedInstanced is called instead.
if (adjustedInstanceCount == 0)
{
mDeviceContext->DrawIndexedInstanced(6, clampedVertexCount, 0, 0, 0);
return gl::NoError();
}
// If pointsprite emulation is used with glDrawArraysInstanced then we need to take
// a less efficent code path. Instanced rendering of emulated pointsprites requires
// a loop to draw each batch of points. An offset into the instanced data buffer is
// calculated and applied on each iteration to ensure all instances are rendered
// correctly. Each instance being rendered requires the inputlayout cache to reapply
// buffers and offsets.
for (GLsizei i = 0; i < params.instances(); i++)
{
ANGLE_TRY(mStateManager.updateVertexOffsetsForPointSpritesEmulation(
params.baseVertex(), i));
mDeviceContext->DrawIndexedInstanced(6, clampedVertexCount, 0, 0, 0);
}
// This required by updateVertexOffsets... above but is outside of the loop for
// speed.
mStateManager.invalidateVertexBuffer();
return gl::NoError();
}
break;
default:
break;
}
// "Normal" draw case.
if (adjustedInstanceCount == 0)
{
mDeviceContext->DrawIndexedInstanced(6, clampedVertexCount, 0, 0, 0);
return gl::NoError();
mDeviceContext->Draw(clampedVertexCount, 0);
}
// If pointsprite emulation is used with glDrawArraysInstanced then we need to take a less
// efficent code path. Instanced rendering of emulated pointsprites requires a loop to draw each
// batch of points. An offset into the instanced data buffer is calculated and applied on each
// iteration to ensure all instances are rendered correctly. Each instance being rendered
// requires the inputlayout cache to reapply buffers and offsets.
for (GLsizei i = 0; i < params.instances(); i++)
else
{
ANGLE_TRY(
mStateManager.updateVertexOffsetsForPointSpritesEmulation(params.baseVertex(), i));
mDeviceContext->DrawIndexedInstanced(6, clampedVertexCount, 0, 0, 0);
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0);
}
// This required by updateVertexOffsets... above but is outside of the loop for speed.
mStateManager.invalidateVertexBuffer();
return gl::NoError();
}
......@@ -1544,8 +1554,8 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal
int startVertex = static_cast<int>(params.firstVertex() - params.baseVertex());
int baseVertex = -startVertex;
const gl::Program *program = glState.getProgram();
GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(program, params.instances());
const ProgramD3D *programD3D = mStateManager.getProgramD3D();
GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(programD3D, params.instances());
if (params.mode() == gl::PrimitiveMode::LineLoop)
{
......@@ -1559,8 +1569,6 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal
baseVertex, adjustedInstanceCount);
}
const ProgramD3D *programD3D = GetImplAs<ProgramD3D>(glState.getProgram());
if (params.mode() != gl::PrimitiveMode::Points ||
!programD3D->usesInstancedPointSpriteEmulation())
{
......@@ -1577,7 +1585,7 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal
}
// This code should not be reachable by multi-view programs.
ASSERT(program->usesMultiview() == false);
ASSERT(programD3D->getState().usesMultiview() == false);
// If the shader is writing to gl_PointSize, then pointsprites are being rendered.
// Emulating instanced point sprites for FL9_3 requires the topology to be
......
......@@ -501,6 +501,10 @@ class Renderer11 : public RendererD3D
d3d11::ANGLED3D11DeviceType getDeviceType() const;
gl::Error markTransformFeedbackUsage(const gl::Context *context);
gl::Error drawWithGeometryShaderAndTransformFeedback(const gl::Context *context,
gl::PrimitiveMode mode,
UINT instanceCount,
UINT vertexCount);
HMODULE mD3d11Module;
HMODULE mDxgiModule;
......
......@@ -248,6 +248,8 @@ class StateManager11 final : angle::NonCopyable
GLsizei getCurrentMinimumDrawCount() const { return mCurrentMinimumDrawCount; }
VertexDataManager *getVertexDataManager() { return &mVertexDataManager; }
ProgramD3D *getProgramD3D() const { return mProgramD3D; }
private:
template <typename SRVType>
void setShaderResourceInternal(gl::ShaderType shaderType,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment