Commit 4166f014 by Jamie Madill Committed by Commit Bot

D3D11: Optimize Renderer11::drawArrays.

Uses inlining and more efficient computation re-use to streamline this function. Bug: angleproject:2575 Change-Id: Ib13e32811f56ec9a010ed66f298d4235e5c6807d Reviewed-on: https://chromium-review.googlesource.com/1067120Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent bf7b95db
...@@ -1327,14 +1327,9 @@ void State::setTransformFeedbackBinding(const Context *context, ...@@ -1327,14 +1327,9 @@ void State::setTransformFeedbackBinding(const Context *context,
mDirtyBits.set(DIRTY_BIT_TRANSFORM_FEEDBACK_BINDING); mDirtyBits.set(DIRTY_BIT_TRANSFORM_FEEDBACK_BINDING);
} }
TransformFeedback *State::getCurrentTransformFeedback() const
{
return mTransformFeedback.get();
}
bool State::isTransformFeedbackActiveUnpaused() const bool State::isTransformFeedbackActiveUnpaused() const
{ {
TransformFeedback *curTransformFeedback = getCurrentTransformFeedback(); TransformFeedback *curTransformFeedback = mTransformFeedback.get();
return curTransformFeedback && curTransformFeedback->isActive() && return curTransformFeedback && curTransformFeedback->isActive() &&
!curTransformFeedback->isPaused(); !curTransformFeedback->isPaused();
} }
......
...@@ -215,7 +215,8 @@ class State : public angle::ObserverInterface, angle::NonCopyable ...@@ -215,7 +215,8 @@ class State : public angle::ObserverInterface, angle::NonCopyable
// Transform feedback object (not buffer) binding manipulation // Transform feedback object (not buffer) binding manipulation
void setTransformFeedbackBinding(const Context *context, TransformFeedback *transformFeedback); void setTransformFeedbackBinding(const Context *context, TransformFeedback *transformFeedback);
TransformFeedback *getCurrentTransformFeedback() const; TransformFeedback *getCurrentTransformFeedback() const { return mTransformFeedback.get(); }
bool isTransformFeedbackActiveUnpaused() const; bool isTransformFeedbackActiveUnpaused() const;
bool removeTransformFeedbackBinding(const Context *context, GLuint transformFeedback); bool removeTransformFeedbackBinding(const Context *context, GLuint transformFeedback);
......
...@@ -100,12 +100,6 @@ GLint DrawCallParams::firstVertex() const ...@@ -100,12 +100,6 @@ GLint DrawCallParams::firstVertex() const
return mFirstVertex; return mFirstVertex;
} }
size_t DrawCallParams::vertexCount() const
{
ASSERT(!isDrawElements() || mIndexRange.valid());
return mVertexCount;
}
GLsizei DrawCallParams::indexCount() const GLsizei DrawCallParams::indexCount() const
{ {
ASSERT(isDrawElements()); ASSERT(isDrawElements());
......
...@@ -99,7 +99,12 @@ class DrawCallParams final : angle::NonCopyable ...@@ -99,7 +99,12 @@ class DrawCallParams final : angle::NonCopyable
// This value is the sum of 'baseVertex' and the first indexed vertex for DrawElements calls. // This value is the sum of 'baseVertex' and the first indexed vertex for DrawElements calls.
GLint firstVertex() const; GLint firstVertex() const;
size_t vertexCount() const; size_t vertexCount() const
{
ASSERT(!isDrawElements() || mIndexRange.valid());
return mVertexCount;
}
GLsizei indexCount() const; GLsizei indexCount() const;
GLint baseVertex() const; GLint baseVertex() const;
GLenum type() const; GLenum type() const;
......
...@@ -296,6 +296,8 @@ class ProgramD3D : public ProgramImpl ...@@ -296,6 +296,8 @@ class ProgramD3D : public ProgramImpl
const std::vector<D3DUniform *> &getD3DUniforms() const { return mD3DUniforms; } const std::vector<D3DUniform *> &getD3DUniforms() const { return mD3DUniforms; }
void markUniformsClean(); void markUniformsClean();
const gl::ProgramState &getState() const { return mState; }
private: private:
// These forward-declared tasks are used for multi-thread shader compiles. // These forward-declared tasks are used for multi-thread shader compiles.
class GetExecutableTask; class GetExecutableTask;
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "common/debug.h" #include "common/debug.h"
#include "libANGLE/Framebuffer.h" #include "libANGLE/Framebuffer.h"
#include "libANGLE/FramebufferAttachment.h" #include "libANGLE/FramebufferAttachment.h"
#include "libANGLE/renderer/d3d/FramebufferD3D.h" #include "libANGLE/renderer/d3d/d3d11/Framebuffer11.h"
#include "libANGLE/renderer/d3d/d3d11/renderer11_utils.h"
#include "libANGLE/renderer/d3d/d3d11/Renderer11.h" #include "libANGLE/renderer/d3d/d3d11/Renderer11.h"
#include "libANGLE/renderer/d3d/d3d11/renderer11_utils.h"
namespace rx namespace rx
{ {
...@@ -44,11 +44,11 @@ void RenderStateCache::clear() ...@@ -44,11 +44,11 @@ void RenderStateCache::clear()
// static // static
d3d11::BlendStateKey RenderStateCache::GetBlendStateKey(const gl::Context *context, d3d11::BlendStateKey RenderStateCache::GetBlendStateKey(const gl::Context *context,
FramebufferD3D *framebufferD3D, Framebuffer11 *framebuffer11,
const gl::BlendState &blendState) const gl::BlendState &blendState)
{ {
d3d11::BlendStateKey key; d3d11::BlendStateKey key;
const gl::AttachmentList &colorbuffers = framebufferD3D->getColorAttachmentsForRender(context); const gl::AttachmentList &colorbuffers = framebuffer11->getColorAttachmentsForRender(context);
const UINT8 blendStateMask = const UINT8 blendStateMask =
gl_d3d11::ConvertColorMask(blendState.colorMaskRed, blendState.colorMaskGreen, gl_d3d11::ConvertColorMask(blendState.colorMaskRed, blendState.colorMaskGreen,
blendState.colorMaskBlue, blendState.colorMaskAlpha); blendState.colorMaskBlue, blendState.colorMaskAlpha);
......
...@@ -18,11 +18,6 @@ ...@@ -18,11 +18,6 @@
#include <unordered_map> #include <unordered_map>
namespace gl
{
class Framebuffer;
}
namespace std namespace std
{ {
template <> template <>
...@@ -61,7 +56,7 @@ struct hash<gl::SamplerState> ...@@ -61,7 +56,7 @@ struct hash<gl::SamplerState>
namespace rx namespace rx
{ {
class FramebufferD3D; class Framebuffer11;
class Renderer11; class Renderer11;
class RenderStateCache : angle::NonCopyable class RenderStateCache : angle::NonCopyable
...@@ -73,7 +68,7 @@ class RenderStateCache : angle::NonCopyable ...@@ -73,7 +68,7 @@ class RenderStateCache : angle::NonCopyable
void clear(); void clear();
static d3d11::BlendStateKey GetBlendStateKey(const gl::Context *context, static d3d11::BlendStateKey GetBlendStateKey(const gl::Context *context,
FramebufferD3D *framebufferD3D, Framebuffer11 *framebuffer11,
const gl::BlendState &blendState); const gl::BlendState &blendState);
gl::Error getBlendState(Renderer11 *renderer, gl::Error getBlendState(Renderer11 *renderer,
const d3d11::BlendStateKey &key, const d3d11::BlendStateKey &key,
......
...@@ -368,17 +368,17 @@ bool IsArrayRTV(ID3D11RenderTargetView *rtv) ...@@ -368,17 +368,17 @@ bool IsArrayRTV(ID3D11RenderTargetView *rtv)
return false; return false;
} }
int GetAdjustedInstanceCount(const gl::Program *program, int instanceCount) GLsizei GetAdjustedInstanceCount(const ProgramD3D *program, GLsizei instanceCount)
{ {
if (!program->usesMultiview()) if (!program->getState().usesMultiview())
{ {
return instanceCount; return instanceCount;
} }
if (instanceCount == 0) if (instanceCount == 0)
{ {
return program->getNumViews(); return program->getState().getNumViews();
} }
return program->getNumViews() * instanceCount; return program->getState().getNumViews() * instanceCount;
} }
const uint32_t ScratchMemoryBufferLifetime = 1000; const uint32_t ScratchMemoryBufferLifetime = 1000;
...@@ -1401,43 +1401,27 @@ void *Renderer11::getD3DDevice() ...@@ -1401,43 +1401,27 @@ void *Renderer11::getD3DDevice()
return reinterpret_cast<void *>(mDevice); return reinterpret_cast<void *>(mDevice);
} }
gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallParams &params) gl::Error Renderer11::drawWithGeometryShaderAndTransformFeedback(const gl::Context *context,
gl::PrimitiveMode mode,
UINT instanceCount,
UINT vertexCount)
{ {
if (params.vertexCount() < static_cast<size_t>(mStateManager.getCurrentMinimumDrawCount())) const gl::State &glState = context->getGLState();
{ ProgramD3D *programD3D = mStateManager.getProgramD3D();
return gl::NoError();
}
const auto &glState = context->getGLState();
if (glState.isTransformFeedbackActiveUnpaused())
{
ANGLE_TRY(markTransformFeedbackUsage(context));
}
gl::Program *program = glState.getProgram();
ASSERT(program != nullptr);
GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(program, params.instances());
ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
// Note: vertex indexes can be arbitrarily large.
UINT clampedVertexCount = params.getClampedVertexCount<UINT>();
if (programD3D->usesGeometryShader(params.mode()) &&
glState.isTransformFeedbackActiveUnpaused())
{
// Since we use a geometry if-and-only-if we rewrite vertex streams, transform feedback // Since we use a geometry if-and-only-if we rewrite vertex streams, transform feedback
// won't get the correct output. To work around this, draw with *only* the stream out // won't get the correct output. To work around this, draw with *only* the stream out
// first (no pixel shader) to feed the stream out buffers and then draw again with the // first (no pixel shader) to feed the stream out buffers and then draw again with the
// geometry shader + pixel shader to rasterize the primitives. // geometry shader + pixel shader to rasterize the primitives.
mStateManager.setPixelShader(nullptr); mStateManager.setPixelShader(nullptr);
if (adjustedInstanceCount > 0) if (instanceCount > 0)
{ {
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0); mDeviceContext->DrawInstanced(vertexCount, instanceCount, 0, 0);
} }
else else
{ {
mDeviceContext->Draw(clampedVertexCount, 0); mDeviceContext->Draw(vertexCount, 0);
} }
rx::ShaderExecutableD3D *pixelExe = nullptr; rx::ShaderExecutableD3D *pixelExe = nullptr;
...@@ -1453,53 +1437,61 @@ gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallP ...@@ -1453,53 +1437,61 @@ gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallP
// Retrieve the geometry shader. // Retrieve the geometry shader.
rx::ShaderExecutableD3D *geometryExe = nullptr; rx::ShaderExecutableD3D *geometryExe = nullptr;
ANGLE_TRY(programD3D->getGeometryExecutableForPrimitiveType(context, params.mode(), ANGLE_TRY(
&geometryExe, nullptr)); programD3D->getGeometryExecutableForPrimitiveType(context, mode, &geometryExe, nullptr));
mStateManager.setGeometryShader( mStateManager.setGeometryShader(&GetAs<ShaderExecutable11>(geometryExe)->getGeometryShader());
&GetAs<ShaderExecutable11>(geometryExe)->getGeometryShader());
if (adjustedInstanceCount > 0) if (instanceCount > 0)
{ {
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0); mDeviceContext->DrawInstanced(vertexCount, instanceCount, 0, 0);
} }
else else
{ {
mDeviceContext->Draw(clampedVertexCount, 0); mDeviceContext->Draw(vertexCount, 0);
} }
return gl::NoError(); return gl::NoError();
} }
if (params.mode() == gl::PrimitiveMode::LineLoop) gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallParams &params)
{
if (params.vertexCount() < static_cast<size_t>(mStateManager.getCurrentMinimumDrawCount()))
{ {
return drawLineLoop(context, clampedVertexCount, GL_NONE, nullptr, 0, return gl::NoError();
adjustedInstanceCount);
} }
if (params.mode() == gl::PrimitiveMode::TriangleFan) ProgramD3D *programD3D = mStateManager.getProgramD3D();
{ GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(programD3D, params.instances());
return drawTriangleFan(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
}
bool useInstancedPointSpriteEmulation = // Note: vertex indexes can be arbitrarily large.
programD3D->usesPointSize() && getWorkarounds().useInstancedPointSpriteEmulation; UINT clampedVertexCount = params.getClampedVertexCount<UINT>();
if (params.mode() != gl::PrimitiveMode::Points || !useInstancedPointSpriteEmulation) const auto &glState = context->getGLState();
{ if (glState.getCurrentTransformFeedback() && glState.isTransformFeedbackActiveUnpaused())
if (adjustedInstanceCount == 0)
{ {
mDeviceContext->Draw(clampedVertexCount, 0); ANGLE_TRY(markTransformFeedbackUsage(context));
}
else if (programD3D->usesGeometryShader(params.mode()))
{ {
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0); return drawWithGeometryShaderAndTransformFeedback(
context, params.mode(), adjustedInstanceCount, clampedVertexCount);
} }
return gl::NoError();
} }
switch (params.mode())
{
case gl::PrimitiveMode::LineLoop:
return drawLineLoop(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
case gl::PrimitiveMode::TriangleFan:
return drawTriangleFan(context, clampedVertexCount, GL_NONE, nullptr, 0,
adjustedInstanceCount);
case gl::PrimitiveMode::Points:
if (getWorkarounds().useInstancedPointSpriteEmulation)
{
// This code should not be reachable by multi-view programs. // This code should not be reachable by multi-view programs.
ASSERT(program->usesMultiview() == false); ASSERT(programD3D->getState().usesMultiview() == false);
// If the shader is writing to gl_PointSize, then pointsprites are being rendered. // If the shader is writing to gl_PointSize, then pointsprites are being rendered.
// Emulating instanced point sprites for FL9_3 requires the topology to be // Emulating instanced point sprites for FL9_3 requires the topology to be
...@@ -1510,21 +1502,39 @@ gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallP ...@@ -1510,21 +1502,39 @@ gl::Error Renderer11::drawArrays(const gl::Context *context, const gl::DrawCallP
return gl::NoError(); return gl::NoError();
} }
// If pointsprite emulation is used with glDrawArraysInstanced then we need to take a less // If pointsprite emulation is used with glDrawArraysInstanced then we need to take
// efficent code path. Instanced rendering of emulated pointsprites requires a loop to draw each // a less efficent code path. Instanced rendering of emulated pointsprites requires
// batch of points. An offset into the instanced data buffer is calculated and applied on each // a loop to draw each batch of points. An offset into the instanced data buffer is
// iteration to ensure all instances are rendered correctly. Each instance being rendered // calculated and applied on each iteration to ensure all instances are rendered
// requires the inputlayout cache to reapply buffers and offsets. // correctly. Each instance being rendered requires the inputlayout cache to reapply
// buffers and offsets.
for (GLsizei i = 0; i < params.instances(); i++) for (GLsizei i = 0; i < params.instances(); i++)
{ {
ANGLE_TRY( ANGLE_TRY(mStateManager.updateVertexOffsetsForPointSpritesEmulation(
mStateManager.updateVertexOffsetsForPointSpritesEmulation(params.baseVertex(), i)); params.baseVertex(), i));
mDeviceContext->DrawIndexedInstanced(6, clampedVertexCount, 0, 0, 0); mDeviceContext->DrawIndexedInstanced(6, clampedVertexCount, 0, 0, 0);
} }
// This required by updateVertexOffsets... above but is outside of the loop for speed. // This required by updateVertexOffsets... above but is outside of the loop for
// speed.
mStateManager.invalidateVertexBuffer(); mStateManager.invalidateVertexBuffer();
return gl::NoError(); return gl::NoError();
}
break;
default:
break;
}
// "Normal" draw case.
if (adjustedInstanceCount == 0)
{
mDeviceContext->Draw(clampedVertexCount, 0);
}
else
{
mDeviceContext->DrawInstanced(clampedVertexCount, adjustedInstanceCount, 0, 0);
}
return gl::NoError();
} }
gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCallParams &params) gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCallParams &params)
...@@ -1544,8 +1554,8 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal ...@@ -1544,8 +1554,8 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal
int startVertex = static_cast<int>(params.firstVertex() - params.baseVertex()); int startVertex = static_cast<int>(params.firstVertex() - params.baseVertex());
int baseVertex = -startVertex; int baseVertex = -startVertex;
const gl::Program *program = glState.getProgram(); const ProgramD3D *programD3D = mStateManager.getProgramD3D();
GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(program, params.instances()); GLsizei adjustedInstanceCount = GetAdjustedInstanceCount(programD3D, params.instances());
if (params.mode() == gl::PrimitiveMode::LineLoop) if (params.mode() == gl::PrimitiveMode::LineLoop)
{ {
...@@ -1559,8 +1569,6 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal ...@@ -1559,8 +1569,6 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal
baseVertex, adjustedInstanceCount); baseVertex, adjustedInstanceCount);
} }
const ProgramD3D *programD3D = GetImplAs<ProgramD3D>(glState.getProgram());
if (params.mode() != gl::PrimitiveMode::Points || if (params.mode() != gl::PrimitiveMode::Points ||
!programD3D->usesInstancedPointSpriteEmulation()) !programD3D->usesInstancedPointSpriteEmulation())
{ {
...@@ -1577,7 +1585,7 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal ...@@ -1577,7 +1585,7 @@ gl::Error Renderer11::drawElements(const gl::Context *context, const gl::DrawCal
} }
// This code should not be reachable by multi-view programs. // This code should not be reachable by multi-view programs.
ASSERT(program->usesMultiview() == false); ASSERT(programD3D->getState().usesMultiview() == false);
// If the shader is writing to gl_PointSize, then pointsprites are being rendered. // If the shader is writing to gl_PointSize, then pointsprites are being rendered.
// Emulating instanced point sprites for FL9_3 requires the topology to be // Emulating instanced point sprites for FL9_3 requires the topology to be
......
...@@ -501,6 +501,10 @@ class Renderer11 : public RendererD3D ...@@ -501,6 +501,10 @@ class Renderer11 : public RendererD3D
d3d11::ANGLED3D11DeviceType getDeviceType() const; d3d11::ANGLED3D11DeviceType getDeviceType() const;
gl::Error markTransformFeedbackUsage(const gl::Context *context); gl::Error markTransformFeedbackUsage(const gl::Context *context);
gl::Error drawWithGeometryShaderAndTransformFeedback(const gl::Context *context,
gl::PrimitiveMode mode,
UINT instanceCount,
UINT vertexCount);
HMODULE mD3d11Module; HMODULE mD3d11Module;
HMODULE mDxgiModule; HMODULE mDxgiModule;
......
...@@ -248,6 +248,8 @@ class StateManager11 final : angle::NonCopyable ...@@ -248,6 +248,8 @@ class StateManager11 final : angle::NonCopyable
GLsizei getCurrentMinimumDrawCount() const { return mCurrentMinimumDrawCount; } GLsizei getCurrentMinimumDrawCount() const { return mCurrentMinimumDrawCount; }
VertexDataManager *getVertexDataManager() { return &mVertexDataManager; } VertexDataManager *getVertexDataManager() { return &mVertexDataManager; }
ProgramD3D *getProgramD3D() const { return mProgramD3D; }
private: private:
template <typename SRVType> template <typename SRVType>
void setShaderResourceInternal(gl::ShaderType shaderType, void setShaderResourceInternal(gl::ShaderType shaderType,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment