Commit 3f2e61de by Jamie Madill

Enable MRT pixel shader rewriting.

Writing to all 8 pixel outputs was causing performance problems on Intel and AMD. Enabling Geoff's work to rewrite our pixel shaders solves the regression. This patch also includes a workaround to the nVidia driver bug where it would ignore NULL RT values in OMSetRenderTargets, by compacting the RT list to skip NULL values. BUG=angle:705 BUG=365078 Change-Id: Ia68af6f0ccd5f10c484d6f76297a0bec694948f0 Reviewed-on: https://chromium-review.googlesource.com/214852Tested-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org>
parent aef95dec
......@@ -649,10 +649,12 @@ ColorbufferInfo Framebuffer::getColorbuffersForRender() const
ASSERT(drawBufferState == GL_BACK || drawBufferState == (GL_COLOR_ATTACHMENT0_EXT + colorAttachment));
colorbuffersForRender.push_back(colorbuffer);
}
#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_DISABLED)
else
{
colorbuffersForRender.push_back(NULL);
}
#endif
}
return colorbuffersForRender;
......
......@@ -82,7 +82,7 @@ unsigned int ParseAndStripArrayIndex(std::string* name)
return subscript;
}
void GetInputLayoutFromShader(const std::vector<sh::Attribute> &shaderAttributes, VertexFormat inputLayout[MAX_VERTEX_ATTRIBS])
void GetDefaultInputLayoutFromShader(const std::vector<sh::Attribute> &shaderAttributes, VertexFormat inputLayout[MAX_VERTEX_ATTRIBS])
{
size_t layoutIndex = 0;
for (size_t attributeIndex = 0; attributeIndex < shaderAttributes.size(); attributeIndex++)
......@@ -108,6 +108,24 @@ void GetInputLayoutFromShader(const std::vector<sh::Attribute> &shaderAttributes
}
}
std::vector<GLenum> GetDefaultOutputLayoutFromShader(const std::vector<rx::PixelShaderOuputVariable> &shaderOutputVars)
{
#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED)
std::vector<GLenum> defaultPixelOutput(1);
#else
std::vector<GLenum> defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS);
#endif
for (size_t i = 0; i < defaultPixelOutput.size(); i++)
{
defaultPixelOutput[i] = GL_NONE;
}
ASSERT(!shaderOutputVars.empty());
defaultPixelOutput[0] = GL_COLOR_ATTACHMENT0 + shaderOutputVars[0].outputIndex;
return defaultPixelOutput;
}
bool IsRowMajorLayout(const sh::InterfaceBlockField &var)
{
return var.isRowMajorLayout;
......@@ -261,7 +279,9 @@ rx::ShaderExecutable *ProgramBinary::getPixelExecutableForOutputLayout(const std
{
for (size_t executableIndex = 0; executableIndex < mPixelExecutables.size(); executableIndex++)
{
#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED)
if (mPixelExecutables[executableIndex]->matchesSignature(outputSignature))
#endif
{
return mPixelExecutables[executableIndex]->shaderExecutable();
}
......@@ -1701,14 +1721,10 @@ bool ProgramBinary::link(InfoLog &infoLog, const AttributeBindings &attributeBin
if (success)
{
VertexFormat defaultInputLayout[MAX_VERTEX_ATTRIBS];
GetInputLayoutFromShader(vertexShader->getActiveAttributes(), defaultInputLayout);
GetDefaultInputLayoutFromShader(vertexShader->getActiveAttributes(), defaultInputLayout);
rx::ShaderExecutable *defaultVertexExecutable = getVertexExecutableForInputLayout(defaultInputLayout);
std::vector<GLenum> defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS);
for (size_t i = 0; i < defaultPixelOutput.size(); i++)
{
defaultPixelOutput[i] = (i == 0) ? GL_COLOR_ATTACHMENT0 : GL_NONE;
}
std::vector<GLenum> defaultPixelOutput = GetDefaultOutputLayoutFromShader(mPixelShaderKey);
rx::ShaderExecutable *defaultPixelExecutable = getPixelExecutableForOutputLayout(defaultPixelOutput);
if (usesGeometryShader())
......
......@@ -24,6 +24,11 @@
#include <string>
#include <vector>
// TODO(jmadill): place this in workarounds library
#define ANGLE_WORKAROUND_ENABLED 1
#define ANGLE_WORKAROUND_DISABLED 2
#define ANGLE_MRT_PERF_WORKAROUND ANGLE_WORKAROUND_ENABLED
namespace sh
{
class HLSLBlockEncoder;
......@@ -271,8 +276,7 @@ class ProgramBinary : public RefCountObject
PixelExecutable(const std::vector<GLenum> &outputSignature, rx::ShaderExecutable *shaderExecutable);
~PixelExecutable();
// FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
bool matchesSignature(const std::vector<GLenum> &signature) const { return true; /* mOutputSignature == signature; */ }
bool matchesSignature(const std::vector<GLenum> &signature) const { return mOutputSignature == signature; }
const std::vector<GLenum> &outputSignature() const { return mOutputSignature; }
rx::ShaderExecutable *shaderExecutable() const { return mShaderExecutable; }
......
......@@ -22,7 +22,7 @@ META_ASSERT(GL_INVALID_INDEX == UINT_MAX);
using namespace gl;
namespace gl_d3d
namespace
{
std::string HLSLComponentTypeString(GLenum componentType)
......@@ -70,6 +70,21 @@ std::string HLSLTypeString(GLenum type)
return HLSLComponentTypeString(gl::VariableComponentType(type), gl::VariableComponentCount(type));
}
const rx::PixelShaderOuputVariable &GetOutputAtLocation(const std::vector<rx::PixelShaderOuputVariable> &outputVariables,
unsigned int location)
{
for (size_t variableIndex = 0; variableIndex < outputVariables.size(); ++variableIndex)
{
if (outputVariables[variableIndex].outputIndex == location)
{
return outputVariables[variableIndex];
}
}
UNREACHABLE();
return outputVariables[0];
}
}
namespace rx
......@@ -328,7 +343,7 @@ std::string DynamicHLSL::generateVaryingHLSL(const ShaderD3D *shader) const
{
GLenum componentType = VariableComponentType(transposedType);
int columnCount = VariableColumnCount(transposedType);
typeString = gl_d3d::HLSLComponentTypeString(componentType, columnCount);
typeString = HLSLComponentTypeString(componentType, columnCount);
}
varyingHLSL += typeString + " v" + n + " : " + varyingSemantic + n + ";\n";
}
......@@ -361,12 +376,12 @@ std::string DynamicHLSL::generateVertexShaderForInputLayout(const std::string &s
if (IsMatrixType(shaderAttribute.type))
{
// Matrix types are always transposed
structHLSL += " " + gl_d3d::HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type));
structHLSL += " " + HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type));
}
else
{
GLenum componentType = mRenderer->getVertexComponentType(vertexFormat);
structHLSL += " " + gl_d3d::HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type));
structHLSL += " " + HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type));
}
structHLSL += " " + decorateVariable(shaderAttribute.name) + " : TEXCOORD" + Str(semanticIndex) + ";\n";
......@@ -421,17 +436,19 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(const std::string
std::string declarationHLSL;
std::string copyHLSL;
for (size_t i = 0; i < outputVariables.size(); i++)
for (size_t layoutIndex = 0; layoutIndex < outputLayout.size(); ++layoutIndex)
{
const PixelShaderOuputVariable& outputVariable = outputVariables[i];
ASSERT(outputLayout.size() > outputVariable.outputIndex);
GLenum binding = outputLayout[layoutIndex];
// FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
bool outputIndexEnabled = true; // outputLayout[outputVariable.outputIndex] != GL_NONE
if (outputIndexEnabled)
if (binding != GL_NONE)
{
declarationHLSL += " " + gl_d3d::HLSLTypeString(outputVariable.type) + " " + outputVariable.name +
" : " + targetSemantic + Str(outputVariable.outputIndex) + ";\n";
unsigned int location = (binding - GL_COLOR_ATTACHMENT0);
const PixelShaderOuputVariable &outputVariable = GetOutputAtLocation(outputVariables, location);
declarationHLSL += " " + HLSLTypeString(outputVariable.type) + " " + outputVariable.name +
" : " + targetSemantic + Str(layoutIndex) + ";\n";
copyHLSL += " output." + outputVariable.name + " = " + outputVariable.source + ";\n";
}
......
......@@ -9,6 +9,7 @@
#include "libGLESv2/renderer/d3d/d3d11/renderer11_utils.h"
#include "libGLESv2/renderer/d3d/d3d11/formatutils11.h"
#include "libGLESv2/ProgramBinary.h"
#include "common/debug.h"
......@@ -392,9 +393,13 @@ static size_t GetMaximumSimultaneousRenderTargets(D3D_FEATURE_LEVEL featureLevel
case D3D_FEATURE_LEVEL_11_1:
case D3D_FEATURE_LEVEL_11_0: return D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT;
// FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
case D3D_FEATURE_LEVEL_10_1:
case D3D_FEATURE_LEVEL_10_0: return 1; /* D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT; */
case D3D_FEATURE_LEVEL_10_0:
#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED)
return D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT;
#else
return 1;
#endif
case D3D_FEATURE_LEVEL_9_3: return D3D_FL9_3_SIMULTANEOUS_RENDER_TARGET_COUNT;
case D3D_FEATURE_LEVEL_9_2:
......
#include "ANGLETest.h"
class DrawBuffersTest : public ANGLETest
{
protected:
DrawBuffersTest(int clientVersion)
{
setWindowWidth(128);
setWindowHeight(128);
setConfigRedBits(8);
setConfigGreenBits(8);
setConfigBlueBits(8);
setConfigAlphaBits(8);
setConfigDepthBits(24);
setClientVersion(clientVersion);
}
virtual void SetUp()
{
ANGLETest::SetUp();
glGenFramebuffers(1, &mFBO);
glBindFramebuffer(GL_FRAMEBUFFER, mFBO);
glGenTextures(4, mTextures);
for (size_t texIndex = 0; texIndex < ArraySize(mTextures); texIndex++)
{
glBindTexture(GL_TEXTURE_2D, mTextures[texIndex]);
glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, getWindowWidth(), getWindowHeight());
}
GLfloat data[] =
{
-1.0f, 1.0f,
-1.0f, -2.0f,
2.0f, 1.0f
};
glGenBuffers(1, &mBuffer);
glBindBuffer(GL_ARRAY_BUFFER, mBuffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * 6, data, GL_STATIC_DRAW);
GLint maxDrawBuffers;
glGetIntegerv(GL_MAX_DRAW_BUFFERS, &maxDrawBuffers);
ASSERT_EQ(maxDrawBuffers, 8);
ASSERT_GL_NO_ERROR();
}
virtual void TearDown()
{
glDeleteFramebuffers(1, &mFBO);
glDeleteTextures(4, mTextures);
glDeleteBuffers(1, &mBuffer);
ANGLETest::TearDown();
}
void setupMRTProgramESSL3(bool bufferEnabled[8], GLuint *programOut)
{
const std::string vertexShaderSource =
"#version 300 es\n"
"in vec4 position;\n"
"void main() {\n"
" gl_Position = position;\n"
"}\n";
std::stringstream strstr;
strstr << "#version 300 es\n"
"precision highp float;\n";
for (unsigned int index = 0; index < 8; index++)
{
if (bufferEnabled[index])
{
strstr << "layout(location = " << index << ") "
"out vec4 value" << index << ";\n";
}
}
strstr << "void main()\n"
"{\n";
for (unsigned int index = 0; index < 8; index++)
{
if (bufferEnabled[index])
{
unsigned int r = (index + 1) & 1;
unsigned int g = (index + 1) & 2;
unsigned int b = (index + 1) & 4;
strstr << " value" << index << " = vec4("
<< r << ".0, " << g << ".0, "
<< b << ".0, 1.0);\n";
}
}
strstr << "}\n";
*programOut = CompileProgram(vertexShaderSource, strstr.str());
if (*programOut == 0)
{
FAIL() << "shader compilation failed.";
}
glUseProgram(*programOut);
GLint location = glGetAttribLocation(*programOut, "position");
ASSERT_NE(location, -1);
glBindBuffer(GL_ARRAY_BUFFER, mBuffer);
glVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 8, NULL);
glEnableVertexAttribArray(location);
}
void setupMRTProgramESSL1(bool bufferEnabled[8], GLuint *programOut)
{
const std::string vertexShaderSource =
"attribute vec4 position;\n"
"void main() {\n"
" gl_Position = position;\n"
"}\n";
std::stringstream strstr;
strstr << "#extension GL_EXT_draw_buffers : enable\n"
"precision highp float;\n"
"void main()\n"
"{\n";
for (unsigned int index = 0; index < 8; index++)
{
if (bufferEnabled[index])
{
unsigned int r = (index + 1) & 1;
unsigned int g = (index + 1) & 2;
unsigned int b = (index + 1) & 4;
strstr << " gl_FragData[" << index << "] = vec4("
<< r << ".0, " << g << ".0, "
<< b << ".0, 1.0);\n";
}
}
strstr << "}\n";
*programOut = CompileProgram(vertexShaderSource, strstr.str());
if (*programOut == 0)
{
FAIL() << "shader compilation failed.";
}
glUseProgram(*programOut);
GLint location = glGetAttribLocation(*programOut, "position");
ASSERT_NE(location, -1);
glBindBuffer(GL_ARRAY_BUFFER, mBuffer);
glVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 8, NULL);
glEnableVertexAttribArray(location);
}
void setupMRTProgram(bool bufferEnabled[8], GLuint *programOut)
{
if (getClientVersion() == 3)
{
setupMRTProgramESSL3(bufferEnabled, programOut);
}
else
{
ASSERT_EQ(getClientVersion(), 2);
setupMRTProgramESSL1(bufferEnabled, programOut);
}
}
void verifyAttachment(unsigned int index, GLuint textureName)
{
for (unsigned int colorAttachment = 0; colorAttachment < 8; colorAttachment++)
{
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, GL_TEXTURE_2D, 0, 0);
}
glBindTexture(GL_TEXTURE_2D, textureName);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, textureName, 0);
unsigned int r = (((index + 1) & 1) > 0) ? 255 : 0;
unsigned int g = (((index + 1) & 2) > 0) ? 255 : 0;
unsigned int b = (((index + 1) & 4) > 0) ? 255 : 0;
EXPECT_PIXEL_EQ(getWindowWidth() / 2, getWindowHeight() / 2, r, g, b, 255);
}
void gapsTest()
{
glBindTexture(GL_TEXTURE_2D, mTextures[0]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, mTextures[0], 0);
bool flags[8] = { false, true };
GLuint program;
setupMRTProgram(flags, &program);
const GLenum bufs[] =
{
GL_NONE,
GL_COLOR_ATTACHMENT1
};
glUseProgram(program);
glDrawBuffersEXT(2, bufs);
glDrawArrays(GL_TRIANGLES, 0, 3);
verifyAttachment(1, mTextures[0]);
glDeleteProgram(program);
}
void firstAndLastTest()
{
glBindTexture(GL_TEXTURE_2D, mTextures[0]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mTextures[0], 0);
glBindTexture(GL_TEXTURE_2D, mTextures[1]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, mTextures[1], 0);
bool flags[8] = { true, false, false, true };
GLuint program;
setupMRTProgram(flags, &program);
const GLenum bufs[] =
{
GL_COLOR_ATTACHMENT0,
GL_NONE,
GL_NONE,
GL_COLOR_ATTACHMENT3
};
glUseProgram(program);
glDrawBuffersEXT(4, bufs);
glDrawArrays(GL_TRIANGLES, 0, 3);
verifyAttachment(0, mTextures[0]);
verifyAttachment(3, mTextures[1]);
EXPECT_GL_NO_ERROR();
glDeleteProgram(program);
}
void firstHalfNULLTest()
{
bool flags[8] = { false };
GLenum bufs[8] = { GL_NONE };
for (unsigned int texIndex = 0; texIndex < 4; texIndex++)
{
glBindTexture(GL_TEXTURE_2D, mTextures[texIndex]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT4 + texIndex, GL_TEXTURE_2D, mTextures[texIndex], 0);
flags[texIndex + 4] = true;
bufs[texIndex + 4] = GL_COLOR_ATTACHMENT4 + texIndex;
}
GLuint program;
setupMRTProgram(flags, &program);
glUseProgram(program);
glDrawBuffersEXT(8, bufs);
glDrawArrays(GL_TRIANGLES, 0, 3);
for (unsigned int texIndex = 0; texIndex < 4; texIndex++)
{
verifyAttachment(texIndex + 4, mTextures[texIndex]);
}
EXPECT_GL_NO_ERROR();
glDeleteProgram(program);
}
GLuint mFBO;
GLuint mTextures[4];
GLuint mBuffer;
};
class DrawBuffersTestESSL3 : public DrawBuffersTest
{
protected:
DrawBuffersTestESSL3()
: DrawBuffersTest(3)
{}
};
class DrawBuffersTestESSL1 : public DrawBuffersTest
{
protected:
DrawBuffersTestESSL1()
: DrawBuffersTest(2)
{}
};
TEST_F(DrawBuffersTestESSL3, Gaps)
{
gapsTest();
}
TEST_F(DrawBuffersTestESSL1, Gaps)
{
gapsTest();
}
TEST_F(DrawBuffersTestESSL3, FirstAndLast)
{
firstAndLastTest();
}
TEST_F(DrawBuffersTestESSL1, FirstAndLast)
{
firstAndLastTest();
}
TEST_F(DrawBuffersTestESSL3, FirstHalfNULL)
{
firstHalfNULLTest();
}
TEST_F(DrawBuffersTestESSL1, FirstHalfNULL)
{
firstHalfNULLTest();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment