Commit 09e2d93b by Jamie Madill

D3D11: Futher optimize input layout cache.

*re-land with fix for matrix attributes* *re-re-land with fix for attributes with BindAttribLocation* Using the new vertex format type enum, we can shrink the size of the input layout tables and reduce draw call overhead further. BUG=angleproject:959 Change-Id: I181acd3d7d519f5587cbe180fb1bca8530b7cfc2 Reviewed-on: https://chromium-review.googlesource.com/285348Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Tested-by: 's avatarJamie Madill <jmadill@chromium.org>
parent c1ac41bb
......@@ -195,6 +195,7 @@ class Program : angle::NonCopyable
void getActiveAttribute(GLuint index, GLsizei bufsize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
GLint getActiveAttributeCount();
GLint getActiveAttributeMaxLength();
const sh::Attribute *getLinkedAttributes() const { return mLinkedAttribute; }
GLint getSamplerMapping(SamplerType type, unsigned int samplerIndex, const Caps &caps);
GLenum getSamplerTextureType(SamplerType type, unsigned int samplerIndex);
......
......@@ -647,6 +647,58 @@ const FormatSet &GetAllSizedInternalFormats()
return formatSet;
}
AttributeType GetAttributeType(GLenum enumValue)
{
switch (enumValue)
{
case GL_FLOAT:
return ATTRIBUTE_FLOAT;
case GL_FLOAT_VEC2:
return ATTRIBUTE_VEC2;
case GL_FLOAT_VEC3:
return ATTRIBUTE_VEC3;
case GL_FLOAT_VEC4:
return ATTRIBUTE_VEC4;
case GL_INT:
return ATTRIBUTE_INT;
case GL_INT_VEC2:
return ATTRIBUTE_IVEC2;
case GL_INT_VEC3:
return ATTRIBUTE_IVEC3;
case GL_INT_VEC4:
return ATTRIBUTE_IVEC4;
case GL_UNSIGNED_INT:
return ATTRIBUTE_UINT;
case GL_UNSIGNED_INT_VEC2:
return ATTRIBUTE_UVEC2;
case GL_UNSIGNED_INT_VEC3:
return ATTRIBUTE_UVEC3;
case GL_UNSIGNED_INT_VEC4:
return ATTRIBUTE_UVEC4;
case GL_FLOAT_MAT2:
return ATTRIBUTE_MAT2;
case GL_FLOAT_MAT3:
return ATTRIBUTE_MAT3;
case GL_FLOAT_MAT4:
return ATTRIBUTE_MAT4;
case GL_FLOAT_MAT2x3:
return ATTRIBUTE_MAT2x3;
case GL_FLOAT_MAT2x4:
return ATTRIBUTE_MAT2x4;
case GL_FLOAT_MAT3x2:
return ATTRIBUTE_MAT3x2;
case GL_FLOAT_MAT3x4:
return ATTRIBUTE_MAT3x4;
case GL_FLOAT_MAT4x2:
return ATTRIBUTE_MAT4x2;
case GL_FLOAT_MAT4x3:
return ATTRIBUTE_MAT4x3;
default:
UNREACHABLE();
return ATTRIBUTE_FLOAT;
}
}
VertexFormatType GetVertexFormatType(GLenum type, GLboolean normalized, GLuint components, bool pureInteger)
{
switch (type)
......
......@@ -76,6 +76,37 @@ GLenum GetSizedInternalFormat(GLenum internalFormat, GLenum type);
typedef std::set<GLenum> FormatSet;
const FormatSet &GetAllSizedInternalFormats();
// From the ESSL 3.00.4 spec:
// Vertex shader inputs can only be float, floating-point vectors, matrices, signed and unsigned
// integers and integer vectors. Vertex shader inputs cannot be arrays or structures.
enum AttributeType
{
ATTRIBUTE_FLOAT,
ATTRIBUTE_VEC2,
ATTRIBUTE_VEC3,
ATTRIBUTE_VEC4,
ATTRIBUTE_INT,
ATTRIBUTE_IVEC2,
ATTRIBUTE_IVEC3,
ATTRIBUTE_IVEC4,
ATTRIBUTE_UINT,
ATTRIBUTE_UVEC2,
ATTRIBUTE_UVEC3,
ATTRIBUTE_UVEC4,
ATTRIBUTE_MAT2,
ATTRIBUTE_MAT3,
ATTRIBUTE_MAT4,
ATTRIBUTE_MAT2x3,
ATTRIBUTE_MAT2x4,
ATTRIBUTE_MAT3x2,
ATTRIBUTE_MAT3x4,
ATTRIBUTE_MAT4x2,
ATTRIBUTE_MAT4x3,
};
AttributeType GetAttributeType(GLenum enumValue);
enum VertexFormatType
{
VERTEX_FORMAT_INVALID,
......
......@@ -8,16 +8,17 @@
// D3D11 input layouts.
#include "libANGLE/renderer/d3d/d3d11/InputLayoutCache.h"
#include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h"
#include "common/utilities.h"
#include "libANGLE/Program.h"
#include "libANGLE/VertexAttribute.h"
#include "libANGLE/renderer/d3d/IndexDataManager.h"
#include "libANGLE/renderer/d3d/ProgramD3D.h"
#include "libANGLE/renderer/d3d/VertexDataManager.h"
#include "libANGLE/renderer/d3d/d3d11/Buffer11.h"
#include "libANGLE/renderer/d3d/d3d11/ShaderExecutable11.h"
#include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h"
#include "libANGLE/renderer/d3d/d3d11/formatutils11.h"
#include "libANGLE/renderer/d3d/ProgramD3D.h"
#include "libANGLE/renderer/d3d/VertexDataManager.h"
#include "libANGLE/renderer/d3d/IndexDataManager.h"
#include "libANGLE/Program.h"
#include "libANGLE/VertexAttribute.h"
#include "third_party/murmurhash/MurmurHash3.h"
namespace rx
......@@ -48,40 +49,80 @@ void GetInputLayout(const TranslatedAttribute *translatedAttributes[gl::MAX_VERT
}
}
GLenum GetNextGLSLAttributeType(const sh::Attribute *linkedAttributes, int index)
{
// Count matrices differently
int subIndex = 0;
for (int attribIndex = 0; attribIndex < gl::MAX_VERTEX_ATTRIBS; ++attribIndex)
{
GLenum attribType = linkedAttributes[attribIndex].type;
if (attribType == GL_NONE)
{
continue;
}
GLenum transposedType = gl::TransposeMatrixType(attribType);
subIndex += gl::VariableRowCount(transposedType);
if (subIndex > index)
{
return transposedType;
}
}
UNREACHABLE();
return GL_NONE;
}
const unsigned int kDefaultCacheSize = 1024;
struct PackedAttribute
{
uint8_t attribType;
uint8_t semanticIndex;
uint8_t vertexFormatType;
uint8_t divisor;
};
} // anonymous namespace
bool InputLayoutCache::PackedAttributeComparator::operator()(const PackedAttributeLayout &a,
const PackedAttributeLayout &b) const
void InputLayoutCache::PackedAttributeLayout::addAttributeData(
GLenum glType,
UINT semanticIndex,
gl::VertexFormatType vertexFormatType,
unsigned int divisor)
{
if (a.numAttributes != b.numAttributes)
{
return a.numAttributes < b.numAttributes;
}
gl::AttributeType attribType = gl::GetAttributeType(glType);
PackedAttribute packedAttrib;
packedAttrib.attribType = static_cast<uint8_t>(attribType);
packedAttrib.semanticIndex = static_cast<uint8_t>(semanticIndex);
packedAttrib.vertexFormatType = static_cast<uint8_t>(vertexFormatType);
packedAttrib.divisor = static_cast<uint8_t>(divisor);
ASSERT(static_cast<gl::AttributeType>(packedAttrib.attribType) == attribType);
ASSERT(static_cast<UINT>(packedAttrib.semanticIndex) == semanticIndex);
ASSERT(static_cast<gl::VertexFormatType>(packedAttrib.vertexFormatType) == vertexFormatType);
ASSERT(static_cast<unsigned int>(packedAttrib.divisor) == divisor);
static_assert(sizeof(uint32_t) == sizeof(PackedAttribute), "PackedAttributes must be 32-bits exactly.");
attributeData[numAttributes++] = gl::bitCast<uint32_t>(packedAttrib);
}
if (a.flags != b.flags)
bool InputLayoutCache::PackedAttributeLayout::operator<(const PackedAttributeLayout &other) const
{
if (numAttributes != other.numAttributes)
{
return a.flags < b.flags;
return numAttributes < other.numAttributes;
}
for (size_t attribIndex = 0; attribIndex < a.numAttributes; attribIndex++)
if (flags != other.flags)
{
const auto &attribA = a.attributeData[attribIndex];
const auto &attribB = b.attributeData[attribIndex];
if (attribA.glType != attribB.glType)
return attribA.glType < attribB.glType;
if (attribA.semanticIndex != attribB.semanticIndex)
return attribA.semanticIndex < attribB.semanticIndex;
if (attribA.dxgiFormat != attribB.dxgiFormat)
return attribA.dxgiFormat < attribB.dxgiFormat;
if (attribA.divisor != attribB.divisor)
return attribA.divisor < attribB.divisor;
return flags < other.flags;
}
// Equal
return false;
return memcmp(attributeData, other.attributeData, sizeof(uint32_t) * numAttributes) < 0;
}
InputLayoutCache::InputLayoutCache()
......@@ -154,9 +195,8 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized.");
}
InputLayoutKey ilKey;
ilKey.elementCount = 0;
unsigned int inputElementCount = 0;
D3D11_INPUT_ELEMENT_DESC inputElements[gl::MAX_VERTEX_ATTRIBS];
PackedAttributeLayout layout;
static const char* semanticName = "TEXCOORD";
......@@ -165,6 +205,8 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
unsigned int firstInstancedElement = gl::MAX_VERTEX_ATTRIBS;
unsigned int nextAvailableInputSlot = 0;
const sh::Attribute *linkedAttributes = program->getLinkedAttributes();
for (unsigned int i = 0; i < unsortedAttributes.size(); i++)
{
if (sortedAttributes[i]->active)
......@@ -176,35 +218,34 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
gl::VertexFormatType vertexFormatType = gl::GetVertexFormatType(*sortedAttributes[i]->attribute, sortedAttributes[i]->currentValueType);
const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormatType, mFeatureLevel);
// Record the type of the associated vertex shader vector in our key
// This will prevent mismatched vertex shaders from using the same input layout
GLint attributeSize;
program->getActiveAttribute(ilKey.elementCount, 0, NULL, &attributeSize, &ilKey.elements[ilKey.elementCount].glslElementType, NULL);
ilKey.elements[ilKey.elementCount].desc.SemanticName = semanticName;
ilKey.elements[ilKey.elementCount].desc.SemanticIndex = sortedSemanticIndices[i];
ilKey.elements[ilKey.elementCount].desc.Format = vertexFormatInfo.nativeFormat;
ilKey.elements[ilKey.elementCount].desc.InputSlot = i;
ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0;
ilKey.elements[ilKey.elementCount].desc.InputSlotClass = inputClass;
ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = instancedPointSpritesActive ? 1 : sortedAttributes[i]->divisor;
inputElements[inputElementCount].SemanticName = semanticName;
inputElements[inputElementCount].SemanticIndex = sortedSemanticIndices[i];
inputElements[inputElementCount].Format = vertexFormatInfo.nativeFormat;
inputElements[inputElementCount].InputSlot = i;
inputElements[inputElementCount].AlignedByteOffset = 0;
inputElements[inputElementCount].InputSlotClass = inputClass;
inputElements[inputElementCount].InstanceDataStepRate = instancedPointSpritesActive ? 1 : sortedAttributes[i]->divisor;
if (inputClass == D3D11_INPUT_PER_VERTEX_DATA && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS)
{
firstIndexedElement = ilKey.elementCount;
firstIndexedElement = inputElementCount;
}
else if (inputClass == D3D11_INPUT_PER_INSTANCE_DATA && firstInstancedElement == gl::MAX_VERTEX_ATTRIBS)
{
firstInstancedElement = ilKey.elementCount;
firstInstancedElement = inputElementCount;
}
ilKey.elementCount++;
nextAvailableInputSlot = i + 1;
// Record the type of the associated vertex shader vector in our key
// This will prevent mismatched vertex shaders from using the same input layout
GLenum glslElementType = GetNextGLSLAttributeType(linkedAttributes, inputElementCount);
layout.addAttributeData(ilKey.elements[ilKey.elementCount].glslElementType,
layout.addAttributeData(glslElementType,
sortedSemanticIndices[i],
vertexFormatInfo.nativeFormat,
vertexFormatType,
sortedAttributes[i]->divisor);
inputElementCount++;
nextAvailableInputSlot = i + 1;
}
}
......@@ -213,33 +254,33 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
// We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the input layout must match the shader
if (programUsesInstancedPointSprites)
{
ilKey.elements[ilKey.elementCount].desc.SemanticName = "SPRITEPOSITION";
ilKey.elements[ilKey.elementCount].desc.SemanticIndex = 0;
ilKey.elements[ilKey.elementCount].desc.Format = DXGI_FORMAT_R32G32B32_FLOAT;
ilKey.elements[ilKey.elementCount].desc.InputSlot = nextAvailableInputSlot;
ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0;
ilKey.elements[ilKey.elementCount].desc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = 0;
inputElements[inputElementCount].SemanticName = "SPRITEPOSITION";
inputElements[inputElementCount].SemanticIndex = 0;
inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32B32_FLOAT;
inputElements[inputElementCount].InputSlot = nextAvailableInputSlot;
inputElements[inputElementCount].AlignedByteOffset = 0;
inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
inputElements[inputElementCount].InstanceDataStepRate = 0;
// The new elements are D3D11_INPUT_PER_VERTEX_DATA data so the indexed element
// tracking must be applied. This ensures that the instancing specific
// buffer swapping logic continues to work.
if (firstIndexedElement == gl::MAX_VERTEX_ATTRIBS)
{
firstIndexedElement = ilKey.elementCount;
firstIndexedElement = inputElementCount;
}
ilKey.elementCount++;
inputElementCount++;
ilKey.elements[ilKey.elementCount].desc.SemanticName = "SPRITETEXCOORD";
ilKey.elements[ilKey.elementCount].desc.SemanticIndex = 0;
ilKey.elements[ilKey.elementCount].desc.Format = DXGI_FORMAT_R32G32_FLOAT;
ilKey.elements[ilKey.elementCount].desc.InputSlot = nextAvailableInputSlot;
ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = sizeof(float) * 3;
ilKey.elements[ilKey.elementCount].desc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = 0;
inputElements[inputElementCount].SemanticName = "SPRITETEXCOORD";
inputElements[inputElementCount].SemanticIndex = 0;
inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32_FLOAT;
inputElements[inputElementCount].InputSlot = nextAvailableInputSlot;
inputElements[inputElementCount].AlignedByteOffset = sizeof(float) * 3;
inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
inputElements[inputElementCount].InstanceDataStepRate = 0;
ilKey.elementCount++;
inputElementCount++;
}
// On 9_3, we must ensure that slot 0 contains non-instanced data.
......@@ -251,14 +292,14 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
if (moveFirstIndexedIntoSlotZero)
{
ilKey.elements[firstInstancedElement].desc.InputSlot = ilKey.elements[firstIndexedElement].desc.InputSlot;
ilKey.elements[firstIndexedElement].desc.InputSlot = 0;
inputElements[firstInstancedElement].InputSlot = inputElements[firstIndexedElement].InputSlot;
inputElements[firstIndexedElement].InputSlot = 0;
// Instanced PointSprite emulation uses multiple layout entries across a single vertex buffer.
// If an index swap is performed, we need to ensure that all elements get the proper InputSlot.
if (programUsesInstancedPointSprites)
{
ilKey.elements[firstIndexedElement + 1].desc.InputSlot = 0;
inputElements[firstIndexedElement + 1].InputSlot = 0;
}
}
......@@ -299,12 +340,12 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
ShaderExecutableD3D *shader11 = GetAs<ShaderExecutable11>(shader);
D3D11_INPUT_ELEMENT_DESC descs[gl::MAX_VERTEX_ATTRIBS];
for (unsigned int j = 0; j < ilKey.elementCount; ++j)
for (unsigned int j = 0; j < inputElementCount; ++j)
{
descs[j] = ilKey.elements[j].desc;
descs[j] = inputElements[j];
}
HRESULT result = mDevice->CreateInputLayout(descs, ilKey.elementCount, shader11->getFunction(), shader11->getLength(), &inputLayout);
HRESULT result = mDevice->CreateInputLayout(descs, inputElementCount, shader11->getFunction(), shader11->getLength(), &inputLayout);
if (FAILED(result))
{
return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal input layout, HRESULT: 0x%08x", result);
......
......@@ -10,16 +10,17 @@
#ifndef LIBANGLE_RENDERER_D3D_D3D11_INPUTLAYOUTCACHE_H_
#define LIBANGLE_RENDERER_D3D_D3D11_INPUTLAYOUTCACHE_H_
#include "libANGLE/Constants.h"
#include "libANGLE/Error.h"
#include "common/angleutils.h"
#include <GLES2/gl2.h>
#include <cstddef>
#include <map>
#include <unordered_map>
#include "common/angleutils.h"
#include "libANGLE/Constants.h"
#include "libANGLE/Error.h"
#include "libANGLE/formatutils.h"
namespace gl
{
class Program;
......@@ -48,28 +49,6 @@ class InputLayoutCache : angle::NonCopyable
void setCacheSize(unsigned int cacheSize) { mCacheSize = cacheSize; }
private:
struct InputLayoutElement
{
D3D11_INPUT_ELEMENT_DESC desc;
GLenum glslElementType;
};
struct InputLayoutKey
{
unsigned int elementCount;
InputLayoutElement elements[gl::MAX_VERTEX_ATTRIBS];
const char *begin() const
{
return reinterpret_cast<const char*>(&elementCount);
}
const char *end() const
{
return reinterpret_cast<const char*>(&elements[elementCount]);
}
};
struct PackedAttributeLayout
{
PackedAttributeLayout()
......@@ -80,23 +59,10 @@ class InputLayoutCache : angle::NonCopyable
void addAttributeData(GLenum glType,
UINT semanticIndex,
DXGI_FORMAT dxgiFormat,
unsigned int divisor)
{
attributeData[numAttributes].glType = glType;
attributeData[numAttributes].semanticIndex = semanticIndex;
attributeData[numAttributes].dxgiFormat = dxgiFormat;
attributeData[numAttributes].divisor = divisor;
++numAttributes;
}
gl::VertexFormatType vertexFormatType,
unsigned int divisor);
struct PackedAttribute
{
GLenum glType;
UINT semanticIndex;
DXGI_FORMAT dxgiFormat;
unsigned int divisor;
};
bool operator<(const PackedAttributeLayout &other) const;
enum Flags
{
......@@ -107,15 +73,10 @@ class InputLayoutCache : angle::NonCopyable
size_t numAttributes;
unsigned int flags;
PackedAttribute attributeData[gl::MAX_VERTEX_ATTRIBS];
};
struct PackedAttributeComparator
{
bool operator()(const PackedAttributeLayout &a, const PackedAttributeLayout &b) const;
uint32_t attributeData[gl::MAX_VERTEX_ATTRIBS];
};
std::map<PackedAttributeLayout, ID3D11InputLayout *, PackedAttributeComparator> mLayoutMap;
std::map<PackedAttributeLayout, ID3D11InputLayout *> mLayoutMap;
ID3D11InputLayout *mCurrentIL;
ID3D11Buffer *mCurrentBuffers[gl::MAX_VERTEX_ATTRIBS];
......
......@@ -333,6 +333,22 @@ TEST_P(VertexAttributeTest, MaxAttribsPlusOne)
ASSERT_EQ(0u, program);
}
// Simple test for when we use glBindAttribLocation
TEST_P(VertexAttributeTest, SimpleBindAttribLocation)
{
// Re-use the multi-attrib program, binding attribute 0
GLuint program = compileMultiAttribProgram(1);
glBindAttribLocation(program, 2, "position");
glBindAttribLocation(program, 3, "a0");
glLinkProgram(program);
// Setup and draw the quad
setupMultiAttribs(program, 1, 0.5f);
drawQuad(program, "position", 0.5f);
EXPECT_GL_NO_ERROR();
EXPECT_PIXEL_NEAR(0, 0, 128, 0, 0, 255, 1);
}
// Use this to select which configurations (e.g. which renderer, which GLES major version) these tests should be run against.
// D3D11 Feature Level 9_3 uses different D3D formats for vertex attribs compared to Feature Levels 10_0+, so we should test them separately.
ANGLE_INSTANTIATE_TEST(VertexAttributeTest, ES2_D3D9(), ES2_D3D11(), ES2_D3D11_FL9_3(), ES2_OPENGL(), ES3_OPENGL());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment