Commit 531d5f45 by Jamie Madill

D3D11: Refactor InputLayout cache.

*re-land with fix for cache overflowing* Using a much more compact input layout structure allows us to save quite a bit of time comparing input layouts, or computing hashes. A subsequent patch shrinks the size of the structure further. BUG=angleproject:959 Change-Id: If240bb7c84d78fc8c9fb6f9049bf71d8a81c97c6 Reviewed-on: https://chromium-review.googlesource.com/283227Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Tested-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 01e4bfe7
...@@ -42,11 +42,44 @@ void GetInputLayout(const TranslatedAttribute *translatedAttributes[gl::MAX_VERT ...@@ -42,11 +42,44 @@ void GetInputLayout(const TranslatedAttribute *translatedAttributes[gl::MAX_VERT
} }
} }
const unsigned int kDefaultCacheSize = 1024;
} // anonymous namespace } // anonymous namespace
const unsigned int InputLayoutCache::kMaxInputLayouts = 1024; bool InputLayoutCache::PackedAttributeComparator::operator()(const PackedAttributeLayout &a,
const PackedAttributeLayout &b) const
{
if (a.numAttributes != b.numAttributes)
{
return a.numAttributes < b.numAttributes;
}
if (a.flags != b.flags)
{
return a.flags < b.flags;
}
for (size_t attribIndex = 0; attribIndex < a.numAttributes; attribIndex++)
{
const auto &attribA = a.attributeData[attribIndex];
const auto &attribB = b.attributeData[attribIndex];
if (attribA.glType != attribB.glType)
return attribA.glType < attribB.glType;
if (attribA.semanticIndex != attribB.semanticIndex)
return attribA.semanticIndex < attribB.semanticIndex;
if (attribA.dxgiFormat != attribB.dxgiFormat)
return attribA.dxgiFormat < attribB.dxgiFormat;
if (attribA.divisor != attribB.divisor)
return attribA.divisor < attribB.divisor;
}
InputLayoutCache::InputLayoutCache() : mInputLayoutMap(kMaxInputLayouts, hashInputLayout, compareInputLayouts) // Equal
return false;
}
InputLayoutCache::InputLayoutCache()
: mCacheSize(kDefaultCacheSize)
{ {
mCounter = 0; mCounter = 0;
mDevice = NULL; mDevice = NULL;
...@@ -77,11 +110,11 @@ void InputLayoutCache::initialize(ID3D11Device *device, ID3D11DeviceContext *con ...@@ -77,11 +110,11 @@ void InputLayoutCache::initialize(ID3D11Device *device, ID3D11DeviceContext *con
void InputLayoutCache::clear() void InputLayoutCache::clear()
{ {
for (InputLayoutMap::iterator i = mInputLayoutMap.begin(); i != mInputLayoutMap.end(); i++) for (auto &layout : mLayoutMap)
{ {
SafeRelease(i->second.inputLayout); SafeRelease(layout.second);
} }
mInputLayoutMap.clear(); mLayoutMap.clear();
SafeRelease(mPointSpriteVertexBuffer); SafeRelease(mPointSpriteVertexBuffer);
SafeRelease(mPointSpriteIndexBuffer); SafeRelease(mPointSpriteIndexBuffer);
markDirty(); markDirty();
...@@ -115,7 +148,10 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -115,7 +148,10 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized."); return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized.");
} }
InputLayoutKey ilKey = { 0 }; InputLayoutKey ilKey;
ilKey.elementCount = 0;
PackedAttributeLayout layout;
static const char* semanticName = "TEXCOORD"; static const char* semanticName = "TEXCOORD";
...@@ -158,6 +194,11 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -158,6 +194,11 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
ilKey.elementCount++; ilKey.elementCount++;
nextAvailableInputSlot = i + 1; nextAvailableInputSlot = i + 1;
layout.addAttributeData(ilKey.elements[ilKey.elementCount].glslElementType,
sortedSemanticIndices[i],
vertexFormatInfo.nativeFormat,
sortedAttributes[i]->divisor);
} }
} }
...@@ -215,20 +256,34 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -215,20 +256,34 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
} }
} }
ID3D11InputLayout *inputLayout = NULL; if (programUsesInstancedPointSprites)
{
layout.flags |= PackedAttributeLayout::FLAG_USES_INSTANCED_SPRITES;
}
InputLayoutMap::iterator keyIter = mInputLayoutMap.find(ilKey); if (moveFirstIndexedIntoSlotZero)
if (keyIter != mInputLayoutMap.end())
{ {
inputLayout = keyIter->second.inputLayout; layout.flags |= PackedAttributeLayout::FLAG_MOVE_FIRST_INDEXED;
keyIter->second.lastUsedTime = mCounter++; }
if (instancedPointSpritesActive)
{
layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_SPRITES_ACTIVE;
}
ID3D11InputLayout *inputLayout = nullptr;
auto layoutMapIt = mLayoutMap.find(layout);
if (layoutMapIt != mLayoutMap.end())
{
inputLayout = layoutMapIt->second;
} }
else else
{ {
gl::VertexFormat shaderInputLayout[gl::MAX_VERTEX_ATTRIBS]; gl::VertexFormat shaderInputLayout[gl::MAX_VERTEX_ATTRIBS];
GetInputLayout(sortedAttributes, unsortedAttributes.size(), shaderInputLayout); GetInputLayout(sortedAttributes, unsortedAttributes.size(), shaderInputLayout);
ShaderExecutableD3D *shader = NULL; ShaderExecutableD3D *shader = nullptr;
gl::Error error = programD3D->getVertexExecutableForInputLayout(shaderInputLayout, &shader, nullptr); gl::Error error = programD3D->getVertexExecutableForInputLayout(shaderInputLayout, &shader, nullptr);
if (error.isError()) if (error.isError())
{ {
...@@ -249,28 +304,26 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -249,28 +304,26 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal input layout, HRESULT: 0x%08x", result); return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal input layout, HRESULT: 0x%08x", result);
} }
if (mInputLayoutMap.size() >= kMaxInputLayouts) if (mLayoutMap.size() >= mCacheSize)
{ {
TRACE("Overflowed the limit of %u input layouts, removing the least recently used " TRACE("Overflowed the limit of %u input layouts, purging half the cache.", mCacheSize);
"to make room.", kMaxInputLayouts);
InputLayoutMap::iterator leastRecentlyUsed = mInputLayoutMap.begin(); // Randomly release every second element
for (InputLayoutMap::iterator i = mInputLayoutMap.begin(); i != mInputLayoutMap.end(); i++) auto it = mLayoutMap.begin();
while (it != mLayoutMap.end())
{ {
if (i->second.lastUsedTime < leastRecentlyUsed->second.lastUsedTime) it++;
if (it != mLayoutMap.end())
{ {
leastRecentlyUsed = i; // Calling std::map::erase invalidates the current iterator, so make a copy.
auto eraseIt = it++;
SafeRelease(eraseIt->second);
mLayoutMap.erase(eraseIt);
} }
} }
SafeRelease(leastRecentlyUsed->second.inputLayout);
mInputLayoutMap.erase(leastRecentlyUsed);
} }
InputLayoutCounterPair inputCounterPair; mLayoutMap[layout] = inputLayout;
inputCounterPair.inputLayout = inputLayout;
inputCounterPair.lastUsedTime = mCounter++;
mInputLayoutMap.insert(std::make_pair(ilKey, inputCounterPair));
} }
if (inputLayout != mCurrentIL) if (inputLayout != mCurrentIL)
...@@ -433,23 +486,4 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -433,23 +486,4 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
return gl::Error(GL_NO_ERROR); return gl::Error(GL_NO_ERROR);
} }
std::size_t InputLayoutCache::hashInputLayout(const InputLayoutKey &inputLayout)
{
static const unsigned int seed = 0xDEADBEEF;
std::size_t hash = 0;
MurmurHash3_x86_32(inputLayout.begin(), static_cast<int>(inputLayout.end() - inputLayout.begin()), seed, &hash);
return hash;
}
bool InputLayoutCache::compareInputLayouts(const InputLayoutKey &a, const InputLayoutKey &b)
{
if (a.elementCount != b.elementCount)
{
return false;
}
return std::equal(a.begin(), a.end(), b.begin());
}
} }
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <GLES2/gl2.h> #include <GLES2/gl2.h>
#include <cstddef> #include <cstddef>
#include <map>
#include <unordered_map> #include <unordered_map>
namespace gl namespace gl
...@@ -43,6 +44,9 @@ class InputLayoutCache : angle::NonCopyable ...@@ -43,6 +44,9 @@ class InputLayoutCache : angle::NonCopyable
gl::Error applyVertexBuffers(const std::vector<TranslatedAttribute> &attributes, gl::Error applyVertexBuffers(const std::vector<TranslatedAttribute> &attributes,
GLenum mode, gl::Program *program, SourceIndexData *sourceInfo); GLenum mode, gl::Program *program, SourceIndexData *sourceInfo);
// Useful for testing
void setCacheSize(unsigned int cacheSize) { mCacheSize = cacheSize; }
private: private:
struct InputLayoutElement struct InputLayoutElement
{ {
...@@ -66,12 +70,53 @@ class InputLayoutCache : angle::NonCopyable ...@@ -66,12 +70,53 @@ class InputLayoutCache : angle::NonCopyable
} }
}; };
struct InputLayoutCounterPair struct PackedAttributeLayout
{ {
ID3D11InputLayout *inputLayout; PackedAttributeLayout()
unsigned long long lastUsedTime; : numAttributes(0),
flags(0)
{
}
void addAttributeData(GLenum glType,
UINT semanticIndex,
DXGI_FORMAT dxgiFormat,
unsigned int divisor)
{
attributeData[numAttributes].glType = glType;
attributeData[numAttributes].semanticIndex = semanticIndex;
attributeData[numAttributes].dxgiFormat = dxgiFormat;
attributeData[numAttributes].divisor = divisor;
++numAttributes;
}
struct PackedAttribute
{
GLenum glType;
UINT semanticIndex;
DXGI_FORMAT dxgiFormat;
unsigned int divisor;
};
enum Flags
{
FLAG_USES_INSTANCED_SPRITES = 0x1,
FLAG_MOVE_FIRST_INDEXED = 0x2,
FLAG_INSTANCED_SPRITES_ACTIVE = 0x4,
};
size_t numAttributes;
unsigned int flags;
PackedAttribute attributeData[gl::MAX_VERTEX_ATTRIBS];
};
struct PackedAttributeComparator
{
bool operator()(const PackedAttributeLayout &a, const PackedAttributeLayout &b) const;
}; };
std::map<PackedAttributeLayout, ID3D11InputLayout *, PackedAttributeComparator> mLayoutMap;
ID3D11InputLayout *mCurrentIL; ID3D11InputLayout *mCurrentIL;
ID3D11Buffer *mCurrentBuffers[gl::MAX_VERTEX_ATTRIBS]; ID3D11Buffer *mCurrentBuffers[gl::MAX_VERTEX_ATTRIBS];
UINT mCurrentVertexStrides[gl::MAX_VERTEX_ATTRIBS]; UINT mCurrentVertexStrides[gl::MAX_VERTEX_ATTRIBS];
...@@ -80,19 +125,7 @@ class InputLayoutCache : angle::NonCopyable ...@@ -80,19 +125,7 @@ class InputLayoutCache : angle::NonCopyable
ID3D11Buffer *mPointSpriteVertexBuffer; ID3D11Buffer *mPointSpriteVertexBuffer;
ID3D11Buffer *mPointSpriteIndexBuffer; ID3D11Buffer *mPointSpriteIndexBuffer;
static std::size_t hashInputLayout(const InputLayoutKey &inputLayout); unsigned int mCacheSize;
static bool compareInputLayouts(const InputLayoutKey &a, const InputLayoutKey &b);
typedef std::size_t (*InputLayoutHashFunction)(const InputLayoutKey &);
typedef bool (*InputLayoutEqualityFunction)(const InputLayoutKey &, const InputLayoutKey &);
typedef std::unordered_map<InputLayoutKey,
InputLayoutCounterPair,
InputLayoutHashFunction,
InputLayoutEqualityFunction> InputLayoutMap;
InputLayoutMap mInputLayoutMap;
static const unsigned int kMaxInputLayouts;
unsigned long long mCounter; unsigned long long mCounter;
ID3D11Device *mDevice; ID3D11Device *mDevice;
......
...@@ -264,6 +264,7 @@ class Renderer11 : public RendererD3D ...@@ -264,6 +264,7 @@ class Renderer11 : public RendererD3D
const Renderer11DeviceCaps &getRenderer11DeviceCaps() { return mRenderer11DeviceCaps; }; const Renderer11DeviceCaps &getRenderer11DeviceCaps() { return mRenderer11DeviceCaps; };
RendererClass getRendererClass() const override { return RENDERER_D3D11; } RendererClass getRendererClass() const override { return RENDERER_D3D11; }
InputLayoutCache *getInputLayoutCache() { return &mInputLayoutCache; }
protected: protected:
void createAnnotator() override; void createAnnotator() override;
......
...@@ -69,6 +69,7 @@ ...@@ -69,6 +69,7 @@
[ [
'<(angle_path)/src/tests/gl_tests/D3D11EmulatedIndexedBufferTest.cpp', '<(angle_path)/src/tests/gl_tests/D3D11EmulatedIndexedBufferTest.cpp',
'<(angle_path)/src/tests/gl_tests/D3D11FormatTablesTest.cpp', '<(angle_path)/src/tests/gl_tests/D3D11FormatTablesTest.cpp',
'<(angle_path)/src/tests/gl_tests/D3D11InputLayoutCacheTest.cpp',
'<(angle_path)/src/tests/gl_tests/QueryDisplayAttribTest.cpp', '<(angle_path)/src/tests/gl_tests/QueryDisplayAttribTest.cpp',
# TODO(cwallez) for Linux, requires a portable implementation of threads # TODO(cwallez) for Linux, requires a portable implementation of threads
'<(angle_path)/src/tests/egl_tests/EGLThreadTest.cpp', '<(angle_path)/src/tests/egl_tests/EGLThreadTest.cpp',
......
//
// Copyright 2015 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// D3D11InputLayoutCacheTest:
// Stress to to reproduce a bug where we weren't fluing the case correctly.
//
#include <sstream>
#include "libANGLE/Context.h"
#include "libANGLE/renderer/d3d/d3d11/Renderer11.h"
#include "test_utils/ANGLETest.h"
#include "test_utils/angle_test_instantiate.h"
using namespace angle;
namespace
{
class D3D11InputLayoutCacheTest : public ANGLETest
{
protected:
GLuint makeProgramWithAttribCount(unsigned int attribCount)
{
std::stringstream strstr;
strstr << "attribute vec2 position;" << std::endl;
for (unsigned int attribIndex = 0; attribIndex < attribCount; ++attribIndex)
{
strstr << "attribute float a" << attribIndex << ";" << std::endl;
}
strstr << "varying float v;" << std::endl
<< "void main() {" << std::endl
<< " v = 0.0;" << std::endl;
for (unsigned int attribIndex = 0; attribIndex < attribCount; ++attribIndex)
{
strstr << " v += a" << attribIndex << ";" << std::endl;
}
strstr << " gl_Position = vec4(position, 0.0, 1.0);" << std::endl
<< "}" << std::endl;
const std::string basicFragmentShader =
"varying highp float v;\n"
"void main() {"
" gl_FragColor = vec4(v / 255.0, 0.0, 0.0, 1.0);\n"
"}\n";
return CompileProgram(strstr.str(), basicFragmentShader);
}
};
// Stress the cache by setting a small cache size and drawing with a bunch of shaders
// with different input signatures.
TEST_P(D3D11InputLayoutCacheTest, StressTest)
{
// Hack the ANGLE!
gl::Context *context = reinterpret_cast<gl::Context *>(getEGLWindow()->getContext());
rx::Renderer11 *renderer11 = rx::GetAs<rx::Renderer11>(context->getRenderer());
rx::InputLayoutCache *inputLayoutCache = renderer11->getInputLayoutCache();
// Clamp the cache size to something tiny
inputLayoutCache->setCacheSize(4);
GLint maxAttribs = 0;
context->getIntegerv(GL_MAX_VERTEX_ATTRIBS, &maxAttribs);
// Reserve one attrib for position
unsigned int maxInputs = static_cast<unsigned int>(maxAttribs) - 2;
std::vector<GLuint> programs;
for (unsigned int attribCount = 0; attribCount <= maxInputs; ++attribCount)
{
GLuint program = makeProgramWithAttribCount(attribCount);
ASSERT_NE(0u, program);
programs.push_back(program);
}
// Iteratively do a simple drop operation, trying every attribute count from 0..MAX_ATTRIBS.
// This should thrash the cache.
for (unsigned int iterationCount = 0; iterationCount < 10; ++iterationCount)
{
ASSERT_GL_NO_ERROR();
for (unsigned int attribCount = 0; attribCount <= maxInputs; ++attribCount)
{
GLuint program = programs[attribCount];
glUseProgram(program);
for (unsigned int attribIndex = 0; attribIndex < attribCount; ++attribIndex)
{
std::stringstream attribNameStr;
attribNameStr << "a" << attribIndex;
std::string attribName = attribNameStr.str();
GLint location = glGetAttribLocation(program, attribName.c_str());
ASSERT_NE(-1, location);
glVertexAttrib1f(location, 1.0f);
glDisableVertexAttribArray(location);
}
drawQuad(program, "position", 0.5f);
EXPECT_PIXEL_EQ(0, 0, attribCount, 0, 0, 255u);
}
}
for (GLuint program : programs)
{
glDeleteProgram(program);
}
}
ANGLE_INSTANTIATE_TEST(D3D11InputLayoutCacheTest, ES2_D3D11());
} // anonymous namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment