Use HashingMRUCache in InputLayoutCache.

This simplifies the state management code inside this manager. Also it should improve the speed of lookups due to using hashing instead of a tree lookup. BUG=angleproject:1156 BUG=angleproject:2044 Change-Id: I19ea8dbac6f2dfd7d30dd403d77b66ba0aa85d73 Reviewed-on: https://chromium-review.googlesource.com/527693 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Geoff Lang <geofflang@chromium.org>

Use HashingMRUCache in InputLayoutCache.
d222d454 · Jamie Madill · Commit Bot · 34ca4f5b · d222d454 · d222d454
Commit d222d454 authored Jun 06, 2017 by Jamie Madill Committed by Commit Bot Jun 13, 2017
5 changed files
--- a/src/libANGLE/SizedMRUCache.h
+++ b/src/libANGLE/SizedMRUCache.h
@@ -9,6 +9,7 @@
 #define LIBANGLE_SIZED_MRU_CACHE_H_
 #include <anglebase/containers/mru_cache.h>
+#include "common/third_party/murmurhash/MurmurHash3.h"
 namespace angle
 {
@@ -102,5 +103,29 @@ class SizedMRUCache final : angle::NonCopyable
    SizedMRUCacheStore mStore;
 };
+// Helper function used in a few places.
+template <typename T>
+void TrimCache(size_t maxStates, size_t gcLimit, const char *name, T *cache)
+{
+    const size_t kGarbageCollectionLimit = maxStates / 2 + gcLimit;
+    if (cache->size() >= kGarbageCollectionLimit)
+    {
+        WARN() << "Overflowed the " << name << " cache limit of " << (maxStates / 2)
+               << " elements, removing the least recently used to make room.";
+        cache->ShrinkToSize(maxStates / 2);
+    }
+}
+template <typename T>
+std::size_t ComputeGenericHash(const T &key)
+{
+    static const unsigned int seed = 0xABCDEF98;
+    std::size_t hash = 0;
+    MurmurHash3_x86_32(&key, sizeof(key), seed, &hash);
+    return hash;
+}
 }  // namespace angle
 #endif  // LIBANGLE_SIZED_MRU_CACHE_H_
--- a/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp
@@ -72,8 +72,6 @@ GLenum GetGLSLAttributeType(const std::vector<sh::Attribute> &shaderAttributes, 
    return GL_NONE;
 }
-const unsigned int kDefaultCacheSize = 1024;
 struct PackedAttribute
 {
    uint8_t attribType;
@@ -132,11 +130,14 @@ void SortAttributesByLayout(const gl::Program *program,
 } // anonymous namespace
-void InputLayoutCache::PackedAttributeLayout::addAttributeData(
+PackedAttributeLayout::PackedAttributeLayout() : numAttributes(0), flags(0), attributeData({})
-    GLenum glType,
+{
-    UINT semanticIndex,
+}
-    gl::VertexFormatType vertexFormatType,
-    unsigned int divisor)
+void PackedAttributeLayout::addAttributeData(GLenum glType,
+                                             UINT semanticIndex,
+                                             gl::VertexFormatType vertexFormatType,
+                                             unsigned int divisor)
 {
    gl::AttributeType attribType = gl::GetAttributeType(glType);
@@ -156,23 +157,14 @@ void InputLayoutCache::PackedAttributeLayout::addAttributeData(
    attributeData[numAttributes++] = gl::bitCast<uint32_t>(packedAttrib);
 }
-bool InputLayoutCache::PackedAttributeLayout::operator<(const PackedAttributeLayout &other) const
+bool PackedAttributeLayout::operator==(const PackedAttributeLayout &other) const
 {
-    if (numAttributes != other.numAttributes)
+    return (numAttributes == other.numAttributes) && (flags == other.flags) &&
-    {
+           (attributeData == other.attributeData);
-        return numAttributes < other.numAttributes;
-    }
-    if (flags != other.flags)
-    {
-        return flags < other.flags;
-    }
-    return memcmp(attributeData, other.attributeData, sizeof(uint32_t) * numAttributes) < 0;
 }
 InputLayoutCache::InputLayoutCache()
-    : mPointSpriteVertexBuffer(), mPointSpriteIndexBuffer(), mCacheSize(kDefaultCacheSize)
+    : mLayoutCache(kDefaultCacheSize * 2), mPointSpriteVertexBuffer(), mPointSpriteIndexBuffer()
 {
    mCurrentAttributes.reserve(gl::MAX_VERTEX_ATTRIBS);
 }
@@ -183,7 +175,7 @@ InputLayoutCache::~InputLayoutCache()
 void InputLayoutCache::clear()
 {
-    mLayoutMap.clear();
+    mLayoutCache.Clear();
    mPointSpriteVertexBuffer.reset();
    mPointSpriteIndexBuffer.reset();
 }
@@ -445,36 +437,21 @@ gl::Error InputLayoutCache::updateInputLayout(Renderer11 *renderer,
    const d3d11::InputLayout *inputLayout = nullptr;
    if (layout.numAttributes > 0 || layout.flags != 0)
    {
-        auto layoutMapIt = mLayoutMap.find(layout);
+        auto it = mLayoutCache.Get(layout);
-        if (layoutMapIt != mLayoutMap.end())
+        if (it != mLayoutCache.end())
        {
-            inputLayout = &layoutMapIt->second;
+            inputLayout = &it->second;
        }
        else
        {
+            angle::TrimCache(mLayoutCache.max_size() / 2, kGCLimit, "input layout", &mLayoutCache);
            d3d11::InputLayout newInputLayout;
            ANGLE_TRY(createInputLayout(renderer, sortedSemanticIndices, mode, program,
                                        numIndicesPerInstance, &newInputLayout));
-            if (mLayoutMap.size() >= mCacheSize)
-            {
-                WARN() << "Overflowed the limit of " << mCacheSize
-                       << " input layouts, purging half the cache.";
-                // Randomly release every second element
+            auto insertIt = mLayoutCache.Put(layout, std::move(newInputLayout));
-                auto it = mLayoutMap.begin();
+            inputLayout   = &insertIt->second;
-                while (it != mLayoutMap.end())
-                {
-                    it++;
-                    if (it != mLayoutMap.end())
-                    {
-                        // c++11 erase allows us to easily delete the current iterator.
-                        it = mLayoutMap.erase(it);
-                    }
-                }
-            }
-            auto result = mLayoutMap.insert(std::make_pair(layout, std::move(newInputLayout)));
-            inputLayout = &result.first->second;
        }
    }
@@ -584,4 +561,11 @@ gl::Error InputLayoutCache::createInputLayout(Renderer11 *renderer,
    return gl::NoError();
 }
+void InputLayoutCache::setCacheSize(size_t newCacheSize)
+{
+    // Forces a reset of the cache.
+    LayoutCache newCache(newCacheSize);
+    mLayoutCache.Swap(newCache);
+}
 }  // namespace rx
--- a/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.h
+++ b/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.h
@@ -20,14 +20,32 @@
 #include "common/angleutils.h"
 #include "libANGLE/Constants.h"
 #include "libANGLE/Error.h"
+#include "libANGLE/SizedMRUCache.h"
 #include "libANGLE/formatutils.h"
 #include "libANGLE/renderer/d3d/RendererD3D.h"
 #include "libANGLE/renderer/d3d/d3d11/ResourceManager11.h"
+namespace rx
+{
+struct PackedAttributeLayout;
+}  // namespace rx
+namespace std
+{
+template <>
+struct std::hash<rx::PackedAttributeLayout>
+{
+    size_t operator()(const rx::PackedAttributeLayout &value) const
+    {
+        return angle::ComputeGenericHash(value);
+    }
+};
+}  // namespace std
 namespace gl
 {
 class Program;
-}
+}  // namespace gl
 namespace rx
 {
@@ -37,6 +55,28 @@ struct SourceIndexData;
 class ProgramD3D;
 class Renderer11;
+struct PackedAttributeLayout
+{
+    PackedAttributeLayout();
+    void addAttributeData(GLenum glType,
+                          UINT semanticIndex,
+                          gl::VertexFormatType vertexFormatType,
+                          unsigned int divisor);
+    bool operator==(const PackedAttributeLayout &other) const;
+    enum Flags
+    {
+        FLAG_USES_INSTANCED_SPRITES     = 0x1,
+        FLAG_INSTANCED_SPRITES_ACTIVE   = 0x2,
+        FLAG_INSTANCED_RENDERING_ACTIVE = 0x4,
+    };
+    size_t numAttributes;
+    unsigned int flags;
+    std::array<uint32_t, gl::MAX_VERTEX_ATTRIBS> attributeData;
+};
 class InputLayoutCache : angle::NonCopyable
 {
  public:
@@ -59,36 +99,9 @@ class InputLayoutCache : angle::NonCopyable
                                                          GLsizei emulatedInstanceId);
    // Useful for testing
-    void setCacheSize(unsigned int cacheSize) { mCacheSize = cacheSize; }
+    void setCacheSize(size_t newCacheSize);
  private:
-    struct PackedAttributeLayout
-    {
-        PackedAttributeLayout()
-            : numAttributes(0),
-              flags(0)
-        {
-        }
-        void addAttributeData(GLenum glType,
-                              UINT semanticIndex,
-                              gl::VertexFormatType vertexFormatType,
-                              unsigned int divisor);
-        bool operator<(const PackedAttributeLayout &other) const;
-        enum Flags
-        {
-            FLAG_USES_INSTANCED_SPRITES     = 0x1,
-            FLAG_INSTANCED_SPRITES_ACTIVE   = 0x2,
-            FLAG_INSTANCED_RENDERING_ACTIVE = 0x4,
-        };
-        size_t numAttributes;
-        unsigned int flags;
-        uint32_t attributeData[gl::MAX_VERTEX_ATTRIBS];
-    };
    gl::Error updateInputLayout(Renderer11 *renderer,
                                const gl::State &state,
                                GLenum mode,
@@ -101,14 +114,19 @@ class InputLayoutCache : angle::NonCopyable
                                GLsizei numIndicesPerInstance,
                                d3d11::InputLayout *inputLayoutOut);
-    std::map<PackedAttributeLayout, d3d11::InputLayout> mLayoutMap;
+    // Starting cache size.
+    static constexpr size_t kDefaultCacheSize = 1024;
+    // The cache tries to clean up this many states at once.
+    static constexpr size_t kGCLimit = 128;
+    using LayoutCache = angle::base::HashingMRUCache<PackedAttributeLayout, d3d11::InputLayout>;
+    LayoutCache mLayoutCache;
    std::vector<const TranslatedAttribute *> mCurrentAttributes;
    d3d11::Buffer mPointSpriteVertexBuffer;
    d3d11::Buffer mPointSpriteIndexBuffer;
-    unsigned int mCacheSize;
 };
 }  // namespace rx

--- a/src/libANGLE/renderer/d3d/d3d11/RenderStateCache.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/RenderStateCache.cpp
@@ -23,37 +23,6 @@ namespace rx
 {
 using namespace gl_d3d11;
-namespace
-{
-template <typename T>
-void TrimCache(unsigned int maxStates, unsigned int gcLimit, const char *name, T *cache)
-{
-    unsigned int kGarbageCollectionLimit = maxStates / 2 + gcLimit;
-    if (cache->size() >= kGarbageCollectionLimit)
-    {
-        WARN() << "Overflowed the limit of " << (maxStates / 2) << " " << name
-               << " states, removing the least recently used to make room.";
-        cache->ShrinkToSize(maxStates / 2);
-    }
-}
-}  // anonymous namespace
-template <typename T>
-std::size_t ComputeGenericHash(const T &key)
-{
-    static const unsigned int seed = 0xABCDEF98;
-    std::size_t hash = 0;
-    MurmurHash3_x86_32(&key, sizeof(key), seed, &hash);
-    return hash;
-}
-template std::size_t ComputeGenericHash(const rx::d3d11::BlendStateKey &);
-template std::size_t ComputeGenericHash(const rx::d3d11::RasterizerStateKey &);
-template std::size_t ComputeGenericHash(const gl::DepthStencilState &);
-template std::size_t ComputeGenericHash(const gl::SamplerState &);
 RenderStateCache::RenderStateCache()
    : mBlendStateCache(kMaxStates),
      mRasterizerStateCache(kMaxStates),
@@ -127,7 +96,7 @@ gl::Error RenderStateCache::getBlendState(Renderer11 *renderer,
        return gl::NoError();
    }
-    TrimCache(kMaxStates, kGCLimit, "blend", &mBlendStateCache);
+    TrimCache(kMaxStates, kGCLimit, "blend state", &mBlendStateCache);
    // Create a new blend state and insert it into the cache
    D3D11_BLEND_DESC blendDesc;
@@ -182,7 +151,7 @@ gl::Error RenderStateCache::getRasterizerState(Renderer11 *renderer,
        return gl::NoError();
    }
-    TrimCache(kMaxStates, kGCLimit, "rasterizer", &mRasterizerStateCache);
+    TrimCache(kMaxStates, kGCLimit, "rasterizer state", &mRasterizerStateCache);
    D3D11_CULL_MODE cullMode =
        gl_d3d11::ConvertCullMode(rasterState.cullFace, rasterState.cullMode);
@@ -234,7 +203,7 @@ gl::Error RenderStateCache::getDepthStencilState(Renderer11 *renderer,
        return gl::NoError();
    }
-    TrimCache(kMaxStates, kGCLimit, "depth stencil", &mDepthStencilStateCache);
+    TrimCache(kMaxStates, kGCLimit, "depth stencil state", &mDepthStencilStateCache);
    D3D11_DEPTH_STENCIL_DESC dsDesc     = {0};
    dsDesc.DepthEnable                  = glState.depthTest ? TRUE : FALSE;
@@ -271,7 +240,7 @@ gl::Error RenderStateCache::getSamplerState(Renderer11 *renderer,
        return gl::NoError();
    }
-    TrimCache(kMaxStates, kGCLimit, "sampler stencil", &mSamplerStateCache);
+    TrimCache(kMaxStates, kGCLimit, "sampler state", &mSamplerStateCache);
    const auto &featureLevel = renderer->getRenderer11DeviceCaps().featureLevel;

--- a/src/libANGLE/renderer/d3d/d3d11/RenderStateCache.h
+++ b/src/libANGLE/renderer/d3d/d3d11/RenderStateCache.h
@@ -12,10 +12,10 @@
 #include "common/angleutils.h"
 #include "libANGLE/Error.h"
+#include "libANGLE/SizedMRUCache.h"
 #include "libANGLE/angletypes.h"
 #include "libANGLE/renderer/d3d/d3d11/renderer11_utils.h"
-#include <anglebase/containers/mru_cache.h>
 #include <unordered_map>
 namespace gl
@@ -23,12 +23,6 @@ namespace gl
 class Framebuffer;
 }
-namespace rx
-{
-template <typename T>
-std::size_t ComputeGenericHash(const T &key);
-}  // namespace rx
 namespace std
 {
 template <>
@@ -36,7 +30,7 @@ struct hash<rx::d3d11::BlendStateKey>
 {
    size_t operator()(const rx::d3d11::BlendStateKey &key) const
    {
-        return rx::ComputeGenericHash(key);
+        return angle::ComputeGenericHash(key);
    }
 };
@@ -45,7 +39,7 @@ struct hash<rx::d3d11::RasterizerStateKey>
 {
    size_t operator()(const rx::d3d11::RasterizerStateKey &key) const
    {
-        return rx::ComputeGenericHash(key);
+        return angle::ComputeGenericHash(key);
    }
 };
@@ -54,14 +48,14 @@ struct hash<gl::DepthStencilState>
 {
    size_t operator()(const gl::DepthStencilState &key) const
    {
-        return rx::ComputeGenericHash(key);
+        return angle::ComputeGenericHash(key);
    }
 };
 template <>
 struct hash<gl::SamplerState>
 {
-    size_t operator()(const gl::SamplerState &key) const { return rx::ComputeGenericHash(key); }
+    size_t operator()(const gl::SamplerState &key) const { return angle::ComputeGenericHash(key); }
 };
 }  // namespace std