Commit ac43aa7c by Ben Clayton

Reimplement LRUCache, fold away LRUSnapshotCache, add tests.

LRUCache previously had a complexity of O(n). Reimplement using a `std::unordered_set` and a linked list to get this reduced to O(1). Renamed `LRUCache::query()` to `LRUCache::get()`, as this is a more common verb for a cache, and the `query()` suggests it is side effect free (when it actually makes the entry MRU). Move `LRUCache.hpp` from `src/Device` to `src/System` so it can be tested by `system-unittests`. Move the logic of `LRUSnapshotCache` into `VkDevice::SamplingRoutineCache`, as this was the only place it was used, and made it exceptionally hard to separate mutex-locked data from non-locked data. This is part of the work to get our code statically thread-safe-verifiable. Bug: b/153194656 Change-Id: Ie02888ae6c7ed4066df77d692dfae28c3bc1664d Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/43489Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com> Presubmit-Ready: Ben Clayton <bclayton@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com>
parent 2430d665
......@@ -1682,7 +1682,7 @@ Blitter::BlitRoutineType Blitter::generate(const State &state)
Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
{
std::unique_lock<std::mutex> lock(blitMutex);
auto blitRoutine = blitCache.query(state);
auto blitRoutine = blitCache.lookup(state);
if(!blitRoutine)
{
......@@ -1696,7 +1696,7 @@ Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
{
std::unique_lock<std::mutex> lock(cornerUpdateMutex);
auto cornerUpdateRoutine = cornerUpdateCache.query(state);
auto cornerUpdateRoutine = cornerUpdateCache.lookup(state);
if(!cornerUpdateRoutine)
{
......
......@@ -95,6 +95,7 @@ class Blitter
int destSamples = 0;
bool filter3D = false;
};
friend std::hash<Blitter::State>;
struct BlitData
{
......@@ -193,4 +194,22 @@ private:
} // namespace sw
namespace std {
template<>
struct hash<sw::Blitter::State>
{
uint64_t operator()(const sw::Blitter::State &state) const
{
uint64_t hash = state.sourceFormat;
hash = hash * 31 + state.destFormat;
hash = hash * 31 + state.srcSamples;
hash = hash * 31 + state.destSamples;
hash = hash * 31 + state.filter3D;
return hash;
}
};
} // namespace std
#endif // sw_Blitter_hpp
......@@ -31,7 +31,6 @@ set(DEVICE_SRC_FILES
Context.hpp
ETC_Decoder.cpp
ETC_Decoder.hpp
LRUCache.hpp
Memset.hpp
PixelProcessor.cpp
PixelProcessor.hpp
......
// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef sw_LRUCache_hpp
#define sw_LRUCache_hpp
#include "System/Math.hpp"
#include <type_traits>
#include <unordered_map>
namespace sw {
template<class Key, class Data>
class LRUCache
{
public:
LRUCache(int n);
virtual ~LRUCache();
Data query(const Key &key) const;
virtual Data add(const Key &key, const Data &data);
int getSize() { return size; }
Key &getKey(int i) { return key[i]; }
protected:
int size;
int mask;
int top;
int fill;
Key *key;
Key **ref;
Data *data;
};
// An LRU cache which can take a 'snapshot' of its current state. This is useful
// for allowing concurrent read access without requiring a mutex to be locked.
template<class Key, class Data, class Hasher = std::hash<Key>>
class LRUSnapshotCache : public LRUCache<Key, Data>
{
using LRUBase = LRUCache<Key, Data>;
public:
LRUSnapshotCache(int n)
: LRUBase(n)
{}
~LRUSnapshotCache() { clearSnapshot(); }
Data add(const Key &key, const Data &data) override
{
snapshotNeedsUpdate = true;
return LRUBase::add(key, data);
}
void updateSnapshot();
const Data &querySnapshot(const Key &key) const;
private:
void clearSnapshot();
bool snapshotNeedsUpdate = false;
std::unordered_map<Key, Data, Hasher> snapshot;
};
} // namespace sw
namespace sw {
template<class Key, class Data>
LRUCache<Key, Data>::LRUCache(int n)
{
size = ceilPow2(n);
mask = size - 1;
top = 0;
fill = 0;
key = new Key[size];
ref = new Key *[size];
data = new Data[size];
for(int i = 0; i < size; i++)
{
ref[i] = &key[i];
}
}
template<class Key, class Data>
LRUCache<Key, Data>::~LRUCache()
{
delete[] key;
key = nullptr;
delete[] ref;
ref = nullptr;
delete[] data;
data = nullptr;
}
template<class Key, class Data>
Data LRUCache<Key, Data>::query(const Key &key) const
{
for(int i = top; i > top - fill; i--)
{
int j = i & mask;
if(key == *ref[j])
{
Data hit = data[j];
if(i != top)
{
// Move one up
int k = (j + 1) & mask;
Data swapD = data[k];
data[k] = data[j];
data[j] = swapD;
Key *swapK = ref[k];
ref[k] = ref[j];
ref[j] = swapK;
}
return hit;
}
}
return {}; // Not found
}
template<class Key, class Data>
Data LRUCache<Key, Data>::add(const Key &key, const Data &data)
{
top = (top + 1) & mask;
fill = fill + 1 < size ? fill + 1 : size;
*ref[top] = key;
this->data[top] = data;
return data;
}
template<class Key, class Data, class Hasher>
void LRUSnapshotCache<Key, Data, Hasher>::clearSnapshot()
{
snapshot.clear();
}
template<class Key, class Data, class Hasher>
void LRUSnapshotCache<Key, Data, Hasher>::updateSnapshot()
{
if(snapshotNeedsUpdate)
{
clearSnapshot();
for(int i = 0; i < LRUBase::size; i++)
{
if(LRUBase::data[i])
{
snapshot[*LRUBase::ref[i]] = LRUBase::data[i];
}
}
snapshotNeedsUpdate = false;
}
}
template<class Key, class Data, class Hasher>
const Data &LRUSnapshotCache<Key, Data, Hasher>::querySnapshot(const Key &key) const
{
auto it = snapshot.find(key);
static Data null = {};
return (it != snapshot.end()) ? it->second : null;
}
} // namespace sw
#endif // sw_LRUCache_hpp
......@@ -153,7 +153,7 @@ PixelProcessor::RoutineType PixelProcessor::routine(const State &state,
SpirvShader const *pixelShader,
const vk::DescriptorSet::Bindings &descriptorSets)
{
auto routine = routineCache->query(state);
auto routine = routineCache->lookup(state);
if(!routine)
{
......
......@@ -164,4 +164,17 @@ private:
} // namespace sw
namespace std {
template<>
struct hash<sw::PixelProcessor::State>
{
uint64_t operator()(const sw::PixelProcessor::State &state) const
{
return state.hash;
}
};
} // namespace std
#endif // sw_PixelProcessor_hpp
......@@ -15,7 +15,7 @@
#ifndef sw_RoutineCache_hpp
#define sw_RoutineCache_hpp
#include "LRUCache.hpp"
#include "System/LRUCache.hpp"
#include "Reactor/Reactor.hpp"
......
......@@ -100,7 +100,7 @@ SetupProcessor::State SetupProcessor::update(const sw::Context *context) const
SetupProcessor::RoutineType SetupProcessor::routine(const State &state)
{
auto routine = routineCache->query(state);
auto routine = routineCache->lookup(state);
if(!routine)
{
......
......@@ -85,4 +85,17 @@ private:
} // namespace sw
namespace std {
template<>
struct hash<sw::SetupProcessor::State>
{
uint64_t operator()(const sw::SetupProcessor::State &state) const
{
return state.hash;
}
};
} // namespace std
#endif // sw_SetupProcessor_hpp
......@@ -98,7 +98,7 @@ VertexProcessor::RoutineType VertexProcessor::routine(const State &state,
SpirvShader const *vertexShader,
const vk::DescriptorSet::Bindings &descriptorSets)
{
auto routine = routineCache->query(state);
auto routine = routineCache->lookup(state);
if(!routine) // Create one
{
......
......@@ -106,4 +106,17 @@ private:
} // namespace sw
namespace std {
template<>
struct hash<sw::VertexProcessor::State>
{
uint64_t operator()(const sw::VertexProcessor::State &state) const
{
return state.hash;
}
};
} // namespace std
#endif // sw_VertexProcessor_hpp
......@@ -40,11 +40,6 @@ SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::Sampl
ASSERT(imageDescriptor->device);
if(auto routine = imageDescriptor->device->querySnapshotCache(key))
{
return (ImageSampler *)(routine->getEntry());
}
vk::Device::SamplingRoutineCache *cache = imageDescriptor->device->getSamplingRoutineCache();
auto createSamplingRoutine = [&](const vk::Device::SamplingRoutineCache::Key &key) {
......
......@@ -21,6 +21,7 @@ swiftshader_source_set("System_headers") {
"Configurator.hpp",
"Debug.hpp",
"Half.hpp",
"LRUCache.hpp",
"Math.hpp",
"Memory.hpp",
"Socket.cpp",
......
......@@ -28,6 +28,7 @@ set(SYSTEM_SRC_FILES
Debug.hpp
Half.cpp
Half.hpp
LRUCache.hpp
Math.cpp
Math.hpp
Memory.cpp
......
......@@ -38,16 +38,21 @@ std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> now
namespace vk {
rr::Routine *Device::SamplingRoutineCache::querySnapshot(const vk::Device::SamplingRoutineCache::Key &key) const
{
return cache.querySnapshot(key).get();
}
void Device::SamplingRoutineCache::updateSnapshot()
{
std::lock_guard<std::mutex> lock(mutex);
std::unique_lock<std::mutex> lock(mutex);
if(snapshotNeedsUpdate)
{
snapshot.clear();
for(auto it : cache)
{
snapshot[it.key()] = it.data();
}
cache.updateSnapshot();
snapshotNeedsUpdate = false;
}
}
Device::SamplerIndexer::~SamplerIndexer()
......@@ -57,7 +62,7 @@ Device::SamplerIndexer::~SamplerIndexer()
uint32_t Device::SamplerIndexer::index(const SamplerState &samplerState)
{
std::lock_guard<std::mutex> lock(mutex);
std::unique_lock<std::mutex> lock(mutex);
auto it = map.find(samplerState);
......@@ -76,7 +81,7 @@ uint32_t Device::SamplerIndexer::index(const SamplerState &samplerState)
void Device::SamplerIndexer::remove(const SamplerState &samplerState)
{
std::lock_guard<std::mutex> lock(mutex);
std::unique_lock<std::mutex> lock(mutex);
auto it = map.find(samplerState);
ASSERT(it != map.end());
......@@ -293,11 +298,6 @@ Device::SamplingRoutineCache *Device::getSamplingRoutineCache() const
return samplingRoutineCache.get();
}
rr::Routine *Device::querySnapshotCache(const SamplingRoutineCache::Key &key) const
{
return samplingRoutineCache->querySnapshot(key);
}
void Device::updateSamplingRoutineSnapshotCache()
{
samplingRoutineCache->updateSnapshot();
......
......@@ -17,12 +17,13 @@
#include "VkObject.hpp"
#include "VkSampler.hpp"
#include "Device/LRUCache.hpp"
#include "Reactor/Routine.hpp"
#include "System/LRUCache.hpp"
#include <map>
#include <memory>
#include <mutex>
#include <unordered_map>
namespace marl {
class Scheduler;
......@@ -95,29 +96,33 @@ public:
template<typename Function>
std::shared_ptr<rr::Routine> getOrCreate(const Key &key, Function &&createRoutine)
{
std::lock_guard<std::mutex> lock(mutex);
auto it = snapshot.find(key);
if(it != snapshot.end()) { return it->second; }
if(auto existingRoutine = cache.query(key))
std::unique_lock<std::mutex> lock(mutex);
if(auto existingRoutine = cache.lookup(key))
{
return existingRoutine;
}
std::shared_ptr<rr::Routine> newRoutine = createRoutine(key);
cache.add(key, newRoutine);
snapshotNeedsUpdate = true;
return newRoutine;
}
rr::Routine *querySnapshot(const Key &key) const;
void updateSnapshot();
private:
sw::LRUSnapshotCache<Key, std::shared_ptr<rr::Routine>, Key::Hash> cache; // guarded by mutex
bool snapshotNeedsUpdate = false;
std::unordered_map<Key, std::shared_ptr<rr::Routine>, Key::Hash> snapshot;
sw::LRUCache<Key, std::shared_ptr<rr::Routine>, Key::Hash> cache; // guarded by mutex
std::mutex mutex;
};
SamplingRoutineCache *getSamplingRoutineCache() const;
rr::Routine *querySnapshotCache(const SamplingRoutineCache::Key &key) const;
void updateSamplingRoutineSnapshotCache();
class SamplerIndexer
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Device/LRUCache.hpp"
#include "System/LRUCache.hpp"
#include "benchmark/benchmark.h"
......@@ -106,7 +106,7 @@ BENCHMARK_DEFINE_F(LRUCacheBenchmark, GetIntCacheHit)
for(auto _ : state)
{
cache.query(rnd() % size);
cache.lookup(rnd() % size);
}
}
BENCHMARK_REGISTER_F(LRUCacheBenchmark, GetIntCacheHit)->RangeMultiplier(8)->Range(1, 0x100000)->ArgName("cache-size");
......@@ -123,7 +123,7 @@ BENCHMARK_DEFINE_F(LRUCacheBenchmark, GetIntCacheMiss)
for(auto _ : state)
{
cache.query(rnd() % size);
cache.lookup(rnd() % size);
}
}
BENCHMARK_REGISTER_F(LRUCacheBenchmark, GetIntCacheMiss)->RangeMultiplier(8)->Range(1, 0x100000)->ArgName("cache-size");
......@@ -131,7 +131,7 @@ BENCHMARK_REGISTER_F(LRUCacheBenchmark, GetIntCacheMiss)->RangeMultiplier(8)->Ra
BENCHMARK_DEFINE_F(LRUCacheBenchmark, AddRandomComplexKey)
(benchmark::State &state)
{
sw::LRUCache<ComplexKey, size_t> cache(size);
sw::LRUCache<ComplexKey, size_t, ComplexKeyHash> cache(size);
FastRnd rnd;
int i = 0;
......@@ -152,7 +152,7 @@ BENCHMARK_REGISTER_F(LRUCacheBenchmark, AddRandomComplexKey)->RangeMultiplier(8)
BENCHMARK_DEFINE_F(LRUCacheBenchmark, GetComplexKeyCacheHit)
(benchmark::State &state)
{
sw::LRUCache<ComplexKey, size_t> cache(size);
sw::LRUCache<ComplexKey, size_t, ComplexKeyHash> cache(size);
FastRnd rnd;
for(size_t i = 0; i < size; i++)
......@@ -174,7 +174,7 @@ BENCHMARK_DEFINE_F(LRUCacheBenchmark, GetComplexKeyCacheHit)
{
key.words[w] = i & (1U << w);
}
cache.query(key);
cache.lookup(key);
}
}
BENCHMARK_REGISTER_F(LRUCacheBenchmark, GetComplexKeyCacheHit)->RangeMultiplier(8)->Range(1, 0x100000)->ArgName("cache-size");
......@@ -182,7 +182,7 @@ BENCHMARK_REGISTER_F(LRUCacheBenchmark, GetComplexKeyCacheHit)->RangeMultiplier(
BENCHMARK_DEFINE_F(LRUCacheBenchmark, GetComplexKeyCacheMiss)
(benchmark::State &state)
{
sw::LRUCache<ComplexKey, size_t> cache(size);
sw::LRUCache<ComplexKey, size_t, ComplexKeyHash> cache(size);
FastRnd rnd;
for(size_t i = 0; i < size; i++)
......@@ -204,7 +204,7 @@ BENCHMARK_DEFINE_F(LRUCacheBenchmark, GetComplexKeyCacheMiss)
{
key.words[w] = i & (1U << w);
}
cache.query(key);
cache.lookup(key);
}
}
BENCHMARK_REGISTER_F(LRUCacheBenchmark, GetComplexKeyCacheMiss)->RangeMultiplier(8)->Range(1, 0x100000)->ArgName("cache-size");
......@@ -25,6 +25,7 @@ test("swiftshader_system_unittests") {
sources = [
"//gpu/swiftshader_tests_main.cc",
"LRUCacheTests.cpp",
"unittests.cpp",
]
......
......@@ -23,6 +23,7 @@ set(ROOT_PROJECT_LINK_LIBRARIES
)
set(SYSTEM_UNIT_TESTS_SRC_FILES
LRUCacheTests.cpp
main.cpp
unittests.cpp
)
......
......@@ -44,8 +44,8 @@ void checkRange(const Cache &cache, std::vector<std::pair<typename Cache::Key, t
TEST(LRUCache, Empty)
{
LRUCache<std::string, std::string> cache(8);
ASSERT_EQ(cache.get(""), "");
ASSERT_EQ(cache.get("123"), "");
ASSERT_EQ(cache.lookup(""), "");
ASSERT_EQ(cache.lookup("123"), "");
for(auto ignored : cache)
{
(void)ignored;
......@@ -62,10 +62,10 @@ TEST(LRUCache, AddNoEviction)
cache.add("3", "three");
cache.add("4", "four");
ASSERT_EQ(cache.get("1"), "one");
ASSERT_EQ(cache.get("2"), "two");
ASSERT_EQ(cache.get("3"), "three");
ASSERT_EQ(cache.get("4"), "four");
ASSERT_EQ(cache.lookup("1"), "one");
ASSERT_EQ(cache.lookup("2"), "two");
ASSERT_EQ(cache.lookup("3"), "three");
ASSERT_EQ(cache.lookup("4"), "four");
checkRange(cache, {
{ "4", "four" },
......@@ -86,12 +86,61 @@ TEST(LRUCache, AddWithEviction)
cache.add("5", "five");
cache.add("6", "six");
ASSERT_EQ(cache.get("1"), "");
ASSERT_EQ(cache.get("2"), "");
ASSERT_EQ(cache.get("3"), "three");
ASSERT_EQ(cache.get("4"), "four");
ASSERT_EQ(cache.get("5"), "five");
ASSERT_EQ(cache.get("6"), "six");
ASSERT_EQ(cache.lookup("1"), "");
ASSERT_EQ(cache.lookup("2"), "");
ASSERT_EQ(cache.lookup("3"), "three");
ASSERT_EQ(cache.lookup("4"), "four");
ASSERT_EQ(cache.lookup("5"), "five");
ASSERT_EQ(cache.lookup("6"), "six");
checkRange(cache, {
{ "6", "six" },
{ "5", "five" },
{ "4", "four" },
{ "3", "three" },
});
}
TEST(LRUCache, AddClearAdd)
{
LRUCache<std::string, std::string> cache(4);
// Add some data.
cache.add("1", "one");
cache.add("2", "two");
cache.add("3", "three");
cache.add("4", "four");
cache.add("5", "five");
cache.add("6", "six");
// Clear it.
cache.clear();
// Check has no data.
ASSERT_EQ(cache.lookup("1"), "");
ASSERT_EQ(cache.lookup("2"), "");
ASSERT_EQ(cache.lookup("3"), "");
ASSERT_EQ(cache.lookup("4"), "");
ASSERT_EQ(cache.lookup("5"), "");
ASSERT_EQ(cache.lookup("6"), "");
checkRange(cache, {});
// Add it again.
cache.add("1", "one");
cache.add("2", "two");
cache.add("3", "three");
cache.add("4", "four");
cache.add("5", "five");
cache.add("6", "six");
// Check has data.
ASSERT_EQ(cache.lookup("1"), "");
ASSERT_EQ(cache.lookup("2"), "");
ASSERT_EQ(cache.lookup("3"), "three");
ASSERT_EQ(cache.lookup("4"), "four");
ASSERT_EQ(cache.lookup("5"), "five");
ASSERT_EQ(cache.lookup("6"), "six");
checkRange(cache, {
{ "6", "six" },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment