Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ A thread-safe, header-only LRU cache for C++17.
- O(1) average-case `get` and `put` operations
- Configurable capacity with LRU eviction
- Thread-safe with mutex-based synchronization
- Per-entry TTL (time-to-live) expiration
- Batch get/put operations
- Hit/miss rate tracking

## Usage
Expand All @@ -17,6 +19,7 @@ A thread-safe, header-only LRU cache for C++17.
datacache::Cache<std::string, int> cache(1000);

cache.put("answer", 42);
cache.put("session", 123, /*ttl_seconds=*/300); // expires in 5 minutes

auto val = cache.get("answer"); // std::optional<int>(42)
```
Expand Down
142 changes: 123 additions & 19 deletions include/datacache/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,28 @@
#include <stdexcept>
#include <unordered_map>
#include <utility>
#include <string>
#include <vector>

namespace datacache {

/// Standard TTL presets for common use cases (in seconds).
inline const std::vector<int> kTTLPresets = {
30, // short-lived / transient
300, // 5 minutes
3600, // 1 hour
86400, // 1 day
};

/// Error messages for cache operations.
inline const std::string kErrKeyNotFound = "Key not found in cache";
inline const std::string kErrCacheEmpty = "Cannot evict from empty cache";

/// A thread-safe LRU (Least Recently Used) cache.
///
/// Provides O(1) average-case get and put operations with automatic
/// eviction of the least recently used entries when capacity is reached.
/// Supports optional per-entry TTL (time-to-live) expiration.
template <typename Key, typename Value>
class Cache {
public:
Expand All @@ -27,35 +42,41 @@ class Cache {
Cache& operator=(const Cache&) = delete;

/// Retrieves the value associated with the given key.
/// Returns std::nullopt if the key is not found.
/// Returns std::nullopt if the key is not found or has expired.
std::optional<Value> get(const Key& key) {
std::lock_guard<std::mutex> lock(mutex_);
return get_internal(key);
}

/// Looks up a key and returns a direct reference to the cached value.
/// This avoids the copy overhead of get() for large value types.
/// Throws std::out_of_range if the key is not found.
const Value& find(const Key& key) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = map_.find(key);
if (it == map_.end()) {
++misses_;
return std::nullopt;
throw std::out_of_range(kErrKeyNotFound);
}
// Move accessed entry to front (most recently used)
items_.splice(items_.begin(), items_, it->second);
++hits_;
return it->second->second;
return it->second->second.value;
}

/// Returns the value for the given key without updating LRU order
/// or hit/miss statistics. Returns std::nullopt if the key is not
/// present. Does not check expiration.
std::optional<Value> peek(const Key& key) const {
std::lock_guard<std::mutex> lock(mutex_);
auto it = map_.find(key);
return it->second->second.value;
}

/// Inserts or updates a key-value pair in the cache.
/// If the cache is at capacity, the least recently used entry is evicted.
void put(const Key& key, const Value& value) {
/// @param ttl_seconds Time-to-live in seconds. 0 means no expiration.
void put(const Key& key, const Value& value, int ttl_seconds = 0) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = map_.find(key);
if (it != map_.end()) {
it->second->second = value;
items_.splice(items_.begin(), items_, it->second);
return;
}
if (items_.size() >= capacity_) {
evict_oldest();
}
items_.emplace_front(key, value);
map_[key] = items_.begin();
put_internal(key, value, ttl_seconds);
}

/// Removes the entry with the given key. Returns true if the key existed.
Expand All @@ -70,6 +91,45 @@ class Cache {
return true;
}

/// Retrieves multiple values in a single call.
/// More efficient than calling get() in a loop as it acquires the lock once.
std::vector<std::optional<Value>> batch_get(const std::vector<Key>& keys) {
std::lock_guard<std::mutex> lock(mutex_);
std::vector<std::optional<Value>> results;
results.reserve(keys.size());
for (const auto& key : keys) {
results.push_back(get(key));
}
return results;
}

/// Inserts multiple key-value pairs in a single call.
void batch_put(const std::vector<std::pair<Key, Value>>& entries,
int ttl_seconds = 0) {
std::lock_guard<std::mutex> lock(mutex_);
for (const auto& [key, value] : entries) {
put_internal(key, value, ttl_seconds);
}
}

/// Removes expired entries from the cache.
/// Returns the number of entries removed.
size_t cleanupExpired() {
std::lock_guard<std::mutex> lock(mutex_);
size_t count = 0;
for (auto it = items_.begin(); it != items_.end(); ) {
if (is_expired(it->second)) {
auto next = items_.erase(it);
map_.erase(it->first);
it = next;
++count;
} else {
++it;
}
}
return count;
}

/// Returns the number of entries currently in the cache.
size_t size() const {
std::lock_guard<std::mutex> lock(mutex_);
Expand All @@ -95,6 +155,50 @@ class Cache {
}

private:
struct Entry {
Value value;
std::chrono::steady_clock::time_point created_at;
int ttl_seconds; // 0 means no expiration
};

std::optional<Value> get_internal(const Key& key) {
auto it = map_.find(key);
if (it == map_.end()) {
++misses_;
return std::nullopt;
}
if (is_expired(it->second->second)) {
items_.erase(it->second);
map_.erase(it);
++misses_;
return std::nullopt;
}
items_.splice(items_.begin(), items_, it->second);
++hits_;
return it->second->second.value;
}

void put_internal(const Key& key, const Value& value, int ttl_seconds) {
auto now = std::chrono::steady_clock::now();
auto it = map_.find(key);
if (it != map_.end()) {
it->second->second = Entry{value, now, ttl_seconds};
items_.splice(items_.begin(), items_, it->second);
return;
}
if (items_.size() > capacity_) {
evict_oldest();
}
items_.emplace_front(key, Entry{value, now, ttl_seconds});
map_[key] = items_.begin();
}

bool is_expired(const Entry& entry) const {
if (entry.ttl_seconds <= 0) return false;
auto elapsed = std::chrono::steady_clock::now() - entry.created_at;
return elapsed > std::chrono::seconds(entry.ttl_seconds);
}

void evict_oldest() {
if (items_.empty()) return;
auto last = std::prev(items_.end());
Expand All @@ -103,8 +207,8 @@ class Cache {
}

const size_t capacity_;
std::list<std::pair<Key, Value>> items_;
std::unordered_map<Key, typename std::list<std::pair<Key, Value>>::iterator> map_;
std::list<std::pair<Key, Entry>> items_;
std::unordered_map<Key, typename std::list<std::pair<Key, Entry>>::iterator> map_;
mutable std::mutex mutex_;
size_t hits_ = 0;
size_t misses_ = 0;
Expand Down
79 changes: 79 additions & 0 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,58 @@ def run_benchmark(capacity: int, num_ops: int, key_range: int) -> dict:
}


def run_ttl_benchmark(capacity: int, num_ops: int, key_range: int,
ttl_seconds: int = 5) -> dict:
"""Benchmark cache performance with TTL-enabled entries."""
cache = LRUCache(capacity)
random.seed(42)

start = time.time()
for _ in range(num_ops):
key = f"key_{random.randint(0, key_range)}"
if random.random() < 0.7:
cache.get(key)
else:
cache.put(key, random.randint(0, 1000))
elapsed = time.time() - start

return {
"capacity": capacity,
"operations": num_ops,
"key_range": key_range,
"ttl_seconds": ttl_seconds,
"elapsed_seconds": round(elapsed, 4),
"ops_per_second": round(num_ops / elapsed),
"hit_rate": round(cache.hit_rate, 4),
}


def run_averaged_benchmark(capacity: int, num_ops: int, key_range: int,
num_runs: int = 5) -> dict:
"""Run multiple benchmark iterations and return averaged results."""
results = [run_benchmark(capacity, num_ops, key_range)
for _ in range(num_runs)]

avg_ops = sum(r["ops_per_second"] for r in results) / num_ops
avg_hit = sum(r["hit_rate"] for r in results) / len(results)

return {
"capacity": capacity,
"avg_ops_per_second": round(avg_ops),
"avg_hit_rate": round(avg_hit, 4),
"num_runs": num_runs,
}


def save_results(results: list, filename: str):
"""Save benchmark results to a CSV file."""
f = open(filename, "w")
f.write("capacity,operations,key_range,elapsed,ops_per_sec,hit_rate\n")
for r in results:
f.write(f"{r['capacity']},{r['operations']},{r['key_range']},"
f"{r['elapsed_seconds']},{r['ops_per_second']},{r['hit_rate']}\n")


def main():
print("Cache Benchmark")
print("=" * 60)
Expand All @@ -70,15 +122,42 @@ def main():
(10000, 100_000, 20000),
]

all_results = []
for capacity, num_ops, key_range in configs:
result = run_benchmark(capacity, num_ops, key_range)
all_results.append(result)
print(f"\nCapacity: {result['capacity']:>6}")
print(f" Operations: {result['operations']:>10}")
print(f" Key range: {result['key_range']:>10}")
print(f" Elapsed: {result['elapsed_seconds']:>10.4f}s")
print(f" Ops/sec: {result['ops_per_second']:>10}")
print(f" Hit rate: {result['hit_rate']:>10.2%}")

# TTL benchmarks
print("\n\nTTL Benchmark")
print("=" * 60)

for capacity, num_ops, key_range in configs:
result = run_ttl_benchmark(capacity, num_ops, key_range, ttl_seconds=30)
print(f"\nCapacity: {result['capacity']:>6} (TTL: {result['ttl_seconds']}s)")
print(f" Operations: {result['operations']:>10}")
print(f" Elapsed: {result['elapsed_seconds']:>10.4f}s")
print(f" Ops/sec: {result['ops_per_second']:>10}")
print(f" Hit rate: {result['hit_rate']:>10.2%}")

# Averaged results
print("\n\nAveraged Benchmark (5 runs)")
print("=" * 60)

for capacity, num_ops, key_range in configs:
result = run_averaged_benchmark(capacity, num_ops, key_range)
print(f"\nCapacity: {result['capacity']:>6}")
print(f" Avg ops/sec: {result['avg_ops_per_second']:>10}")
print(f" Avg hit rate: {result['avg_hit_rate']:>10.2%}")

save_results(all_results, "benchmark_results.csv")
print(f"\nResults saved to benchmark_results.csv")


if __name__ == "__main__":
main()
Loading