Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.1
current_version = 0.4.0
commit = True
tag = True

Expand Down
29 changes: 9 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ Data is kept purely in RAM and is **volatile**: it is **not persisted across app
- **Zero I/O overhead**: pure in‑RAM storage (`dict`/`list` under the hood)
- **Commit/rollback support**
- **Index support**: indexes are recognized and used for faster lookups
- **Merge and `get()` support**: like real SQLAlchemy behavior
- **Lazy query evaluation**: supports generator pipelines and short-circuiting
- `first()`-style queries avoid scanning the full dataset
- Optimized for read-heavy workloads and streaming filters

## Benchmark

Curious how `sqlalchemy-memory` stacks up?

[View Benchmark Results](https://sqlalchemy-memory.readthedocs.io/en/latest/benchmarks.html) comparing `sqlalchemy-memory` to `in-memory SQLite`

## Installation

Expand All @@ -48,25 +56,6 @@ pip install sqlalchemy-memory

[See the official documentation for usage examples](https://sqlalchemy-memory.readthedocs.io/en/latest/)


## Status

Currently supports basic functionality equivalent to:

- SQLite in-memory behavior for ORM + Core queries

- `declarative_base()` model support

Coming soon:

- `func.count()` / aggregations

- Joins and relationships (limited)

- Compound indexes

- Better expression support in `update(...).values()` (e.g., +=)

## Testing

Simply run `make tests`
Expand Down
59 changes: 46 additions & 13 deletions benchmark.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
from sqlalchemy import create_engine, Column, Integer, String, Boolean, select, Index, update, delete
from sqlalchemy import create_engine, Column, Integer, String, Boolean, select, Float, update, delete, bindparam, literal
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.sql import operators
from sqlalchemy.sql.elements import BinaryExpression
from sqlalchemy_memory import MemorySession
import argparse
import time
import random
from faker import Faker

try:
from sqlalchemy_memory import create_memory_engine
except ImportError:
create_memory_engine = None

random.seed(42)
Base = declarative_base()
fake = Faker()
CATEGORIES = list("ABCDEFGHIJK")
Expand All @@ -22,22 +21,46 @@ class Item(Base):
name = Column(String)
active = Column(Boolean, index=True)
category = Column(String, index=True)
price = Column(Float, index=True)
cost = Column(Float)

def generate_items(n):
for _ in range(n):
yield Item(
name=fake.name(),
active=random.choice([True, False]),
category=random.choice(CATEGORIES)
category=random.choice(CATEGORIES),
price=round(random.uniform(5, 500), 2),
cost=round(random.uniform(1, 300), 2),
)

def generate_random_select_query():
clauses = []

if random.random() < 0.5:
clauses.append(Item.active == random.choice([True, False]))
if random.random() < 0.5 or not clauses:
val = random.choice([True, False])
op = random.choice([operators.eq, operators.ne])
clauses.append(BinaryExpression(Item.active, literal(val), op))

if random.random() < 0.7:
subset = random.sample(CATEGORIES, random.randint(1, 4))
clauses.append(Item.category.in_(subset))
op = random.choice([operators.in_op, operators.notin_op])
param = bindparam("category_list", subset, expanding=True)
clauses.append(BinaryExpression(Item.category, param, op))

if random.random() < 0.6:
price_val = round(random.uniform(10, 400), 2)
op = random.choice([operators.gt, operators.lt, operators.le, operators.gt])
clauses.append(BinaryExpression(Item.price, literal(price_val), op))

if random.random() < 0.3:
cost_val = round(random.uniform(10, 200), 2)
op = random.choice([operators.gt, operators.lt, operators.le, operators.gt])
clauses.append(BinaryExpression(Item.cost, literal(cost_val), op))

if not clauses:
clauses.append(Item.active == True)

return select(Item).where(*clauses)

def inserts(Session, count):
Expand All @@ -49,15 +72,24 @@ def inserts(Session, count):
print(f"Inserted {count} items in {insert_duration:.2f} seconds.")
return insert_duration

def selects(Session, count):
def selects(Session, count, fetch_type):
queries = [generate_random_select_query() for _ in range(count)]

query_start = time.time()
with Session() as session:
for stmt in queries:
list(session.execute(stmt).scalars())
if fetch_type == "limit":
stmt = stmt.limit(5)

result = session.execute(stmt)

if fetch_type == "first":
result.first()
else:
list(result.scalars())

query_duration = time.time() - query_start
print(f"Executed {count} select queries in {query_duration:.2f} seconds.")
print(f"Executed {count} select queries ({fetch_type}) in {query_duration:.2f} seconds.")
return query_duration

def updates(Session, random_ids):
Expand Down Expand Up @@ -105,7 +137,8 @@ def run_benchmark(db_type="sqlite", count=100_000):
Base.metadata.create_all(engine)

elapsed = inserts(Session, count)
elapsed += selects(Session, 500)
elapsed += selects(Session, 500, fetch_type="all")
elapsed += selects(Session, 500, fetch_type="limit")

random_ids = random.sample(range(1, count + 1), 500)
elapsed += updates(Session, random_ids)
Expand Down
29 changes: 17 additions & 12 deletions docs/benchmarks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ This benchmark compares `sqlalchemy-memory` to `in-memory SQLite` using 20,000 i

As the results show, `sqlalchemy-memory` **excels in read-heavy workloads**, delivering significantly faster query performance. While SQLite performs slightly better on update and delete operations, the overall runtime of `sqlalchemy-memory` remains substantially lower, making it a strong choice for prototyping and simulation.

`Check the benchmark script on GitHub <https://github.com/rundef/sqlalchemy-memory/blob/main/benchmark.py>`_

.. list-table::
:header-rows: 1
:widths: 25 25 25
Expand All @@ -13,17 +15,20 @@ As the results show, `sqlalchemy-memory` **excels in read-heavy workloads**, del
- SQLite (in-memory)
- sqlalchemy-memory
* - Insert
- 3.17 sec
- 2.70 sec
* - 500 Select Queries
- 26.37 sec
- 2.94 sec
- 3.30 sec
- **3.10 sec**
* - 500 Select Queries (all())
- 30.07 sec
- **4.14 sec**
* - 500 Select Queries (limit(5))
- **0.24** sec
- 0.30 sec
* - 500 Updates
- 0.26 sec
- 1.12 sec
- 0.25 sec
- **0.19** sec
* - 500 Deletes
- 0.09 sec
- 0.90 sec
* - **Total Runtime**
- **29.89 sec**
- **7.66 sec**
- **0.09** sec
- **0.09** sec
* - *Total Runtime*
- 33.95 sec
- **7.81 sec**
2 changes: 2 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Welcome to sqlalchemy-memory's documentation!

`sqlalchemy-memory` is a pure in-memory backend for SQLAlchemy 2.0 that supports both sync and async modes, with full compatibility for SQLAlchemy Core and ORM.

📦 GitHub: https://github.com/rundef/sqlalchemy-memory

Quickstart: sync example
------------------------

Expand Down
1 change: 1 addition & 0 deletions docs/query.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Supported Functions

- `DATE(column)`
- `func.json_extract(col, '$.expr')`
- Aggregation functions: - Aggregations: `func.count()` / `func.sum()` / `func.min()` / `func.max()` / `func.avg()`

Indexes
-------
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "sqlalchemy-memory"
version = "0.3.1"
version = "0.4.0"
dependencies = [
"sqlalchemy>=2.0,<3.0",
"sortedcontainers>=2.4.0"
Expand Down
2 changes: 1 addition & 1 deletion sqlalchemy_memory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"AsyncMemorySession",
]

__version__ = '0.3.1'
__version__ = '0.4.0'
83 changes: 51 additions & 32 deletions sqlalchemy_memory/base/indexes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import defaultdict
from sortedcontainers import SortedDict
from typing import Any, List
from typing import Any, List, Generator
from itertools import chain
from sqlalchemy.sql import operators

from ..helpers.ordered_set import OrderedSet
Expand Down Expand Up @@ -108,62 +109,84 @@ def on_update(self, obj, updates):
self.hash_index.add(tablename, indexname, new_value, obj)
self.range_index.add(tablename, indexname, new_value, obj)

def query(self, collection, tablename, colname, operator, value):
def query(self, collection, tablename, colname, operator, value, collection_is_full_table=False):
indexname = self._column_to_index(tablename, colname)
if not indexname:
return None

# Use hash index for = / != / IN / NOT IN operators
if operator == operators.eq:
result = self.hash_index.query(tablename, indexname, value)
return list(set(result) & set(collection))
if collection_is_full_table:
return result
return (item for item in collection if item in result)

elif operator == operators.ne:
# All values except the given one
excluded = self.hash_index.query(tablename, indexname, value)
return list(set(collection) - set(excluded))
return (item for item in collection if item not in excluded)

elif operator == operators.in_op:
result = []
for v in value:
result.extend(self.hash_index.query(tablename, indexname, v))
return list(set(result) & set(collection))
result = chain.from_iterable(
self.hash_index.query(tablename, indexname, v) for v in value
)
if collection_is_full_table:
return result
result = set(result)
return (item for item in collection if item in result)

elif operator == operators.notin_op:
excluded = []
for v in value:
excluded.extend(self.hash_index.query(tablename, indexname, v))
return list(set(collection) - set(excluded))
excluded = set(chain.from_iterable(
self.hash_index.query(tablename, indexname, v) for v in value
))
return (item for item in collection if item not in excluded)

# Use range index
if operator == operators.gt:
elif operator == operators.gt:
result = self.range_index.query(tablename, indexname, gt=value)
return list(set(result) & set(collection))
if collection_is_full_table:
return result
result = set(result)
return (item for item in collection if item in result)

elif operator == operators.ge:
result = self.range_index.query(tablename, indexname, gte=value)
return list(set(result) & set(collection))
if collection_is_full_table:
return result
result = set(result)
return (item for item in collection if item in result)

elif operator == operators.lt:
result = self.range_index.query(tablename, indexname, lt=value)
return list(set(result) & set(collection))
if collection_is_full_table:
return result
result = set(result)
return (item for item in collection if item in result)

elif operator == operators.le:
result = self.range_index.query(tablename, indexname, lte=value)
return list(set(result) & set(collection))
if collection_is_full_table:
return result
result = set(result)
return (item for item in collection if item in result)

elif operator == operators.between_op and isinstance(value, (tuple, list)) and len(value) == 2:
result = self.range_index.query(tablename, indexname, gte=value[0], lte=value[1])
return list(set(result) & set(collection))
if collection_is_full_table:
return result
result = set(result)
return (item for item in collection if item in result)

elif operator == operators.not_between_op and isinstance(value, (tuple, list)) and len(value) == 2:
in_range = self.range_index.query(tablename, indexname, gte=value[0], lte=value[1])
return list(set(collection) - set(in_range))
in_range = set(self.range_index.query(tablename, indexname, gte=value[0], lte=value[1]))
return (item for item in collection if item not in in_range)


def get_selectivity(self, tablename, colname, operator, value, total_count):
"""
Estimate selectivity: higher means worst filtering.
Estimate the selectivity of a single WHERE condition.

This method is used to rank or sort WHERE conditions by their estimated
filtering power. A lower selectivity value indicates that the condition
is expected to filter out more rows (i.e., fewer rows remain after applying it),
making it more selective.
"""

indexname = self._column_to_index(tablename, colname)
Expand Down Expand Up @@ -220,7 +243,7 @@ def remove(self, tablename: str, indexname: str, value: Any, obj: Any):
del self.index[tablename][indexname][value]

def query(self, tablename: str, indexname: str, value: Any) -> List[Any]:
return list(self.index[tablename][indexname].get(value, []))
return self.index[tablename][indexname].get(value, [])


class RangeIndex:
Expand Down Expand Up @@ -255,7 +278,7 @@ def remove(self, tablename: str, indexname: str, value: Any, obj: Any):
except ValueError:
pass

def query(self, tablename: str, indexname: str, gt=None, gte=None, lt=None, lte=None) -> List[Any]:
def query(self, tablename: str, indexname: str, gt=None, gte=None, lt=None, lte=None) -> Generator:
sd = self.index[tablename][indexname]

# Define range bounds
Expand All @@ -264,14 +287,10 @@ def query(self, tablename: str, indexname: str, gt=None, gte=None, lt=None, lte=
inclusive_min = gte is not None
inclusive_max = lte is not None

irange = sd.irange(
keys = sd.irange(
minimum=min_key,
maximum=max_key,
inclusive=(inclusive_min, inclusive_max)
)

result = []
for key in irange:
result.extend(sd[key])

return result
return chain.from_iterable(sd[key] for key in keys)
Loading