rundef · rundef · May 12, 2025 · May 5, 2025 · May 12, 2025 · May 12, 2025
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.1
+current_version = 0.4.0
 commit = True
 tag = True
 

diff --git a/README.md b/README.md
@@ -36,7 +36,15 @@ Data is kept purely in RAM and is **volatile**: it is **not persisted across app
 - **Zero I/O overhead**: pure in‑RAM storage (`dict`/`list` under the hood)
 - **Commit/rollback support**
 - **Index support**: indexes are recognized and used for faster lookups
-- **Merge and `get()` support**: like real SQLAlchemy behavior
+- **Lazy query evaluation**: supports generator pipelines and short-circuiting
+  - `first()`-style queries avoid scanning the full dataset
+  - Optimized for read-heavy workloads and streaming filters
+
+## Benchmark
+
+Curious how `sqlalchemy-memory` stacks up?
+
+[View Benchmark Results](https://sqlalchemy-memory.readthedocs.io/en/latest/benchmarks.html) comparing `sqlalchemy-memory` to `in-memory SQLite`
 
 ## Installation
 
@@ -48,25 +56,6 @@ pip install sqlalchemy-memory
 
 [See the official documentation for usage examples](https://sqlalchemy-memory.readthedocs.io/en/latest/)
 
-
-## Status
-
-Currently supports basic functionality equivalent to:
-
-- SQLite in-memory behavior for ORM + Core queries
-
-- `declarative_base()` model support
-
-Coming soon:
-
-- `func.count()` / aggregations
-
-- Joins and relationships (limited)
-
-- Compound indexes
-
-- Better expression support in `update(...).values()` (e.g., +=)
-
 ## Testing
 
 Simply run `make tests`

diff --git a/benchmark.py b/benchmark.py
@@ -1,16 +1,15 @@
-from sqlalchemy import create_engine, Column, Integer, String, Boolean, select, Index, update, delete
+from sqlalchemy import create_engine, Column, Integer, String, Boolean, select, Float, update, delete, bindparam, literal
 from sqlalchemy.orm import declarative_base, sessionmaker
+from sqlalchemy.sql import operators
+from sqlalchemy.sql.elements import BinaryExpression
 from sqlalchemy_memory import MemorySession
 import argparse
 import time
 import random
 from faker import Faker
 
-try:
-    from sqlalchemy_memory import create_memory_engine
-except ImportError:
-    create_memory_engine = None
 
+random.seed(42)
 Base = declarative_base()
 fake = Faker()
 CATEGORIES = list("ABCDEFGHIJK")
@@ -22,22 +21,46 @@ class Item(Base):
     name = Column(String)
     active = Column(Boolean, index=True)
     category = Column(String, index=True)
+    price = Column(Float, index=True)
+    cost = Column(Float)
 
 def generate_items(n):
     for _ in range(n):
         yield Item(
             name=fake.name(),
             active=random.choice([True, False]),
-            category=random.choice(CATEGORIES)
+            category=random.choice(CATEGORIES),
+            price=round(random.uniform(5, 500), 2),
+            cost=round(random.uniform(1, 300), 2),
         )
 
 def generate_random_select_query():
     clauses = []
+
     if random.random() < 0.5:
-        clauses.append(Item.active == random.choice([True, False]))
-    if random.random() < 0.5 or not clauses:
+        val = random.choice([True, False])
+        op = random.choice([operators.eq, operators.ne])
+        clauses.append(BinaryExpression(Item.active, literal(val), op))
+
+    if random.random() < 0.7:
         subset = random.sample(CATEGORIES, random.randint(1, 4))
-        clauses.append(Item.category.in_(subset))
+        op = random.choice([operators.in_op, operators.notin_op])
+        param = bindparam("category_list", subset, expanding=True)
+        clauses.append(BinaryExpression(Item.category, param, op))
+
+    if random.random() < 0.6:
+        price_val = round(random.uniform(10, 400), 2)
+        op = random.choice([operators.gt, operators.lt, operators.le, operators.gt])
+        clauses.append(BinaryExpression(Item.price, literal(price_val), op))
+
+    if random.random() < 0.3:
+        cost_val = round(random.uniform(10, 200), 2)
+        op = random.choice([operators.gt, operators.lt, operators.le, operators.gt])
+        clauses.append(BinaryExpression(Item.cost, literal(cost_val), op))
+
+    if not clauses:
+        clauses.append(Item.active == True)
+
     return select(Item).where(*clauses)
 
 def inserts(Session, count):
@@ -49,15 +72,24 @@ def inserts(Session, count):
     print(f"Inserted {count} items in {insert_duration:.2f} seconds.")
     return insert_duration
 
-def selects(Session, count):
+def selects(Session, count, fetch_type):
     queries = [generate_random_select_query() for _ in range(count)]
 
     query_start = time.time()
     with Session() as session:
         for stmt in queries:
-            list(session.execute(stmt).scalars())
+            if fetch_type == "limit":
+                stmt = stmt.limit(5)
+
+            result = session.execute(stmt)
+
+            if fetch_type == "first":
+                result.first()
+            else:
+                list(result.scalars())
+
     query_duration = time.time() - query_start
-    print(f"Executed {count} select queries in {query_duration:.2f} seconds.")
+    print(f"Executed {count} select queries ({fetch_type}) in {query_duration:.2f} seconds.")
     return query_duration
 
 def updates(Session, random_ids):
@@ -105,7 +137,8 @@ def run_benchmark(db_type="sqlite", count=100_000):
     Base.metadata.create_all(engine)
 
     elapsed = inserts(Session, count)
-    elapsed += selects(Session, 500)
+    elapsed += selects(Session, 500, fetch_type="all")
+    elapsed += selects(Session, 500, fetch_type="limit")
 
     random_ids = random.sample(range(1, count + 1), 500)
     elapsed += updates(Session, random_ids)

diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
@@ -5,6 +5,8 @@ This benchmark compares `sqlalchemy-memory` to `in-memory SQLite` using 20,000 i
 
 As the results show, `sqlalchemy-memory` **excels in read-heavy workloads**, delivering significantly faster query performance. While SQLite performs slightly better on update and delete operations, the overall runtime of `sqlalchemy-memory` remains substantially lower, making it a strong choice for prototyping and simulation.
 
+`Check the benchmark script on GitHub <https://github.com/rundef/sqlalchemy-memory/blob/main/benchmark.py>`_
+
 .. list-table::
    :header-rows: 1
    :widths: 25 25 25
@@ -13,17 +15,20 @@ As the results show, `sqlalchemy-memory` **excels in read-heavy workloads**, del
      - SQLite (in-memory)
      - sqlalchemy-memory
    * - Insert
-     - 3.17 sec
-     - 2.70 sec
-   * - 500 Select Queries
-     - 26.37 sec
-     - 2.94 sec
+     - 3.30 sec
+     - **3.10 sec**
+   * - 500 Select Queries (all())
+     - 30.07 sec
+     - **4.14 sec**
+   * - 500 Select Queries (limit(5))
+     - **0.24** sec
+     - 0.30 sec
    * - 500 Updates
-     - 0.26 sec
-     - 1.12 sec
+     - 0.25 sec
+     - **0.19** sec
    * - 500 Deletes
-     - 0.09 sec
-     - 0.90 sec
-   * - **Total Runtime**
-     - **29.89 sec**
-     - **7.66 sec**
+     - **0.09** sec
+     - **0.09** sec
+   * - *Total Runtime*
+     - 33.95 sec
+     - **7.81 sec**
diff --git a/docs/index.rst b/docs/index.rst
@@ -3,6 +3,8 @@ Welcome to sqlalchemy-memory's documentation!
 
 `sqlalchemy-memory` is a pure in-memory backend for SQLAlchemy 2.0 that supports both sync and async modes, with full compatibility for SQLAlchemy Core and ORM.
 
+📦 GitHub: https://github.com/rundef/sqlalchemy-memory
+
 Quickstart: sync example
 ------------------------
 

diff --git a/docs/query.rst b/docs/query.rst
@@ -15,6 +15,7 @@ Supported Functions
 
 - `DATE(column)`
 - `func.json_extract(col, '$.expr')`
+- Aggregation functions: - Aggregations: `func.count()` / `func.sum()` / `func.min()` / `func.max()` / `func.avg()`
 
 Indexes
 -------

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "sqlalchemy-memory"
-version = "0.3.1"
+version = "0.4.0"
 dependencies = [
     "sqlalchemy>=2.0,<3.0",
     "sortedcontainers>=2.4.0"

diff --git a/sqlalchemy_memory/__init__.py b/sqlalchemy_memory/__init__.py
@@ -6,4 +6,4 @@
     "AsyncMemorySession",
 ]
 
-__version__ = '0.3.1'
+__version__ = '0.4.0'
diff --git a/sqlalchemy_memory/base/indexes.py b/sqlalchemy_memory/base/indexes.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 from sortedcontainers import SortedDict
-from typing import Any, List
+from typing import Any, List, Generator
+from itertools import chain
 from sqlalchemy.sql import operators
 
 from ..helpers.ordered_set import OrderedSet
@@ -108,62 +109,84 @@ def on_update(self, obj, updates):
             self.hash_index.add(tablename, indexname, new_value, obj)
             self.range_index.add(tablename, indexname, new_value, obj)
 
-    def query(self, collection, tablename, colname, operator, value):
+    def query(self, collection, tablename, colname, operator, value, collection_is_full_table=False):
         indexname = self._column_to_index(tablename, colname)
         if not indexname:
             return None
 
-        # Use hash index for = / != / IN / NOT IN operators
         if operator == operators.eq:
             result = self.hash_index.query(tablename, indexname, value)
-            return list(set(result) & set(collection))
+            if collection_is_full_table:
+                return result
+            return (item for item in collection if item in result)
 
         elif operator == operators.ne:
-            # All values except the given one
             excluded = self.hash_index.query(tablename, indexname, value)
-            return list(set(collection) - set(excluded))
+            return (item for item in collection if item not in excluded)
 
         elif operator == operators.in_op:
-            result = []
-            for v in value:
-                result.extend(self.hash_index.query(tablename, indexname, v))
-            return list(set(result) & set(collection))
+            result = chain.from_iterable(
+                self.hash_index.query(tablename, indexname, v) for v in value
+            )
+            if collection_is_full_table:
+                return result
+            result = set(result)
+            return (item for item in collection if item in result)
 
         elif operator == operators.notin_op:
-            excluded = []
-            for v in value:
-                excluded.extend(self.hash_index.query(tablename, indexname, v))
-            return list(set(collection) - set(excluded))
+            excluded = set(chain.from_iterable(
+                self.hash_index.query(tablename, indexname, v) for v in value
+            ))
+            return (item for item in collection if item not in excluded)
 
-        # Use range index
-        if operator == operators.gt:
+        elif operator == operators.gt:
             result = self.range_index.query(tablename, indexname, gt=value)
-            return list(set(result) & set(collection))
+            if collection_is_full_table:
+                return result
+            result = set(result)
+            return (item for item in collection if item in result)
 
         elif operator == operators.ge:
             result = self.range_index.query(tablename, indexname, gte=value)
-            return list(set(result) & set(collection))
+            if collection_is_full_table:
+                return result
+            result = set(result)
+            return (item for item in collection if item in result)
 
         elif operator == operators.lt:
             result = self.range_index.query(tablename, indexname, lt=value)
-            return list(set(result) & set(collection))
+            if collection_is_full_table:
+                return result
+            result = set(result)
+            return (item for item in collection if item in result)
 
         elif operator == operators.le:
             result = self.range_index.query(tablename, indexname, lte=value)
-            return list(set(result) & set(collection))
+            if collection_is_full_table:
+                return result
+            result = set(result)
+            return (item for item in collection if item in result)
 
         elif operator == operators.between_op and isinstance(value, (tuple, list)) and len(value) == 2:
             result = self.range_index.query(tablename, indexname, gte=value[0], lte=value[1])
-            return list(set(result) & set(collection))
+            if collection_is_full_table:
+                return result
+            result = set(result)
+            return (item for item in collection if item in result)
 
         elif operator == operators.not_between_op and isinstance(value, (tuple, list)) and len(value) == 2:
-            in_range = self.range_index.query(tablename, indexname, gte=value[0], lte=value[1])
-            return list(set(collection) - set(in_range))
+            in_range = set(self.range_index.query(tablename, indexname, gte=value[0], lte=value[1]))
+            return (item for item in collection if item not in in_range)
 
 
     def get_selectivity(self, tablename, colname, operator, value, total_count):
         """
-        Estimate selectivity: higher means worst filtering.
+        Estimate the selectivity of a single WHERE condition.
+
+        This method is used to rank or sort WHERE conditions by their estimated
+        filtering power. A lower selectivity value indicates that the condition
+        is expected to filter out more rows (i.e., fewer rows remain after applying it),
+        making it more selective.
         """
 
         indexname = self._column_to_index(tablename, colname)
@@ -220,7 +243,7 @@ def remove(self, tablename: str, indexname: str, value: Any, obj: Any):
             del self.index[tablename][indexname][value]
 
     def query(self, tablename: str, indexname: str, value: Any) -> List[Any]:
-        return list(self.index[tablename][indexname].get(value, []))
+        return self.index[tablename][indexname].get(value, [])
 
 
 class RangeIndex:
@@ -255,7 +278,7 @@ def remove(self, tablename: str, indexname: str, value: Any, obj: Any):
             except ValueError:
                 pass
 
-    def query(self, tablename: str, indexname: str, gt=None, gte=None, lt=None, lte=None) -> List[Any]:
+    def query(self, tablename: str, indexname: str, gt=None, gte=None, lt=None, lte=None) -> Generator:
         sd = self.index[tablename][indexname]
 
         # Define range bounds
@@ -264,14 +287,10 @@ def query(self, tablename: str, indexname: str, gt=None, gte=None, lt=None, lte=
         inclusive_min = gte is not None
         inclusive_max = lte is not None
 
-        irange = sd.irange(
+        keys = sd.irange(
             minimum=min_key,
             maximum=max_key,
             inclusive=(inclusive_min, inclusive_max)
         )
 
-        result = []
-        for key in irange:
-            result.extend(sd[key])
-
-        return result
+        return chain.from_iterable(sd[key] for key in keys)
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,8 @@ Welcome to sqlalchemy-memory's documentation! @@
     `sqlalchemy-memory` is a pure in-memory backend for SQLAlchemy 2.0 that supports both sync and async modes, with full compatibility for SQLAlchemy Core and ORM.
+    📦 GitHub: https://github.com/rundef/sqlalchemy-memory
     Quickstart: sync example
     ------------------------
@@ Expand Down @@