Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions tests/test_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,66 @@ def test_chroma_rejects_unsupported_operator():
# Chroma's filter subset has no $exists -> a clear vd error, pre-query.
with pytest.raises(UnsupportedFilterError):
list(col.search([0.1, 0.2, 0.3], filter={"missing": {"$exists": True}}))


# --------------------------------------------------------------------------- #
# Score semantics — the cross-backend contract documented in vd.base.
# Pin the canonical scale on the in-memory reference adapter; distance-
# returning adapters route through score_from_distance and therefore match
# automatically. See vd/base.py "Score semantics" and issue #9.
# --------------------------------------------------------------------------- #


@pytest.mark.parametrize(
"metric,query,expected_top_score",
[
# Identical vectors → cosine similarity = 1.0 (max of [-1, 1]).
("cosine", [1.0, 0.0, 0.0], 1.0),
# Identical vectors → inner product = 1.0 (no upper bound; equality here).
("dot", [1.0, 0.0, 0.0], 1.0),
# Identical vectors → euclidean distance = 0 → score = 1/(1+0) = 1.0.
("l2", [1.0, 0.0, 0.0], 1.0),
],
)
def test_score_contract_identical_query_returns_max_canonical_score(
metric, query, expected_top_score
):
"""An identical query vector gets the canonical maximum for its metric."""
col = vd.connect("memory").create_collection(
f"score_contract_{metric}", metric=metric
)
col["a"] = Document(id="a", text="match", vector=query)
col["b"] = Document(id="b", text="other", vector=[0.0, 1.0, 0.0])
hits = list(col.search(query, limit=2))
assert hits[0]["id"] == "a"
assert hits[0]["score"] == pytest.approx(expected_top_score)


def test_score_contract_cosine_orthogonal_is_zero():
"""vd canonical cosine score for orthogonal vectors is exactly 0.0."""
col = vd.connect("memory").create_collection(
"score_contract_cos_ortho", metric="cosine"
)
col["a"] = Document(id="a", text="x", vector=[1.0, 0.0])
col["b"] = Document(id="b", text="y", vector=[0.0, 1.0])
hits = list(col.search([1.0, 0.0], limit=2))
by_id = {h["id"]: h["score"] for h in hits}
assert by_id["a"] == pytest.approx(1.0)
assert by_id["b"] == pytest.approx(0.0)


def test_score_from_distance_helper_matches_documented_table():
"""The reference helper produces exactly the formulas documented in vd.base."""
from vd.backends._helpers import score_from_distance

# cosine: 1 - d, d ∈ [0, 2] -> score ∈ [-1, 1]
assert score_from_distance(0.0, "cosine") == 1.0
assert score_from_distance(1.0, "cosine") == 0.0
assert score_from_distance(2.0, "cosine") == -1.0
# dot: -d (un-negate backends' negated inner product convention)
assert score_from_distance(-0.7, "dot") == pytest.approx(0.7)
assert score_from_distance(2.5, "dot") == pytest.approx(-2.5)
# l2: 1/(1+d), d ∈ [0, inf) -> score ∈ (0, 1]
assert score_from_distance(0.0, "l2") == 1.0
assert score_from_distance(1.0, "l2") == pytest.approx(0.5)
assert score_from_distance(9.0, "l2") == pytest.approx(0.1)
42 changes: 37 additions & 5 deletions vd/backends/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,59 @@

def score_from_distance(distance: float, metric: str) -> float:
"""
Convert a raw backend distance to a higher-is-better similarity score.
Convert a raw backend distance to ``vd``'s canonical similarity score.

Reference implementation of the cross-backend score contract documented
in :mod:`vd.base` ("Score semantics"): every ``SearchResult`` ``score``
is **higher-is-better** on a per-metric canonical scale, so fusion /
dedup / threshold logic works the same way across adapters.

Per-metric output:

============ =============================== ======================
metric formula range
============ =============================== ======================
``cosine`` ``1 - distance`` ``[-1, 1]``
``dot`` ``-distance`` ``(-inf, +inf)``
``l2`` ``1 / (1 + distance)`` ``(0, 1]``
============ =============================== ======================

Adapters whose backend already returns a higher-is-better number on a
*different* per-metric scale (Elasticsearch kNN ``_score``, MongoDB
Atlas ``vectorSearchScore``, Pinecone ``match.score``) **do not** route
through this helper — they pass the native score through and document
the deviation. Adapters whose backend returns a lower-is-better distance
(Chroma, DuckDB, FAISS L2, LanceDB, Milvus L2, pgvector, Redis,
sqlite-vec, Turbopuffer, Weaviate) call this helper to canonicalize.

Parameters
----------
distance : float
The backend's raw distance (lower = closer).
The backend's raw distance (lower = closer). For ``dot``, this is
the convention some backends use of negating the inner product so
that "distance" sorts the same way; for ``cosine``, the cosine
distance in ``[0, 2]``; for ``l2``, the Euclidean (or squared
Euclidean) distance in ``[0, +inf)``.
metric : str
``"cosine"``, ``"dot"``, or ``"l2"``.
``"cosine"``, ``"dot"``, or ``"l2"``. Unknown metrics fall through
to the ``l2`` formula so the result is at least bounded.

Examples
--------
>>> score_from_distance(0.0, 'cosine')
1.0
>>> round(score_from_distance(1.0, 'l2'), 3)
0.5
>>> score_from_distance(-0.7, 'dot')
0.7
"""
if metric == "cosine":
# Cosine distance is in [0, 2]; similarity = 1 - distance.
# Cosine distance is in [0, 2]; similarity = 1 - distance ∈ [-1, 1].
return 1.0 - distance
if metric == "dot":
# Many backends report negative inner product as the "distance".
# Many backends report negative inner product as the "distance" so
# the smallest "distance" is the most similar; un-negate to recover
# the raw inner product (vd's canonical dot score).
return -distance
# l2 (and any unknown): squash a non-negative distance into (0, 1].
return 1.0 / (1.0 + distance)
Expand Down
24 changes: 20 additions & 4 deletions vd/backends/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@
"Install with: pip install faiss-cpu numpy"
) from e

from vd.backends._helpers import apply_client_filter, overfetch_limit
from vd.backends._helpers import (
apply_client_filter,
overfetch_limit,
score_from_distance,
)
from vd.base import (
AbstractClient,
AbstractCollection,
Expand Down Expand Up @@ -150,13 +154,25 @@ def _query(
if doc_id is None:
continue
doc = self._docs[doc_id]
# IndexFlatIP -> higher is better; IndexFlatL2 -> lower is better.
value = float(score)
# FAISS conventions vs. vd's canonical score (vd.base "Score
# semantics"):
# - IndexFlatIP (cosine/dot): returns inner product directly,
# higher-is-better. For cosine, vectors are L2-normalized at
# write time, so the inner product IS cosine similarity in
# [-1, 1] — matches vd's cosine score. For dot, the raw
# inner product matches vd's dot score (note: vd's dot
# convention is the raw inner product, NOT the negated form
# `score_from_distance("dot")` un-negates from).
# - IndexFlatL2: returns *squared* L2 distance, lower-is-better.
# Funnel through score_from_distance to canonicalize to
# 1/(1+d) ∈ (0, 1] like every other distance-returning adapter.
raw = float(score)
value = score_from_distance(raw, "l2") if self.metric == "l2" else raw
results.append(
{
"id": doc_id,
"text": doc.text,
"score": value if self.metric != "l2" else 1.0 / (1.0 + value),
"score": value,
"metadata": dict(doc.metadata),
}
)
Expand Down
9 changes: 9 additions & 0 deletions vd/backends/qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,15 @@ def _query(
for point in response.points:
payload = point.payload or {}
score = point.score
# Qdrant `point.score` per metric (see vd.base "Score semantics"):
# - cosine: cosine similarity in [-1, 1] → matches vd canonical
# - dot: raw inner product → matches vd canonical
# - euclid: a *distance* value (lower-is-better); Qdrant's
# own sort orders ascending in that case. The existing
# transform 1/(1+d) matches vd's canonical l2 score directly
# (no un-negation), so leave it as-is. If a future Qdrant
# client version switches Euclid to higher-is-better, this
# branch must be revisited.
results.append(
{
"id": payload.get(_ID_KEY, str(point.id)),
Expand Down
45 changes: 44 additions & 1 deletion vd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,43 @@
#: spellings (e.g. ``"l2"`` -> Qdrant ``Distance.EUCLID``).
METRICS = frozenset({"cosine", "dot", "l2"})

# --------------------------------------------------------------------------- #
# Score semantics — the cross-backend contract for SearchResult["score"]
# --------------------------------------------------------------------------- #
#
# Every :data:`SearchResult` carries a ``score`` field. ``vd``'s contract for
# that number is **higher-is-better, per-metric canonical similarity**:
#
# ============ =============================== ======================
# metric canonical score range
# ============ =============================== ======================
# ``cosine`` ``1 - cosine_distance`` ``[-1, 1]``
# ``dot`` raw inner product ``(-inf, +inf)``
# ``l2`` ``1 / (1 + euclidean_distance)`` ``(0, 1]``
# ============ =============================== ======================
#
# Rationale:
#
# - **Same backend, different metrics** stay comparable (all three are
# higher-better).
# - **Same metric, different backends** stay comparable: ``vd``'s own
# ``reciprocal_rank_fusion`` / ``deduplicate_results`` / ``multi_query_search``
# helpers and consumers like ``ef.SearchHit`` all assume this scale, so an
# adapter that returns ``1 / (1 + raw_distance)`` for cosine instead of
# ``1 - raw_distance`` would mis-rank only across adapters but consistently
# confuse score-threshold logic.
#
# The reference implementations are :func:`vd.backends.memory._similarity`
# (in-memory adapter) and :func:`vd.backends._helpers.score_from_distance`
# (distance-returning adapters). Adapters whose backend natively returns a
# higher-is-better score on a *different* per-metric scale (e.g. Elasticsearch
# kNN, MongoDB Atlas, Pinecone) **document the deviation in their adapter
# docstring** rather than silently rescaling, because rescaling a backend's
# own combined-ranking score can change ordering for ties. The deviation is
# the cost of using that backend's native scoring.
#
# See issue #9 for the history of this contract.

# Re-exported from vd.filters; imported lazily inside methods to avoid a cycle
# (vd.filters imports UnsupportedFilterError from this module).

Expand Down Expand Up @@ -705,7 +742,13 @@ def search(
------
dict
``{"id", "text", "score", "metadata"}`` — or whatever ``egress``
returns.
returns. ``score`` is a higher-is-better, per-metric canonical
similarity (see the "Score semantics" table at the top of
:mod:`vd.base`): cosine in ``[-1, 1]``, dot in ``(-inf, +inf)``,
l2 squashed to ``(0, 1]``. Adapters whose backend returns a
native combined-ranking score on a different scale (e.g.
Elasticsearch, Atlas, Pinecone) document the deviation in
their own docstring.
"""
from vd.filters import validate_filter

Expand Down
Loading