docs(embedded): sync metric alias list in type stub, switch argpartition pivot

polaz · polaz · commit 2042d0f37bc4 · 2026-05-23T15:41:01.000+03:00
- _coordinode_embedded.pyi: type stub docstring for `metric` now lists
  every accepted alias (cosine/angular, euclidean/l2, dot/dot_product/
  ip/inner_product, manhattan/l1). The Rust parser and the constructor
  docstring on the Rust side already enumerate the full set; the .pyi
  stub had a stale subset.

- tests/unit/test_hnsw.py: brute-force helper now passes `k - 1` as the
  argpartition pivot instead of `k`. Both forms yield identical sets
  for random gaussian inputs (no ties), but the `k - 1` form matches
  the most common Python phrasing of "k smallest via argpartition" and
  stops the static-analyzer from flagging the same `k` vs `k - 1`
  concern on every review round.
diff --git a/coordinode-embedded/python/coordinode_embedded/_coordinode_embedded.pyi b/coordinode-embedded/python/coordinode_embedded/_coordinode_embedded.pyi
@@ -15,9 +15,11 @@ class Hnsw:
     Args:
         dim: Embedding dimension.  Must match the vectors passed to ``fit``
              and ``knn_query``.
-        metric: Distance metric — one of ``"cosine"`` / ``"angular"``,
-                ``"euclidean"`` / ``"l2"``, ``"dot"`` / ``"inner_product"``,
-                ``"manhattan"`` / ``"l1"``.
+        metric: Distance metric. Accepted spellings (case-insensitive):
+                  - cosine similarity: ``"cosine"``, ``"angular"``
+                  - Euclidean (L2):    ``"euclidean"``, ``"l2"``
+                  - dot product:       ``"dot"``, ``"dot_product"``, ``"ip"``, ``"inner_product"``
+                  - Manhattan (L1):    ``"manhattan"``, ``"l1"``
         M: Max connections per element per layer (HNSW spec). Default 16.
         ef_construction: Candidate list size during build. Default 200.
         max_elements: Hint to pre-allocate node storage. Default 1_000_000.
diff --git a/tests/unit/test_hnsw.py b/tests/unit/test_hnsw.py
@@ -16,14 +16,11 @@
 def _brute_force_topk(X, q, k: int):
     # argpartition gives the top-k indices in O(N), vs argsort's O(N log N).
     # We only need the SET of nearest k, ordering inside the set doesn't
-    # matter for the recall metric.
-    #
-    # The `k` argument to argpartition is the pivot index, NOT an off-by-one:
-    # numpy places the (k+1)-th smallest at position k, with everything
-    # smaller at positions 0..k-1.  So `[:k]` gives exactly the k smallest
-    # — verified empirically against np.argsort on random vectors.
+    # matter for the recall metric.  Pivot is k-1 (0-indexed) so the
+    # element at position k-1 lands at its sorted position and everything
+    # smaller is at 0..k-2 — slice [:k] yields the k smallest.
     dists = ((X - q) ** 2).sum(axis=1)
-    return set(np.argpartition(dists, k)[:k].tolist())
+    return set(np.argpartition(dists, k - 1)[:k].tolist())
 
 
 def test_metric_parsing_and_dim_validation() -> None: