Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions docs/byok_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,18 @@ Both modes rely on:

Inline RAG additionally supports:
- **Score Multiplier**: Optional weight applied per BYOK vector store when mixing multiple sources. Allows custom prioritization of content.
- **Relevance cutoff score** (`relevance_cutoff_score` in `byok_rag`): Minimum raw similarity score for a chunk to be returned from that BYOK vector store. Chunks below the threshold are dropped before results are merged and ranked with other sources. Configure per knowledge source (each `byok_rag` entry has its own value). The default when omitted is `0.3` (see `DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE` in `src/constants.py`). This value is passed to Llama Stack as the vector search `score_threshold` for that store.

> [!NOTE]
> OKP and BYOK scores are not directly comparable (different scoring systems), so
> `score_multiplier` does not apply to OKP results. To control the amount of retrieved
> context, set the `BYOK_RAG_MAX_CHUNKS` and `OKP_RAG_MAX_CHUNKS` constants in `src/constants.py`
> (defaults: 10 and 5 respectively). For Tool RAG, use `TOOL_RAG_MAX_CHUNKS` (default: 10).
>
> [!NOTE]
> `relevance_cutoff_score` applies to Inline RAG only. When the model uses Tool RAG (`file_search`),
> Lightspeed Stack does not send this setting; retrieval uses Llama Stack’s default ranking for that path.
> Use Inline RAG if you need per-store cutoff behavior from configuration.

Comment thread
syedriko marked this conversation as resolved.
---

Expand Down Expand Up @@ -288,11 +294,16 @@ registered_resources:
> embedding_dimension: 768
> vector_db_id: your-index-id # Llama Stack vector store ID (from index generation)
> db_path: /path/to/vector_db/faiss_store.db
> score_multiplier: 1.0 # Optional: weight results when mixing multiple sources
> score_multiplier: 1.0 # Optional: weight results when mixing multiple BYOK sources (Inline RAG)
> relevance_cutoff_score: 0.3 # Optional: min raw similarity per chunk for this store (Inline RAG only; default 0.3)
> ```
>
> When multiple BYOK sources are configured, `score_multiplier` adjusts the relative importance of
> each store's results during Inline RAG retrieval. Values above 1.0 boost a store; below 1.0 reduce it.
>
> `relevance_cutoff_score` is interpreted in the same score space as the vector backend for that
> store. It is not comparable across different vector stores or OKP; tune each `byok_rag` entry
> using retrieval quality on that corpus.

### Step 5: Configure RAG Strategy

Expand All @@ -319,10 +330,10 @@ okp:

Both modes can be enabled simultaneously. Choose based on your latency and control preferences:

| Mode | When context is fetched | Tool call needed | score_multiplier |
|------|------------------------|------------------|-----------------|
| Inline RAG | With every query | No | Yes (BYOK only) |
| Tool RAG | On LLM demand | Yes | No |
| Mode | When context is fetched | Tool call needed | score_multiplier | relevance_cutoff_score |
|------|------------------------|------------------|----------------|------------------------|
| Inline RAG | With every query | No | Yes (BYOK only) | Yes (BYOK only) |
| Tool RAG | On LLM demand | Yes | No | No |

> [!TIP]
> A ready-to-use example combining BYOK and OKP is available at
Expand Down Expand Up @@ -572,4 +583,4 @@ For additional support and advanced configurations, refer to:
- [Llama Stack Documentation](https://llama-stack.readthedocs.io/)
- [rag-content Tool Repository](https://github.com/lightspeed-core/rag-content)

Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality.
Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality.
7 changes: 7 additions & 0 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -11806,6 +11806,13 @@
"title": "Score multiplier",
"description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.",
"default": 1.0
},
"relevance_cutoff_score": {
"type": "number",
"exclusiveMinimum": 0.0,
"title": "Relevance cutoff score",
"description": "Minimum raw similarity score to consider a result relevant. Results with a similarity score below this threshold are not returned.",
"default": 0.3
Comment thread
syedriko marked this conversation as resolved.
}
},
"additionalProperties": false,
Expand Down
3 changes: 3 additions & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@
BYOK_RAG_MAX_CHUNKS: Final[int] = 10 # retrieved from BYOK RAG
OKP_RAG_MAX_CHUNKS: Final[int] = 5 # retrieved from OKP RAG

# Default minimum raw similarity per BYOK store
DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE: Final[float] = 0.3

# Solr OKP constants
SOLR_VECTOR_SEARCH_DEFAULT_K: Final[int] = 5
SOLR_VECTOR_SEARCH_DEFAULT_SCORE_THRESHOLD: Final[float] = 0.3
Expand Down
8 changes: 8 additions & 0 deletions src/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1718,6 +1718,14 @@ class ByokRag(ConfigurationBase):
"Values > 1 boost this store's results; values < 1 reduce them.",
)

relevance_cutoff_score: float = Field(
constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
gt=0,
title="Relevance cutoff score",
description="Minimum raw similarity score to consider a result relevant. "
"Results with a similarity score below this threshold are not returned.",
)


class QuotaLimiterConfiguration(ConfigurationBase):
"""Configuration for one quota limiter.
Expand Down
22 changes: 22 additions & 0 deletions src/utils/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,24 @@
logger = get_logger(__name__)


def _relevance_cutoff_for_vector_store(vector_store_id: str) -> float:
"""Return configured relevance cutoff for a Llama Stack vector store ID.

Args:
vector_store_id: Llama Stack vector store identifier (``vector_db_id``)
used to find a matching BYOK RAG entry in configuration.

Returns:
``brag.relevance_cutoff_score`` from the ``ByokRag`` whose ``vector_db_id``
matches ``vector_store_id``, or ``constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE``
when no BYOK entry matches.
"""
for brag in configuration.configuration.byok_rag:
if brag.vector_db_id == vector_store_id:
return brag.relevance_cutoff_score
return constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
Comment thread
coderabbitai[bot] marked this conversation as resolved.


def _get_okp_base_url() -> AnyUrl:
"""Return OKP document base URL from configuration (rhokp_url), or default if unset.

Expand Down Expand Up @@ -180,6 +198,7 @@ async def _query_store_for_byok_rag(
vector_store_id: str,
query: str,
weight: float,
score_threshold: float,
) -> list[dict[str, Any]]:
"""Query a single vector store for BYOK RAG.

Expand All @@ -188,6 +207,7 @@ async def _query_store_for_byok_rag(
vector_store_id: ID of the vector store to query
query: Search query string
weight: Score multiplier to apply
score_threshold: Minimum raw similarity score (``relevance_cutoff_score``)

Returns:
List of weighted result dictionaries, or empty list on error
Expand All @@ -199,6 +219,7 @@ async def _query_store_for_byok_rag(
params={
"max_chunks": constants.BYOK_RAG_MAX_CHUNKS,
"mode": "vector",
"score_threshold": score_threshold,
},
)
return _extract_byok_rag_chunks(search_response, vector_store_id, weight)
Expand Down Expand Up @@ -410,6 +431,7 @@ async def _fetch_byok_rag(
vector_store_id,
query,
score_multiplier_mapping.get(vector_store_id, 1.0),
_relevance_cutoff_for_vector_store(vector_store_id),
)
for vector_store_id in vector_store_ids_to_query
]
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/models/config/test_byok_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pydantic import ValidationError

from constants import (
DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
DEFAULT_EMBEDDING_DIMENSION,
DEFAULT_EMBEDDING_MODEL,
DEFAULT_RAG_TYPE,
Expand Down Expand Up @@ -35,6 +36,7 @@ def test_byok_rag_configuration_default_values() -> None:
assert byok_rag.vector_db_id == "vector_db_id"
assert byok_rag.db_path == "tests/configuration/rag.txt"
assert byok_rag.score_multiplier == DEFAULT_SCORE_MULTIPLIER
assert byok_rag.relevance_cutoff_score == DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE


def test_byok_rag_configuration_nondefault_values() -> None:
Expand All @@ -54,6 +56,7 @@ def test_byok_rag_configuration_nondefault_values() -> None:
vector_db_id="vector_db_id",
db_path="tests/configuration/rag.txt",
score_multiplier=1.0,
relevance_cutoff_score=0.72,
)
assert byok_rag is not None
assert byok_rag.rag_id == "rag_id"
Expand All @@ -62,6 +65,7 @@ def test_byok_rag_configuration_nondefault_values() -> None:
assert byok_rag.embedding_dimension == 1024
assert byok_rag.vector_db_id == "vector_db_id"
assert byok_rag.db_path == "tests/configuration/rag.txt"
assert byok_rag.relevance_cutoff_score == 0.72


def test_byok_rag_configuration_wrong_dimension() -> None:
Expand Down Expand Up @@ -199,3 +203,18 @@ def test_byok_rag_configuration_score_multiplier_must_be_positive() -> None:
db_path="tests/configuration/rag.txt",
score_multiplier=0.0,
)


def test_byok_rag_configuration_relevance_cutoff_must_be_positive() -> None:
"""Test that relevance_cutoff_score must be greater than 0."""
with pytest.raises(ValidationError, match="greater than 0"):
_ = ByokRag(
rag_id="rag_id",
rag_type="rag_type",
vector_db_id="vector_db_id",
embedding_model="embedding_model",
embedding_dimension=1024,
db_path="tests/configuration/rag.txt",
score_multiplier=1.0,
relevance_cutoff_score=0.0,
)
2 changes: 2 additions & 0 deletions tests/unit/models/config/test_dump_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pydantic import SecretStr

from constants import DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
from models.config import (
ByokRag,
Configuration,
Expand Down Expand Up @@ -1024,6 +1025,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None:
"rag_type": "inline::faiss",
"vector_db_id": "vector_db_id",
"score_multiplier": 1.0,
"relevance_cutoff_score": DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
},
],
"quota_handlers": {
Expand Down
Loading
Loading