Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/ollama/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.22.0", "ollama>=0.5.0", "pydantic>=2.12.0", "tenacity>=8.2.3"]
dependencies = ["haystack-ai>=2.22.0", "ollama>=0.5.4", "pydantic>=2.12.0", "tenacity>=8.2.3"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
meta_fields_to_embed: list[str] | None = None,
embedding_separator: str = "\n",
batch_size: int = 32,
dimensions: int | None = None,
) -> None:
"""
Create a new OllamaDocumentEmbedder instance.
Expand Down Expand Up @@ -76,6 +77,11 @@ def __init__(
Separator used to concatenate the metadata fields to the document text.
:param batch_size:
Number of documents to process at once.
:param dimensions:
The desired number of dimensions in the embedding output. Only supported by models
that implement Matryoshka Representation Learning (MRL), such as nomic-embed-text-v1.5,
mxbai-embed-large, and qwen3-embedding. If None (default), the full vector is returned.
Requires ollama-python >= 0.6.2.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove this line from the docstring and instead bump the version of ollama-python in pyproject.toml

"""
self.keep_alive = keep_alive
self.timeout = timeout
Expand All @@ -88,6 +94,7 @@ def __init__(
self.embedding_separator = embedding_separator
self.suffix = suffix
self.prefix = prefix
self.dimensions = dimensions

self._client = Client(host=self.url, timeout=self.timeout)
self._async_client = AsyncClient(host=self.url, timeout=self.timeout)
Expand Down Expand Up @@ -145,6 +152,7 @@ def _embed_batch(
input=batch,
options=generation_kwargs,
keep_alive=self.keep_alive,
dimensions=self.dimensions,
)
all_embeddings.extend(result["embeddings"])

Expand All @@ -166,6 +174,7 @@ async def _embed_batch_async(
input=batch,
options=generation_kwargs,
keep_alive=self.keep_alive,
dimensions=self.dimensions,
)
for batch in batches
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
@component
class OllamaTextEmbedder:
"""
Computes the embeddings of a list of Documents and stores the obtained vectors in each Document's embedding field.

It uses embedding models compatible with the Ollama Library.
Computes the embeddings of a string using embedding models compatible with the Ollama Library.

Usage example:
```python
Expand All @@ -29,6 +27,7 @@ def __init__(
generation_kwargs: dict[str, Any] | None = None,
timeout: int = 120,
keep_alive: float | str | None = None,
dimensions: int | None = None,
) -> None:
"""
Create a new OllamaTextEmbedder instance.
Expand All @@ -51,12 +50,17 @@ def __init__(
- a number in seconds (such as 3600)
- any negative number which will keep the model loaded in memory (e.g. -1 or "-1m")
- '0' which will unload the model immediately after generating a response.
:param dimensions:
The desired number of dimensions in the embedding output. Only supported by models
that implement Matryoshka Representation Learning (MRL), such as nomic-embed-text-v1.5,
mxbai-embed-large, and qwen3-embedding. If None (default), the full vector is returned.
"""
self.keep_alive = keep_alive
self.timeout = timeout
self.generation_kwargs = generation_kwargs or {}
self.url = url
self.model = model
self.dimensions = dimensions

self._client = Client(host=self.url, timeout=self.timeout)
self._async_client = AsyncClient(host=self.url, timeout=self.timeout)
Expand All @@ -78,15 +82,15 @@ def run(
- `embedding`: The computed embeddings
- `meta`: The metadata collected during the embedding process
"""
result = self._client.embeddings(
result = self._client.embed(
model=self.model,
prompt=text,
input=text,
options=generation_kwargs,
keep_alive=self.keep_alive,
).model_dump()
result["meta"] = {"model": self.model}
dimensions=self.dimensions,
)

return result
return {"embedding": result["embeddings"][0], "meta": {"model": self.model}}

@component.output_types(embedding=list[float], meta=dict[str, Any])
async def run_async(
Expand All @@ -105,13 +109,12 @@ async def run_async(
- `embedding`: The computed embeddings
- `meta`: The metadata collected during the embedding process
"""
response = await self._async_client.embeddings(
result = await self._async_client.embed(
model=self.model,
prompt=text,
input=text,
options=generation_kwargs,
keep_alive=self.keep_alive,
dimensions=self.dimensions,
)
result = response.model_dump()
result["meta"] = {"model": self.model}

return result
return {"embedding": result["embeddings"][0], "meta": {"model": self.model}}
92 changes: 92 additions & 0 deletions integrations/ollama/tests/test_document_embedder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from unittest.mock import AsyncMock, MagicMock

import pytest
from haystack import Document
from haystack.core.serialization import default_from_dict, default_to_dict
from ollama._types import ResponseError

from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder
Expand Down Expand Up @@ -74,3 +77,92 @@ async def test_run_async(self):
documents = result["documents"]
assert len(documents) == 3
assert all(isinstance(element, float) for document in documents for element in document.embedding)

def test_dimensions_default_is_none(self):
embedder = OllamaDocumentEmbedder()
assert embedder.dimensions is None

def test_dimensions_stored_on_instance(self):
embedder = OllamaDocumentEmbedder(dimensions=512)
assert embedder.dimensions == 512

def test_dimensions_passed_to_embed_client(self):
embedder = OllamaDocumentEmbedder(dimensions=512)
mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
embedder._client.embed = MagicMock(return_value=mock_response)

embedder._embed_batch(["hello world"], batch_size=32)

call_kwargs = embedder._client.embed.call_args.kwargs
assert call_kwargs["dimensions"] == 512

def test_none_dimensions_passed_to_embed_client(self):
embedder = OllamaDocumentEmbedder(dimensions=None)
mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
embedder._client.embed = MagicMock(return_value=mock_response)

embedder._embed_batch(["hello"], batch_size=32)

call_kwargs = embedder._client.embed.call_args.kwargs
assert call_kwargs["dimensions"] is None

@pytest.mark.asyncio
async def test_dimensions_passed_to_async_embed_client(self):
embedder = OllamaDocumentEmbedder(dimensions=256)
mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
embedder._async_client.embed = AsyncMock(return_value=mock_response)

await embedder._embed_batch_async(["hello"], batch_size=32)

call_kwargs = embedder._async_client.embed.call_args.kwargs
assert call_kwargs["dimensions"] == 256

def test_to_dict_contains_dimensions(self):
embedder = OllamaDocumentEmbedder(dimensions=512)
embedder_dict = default_to_dict(
embedder,
model=embedder.model,
url=embedder.url,
generation_kwargs=embedder.generation_kwargs,
timeout=embedder.timeout,
keep_alive=embedder.keep_alive,
prefix=embedder.prefix,
suffix=embedder.suffix,
progress_bar=embedder.progress_bar,
meta_fields_to_embed=embedder.meta_fields_to_embed,
embedding_separator=embedder.embedding_separator,
batch_size=embedder.batch_size,
dimensions=embedder.dimensions,
)
assert embedder_dict["init_parameters"]["dimensions"] == 512

def test_to_dict_contains_dimensions_none(self):
embedder = OllamaDocumentEmbedder()
embedder_dict = default_to_dict(
embedder,
model=embedder.model,
url=embedder.url,
generation_kwargs=embedder.generation_kwargs,
timeout=embedder.timeout,
keep_alive=embedder.keep_alive,
prefix=embedder.prefix,
suffix=embedder.suffix,
progress_bar=embedder.progress_bar,
meta_fields_to_embed=embedder.meta_fields_to_embed,
embedding_separator=embedder.embedding_separator,
batch_size=embedder.batch_size,
dimensions=embedder.dimensions,
)
assert embedder_dict["init_parameters"]["dimensions"] is None

def test_from_dict_restores_dimensions(self):
embedder_dict = {
"type": "haystack_integrations.components.embedders.ollama.document_embedder.OllamaDocumentEmbedder",
"init_parameters": {
"model": "nomic-embed-text",
"url": "http://localhost:11434",
"dimensions": 512,
},
}
embedder = default_from_dict(OllamaDocumentEmbedder, embedder_dict)
assert embedder.dimensions == 512
86 changes: 86 additions & 0 deletions integrations/ollama/tests/test_text_embedder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import asyncio
from unittest.mock import AsyncMock, MagicMock

import pytest
from haystack.core.serialization import default_from_dict, default_to_dict
from ollama._types import ResponseError

from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder
Expand Down Expand Up @@ -58,3 +62,85 @@ async def test_run_async(self):
assert isinstance(reply, dict)
assert all(isinstance(element, float) for element in reply["embedding"])
assert reply["meta"]["model"] == "all-minilm"

def test_dimensions_default_is_none(self):
embedder = OllamaTextEmbedder()
assert embedder.dimensions is None

def test_dimensions_stored_on_instance(self):
embedder = OllamaTextEmbedder(dimensions=256)
assert embedder.dimensions == 256

def test_dimensions_passed_to_embed_client(self):

embedder = OllamaTextEmbedder(dimensions=256)
mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
embedder._client.embed = MagicMock(return_value=mock_response)

embedder.run(text="hello world")

call_kwargs = embedder._client.embed.call_args.kwargs
assert call_kwargs["dimensions"] == 256

def test_none_dimensions_passed_to_embed_client(self):

embedder = OllamaTextEmbedder(dimensions=None)
mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
embedder._client.embed = MagicMock(return_value=mock_response)

embedder.run(text="hello")

call_kwargs = embedder._client.embed.call_args.kwargs
assert call_kwargs["dimensions"] is None

def test_dimensions_passed_to_async_embed_client(self):

embedder = OllamaTextEmbedder(dimensions=128)
mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
embedder._async_client.embed = AsyncMock(return_value=mock_response)

asyncio.run(embedder.run_async(text="hello"))

call_kwargs = embedder._async_client.embed.call_args.kwargs
assert call_kwargs["dimensions"] == 128

def test_to_dict_contains_dimensions(self):

embedder = OllamaTextEmbedder(dimensions=256)
embedder_dict = default_to_dict(
embedder,
model=embedder.model,
url=embedder.url,
generation_kwargs=embedder.generation_kwargs,
timeout=embedder.timeout,
keep_alive=embedder.keep_alive,
dimensions=embedder.dimensions,
)
assert embedder_dict["init_parameters"]["dimensions"] == 256

def test_to_dict_contains_dimensions_none(self):

embedder = OllamaTextEmbedder()
embedder_dict = default_to_dict(
embedder,
model=embedder.model,
url=embedder.url,
generation_kwargs=embedder.generation_kwargs,
timeout=embedder.timeout,
keep_alive=embedder.keep_alive,
dimensions=embedder.dimensions,
)
assert embedder_dict["init_parameters"]["dimensions"] is None

def test_from_dict_restores_dimensions(self):

embedder_dict = {
"type": "haystack_integrations.components.embedders.ollama.text_embedder.OllamaTextEmbedder",
"init_parameters": {
"model": "nomic-embed-text",
"url": "http://localhost:11434",
"dimensions": 256,
},
}
embedder = default_from_dict(OllamaTextEmbedder, embedder_dict)
assert embedder.dimensions == 256
Loading