deepset-ai · bogdankostic · May 21, 2026 · May 16, 2026 · May 19, 2026 · May 20, 2026
@@ -27,7 +27,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.22.0", "ollama>=0.5.0", "pydantic>=2.12.0", "tenacity>=8.2.3"]
+dependencies = ["haystack-ai>=2.22.0", "ollama>=0.5.4", "pydantic>=2.12.0", "tenacity>=8.2.3"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme"

@@ -41,6 +41,7 @@ def __init__(
         meta_fields_to_embed: list[str] | None = None,
         embedding_separator: str = "\n",
         batch_size: int = 32,
+        dimensions: int | None = None,
     ) -> None:
         """
         Create a new OllamaDocumentEmbedder instance.
@@ -76,6 +77,11 @@ def __init__(
             Separator used to concatenate the metadata fields to the document text.
         :param batch_size:
             Number of documents to process at once.
+        :param dimensions:
+            The desired number of dimensions in the embedding output. Only supported by models
+            that implement Matryoshka Representation Learning (MRL), such as nomic-embed-text-v1.5,
+            mxbai-embed-large, and qwen3-embedding. If None (default), the full vector is returned.
+            Requires ollama-python >= 0.6.2.
         """
         self.keep_alive = keep_alive
         self.timeout = timeout
@@ -88,6 +94,7 @@ def __init__(
         self.embedding_separator = embedding_separator
         self.suffix = suffix
         self.prefix = prefix
+        self.dimensions = dimensions
 
         self._client = Client(host=self.url, timeout=self.timeout)
         self._async_client = AsyncClient(host=self.url, timeout=self.timeout)
@@ -145,6 +152,7 @@ def _embed_batch(
                 input=batch,
                 options=generation_kwargs,
                 keep_alive=self.keep_alive,
+                dimensions=self.dimensions,
             )
             all_embeddings.extend(result["embeddings"])
 
@@ -166,6 +174,7 @@ async def _embed_batch_async(
                 input=batch,
                 options=generation_kwargs,
                 keep_alive=self.keep_alive,
+                dimensions=self.dimensions,
             )
             for batch in batches
         ]

@@ -8,9 +8,7 @@
 @component
 class OllamaTextEmbedder:
     """
-    Computes the embeddings of a list of Documents and stores the obtained vectors in each Document's embedding field.
-
-    It uses embedding models compatible with the Ollama Library.
+    Computes the embeddings of a string using embedding models compatible with the Ollama Library.
 
     Usage example:
     ```python
@@ -29,6 +27,7 @@ def __init__(
         generation_kwargs: dict[str, Any] | None = None,
         timeout: int = 120,
         keep_alive: float | str | None = None,
+        dimensions: int | None = None,
     ) -> None:
         """
         Create a new OllamaTextEmbedder instance.
@@ -51,12 +50,17 @@ def __init__(
             - a number in seconds (such as 3600)
             - any negative number which will keep the model loaded in memory (e.g. -1 or "-1m")
             - '0' which will unload the model immediately after generating a response.
+        :param dimensions:
+            The desired number of dimensions in the embedding output. Only supported by models
+            that implement Matryoshka Representation Learning (MRL), such as nomic-embed-text-v1.5,
+            mxbai-embed-large, and qwen3-embedding. If None (default), the full vector is returned.
         """
         self.keep_alive = keep_alive
         self.timeout = timeout
         self.generation_kwargs = generation_kwargs or {}
         self.url = url
         self.model = model
+        self.dimensions = dimensions
 
         self._client = Client(host=self.url, timeout=self.timeout)
         self._async_client = AsyncClient(host=self.url, timeout=self.timeout)
@@ -78,15 +82,15 @@ def run(
             - `embedding`: The computed embeddings
             - `meta`: The metadata collected during the embedding process
         """
-        result = self._client.embeddings(
+        result = self._client.embed(
             model=self.model,
-            prompt=text,
+            input=text,
             options=generation_kwargs,
             keep_alive=self.keep_alive,
-        ).model_dump()
-        result["meta"] = {"model": self.model}
+            dimensions=self.dimensions,
+        )
 
-        return result
+        return {"embedding": result["embeddings"][0], "meta": {"model": self.model}}
 
     @component.output_types(embedding=list[float], meta=dict[str, Any])
     async def run_async(
@@ -105,13 +109,12 @@ async def run_async(
             - `embedding`: The computed embeddings
             - `meta`: The metadata collected during the embedding process
         """
-        response = await self._async_client.embeddings(
+        result = await self._async_client.embed(
             model=self.model,
-            prompt=text,
+            input=text,
             options=generation_kwargs,
             keep_alive=self.keep_alive,
+            dimensions=self.dimensions,
         )
-        result = response.model_dump()
-        result["meta"] = {"model": self.model}
 
-        return result
+        return {"embedding": result["embeddings"][0], "meta": {"model": self.model}}
@@ -1,5 +1,8 @@
+from unittest.mock import AsyncMock, MagicMock
+
 import pytest
 from haystack import Document
+from haystack.core.serialization import default_from_dict, default_to_dict
 from ollama._types import ResponseError
 
 from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder
@@ -74,3 +77,92 @@ async def test_run_async(self):
         documents = result["documents"]
         assert len(documents) == 3
         assert all(isinstance(element, float) for document in documents for element in document.embedding)
+
+    def test_dimensions_default_is_none(self):
+        embedder = OllamaDocumentEmbedder()
+        assert embedder.dimensions is None
+
+    def test_dimensions_stored_on_instance(self):
+        embedder = OllamaDocumentEmbedder(dimensions=512)
+        assert embedder.dimensions == 512
+
+    def test_dimensions_passed_to_embed_client(self):
+        embedder = OllamaDocumentEmbedder(dimensions=512)
+        mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedder._client.embed = MagicMock(return_value=mock_response)
+
+        embedder._embed_batch(["hello world"], batch_size=32)
+
+        call_kwargs = embedder._client.embed.call_args.kwargs
+        assert call_kwargs["dimensions"] == 512
+
+    def test_none_dimensions_passed_to_embed_client(self):
+        embedder = OllamaDocumentEmbedder(dimensions=None)
+        mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedder._client.embed = MagicMock(return_value=mock_response)
+
+        embedder._embed_batch(["hello"], batch_size=32)
+
+        call_kwargs = embedder._client.embed.call_args.kwargs
+        assert call_kwargs["dimensions"] is None
+
+    @pytest.mark.asyncio
+    async def test_dimensions_passed_to_async_embed_client(self):
+        embedder = OllamaDocumentEmbedder(dimensions=256)
+        mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedder._async_client.embed = AsyncMock(return_value=mock_response)
+
+        await embedder._embed_batch_async(["hello"], batch_size=32)
+
+        call_kwargs = embedder._async_client.embed.call_args.kwargs
+        assert call_kwargs["dimensions"] == 256
+
+    def test_to_dict_contains_dimensions(self):
+        embedder = OllamaDocumentEmbedder(dimensions=512)
+        embedder_dict = default_to_dict(
+            embedder,
+            model=embedder.model,
+            url=embedder.url,
+            generation_kwargs=embedder.generation_kwargs,
+            timeout=embedder.timeout,
+            keep_alive=embedder.keep_alive,
+            prefix=embedder.prefix,
+            suffix=embedder.suffix,
+            progress_bar=embedder.progress_bar,
+            meta_fields_to_embed=embedder.meta_fields_to_embed,
+            embedding_separator=embedder.embedding_separator,
+            batch_size=embedder.batch_size,
+            dimensions=embedder.dimensions,
+        )
+        assert embedder_dict["init_parameters"]["dimensions"] == 512
+
+    def test_to_dict_contains_dimensions_none(self):
+        embedder = OllamaDocumentEmbedder()
+        embedder_dict = default_to_dict(
+            embedder,
+            model=embedder.model,
+            url=embedder.url,
+            generation_kwargs=embedder.generation_kwargs,
+            timeout=embedder.timeout,
+            keep_alive=embedder.keep_alive,
+            prefix=embedder.prefix,
+            suffix=embedder.suffix,
+            progress_bar=embedder.progress_bar,
+            meta_fields_to_embed=embedder.meta_fields_to_embed,
+            embedding_separator=embedder.embedding_separator,
+            batch_size=embedder.batch_size,
+            dimensions=embedder.dimensions,
+        )
+        assert embedder_dict["init_parameters"]["dimensions"] is None
+
+    def test_from_dict_restores_dimensions(self):
+        embedder_dict = {
+            "type": "haystack_integrations.components.embedders.ollama.document_embedder.OllamaDocumentEmbedder",
+            "init_parameters": {
+                "model": "nomic-embed-text",
+                "url": "http://localhost:11434",
+                "dimensions": 512,
+            },
+        }
+        embedder = default_from_dict(OllamaDocumentEmbedder, embedder_dict)
+        assert embedder.dimensions == 512
@@ -1,4 +1,8 @@
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
 import pytest
+from haystack.core.serialization import default_from_dict, default_to_dict
 from ollama._types import ResponseError
 
 from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder
@@ -58,3 +62,85 @@ async def test_run_async(self):
         assert isinstance(reply, dict)
         assert all(isinstance(element, float) for element in reply["embedding"])
         assert reply["meta"]["model"] == "all-minilm"
+
+    def test_dimensions_default_is_none(self):
+        embedder = OllamaTextEmbedder()
+        assert embedder.dimensions is None
+
+    def test_dimensions_stored_on_instance(self):
+        embedder = OllamaTextEmbedder(dimensions=256)
+        assert embedder.dimensions == 256
+
+    def test_dimensions_passed_to_embed_client(self):
+
+        embedder = OllamaTextEmbedder(dimensions=256)
+        mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedder._client.embed = MagicMock(return_value=mock_response)
+
+        embedder.run(text="hello world")
+
+        call_kwargs = embedder._client.embed.call_args.kwargs
+        assert call_kwargs["dimensions"] == 256
+
+    def test_none_dimensions_passed_to_embed_client(self):
+
+        embedder = OllamaTextEmbedder(dimensions=None)
+        mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedder._client.embed = MagicMock(return_value=mock_response)
+
+        embedder.run(text="hello")
+
+        call_kwargs = embedder._client.embed.call_args.kwargs
+        assert call_kwargs["dimensions"] is None
+
+    def test_dimensions_passed_to_async_embed_client(self):
+
+        embedder = OllamaTextEmbedder(dimensions=128)
+        mock_response = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedder._async_client.embed = AsyncMock(return_value=mock_response)
+
+        asyncio.run(embedder.run_async(text="hello"))
+
+        call_kwargs = embedder._async_client.embed.call_args.kwargs
+        assert call_kwargs["dimensions"] == 128
+
+    def test_to_dict_contains_dimensions(self):
+
+        embedder = OllamaTextEmbedder(dimensions=256)
+        embedder_dict = default_to_dict(
+            embedder,
+            model=embedder.model,
+            url=embedder.url,
+            generation_kwargs=embedder.generation_kwargs,
+            timeout=embedder.timeout,
+            keep_alive=embedder.keep_alive,
+            dimensions=embedder.dimensions,
+        )
+        assert embedder_dict["init_parameters"]["dimensions"] == 256
+
+    def test_to_dict_contains_dimensions_none(self):
+
+        embedder = OllamaTextEmbedder()
+        embedder_dict = default_to_dict(
+            embedder,
+            model=embedder.model,
+            url=embedder.url,
+            generation_kwargs=embedder.generation_kwargs,
+            timeout=embedder.timeout,
+            keep_alive=embedder.keep_alive,
+            dimensions=embedder.dimensions,
+        )
+        assert embedder_dict["init_parameters"]["dimensions"] is None
+
+    def test_from_dict_restores_dimensions(self):
+
+        embedder_dict = {
+            "type": "haystack_integrations.components.embedders.ollama.text_embedder.OllamaTextEmbedder",
+            "init_parameters": {
+                "model": "nomic-embed-text",
+                "url": "http://localhost:11434",
+                "dimensions": 256,
+            },
+        }
+        embedder = default_from_dict(OllamaTextEmbedder, embedder_dict)
+        assert embedder.dimensions == 256