Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
63e66be
test(chroma): use async DocumentStore mixin tests
ShubhamGond105 Apr 27, 2026
28d58bc
chore(chroma): bump minimum haystack-ai version to 2.28.0
ShubhamGond105 Apr 28, 2026
b8fb275
test(chroma): fix lint issues
ShubhamGond105 Apr 28, 2026
1a238a5
test(chroma): override duplicate fail test for Chroma behaviour
ShubhamGond105 Apr 29, 2026
c2ac6f6
test(chroma): remove WriteDocumentsAsyncTest mixin - Chroma has custo…
ShubhamGond105 Apr 29, 2026
494a433
feat(supabase): add SupabaseGroongaDocumentStore and SupabaseGroongaR…
ShubhamGond105 May 2, 2026
17877e7
chore: resolve merge conflict in chroma test file
ShubhamGond105 May 11, 2026
7ddf365
fix(supabase): fix lint errors and add supabase test dependency
ShubhamGond105 May 11, 2026
e8f53bb
fix(supabase): fix mypy type errors in groonga document store
ShubhamGond105 May 11, 2026
446088c
fix(supabase): fix mypy union-attr error and count_documents implemen…
ShubhamGond105 May 11, 2026
5c8d270
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 19, 2026
ceb8394
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 19, 2026
3ae853c
fix(supabase): address reviewer feedback - lazy init, DocumentStore b…
ShubhamGond105 May 19, 2026
5be352e
fix(supabase): fix lint errors - imports, assert, formatting
ShubhamGond105 May 19, 2026
d679975
fix(supabase): fix mypy type errors - CountMethod and union-attr
ShubhamGond105 May 19, 2026
3887dff
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 20, 2026
78231ce
converting methods to static
davidsbatista May 20, 2026
6d9ef28
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 20, 2026
22005a5
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 21, 2026
310aa13
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 21, 2026
95808fb
Merge branch 'main' into feat/supabase-groonga
davidsbatista May 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions integrations/supabase/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ dependencies = [
"pytest-rerunfailures",
"mypy",
"pip",
"supabase",
]

[tool.hatch.envs.test.scripts]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from .embedding_retriever import SupabasePgvectorEmbeddingRetriever
from .groonga_retriever import SupabaseGroongaRetriever
from .keyword_retriever import SupabasePgvectorKeywordRetriever

__all__ = ["SupabasePgvectorEmbeddingRetriever", "SupabasePgvectorKeywordRetriever"]
__all__ = [
"SupabaseGroongaRetriever",
"SupabasePgvectorEmbeddingRetriever",
"SupabasePgvectorKeywordRetriever",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import copy
from typing import Any

from haystack import component, default_from_dict, default_to_dict
from haystack.dataclasses import Document
from haystack.document_stores.types import FilterPolicy

from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore


@component
class SupabaseGroongaRetriever:
"""
Retrieves documents from SupabaseGroongaDocumentStore using PGroonga full-text search.

This retriever works without embeddings — it searches documents using plain text queries.
It can be used alongside SupabasePgvectorEmbeddingRetriever in hybrid search pipelines.

Note: async operations are not supported as the supabase-py sync client does not expose
awaitable query methods. Use the sync run() method instead.

Example usage:

```python
from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
from haystack_integrations.components.retrievers.supabase import SupabaseGroongaRetriever
from haystack.utils import Secret

document_store = SupabaseGroongaDocumentStore(
supabase_url="https://<project>.supabase.co",
supabase_key=Secret.from_env_var("SUPABASE_SERVICE_KEY"),
table_name="haystack_fts_documents",
)
document_store.warm_up()

retriever = SupabaseGroongaRetriever(document_store=document_store, top_k=10)
result = retriever.run(query="python programming")
print(result["documents"])
```
"""

def __init__(
self,
*,
document_store: SupabaseGroongaDocumentStore,
filters: dict[str, Any] | None = None,
top_k: int = 10,
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
) -> None:
"""
Initialize the SupabaseGroongaRetriever.

:param document_store: An instance of SupabaseGroongaDocumentStore.
:param filters: Optional filters applied to retrieved Documents.
:param top_k: Maximum number of Documents to return. Defaults to 10.
:param filter_policy: Policy to determine how filters are applied.
:raises ValueError: If document_store is not an instance of SupabaseGroongaDocumentStore.
"""
if not isinstance(document_store, SupabaseGroongaDocumentStore):
msg = "document_store must be an instance of SupabaseGroongaDocumentStore"
raise ValueError(msg)

self.document_store = document_store
self.filters = filters or {}
self.top_k = top_k
self.filter_policy = (
filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
)

@component.output_types(documents=list[Document])
def run(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also define an async def run_async(...) ? Does the client supports it?

self,
query: str,
filters: dict[str, Any] | None = None,
top_k: int | None = None,
) -> dict[str, list[Document]]:
"""
Runs the retriever on the given query.

:param query: The text query to search for.
:param filters: Optional runtime filters. Merged or replaced based on filter_policy.
:param top_k: Optional override for maximum number of documents to return.
:returns: Dictionary with key "documents" containing list of matching Documents.
"""
if not query:
return {"documents": []}

merged_filters = self._merge_filters(filters)
effective_top_k = top_k if top_k is not None else self.top_k

documents = self.document_store._groonga_retrieval(
query=query,
top_k=effective_top_k,
filters=merged_filters,
)

return {"documents": documents}

@component.output_types(documents=list[Document])
async def run_async(
self,
query: str,
filters: dict[str, Any] | None = None,
top_k: int | None = None,
) -> dict[str, list[Document]]:
"""
Async version of run().

Note: supabase-py's sync client does not support native async queries.
This method runs the synchronous retrieval and returns the result.
For fully async support, consider using acreate_client() from supabase-py
and refactoring the document store accordingly.

:param query: The text query to search for.
:param filters: Optional runtime filters. Merged or replaced based on filter_policy.
:param top_k: Optional override for maximum number of documents to return.
:returns: Dictionary with key "documents" containing list of matching Documents.
"""
return self.run(query=query, filters=filters, top_k=top_k)

def _merge_filters(self, runtime_filters: dict[str, Any] | None) -> dict[str, Any]:
Copy link
Copy Markdown
Contributor

@davidsbatista davidsbatista May 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a shallow dict merge, we should change it and use the AND operator.

Concrete example:

  # Init-time filters (set when retriever is constructed)
  self.filters = {
      "operator": "AND",
      "conditions": [
          {"field": "meta.language", "operator": "==", "value": "en"}
      ]
  }

  # Runtime filters (passed to run())
  runtime_filters = {
      "operator": "AND",
      "conditions": [
          {"field": "meta.topic", "operator": "==", "value": "python"}
      ]
  }

What the current code produces:

{
     "operator": "AND",
     "conditions": [{"field": "meta.topic", "operator": "==", "value": "python"}]
}

The "language == en" condition is dropped. What MERGE should produce:

  {
      "operator": "AND",
      "conditions": [
          {"field": "meta.language", "operator": "==", "value": "en"},
          {"field": "meta.topic", "operator": "==", "value": "python"}
      ]
  }

Check the apply_filter_policy in haystack.document_stores.types.filter_policy, which handles all combinations of comparison and logical filters correctly. Every other retriever in this repo uses it.

The fix is to drop _merge_filters entirely and replace it with a single call:

from haystack.document_stores.types.filter_policy import apply_filter_policy

In run() / run_async(), replace:
merged_filters = self._merge_filters(filters)

with:
merged_filters = apply_filter_policy(self.filter_policy, self.filters, filters)

Also make sure this is tested, see the Mixin tests in haystack.testing.document_stores

"""
Merges runtime filters with init filters based on filter_policy.

:param runtime_filters: Filters passed at runtime.
:returns: Merged filters dictionary.
"""
if runtime_filters is not None:
if self.filter_policy == FilterPolicy.MERGE:
return {**self.filters, **runtime_filters}
return runtime_filters
return self.filters

def to_dict(self) -> dict[str, Any]:
"""
Serializes the component to a dictionary.

:returns: Dictionary with serialized data.
"""
return default_to_dict(
self,
filters=self.filters,
top_k=self.top_k,
filter_policy=self.filter_policy.value,
document_store=self.document_store.to_dict(),
)

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "SupabaseGroongaRetriever":
"""
Deserializes the component from a dictionary.

:param data: Dictionary to deserialize from.
:returns: Deserialized component.
"""
data = copy.deepcopy(data)
doc_store_params = data["init_parameters"]["document_store"]
data["init_parameters"]["document_store"] = SupabaseGroongaDocumentStore.from_dict(doc_store_params)
if filter_policy := data["init_parameters"].get("filter_policy"):
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
return default_from_dict(cls, data)
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,9 @@
#
# SPDX-License-Identifier: Apache-2.0
from .document_store import SupabasePgvectorDocumentStore
from .groonga_document_store import SupabaseGroongaDocumentStore

__all__ = ["SupabasePgvectorDocumentStore"]
__all__ = [
"SupabaseGroongaDocumentStore",
"SupabasePgvectorDocumentStore",
]
Loading
Loading