Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to
### Added

- 🔧(backend) settings CONVERSION_UPLOAD_ENABLED to control usage of docspec
- ✨(backend) add a public_search API view to the Document viewset #2068

### Changed

Expand Down
40 changes: 40 additions & 0 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,9 @@ class DocumentViewSet(
15. **AI Proxy**: Proxy an AI request to an external AI service.
Example: POST /api/v1.0/documents/<resource_id>/ai-proxy

13. **Public Search**: Search within a public document and the related tree.
Example: GET /documents/{id}/public_search/?q=search_text

### Ordering: created_at, updated_at, is_favorite, title

Example:
Expand Down Expand Up @@ -1536,6 +1539,43 @@ def _list_descendants(self, request, validated_data):
queryset = filterset.qs
return self.get_response_for_queryset(queryset)

@drf.decorators.action(detail=True, methods=["get"], url_path="public_search")
def public_search(self, request, *args, **kwargs):
"""
Returns a DRF response containing the filtered, annotated and ordered document list
for public search on the tree of a given public document.

Applies filtering based on request parameter 'q' from `SearchDocumentSerializer`.

The filtering is done on the model field 'title', there is no full text search.

The ordering is always by the most recent first.
"""
document = self.get_object()

params = serializers.SearchDocumentSerializer(data=request.query_params)
params.is_valid(raise_exception=True)
text = params.validated_data["q"]

public_root = document.get_highest_public_ancestor()

# We limit the queryset to the current public tree, filtering out deleted documents.
queryset = public_root.get_descendants(include_self=True).filter(
ancestors_deleted_at__isnull=True
)

filterset = DocumentFilter({"title": text}, queryset=queryset)

if not filterset.is_valid():
raise drf.exceptions.ValidationError(filterset.errors)

queryset = filterset.filter_queryset(queryset)
queryset = queryset.filter(ancestors_deleted_at__isnull=True)

return self.get_response_for_queryset(
queryset.order_by("-updated_at"),
)

@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
def versions_list(self, request, *args, **kwargs):
"""
Expand Down
19 changes: 19 additions & 0 deletions src/backend/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,22 @@ def content(self, content):

self._content = content

def get_highest_public_ancestor(self):
"""
Get the highest ancestor of the document that has a public link reach.
If the document itself has a public link reach, it will be returned.
If there is no public ancestor, None will be returned.
"""
if self.link_reach == LinkReachChoices.PUBLIC:
return self

return (
self.get_ancestors()
.filter(link_reach=LinkReachChoices.PUBLIC)
.order_by("-path")
.first()
)

def get_content_response(self, version_id=""):
"""Get the content in a specific version of the document"""
params = {
Expand Down Expand Up @@ -1283,6 +1299,8 @@ def get_abilities(self, user):
else (is_owner_or_admin or (user.is_authenticated and self.creator == user))
) and not is_deleted

is_public = link_reach == LinkReachChoices.PUBLIC

ai_allow_reach_from = settings.AI_ALLOW_REACH_FROM
ai_access = any(
[
Expand Down Expand Up @@ -1319,6 +1337,7 @@ def get_abilities(self, user):
"mask": can_get and user.is_authenticated,
"move": is_owner_or_admin and not is_deleted,
"partial_update": can_update,
"public_search": is_public and not is_deleted,
"restore": is_owner,
"retrieve": retrieve,
"media_auth": can_get,
Expand Down
288 changes: 288 additions & 0 deletions src/backend/core/tests/documents/test_api_documents_public_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
"""
Tests for Documents API endpoint: public_search action.
"""

import datetime

from django.utils import timezone

import pytest
from rest_framework.test import APIClient

from core import factories, models

pytestmark = pytest.mark.django_db


def test_api_documents_public_search_missing_q():
"""Missing `q` param should return 400."""
client = APIClient()

document = factories.DocumentFactory(link_reach="public")
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={},
)

assert response.status_code == 400
assert response.json() == {"q": ["This field is required."]}


def test_api_documents_public_search_blank_q():
"""Blank `q` param should return all documents in the public tree."""
client = APIClient()
document = factories.DocumentFactory(link_reach="public")
child = factories.DocumentFactory(parent=document)

response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": " "},
)

assert response.status_code == 200
result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 2
assert str(document.id) in result_ids
assert str(child.id) in result_ids


# ---------------------------------------------------------------------------
# Permissions
# ---------------------------------------------------------------------------


def test_api_documents_public_search_anonymous_on_public_document_tree():
"""Anonymous users can search within a public document's tree."""
client = APIClient()

document = factories.DocumentFactory(link_reach="public")
match = factories.DocumentFactory(parent=document, title="match me")
no_match = factories.DocumentFactory(parent=document, title="don't find me")

response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "match"},
)
assert response.status_code == 200

result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(match.id) in result_ids
assert str(no_match.id) not in result_ids


def test_api_documents_public_search_anonymous_on_restricted_document():
"""Anonymous users cannot search on a restricted document."""
client = APIClient()
document = factories.DocumentFactory(link_reach="restricted")

response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 401
assert response.json() == {
"detail": "Authentication credentials were not provided."
}


def test_api_documents_public_search_anonymous_on_authenticated_document():
"""Anonymous users cannot search on an authenticated-only document."""
client = APIClient()
document = factories.DocumentFactory(link_reach="authenticated")

response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 401
assert response.json() == {
"detail": "Authentication credentials were not provided."
}


def test_api_documents_public_search_authenticated_on_restricted_document():
"""Authenticated users cannot search on a restricted document they don't own."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="restricted")

client = APIClient()
client.force_login(user)
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 403
assert response.json() == {
"detail": "You do not have permission to perform this action."
}
Comment thread
Ash-Crow marked this conversation as resolved.


def test_api_documents_public_search_authenticated_on_authenticated_document():
"""Authenticated users cannot search on a authenticated document they don't own."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach="authenticated")

client = APIClient()
client.force_login(user)
response = client.get(
f"/api/v1.0/documents/{document.id}/public_search/",
data={"q": "anything"},
)
assert response.status_code == 403
assert response.json() == {
"detail": "You do not have permission to perform this action."
}


# ---------------------------------------------------------------------------
# Public via ancestor
# ---------------------------------------------------------------------------


def test_api_documents_public_search_document_public_via_ancestor():
"""
A restricted child document whose ancestor is public is effectively public.
The search scope should be rooted at the highest public ancestor.
"""
client = APIClient()

root = factories.DocumentFactory(link_reach="public", title="root")
child = factories.DocumentFactory(
parent=root, link_reach="restricted", title="child alpha"
)
sibling = factories.DocumentFactory(parent=root, title="sibling alpha")
grand_child = factories.DocumentFactory(parent=child, title="grand alpha")

# child is public via root
assert child.computed_link_reach == models.LinkReachChoices.PUBLIC

response = client.get(
f"/api/v1.0/documents/{child.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200

content = response.json()
result_ids = {r["id"] for r in content["results"]}

# All descendants of root that match "alpha" should be returned
assert len(result_ids) == 3
assert str(child.id) in result_ids
assert str(sibling.id) in result_ids
assert str(grand_child.id) in result_ids


def test_api_documents_public_search_scope_limited_to_public_tree():
"""
Documents outside the public tree should not appear in results, even if they
match the query.
"""
client = APIClient()

private_root = factories.DocumentFactory(
link_reach="restricted", title="private root"
)
public_doc = factories.DocumentFactory(
parent=private_root, link_reach="public", title="public doc"
)
inside = factories.DocumentFactory(parent=public_doc, title="alpha inside")

# Separate tree — should never appear
other_root = factories.DocumentFactory(link_reach="public", title="other root")
outside = factories.DocumentFactory(parent=other_root, title="alpha outside")

response = client.get(
f"/api/v1.0/documents/{public_doc.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200

result_ids = {r["id"] for r in response.json()["results"]}
Comment thread
Ash-Crow marked this conversation as resolved.
assert len(result_ids) == 1
assert str(inside.id) in result_ids
assert str(outside.id) not in result_ids


def test_api_documents_public_search_excludes_deleted_documents():
"""Soft-deleted documents should not appear in results."""
client = APIClient()
root = factories.DocumentFactory(link_reach="public")
alive = factories.DocumentFactory(parent=root, title="alive alpha")
deleted = factories.DocumentFactory(
parent=root,
title="deleted alpha",
deleted_at="2024-01-01T00:00:00Z",
ancestors_deleted_at="2024-01-01T00:00:00Z",
)

response = client.get(
f"/api/v1.0/documents/{root.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200

result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(alive.id) in result_ids
assert str(deleted.id) not in result_ids


def test_api_documents_public_search_excludes_documents_with_deleted_ancestor():
"""Documents whose ancestor is deleted should not appear in results."""
client = APIClient()
root = factories.DocumentFactory(link_reach="public")
deleted_parent = factories.DocumentFactory(
parent=root,
title="deleted parent",
deleted_at="2024-01-01T00:00:00Z",
ancestors_deleted_at="2024-01-01T00:00:00Z",
)
orphan = factories.DocumentFactory(
parent=deleted_parent,
title="orphan alpha",
ancestors_deleted_at="2024-01-01T00:00:00Z",
)
alive = factories.DocumentFactory(parent=root, title="alive alpha")

response = client.get(
f"/api/v1.0/documents/{root.id}/public_search/",
data={"q": "alpha"},
)

assert response.status_code == 200

result_ids = {r["id"] for r in response.json()["results"]}
assert len(result_ids) == 1
assert str(alive.id) in result_ids
assert str(orphan.id) not in result_ids


# ---------------------------------------------------------------------------
# Ordering
# ---------------------------------------------------------------------------


def test_api_documents_public_search_ordered_by_most_recent_first():
"""Results should be ordered by -updated_at."""
client = APIClient()

root_doc = factories.DocumentFactory(link_reach="public")
old = factories.DocumentFactory(parent=root_doc, title="old alpha")
new = factories.DocumentFactory(parent=root_doc, title="new alpha")

# Force updated_at ordering
models.Document.objects.filter(pk=old.pk).update(
updated_at=timezone.now() - datetime.timedelta(days=10)
)
models.Document.objects.filter(pk=new.pk).update(updated_at=timezone.now())

response = client.get(
f"/api/v1.0/documents/{root_doc.id}/public_search/",
data={"q": "alpha"},
)
assert response.status_code == 200

result_ids = [r["id"] for r in response.json()["results"]]
assert len(result_ids) == 2
assert result_ids.index(str(new.id)) < result_ids.index(str(old.id))
Loading
Loading