Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
0473c0e
feat: #253 new branch to reduce noise; adding BE logic to implement c…
tbain Mar 18, 2026
2c0b959
feat: #253 Fixing API tests with regard to count logic changes
tbain Mar 23, 2026
8846805
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Mar 25, 2026
b01964b
feat: #253 Resolving merge conflict with upstream main branch
tbain Mar 26, 2026
a23afe8
feat: #253 Fixing pylint issues
tbain Mar 26, 2026
3df68ab
feat: #253 Fixing pycodestyle issue
tbain Mar 26, 2026
435808c
feat: #253 Fixing pycodestyle issue
tbain Mar 26, 2026
457313b
feat: #253 Addressing first round Code review comments
tbain Mar 27, 2026
a14c56e
feat: #253 fixing count depth issue and updating appropriate unit tests
tbain Mar 27, 2026
2055a07
feat: #253 fixing spelling errors in comments
tbain Mar 27, 2026
939f18c
feat: #253 Fixing code review comments; fix incorrect unit test & fil…
tbain Mar 30, 2026
5762c33
feat: #253 adjusting comments per code review feedback
tbain Apr 1, 2026
79be83a
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 1, 2026
c2f79d2
feat: #253 fixing unit tests to work with upstream updates
tbain Apr 1, 2026
c017e8a
feat: #253 Changing usage_count to being in-mem/python based instead …
tbain Apr 3, 2026
7d42793
feat: #253 Fixing code quality pipeline issues
tbain Apr 3, 2026
1e47967
feat: #253 Moving usage_count logic out to API level, cleaning up/add…
tbain Apr 8, 2026
a87c877
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 7 additions & 29 deletions src/openedx_tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def get_filtered_tags( # pylint: disable=too-many-positional-arguments
def _get_filtered_tags_free_text(
self,
search_term: str | None,
include_counts: bool,
include_counts: bool, # pylint: disable=unused-argument
) -> TagDataQuerySet:
"""
Implementation of get_filtered_tags() for free text taxonomies.
Expand All @@ -499,16 +499,14 @@ def _get_filtered_tags_free_text(
_id=Value(None, output_field=models.CharField()),
)
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
if include_counts:
return qs.annotate(usage_count=models.Count("value"))
else:
return qs.distinct() # type: ignore[return-value]

return qs.distinct() # type: ignore[return-value]

def _get_filtered_tags_one_level(
self,
parent_tag_value: str | None,
search_term: str | None,
include_counts: bool,
include_counts: bool, # pylint: disable=unused-argument
) -> TagDataQuerySet:
"""
Implementation of get_filtered_tags() for closed taxonomies, where
Expand All @@ -531,24 +529,14 @@ def _get_filtered_tags_one_level(
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("value")
if include_counts:
# We need to include the count of how many times this tag is used to tag objects.
# You'd think we could just use:
# qs = qs.annotate(usage_count=models.Count("objecttag__pk"))
# but that adds another join which starts creating a cross product and the children and usage_count become
# intertwined and multiplied with each other. So we use a subquery.
obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
# We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
count=models.Func(F('id'), function='Count')
)
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count')))

return qs # type: ignore[return-value]

def _get_filtered_tags_deep(
self,
parent_tag_value: str | None,
search_term: str | None,
include_counts: bool,
include_counts: bool, # pylint: disable=unused-argument
excluded_values: list[str] | None,
) -> TagDataQuerySet:
"""
Expand Down Expand Up @@ -615,17 +603,7 @@ def _get_filtered_tags_deep(
# lineage is a case-insensitive column storing "Root\tParent\t...\tThisValue\t", so
# ordering by it gives the tree sort order that we want.
qs = qs.order_by("lineage")
if include_counts:
# Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
obj_tags = (
ObjectTag.objects.filter(tag_id=models.OuterRef("pk"))
.order_by()
.annotate(
# We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
count=models.Func(F("id"), function="Count")
)
)
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values("count")))

return qs # type: ignore[return-value]

def add_tag(
Expand Down
41 changes: 40 additions & 1 deletion src/openedx_tagging/rest_api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""
from __future__ import annotations

from collections import Counter, defaultdict

from django.core import exceptions
from django.db import models
from django.http import Http404, HttpResponse
Expand Down Expand Up @@ -847,17 +849,54 @@ def get_queryset(self) -> TagDataQuerySet:
)
if depth == 1:
# We're already returning just a single level. It will be paginated normally.
if include_counts:
return self._add_counts(results)
return results
elif full_depth_threshold and len(results) < full_depth_threshold:
# We can load and display all the tags in this (sub)tree at once:
self.pagination_class = DisabledTagsPagination
if include_counts:
return self._add_counts(results)
return results
else:
# We had to do a deep query, but we will only return one level of results.
# This is because the user did not request a deep response (via full_depth_threshold) or the result was too
# large (larger than the threshold).
# It will be paginated normally.
return results.filter(parent_value=parent_tag_value)
filtered_results = results.filter(parent_value=parent_tag_value)
if include_counts:
return self._add_counts(filtered_results)

return filtered_results

def _add_counts(self, tag_data: TagDataQuerySet) -> TagDataQuerySet:
"""
Add usage counts to a list of tag data dictionaries. For performance
reasons, we call this function with the list result of the
QuerySet so we can then add the counts in-memory rather than to a
QuerySet which would require a very expensive annotation to join the
in-memory data to the original QuerySet.
"""

taxonomy = self.get_taxonomy()
object_tags = taxonomy.objecttag_set.values_list("object_id", "tag__lineage")
tag_counts: Counter[str] = Counter()
object_tag_lineage_seen: defaultdict[str, set] = defaultdict(set)

for object_id, tag_lineage in object_tags:
# split the lineages to get a dict of {tag.value: [lineages]}
lineage_tags = list(tag_lineage.split('\t')) if tag_lineage else []
# de-duplicate based on if the lineage is already 'seen' per object
unseen_tags = [t for t in lineage_tags if t not in object_tag_lineage_seen[object_id]]

tag_counts.update(unseen_tags)
object_tag_lineage_seen[object_id].update(unseen_tags)

# In-memory 'annotation'; this is faster than using annotate() on the QuerySet.
for row in tag_data:
row["usage_count"] = tag_counts.get(row["value"], 0)

return tag_data

def post(self, request, *args, **kwargs):
"""
Expand Down
82 changes: 41 additions & 41 deletions tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,53 +752,53 @@ def get_object_tags():

@ddt.data(
("ChA", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "cha" but a child does
" Archaebacteria (used: 1, children: 0)",
"Archaea (children: 2)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 1)", # does not contain "cha" but a child does
" Archaebacteria (children: 0)",
]),
("ar", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)",
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
" Arthropoda (used: 1, children: 0)",
" Cnidaria (used: 0, children: 0)",
"Archaea (children: 2)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 1)", # does not contain "ar" but a child does
" Archaebacteria (children: 0)",
"Eukaryota (children: 1)",
" Animalia (children: 2)", # does not contain "ar" but a child does
" Arthropoda (children: 0)",
" Cnidaria (children: 0)",
]),
("aE", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ae" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)", # does not contain "ae" but a child does
" Plantae (used: 1, children: 0)",
"Archaea (children: 2)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 1)", # does not contain "ae" but a child does
" Archaebacteria (children: 0)",
"Eukaryota (children: 1)", # does not contain "ae" but a child does
" Plantae (children: 0)",
]),
("a", [
"Archaea (used: 1, children: 3)",
" DPANN (used: 0, children: 0)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4)",
" Animalia (used: 1, children: 7)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 1)",
" Mammalia (used: 0, children: 0)",
" Cnidaria (used: 0, children: 0)",
" Ctenophora (used: 0, children: 0)",
" Gastrotrich (used: 1, children: 0)",
" Placozoa (used: 1, children: 0)",
" Porifera (used: 0, children: 0)",
" Monera (used: 1, children: 0)",
" Plantae (used: 1, children: 0)",
" Protista (used: 0, children: 0)",
"Archaea (children: 3)",
" DPANN (children: 0)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 2)",
" Archaebacteria (children: 0)",
" Eubacteria (children: 0)",
"Eukaryota (children: 4)",
" Animalia (children: 7)",
" Arthropoda (children: 0)",
" Chordata (children: 1)",
" Mammalia (children: 0)",
" Cnidaria (children: 0)",
" Ctenophora (children: 0)",
" Gastrotrich (children: 0)",
" Placozoa (children: 0)",
" Porifera (children: 0)",
" Monera (children: 0)",
" Plantae (children: 0)",
" Protista (children: 0)",
]),
)
@ddt.unpack
Expand Down
53 changes: 6 additions & 47 deletions tests/openedx_tagging/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ def test_get_child_tags_one_level(self) -> None:
Test basic retrieval of tags one level below the "Eukaryota" root tag in
the closed taxonomy, using get_filtered_tags(). With counts included.
"""
result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota", include_counts=True))
common_fields = {"depth": 1, "parent_value": "Eukaryota", "usage_count": 0, "external_id": None}
result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota"))
common_fields = {"depth": 1, "parent_value": "Eukaryota", "external_id": None}
for r in result:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
Expand Down Expand Up @@ -376,13 +376,12 @@ def test_get_depth_1_search_term(self) -> None:
"""
Filter the root tags to only those that match a search term
"""
result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH", include_counts=True))
result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH"))
assert result == [
{
"value": "Archaea",
"child_count": 3,
"depth": 0,
"usage_count": 0,
"parent_value": None,
"external_id": None,
"_id": 2, # These IDs are hard-coded in the test fixture file
Expand Down Expand Up @@ -501,13 +500,12 @@ def test_tags_deep(self) -> None:
"""
Test getting a deep tag in the taxonomy
"""
result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata", include_counts=True))
result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata"))
assert result == [
{
"value": "Mammalia",
"parent_value": "Chordata",
"depth": 3,
"usage_count": 0,
"child_count": 0,
"external_id": None,
"_id": 21, # These IDs are hard-coded in the test fixture file
Expand Down Expand Up @@ -540,29 +538,6 @@ def test_get_external_id(self) -> None:
assert result[0]["value"] == "Bacteria"
assert result[0]["external_id"] == "bct001"

def test_usage_count(self) -> None:
"""
Test that the usage count in the results is right
"""
api.tag_object(object_id="obj01", taxonomy=self.taxonomy, tags=["Bacteria"])
api.tag_object(object_id="obj02", taxonomy=self.taxonomy, tags=["Bacteria"])
api.tag_object(object_id="obj03", taxonomy=self.taxonomy, tags=["Bacteria"])
api.tag_object(object_id="obj04", taxonomy=self.taxonomy, tags=["Eubacteria"])
# Now the API should reflect these usage counts:
result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria", include_counts=True))
assert result == [
"Bacteria (None) (used: 3, children: 2)",
" Archaebacteria (Bacteria) (used: 0, children: 0)",
" Eubacteria (Bacteria) (used: 1, children: 0)",
]
# Same with depth=1, which uses a different query internally:
result1 = pretty_format_tags(
self.taxonomy.get_filtered_tags(search_term="bacteria", include_counts=True, depth=1)
)
assert result1 == [
"Bacteria (None) (used: 3, children: 2)",
]

def test_tree_sort(self) -> None:
"""
Verify that taxonomies can be sorted correctly in tree orer (case insensitive).
Expand Down Expand Up @@ -623,29 +598,13 @@ def test_get_filtered_tags(self):
{"value": "triple", **common_fields},
]

def test_get_filtered_tags_with_count(self):
"""
Test basic retrieval of all tags in the taxonomy.
Without counts included.
"""
result = list(self.taxonomy.get_filtered_tags(include_counts=True))
common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None}
assert result == [
# These should appear in alphabetical order:
{"value": "double", "usage_count": 2, **common_fields},
{"value": "solo", "usage_count": 1, **common_fields},
{"value": "triple", "usage_count": 3, **common_fields},
]

def test_get_filtered_tags_num_queries(self):
"""
Test that the number of queries used by get_filtered_tags() is fixed
and not O(n) or worse.
"""
with self.assertNumQueries(1):
self.test_get_filtered_tags()
with self.assertNumQueries(1):
self.test_get_filtered_tags_with_count()

def test_get_filtered_tags_with_search(self) -> None:
"""
Expand All @@ -655,8 +614,8 @@ def test_get_filtered_tags_with_search(self) -> None:
common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None}
assert result1 == [
# These should appear in alphabetical order:
{"value": "double", "usage_count": 2, **common_fields},
{"value": "triple", "usage_count": 3, **common_fields},
{"value": "double", **common_fields},
{"value": "triple", **common_fields},
]
# And it should be case insensitive:
result2 = list(self.taxonomy.get_filtered_tags(search_term="LE", include_counts=True))
Expand Down
Loading