Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.claude
*.py[cod]
__pycache__
.pytest_cache
Expand Down
22 changes: 11 additions & 11 deletions olx_importer/management/commands/load_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,33 +61,33 @@ def init_known_types(self):

def add_arguments(self, parser):
parser.add_argument("course_data_path", type=pathlib.Path)
parser.add_argument("learning_package_key", type=str)
parser.add_argument("learning_package_ref", type=str)

def handle(self, course_data_path, learning_package_key, **options):
def handle(self, course_data_path, learning_package_ref, **options):
self.course_data_path = course_data_path
self.learning_package_key = learning_package_key
self.load_course_data(learning_package_key)
self.learning_package_ref = learning_package_ref
self.load_course_data(learning_package_ref)

def get_course_title(self):
course_type_dir = self.course_data_path / "course"
course_xml_file = next(course_type_dir.glob("*.xml"))
course_root = ET.parse(course_xml_file).getroot()
return course_root.attrib.get("display_name", "Unknown Course")

def load_course_data(self, learning_package_key):
def load_course_data(self, learning_package_ref):
print(f"Importing course from: {self.course_data_path}")
now = datetime.now(timezone.utc)
title = self.get_course_title()

if content_api.learning_package_exists(learning_package_key):
if content_api.learning_package_exists(learning_package_ref):
raise CommandError(
f"{learning_package_key} already exists. "
f"{learning_package_ref} already exists. "
"This command currently only supports initial import."
)

with transaction.atomic():
self.learning_package = content_api.create_learning_package(
learning_package_key, title, created=now,
learning_package_ref, title, created=now,
)
for block_type in SUPPORTED_TYPES:
self.import_block_type(block_type, now) #, publish_log_entry)
Expand Down Expand Up @@ -140,7 +140,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry):

for xml_file_path in block_data_path.glob("*.xml"):
components_found += 1
local_key = xml_file_path.stem
component_code = xml_file_path.stem

# Do some basic parsing of the content to see if it's even well
# constructed enough to add (or whether we should skip/error on it).
Expand All @@ -155,7 +155,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry):
_component, component_version = content_api.create_component_and_version(
self.learning_package.id,
component_type=block_type,
local_key=local_key,
component_code=component_code,
title=display_name,
created=now,
created_by=None,
Expand All @@ -173,7 +173,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry):
content_api.create_component_version_media(
component_version,
text_content.pk,
key="block.xml",
path="block.xml",
)

# Cycle through static assets references and add those as well...
Expand Down
14 changes: 8 additions & 6 deletions src/openedx_content/applets/backup_restore/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,28 @@

from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user

from ..publishing.api import get_learning_package_by_key
from ..publishing.api import get_learning_package_by_ref
from .zipper import LearningPackageUnzipper, LearningPackageZipper


def create_zip_file(lp_key: str, path: str, user: UserType | None = None, origin_server: str | None = None) -> None:
def create_zip_file(
package_ref: str, path: str, user: UserType | None = None, origin_server: str | None = None
) -> None:
"""
Creates a dump zip file for the given learning package key at the given path.
The zip file contains a TOML representation of the learning package and its contents.

Can throw a NotFoundError at get_learning_package_by_key
Can throw a NotFoundError at get_learning_package_by_ref
"""
learning_package = get_learning_package_by_key(lp_key)
learning_package = get_learning_package_by_ref(package_ref)
LearningPackageZipper(learning_package, user, origin_server).create_zip(path)


def load_learning_package(path: str, key: str | None = None, user: UserType | None = None) -> dict:
def load_learning_package(path: str, package_ref: str | None = None, user: UserType | None = None) -> dict:
"""
Loads a learning package from a zip file at the given path.
Restores the learning package and its contents to the database.
Returns a dictionary with the status of the operation and any errors encountered.
"""
with zipfile.ZipFile(path, "r") as zipf:
return LearningPackageUnzipper(zipf, key, user).load()
return LearningPackageUnzipper(zipf, package_ref, user).load()
149 changes: 120 additions & 29 deletions src/openedx_content/applets/backup_restore/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,30 @@ class LearningPackageSerializer(serializers.Serializer): # pylint: disable=abst
"""
Serializer for learning packages.

Archives created in Verawood or later write ``package_ref``. Archives
created in Ulmo write ``key``. Both are accepted; ``package_ref`` takes
precedence.

Note:
The `key` field is serialized, but it is generally not trustworthy for restoration.
During restore, a new key may be generated or overridden.
The ref/key field is serialized but is generally not trustworthy for
restoration. During restore, a new ref may be generated or overridden.
"""

title = serializers.CharField(required=True)
key = serializers.CharField(required=True)
package_ref = serializers.CharField(required=False)
key = serializers.CharField(required=False)
description = serializers.CharField(required=True, allow_blank=True)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)

def validate(self, attrs):
package_ref = attrs.pop("package_ref", None)
legacy_key = attrs.pop("key", None)
ref = package_ref or legacy_key
if not ref:
raise serializers.ValidationError("Either 'package_ref' or 'key' is required.")
attrs["package_ref"] = ref # Normalise to 'package_ref' for create_learning_package.
return attrs


class LearningPackageMetadataSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Expand All @@ -40,40 +55,91 @@ class LearningPackageMetadataSerializer(serializers.Serializer): # pylint: disa
class EntitySerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for publishable entities.

Archives created in Verawood or later write ``entity_ref``. Archives
created in Ulmo use ``key``. Both are accepted; ``entity_ref`` takes
precedence.
"""

can_stand_alone = serializers.BooleanField(required=True)
key = serializers.CharField(required=True)
entity_ref = serializers.CharField(required=False)
key = serializers.CharField(required=False)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)

def validate(self, attrs):
entity_ref = attrs.pop("entity_ref", None)
legacy_key = attrs.pop("key", None)
ref = entity_ref or legacy_key
if not ref:
raise serializers.ValidationError("Either 'entity_ref' or 'key' is required.")
attrs["entity_ref"] = ref
return attrs


class EntityVersionSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for publishable entity versions.
"""
title = serializers.CharField(required=True)
entity_key = serializers.CharField(required=True)
entity_ref = serializers.CharField(required=True)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
version_num = serializers.IntegerField(required=True)


class ComponentSerializer(EntitySerializer): # pylint: disable=abstract-method
"""
Serializer for components.
Contains logic to convert entity_key to component_type and local_key.

Extracts component_type and component_code from the [entity.component]
section if present (archives created in Verawood or later). Falls back to
parsing the entity key for archives created in Ulmo.
"""

component = serializers.DictField(required=False)

def validate(self, attrs):
"""
Custom validation logic:
parse the entity_key into (component_type, local_key).
Custom validation logic: resolve component_type and component_code.

Archives created in Verawood or later supply an [entity.component]
section with ``component_type`` (e.g. "xblock.v1:problem") and
``component_code`` (e.g. "my_example"). Archives created in Ulmo only
have the entity ``key`` in the format
``"{namespace}:{type_name}:{component_code}"``, so we fall back to
parsing that for backwards compatibility.
"""
entity_key = attrs["key"]
try:
component_type_obj, local_key = components_api.get_or_create_component_type_by_entity_key(entity_key)
attrs["component_type"] = component_type_obj
attrs["local_key"] = local_key
except ValueError as exc:
raise serializers.ValidationError({"key": str(exc)})
super().validate(attrs)
component_section = attrs.pop("component", None)
if component_section:
# Verawood+ format: component_type and component_code are explicit.
component_type_str = component_section.get("component_type", "")
component_code = component_section.get("component_code", "")
try:
namespace, type_name = component_type_str.split(":", 1)
except ValueError as exc:
raise serializers.ValidationError(
{"component": f"Invalid component_type format: {component_type_str!r}. "
"Expected '{namespace}:{type_name}'."}
) from exc
component_type_obj = components_api.get_or_create_component_type(namespace, type_name)
else:
# Ulmo (legacy) format: parse the entity_ref (which ws normalized
# from "key" in super.validate()) assuming the format:
# (namespace, type_name, component_code). This parsing is
# intentionally only here — entity_ref must not be parsed anywhere
# else in the codebase. Verawood+ archives may not follow this
# convention.
entity_ref = attrs["entity_ref"]
try:
namespace, type_name, component_code = entity_ref.split(":", 2)
except ValueError as exc:
raise serializers.ValidationError(
{"key": f"Invalid entity key format: {entity_ref!r}. "
"Expected '{namespace}:{type_name}:{component_code}'."}
) from exc
component_type_obj = components_api.get_or_create_component_type(namespace, type_name)
attrs["component_type"] = component_type_obj
attrs["component_code"] = component_code
return attrs


Expand All @@ -86,35 +152,46 @@ class ComponentVersionSerializer(EntityVersionSerializer): # pylint: disable=ab
class ContainerSerializer(EntitySerializer): # pylint: disable=abstract-method
"""
Serializer for containers.

Extracts container_code from the [entity.container] section.
Archives created in Verawood or later include an explicit
``container_code`` field. Archives created in Ulmo do not, so we
fall back to using the entity key as the container_code.
"""

container = serializers.DictField(required=True)

def validate_container(self, value):
"""
Custom validation logic for the container field.
Ensures that the container dict has exactly one key which is one of
"section", "subsection", or "unit" values.
Ensures that the container dict has exactly one type key ("section",
"subsection", or "unit"), optionally alongside "container_code".
"""
errors = []
if not isinstance(value, dict) or len(value) != 1:
errors.append("Container must be a dict with exactly one key.")
if len(value) == 1: # Only check the key if there is exactly one
container_type = list(value.keys())[0]
if container_type not in ("section", "subsection", "unit"):
errors.append(f"Invalid container value: {container_type}")
type_keys = [k for k in value if k in ("section", "subsection", "unit")]
if len(type_keys) != 1:
errors.append(
"Container must have exactly one type key: 'section', 'subsection', or 'unit'."
)
if errors:
raise serializers.ValidationError(errors)
return value

def validate(self, attrs):
"""
Custom validation logic:
parse the container dict to extract the container type.
Custom validation logic: extract container_type and container_code.

Archives created in Verawood or later supply an explicit
``container_code`` field inside [entity.container]. Archives created
in Ulmo do not, so we fall back to using the entity key.
"""
container = attrs["container"]
container_type = list(container.keys())[0] # It is safe to do this after validate_container
super().validate(attrs)
container = attrs.pop("container")
# It is safe to do this after validate_container
container_type = next(k for k in container if k in ("section", "subsection", "unit"))
attrs["container_type"] = container_type
attrs.pop("container") # Remove the container field after processing
# Verawood+: container_code is explicit. Ulmo: fall back to entity_ref.
attrs["container_code"] = container.get("container_code") or attrs["entity_ref"]
return attrs


Expand Down Expand Up @@ -156,10 +233,24 @@ class CollectionSerializer(serializers.Serializer): # pylint: disable=abstract-
Serializer for collections.
"""
title = serializers.CharField(required=True)
key = serializers.CharField(required=True)
# 'collection_code' is the current field name; 'key' is the old name kept for
# back-compat with archives written before the rename. At least one must be present.
collection_code = serializers.CharField(required=False)
key = serializers.CharField(required=False)
description = serializers.CharField(required=True, allow_blank=True)
entities = serializers.ListField(
child=serializers.CharField(),
required=True,
allow_empty=True,
)

def validate(self, attrs):
# Prefer 'collection_code'; fall back to legacy 'key'. Always remove
# both so only the normalised 'collection_code' key reaches the caller.
code = attrs.pop("collection_code", None)
legacy_key = attrs.pop("key", None)
code = code or legacy_key
if not code:
raise serializers.ValidationError("Either 'collection_code' or 'key' is required.")
attrs["collection_code"] = code
return attrs
Loading