Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 40 additions & 17 deletions app/tasks/validation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import os
import shutil
import json
from typing import Optional

from rocrate_validator import services
Expand All @@ -22,7 +23,6 @@
find_validation_object_on_minio
)
from app.utils.webhook_utils import send_webhook_notification
from app.utils.file_utils import build_metadata_only_rocrate

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -98,7 +98,7 @@ def process_validation_task_by_id(

@celery.task
def process_validation_task_by_metadata(
crate_json: str, profile_name: str | None, webhook_url: str | None
crate_json: str, profile_name: str | None, webhook_url: str | None, profiles_path: Optional[str] = None
) -> ValidationResult | str:
"""
Background task to process the RO-Crate validation for a given json metadata string.
Expand All @@ -111,19 +111,13 @@ def process_validation_task_by_metadata(
:todo: Replace the Crate ID with a more comprehensive system, and replace profile name with URI.
"""

skip_checks_list = ['ro-crate-1.1_12.1']
file_path = None

try:
# Fetch the RO-Crate from MinIO using the provided ID:
file_path = build_metadata_only_rocrate(crate_json)

logging.info(f"Processing validation task for {file_path}")
logging.info(f"Processing validation task for provided metadata string")

# Perform validation:
validation_result = perform_ro_crate_validation(file_path,
validation_result = perform_metadata_validation(crate_json,
profile_name,
skip_checks_list
profiles_path
)

if isinstance(validation_result, str):
Expand All @@ -132,9 +126,9 @@ def process_validation_task_by_metadata(
raise Exception(f"Validation failed: {validation_result}")

if not validation_result.has_issues():
logging.info(f"RO Crate {file_path} is valid.")
logging.info("RO Crate metadata is valid.")
else:
logging.info(f"RO Crate {file_path} is invalid.")
logging.info("RO Crate metadata is invalid.")

if webhook_url:
send_webhook_notification(webhook_url, validation_result.to_json())
Expand All @@ -148,10 +142,6 @@ def process_validation_task_by_metadata(
send_webhook_notification(webhook_url, error_data)

finally:
# Clean up the temporary file if it was created:
if file_path and os.path.exists(file_path):
shutil.rmtree(file_path)

if isinstance(validation_result, str):
return validation_result
else:
Expand Down Expand Up @@ -196,6 +186,39 @@ def perform_ro_crate_validation(
return str(e)


def perform_metadata_validation(
crate_json: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
) -> ValidationResult | str:
"""
Validates only RO-Crate metadata provided as a json string.

:param crate_json: The JSON string containing the metadata
:param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will
attempt to determine the profile.
:param profiles_path: The path to the profiles definition directory
:param skip_checks_list: A list of checks to skip, if needed
:return: The validation result.
:raises Exception: If an error occurs during the validation process.
"""

try:
logging.info(f"Validating ro-crate metadata with profile {profile_name}")

settings = services.ValidationSettings(
**({"metadata_only": True}),
**({"metadata_dict": json.loads(crate_json)}),
**({"profile_identifier": profile_name} if profile_name else {}),
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
**({"profiles_path": profiles_path} if profiles_path else {})
)

return services.validate(settings)

except Exception as e:
logging.error(f"Unexpected error during validation: {e}")
return str(e)


def check_ro_crate_exists(
minio_client: object,
bucket_name: str,
Expand Down
53 changes: 0 additions & 53 deletions app/utils/file_utils.py

This file was deleted.

3 changes: 2 additions & 1 deletion tests/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str,
"{}",
422, "Required parameter crate_json is empty"
),
]
],
ids=["missing_crate_json","invalid_json","empty_json"]
)
def test_queue_metadata_json_errors(flask_app, crate_json: str, status_code: int, response_error: str):
response, status = queue_ro_crate_metadata_validation_task(crate_json)
Expand Down
114 changes: 82 additions & 32 deletions tests/test_validation_tasks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from unittest import mock
import pytest
import json

from app.tasks.validation_tasks import (
process_validation_task_by_id,
perform_ro_crate_validation,
perform_metadata_validation,
return_ro_crate_validation,
process_validation_task_by_metadata,
check_ro_crate_exists,
Expand Down Expand Up @@ -227,34 +229,28 @@ def test_process_validation_failure(
# Test function: process_validation_task_by_metadata

@pytest.mark.parametrize(
"crate_json, profile_name, webhook_url, mock_path, validation_json, validation_value, os_path_exists",
"crate_json, profile_name, webhook_url, validation_json, validation_value",
[
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", "https://example.com/webhook", "/tmp/crate",
'{"status": "valid"}', False, True
"test-profile", "https://example.com/webhook",
'{"status": "valid"}', False
),
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", "https://example.com/webhook", "/tmp/crate",
'{"status": "invalid"}', True, True
"test-profile", "https://example.com/webhook",
'{"status": "invalid"}', True
)
],
ids=["success_no_issues", "success_with_issues"]
)
@mock.patch("app.tasks.validation_tasks.shutil.rmtree")
@mock.patch("app.tasks.validation_tasks.os.path.exists")
@mock.patch("app.tasks.validation_tasks.send_webhook_notification")
@mock.patch("app.tasks.validation_tasks.perform_ro_crate_validation")
@mock.patch("app.tasks.validation_tasks.build_metadata_only_rocrate")
@mock.patch("app.tasks.validation_tasks.perform_metadata_validation")
def test_metadata_validation(
mock_build, mock_validate, mock_webhook, mock_exists, mock_rmtree,
crate_json: str, profile_name: str, webhook_url: str, mock_path: str,
validation_json: str, validation_value: bool, os_path_exists: bool
mock_validate, mock_webhook,
crate_json: str, profile_name: str, webhook_url: str,
validation_json: str, validation_value: bool,
):
mock_exists.return_value = os_path_exists
mock_build.return_value = mock_path

mock_result = mock.Mock()
mock_result.has_issues.return_value = validation_value
mock_result.to_json.return_value = validation_json
Expand All @@ -263,39 +259,33 @@ def test_metadata_validation(
result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url)

assert result == validation_json
mock_build.assert_called_once_with(crate_json)
mock_validate.assert_called_once()
mock_webhook.assert_called_once_with(webhook_url, validation_json)
mock_rmtree.assert_called_once_with(mock_path)


@pytest.mark.parametrize(
"crate_json, profile_name, webhook_url, mock_path, validation_message, os_path_exists",
"crate_json, profile_name, webhook_url, validation_message",
[
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", "https://example.com/webhook", "/tmp/crate",
"Validation error", True
"test-profile", "https://example.com/webhook",
"Validation error"
),
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", None, "/tmp/crate",
"Validation error", True
"test-profile", None,
"Validation error"
)
],
ids=["validation_fails", "validation_fails_no_webhook"]
)
@mock.patch("app.tasks.validation_tasks.shutil.rmtree")
@mock.patch("app.tasks.validation_tasks.os.path.exists", return_value=True)
@mock.patch("app.tasks.validation_tasks.send_webhook_notification")
@mock.patch("app.tasks.validation_tasks.perform_ro_crate_validation")
@mock.patch("app.tasks.validation_tasks.build_metadata_only_rocrate")
@mock.patch("app.tasks.validation_tasks.perform_metadata_validation")
def test_validation_fails_and_sends_error_notification_to_webhook(
mock_build, mock_validate, mock_webhook, mock_exists, mock_rmtree,
crate_json: str, profile_name: str, webhook_url: str, mock_path: str,
validation_message: str, os_path_exists: bool
mock_validate, mock_webhook,
crate_json: str, profile_name: str, webhook_url: str,
validation_message: str
):
mock_build.return_value = mock_path

mock_validate.return_value = validation_message

Expand All @@ -313,8 +303,6 @@ def test_validation_fails_and_sends_error_notification_to_webhook(
# Make sure webhook not sent
mock_webhook.assert_not_called()

mock_rmtree.assert_called_once_with(mock_path)


# Test function: perform_ro_crate_validation

Expand Down Expand Up @@ -378,6 +366,68 @@ def test_validation_settings_error(mock_validation_settings, mock_validate):
mock_validate.assert_not_called()


# Test function: perform_metadata_validation

@pytest.mark.parametrize(
"crate_json, profile_name, skip_checks",
[
('{"id":"dummy json"}', "ro_profile", ["check1", "check2"]),
('{"id":"dummy json"}', None, None)
],
ids=["success_with_all_args", "success_with_only_crate"]
)
@mock.patch("app.tasks.validation_tasks.services.validate")
@mock.patch("app.tasks.validation_tasks.services.ValidationSettings")
def test_metadata_validation_success_with_all_args(
mock_validation_settings, mock_validate,
crate_json: str, profile_name: str, skip_checks: list
):
mock_result = mock.Mock()
mock_validate.return_value = mock_result

result = perform_metadata_validation(crate_json, profile_name, skip_checks)

# Assert that result was returned
assert result == mock_result

# Validate proper construction of ValidationSettings
mock_validation_settings.assert_called_once()
args, kwargs = mock_validation_settings.call_args
assert kwargs["metadata_dict"] == json.loads(crate_json)
if profile_name is not None:
assert kwargs["profile_identifier"] == profile_name
else:
assert "profile_identifier" not in kwargs
if skip_checks is not None:
assert kwargs["skip_checks"] == skip_checks
else:
assert "skip_checks" not in kwargs

mock_validate.assert_called_once_with(mock_validation_settings.return_value)


@mock.patch("app.tasks.validation_tasks.services.validate", side_effect=RuntimeError("Validation error"))
@mock.patch("app.tasks.validation_tasks.services.ValidationSettings")
def test_metadata_validation_raises_exception_and_returns_string(mock_validation_settings, mock_validate):
crate_json = '{"id":"test metadata"}'
result = perform_metadata_validation(crate_json, "profile", skip_checks_list=None)

assert isinstance(result, str)
assert "Validation error" in result
mock_validate.assert_called_once()


@mock.patch("app.tasks.validation_tasks.services.validate")
@mock.patch("app.tasks.validation_tasks.services.ValidationSettings", side_effect=ValueError("Bad config"))
def test_metadata_validation_settings_error(mock_validation_settings, mock_validate):
crate_json = '{"id":"test metadata"}'
result = perform_metadata_validation(crate_json, None)

assert isinstance(result, str)
assert "Bad config" in result
mock_validate.assert_not_called()


# Test function: return_ro_crate_validation

@mock.patch("app.tasks.validation_tasks.get_validation_status_from_minio")
Expand Down