-
Notifications
You must be signed in to change notification settings - Fork 0
Implement Metadata File expectations #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ec02130
fd7046d
b16679f
c9697bc
6a73738
4ab9ea0
3d59e30
f3cd29c
ee18c9f
6ded827
0dfbc83
26563c9
8f87ff2
f6df284
729a2be
2e8f9ac
a39c810
2615878
8c61b6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| # Copyright (c) 2024-2025 CRS4 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import re | ||
|
|
||
| import rocrate_validator.log as logging | ||
| from rocrate_validator.models import Severity, ValidationContext | ||
| from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement | ||
|
|
||
| # set up logging | ||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| @requirement(name="RO-Crate context version") | ||
| class FileDescriptorContextVersion(PyFunctionCheck): | ||
| """The RO-Crate metadata file MUST include the RO-Crate context version 1.2 | ||
| (or later minor version) in `@context`""" | ||
|
|
||
| @check(name="RO-Crate context version", severity=Severity.REQUIRED) | ||
| def test_existence(self, context: ValidationContext) -> bool: | ||
| """ | ||
| The RO-Crate metadata file MUST include the RO-Crate context version 1.2 | ||
| (or later minor version) in `@context` | ||
| """ | ||
| try: | ||
| json_dict = context.ro_crate.metadata.as_dict() | ||
| context_value = json_dict["@context"] | ||
| pattern = re.compile( | ||
| r"https://w3id\.org/ro/crate/1\.[2-9](-DRAFT)?/context" | ||
| ) | ||
| passed = True | ||
| if isinstance(context_value, list): | ||
| if not any( | ||
| pattern.match(item) | ||
| for item in context_value | ||
| if isinstance(item, str) | ||
| ): | ||
| passed = False | ||
| else: | ||
| if not pattern.match(context_value): | ||
| passed = False | ||
| if not passed: | ||
| context.result.add_issue( | ||
| "The RO-Crate metadata file MUST include the RO-Crate context " | ||
| "version 1.2 (or later minor version) in `@context`", | ||
| self, | ||
| ) | ||
| return passed | ||
|
|
||
| except Exception as e: | ||
| if logger.isEnabledFor(logging.DEBUG): | ||
| logger.exception(e) | ||
| return True | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to pass the validation if there is an issue with the metadata. What I mean is that we can't tell if the exception is:
e.g. if the validated file has an issue such that
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The base profile has a few checks for this sort of thing - valid JSON-LD, flattened & compacted, has a |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| # Copyright (c) 2025 eScience Lab, The University of Manchester | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| @prefix ro-crate: <https://github.com/crs4/rocrate-validator/profiles/ro-crate/> . | ||
| @prefix five-safes-crate: <https://github.com/eScienceLab/rocrate-validator/profiles/five-safes-crate/> . | ||
| @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . | ||
| @prefix schema: <http://schema.org/> . | ||
| @prefix purl: <http://purl.org/dc/terms/> . | ||
| @prefix sh: <http://www.w3.org/ns/shacl#> . | ||
| @prefix validator: <https://github.com/crs4/rocrate-validator/> . | ||
| @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . | ||
| @prefix dct: <http://purl.org/dc/terms/> . | ||
|
|
||
| five-safes-crate:MetadataFileDescriptorProperties a sh:NodeShape ; | ||
| sh:name "RO-Crate conforms to 1.2 or later minor version" ; | ||
| sh:description """The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version"""; | ||
| sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; | ||
| sh:property [ | ||
| a sh:PropertyShape ; | ||
| sh:name "RO-Crate conforms to 1.2 or later minor version" ; | ||
| sh:description "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ; | ||
| sh:minCount 1 ; | ||
| sh:nodeKind sh:IRI ; | ||
| sh:path dct:conformsTo ; | ||
| sh:pattern "https://w3id\\.org/ro/crate/(1\\.[2-9](-DRAFT)?)" ; | ||
| sh:severity sh:Violation; | ||
| sh:message "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ; | ||
| ] . | ||
|
|
||
| ro-crate:conformsToROCrateSpec sh:deactivated true . |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| { | ||
| "@context": ["https://w3id.org/ro/crate/1.1/context", "http://schema.org", {"test": "http://schema.org/test"}], | ||
| "@graph": [ | ||
| { | ||
| "@type": "CreativeWork", | ||
| "@id": "ro-crate-metadata.json", | ||
| "about": { | ||
| "@id": "./" | ||
| }, | ||
| "conformsTo": { | ||
| "@id": "https://w3id.org/ro/crate/1.2" | ||
| } | ||
| }, | ||
| { | ||
| "@id": "./", | ||
| "@type": "Dataset", | ||
| "name": "5-Safe RO-Crate Request", | ||
| "description": "example 5-Safe RO-Crate request metadata for testing", | ||
| "license": "Apache-2.0", | ||
| "datePublished": "2025-09-20T14:38:00+00:00", | ||
| "conformsTo": { | ||
| "@id": "https://w3id.org/5s-crate/0.4" | ||
| }, | ||
| "hasPart": [ | ||
| { | ||
| "@id": "https://workflowhub.eu/workflows/289?version=1" | ||
| }, | ||
| { | ||
| "@id": "input1.txt" | ||
| } | ||
| ], | ||
| "mainEntity": { | ||
| "@id": "https://workflowhub.eu/workflows/289?version=1" | ||
| }, | ||
| "mentions": { | ||
| "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" | ||
| }, | ||
| "sourceOrganization": { | ||
| "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" | ||
| } | ||
| }, | ||
| { | ||
| "@id": "https://w3id.org/5s-crate/0.4", | ||
| "@type": "Profile", | ||
| "name": "Five Safes RO-Crate profile" | ||
| }, | ||
| { | ||
| "@id": "https://workflowhub.eu/workflows/289?version=1", | ||
| "@type": "Dataset", | ||
| "name": "CWL Protein MD Setup tutorial with mutations", | ||
| "conformsTo": { | ||
| "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" | ||
| }, | ||
| "distribution": { | ||
| "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" | ||
| } | ||
| }, | ||
| { | ||
| "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", | ||
| "@type": "DataDownload", | ||
| "conformsTo": { | ||
| "@id": "https://w3id.org/ro/crate" | ||
| }, | ||
| "encodingFormat": "application/zip" | ||
| }, | ||
| { | ||
| "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", | ||
| "@type": "CreateAction", | ||
| "actionStatus": "http://schema.org/PotentialActionStatus", | ||
| "agent": { | ||
| "@id": "https://orcid.org/0000-0001-9842-9718" | ||
| }, | ||
| "instrument": { | ||
| "@id": "https://workflowhub.eu/workflows/289?version=1" | ||
| }, | ||
| "name": "Execute query 12389 on workflow ", | ||
| "object": [ | ||
| { | ||
| "@id": "input1.txt" | ||
| }, | ||
| { | ||
| "@id": "#enableFastMode" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "@id": "https://orcid.org/0000-0001-9842-9718", | ||
| "@type": "Person", | ||
| "name": "Stian Soiland-Reyes", | ||
| "affiliation": { | ||
| "@id": "https://ror.org/027m9bs27" | ||
| }, | ||
| "memberOf": [ | ||
| { | ||
| "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "@id": "https://ror.org/027m9bs27", | ||
| "@type": "Organization", | ||
| "name": "The University of Manchester" | ||
| }, | ||
| { | ||
| "@id": "https://ror.org/01ee9ar58", | ||
| "@type": "Organization", | ||
| "name": "University of Nottingham" | ||
| }, | ||
| { | ||
| "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", | ||
| "@type": "Project", | ||
| "name": "Investigation of cancer (TRE72 project 81)", | ||
| "identifier": [ | ||
| { | ||
| "@id": "_:localid:tre72:project81" | ||
| } | ||
| ], | ||
| "funding": { | ||
| "@id": "https://gtr.ukri.org/projects?ref=10038961" | ||
| }, | ||
| "member": [ | ||
| { | ||
| "@id": "https://ror.org/027m9bs27" | ||
| }, | ||
| { | ||
| "@id": "https://ror.org/01ee9ar58" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "@id": "_:localid:tre72:project81", | ||
| "@type": "PropertyValue", | ||
| "name": "tre72", | ||
| "value": "project81" | ||
| }, | ||
| { | ||
| "@id": "https://gtr.ukri.org/projects?ref=10038961", | ||
| "@type": "Grant", | ||
| "name": "EOSC4Cancer" | ||
| }, | ||
| { | ||
| "@id": "input1.txt", | ||
| "@type": "File", | ||
| "name": "input1", | ||
| "exampleOfWork": { | ||
| "@id": "#sequence" | ||
| } | ||
| }, | ||
| { | ||
| "@id": "#enableFastMode", | ||
| "@type": "PropertyValue", | ||
| "name": "--fast-mode", | ||
| "value": "True", | ||
| "exampleOfWork": { | ||
| "@id": "#fast" | ||
| } | ||
| }, | ||
| { | ||
| "@id": "#sequence", | ||
| "@type": "FormalParameter", | ||
| "name": "input-sequence" | ||
| }, | ||
| { | ||
| "@id": "#fast", | ||
| "@type": "FormalParameter", | ||
| "name": "fast-mode" | ||
| } | ||
| ] | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd maybe add a
if not context_valuecheck for safety here if context is missing, None or empty :-) and add an issueThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one's caught in the base profile too - https://github.com/crs4/rocrate-validator/blob/dfe0136517196802b0a976e54da3f84a572780b6/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py#L128