Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion scripts/properties_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import csv
import json
import pathlib
import re

DIR = pathlib.Path(__file__).parent.parent
CSV_PATH = DIR / "data/properties_description/"
Expand All @@ -71,6 +72,10 @@
CSV_PATH.glob("*.csv"), key=lambda p: float(p.stem.lstrip("v")), reverse=True
)

def canonicalize(s):
"""strips non-letters and lower-cases"""
return re.sub("\\W", "", s).lower()

for csv_path in paths:
version = csv_path.stem
# header = ["Parent Type", "Property", "Type", "Description"]
Expand All @@ -85,12 +90,32 @@
# Look for a similar existing item from a newer CodeMeta version
for existing_item in json_items:
if existing_item.items() >= item.items():
# We found an existing item, add this version to its list
# We found an identical existing item, add this version to its list
assert (
version not in existing_item["versions"]
), f"CodeMeta {version} has duplicated property {item}"
existing_item["versions"].append(version)
# check for existing properties that have differing types or descriptions
# values from newer versions of properties_description.json take precedence
# over new ones.
# update the versions for these here and break to avoid duplicate rows
if item["Property"] == existing_item["Property"] and item["Parent Type"] == existing_item["Parent Type"]:
if canonicalize(item["Type"]) != canonicalize(existing_item["Type"]):
# both types meaningfully differ
item["versions"] = [version]
json_items.append(item)
else:
item["Type"] = existing_item["Type"]
if version not in existing_item["versions"]:
existing_item["versions"].append(version)

if item["Description"] != existing_item["Description"] and item["Type"] == existing_item["Type"]:
item["Description"] = existing_item["Description"]
if version not in existing_item["versions"]:
existing_item["versions"].append(version)

break

else:
# No similar item, create a new one
item["versions"] = [version]
Expand Down