Skip to content

Commit 19778e3

Browse files
Merge pull request #72 from francoto/add_codemeta_generation
Add codemeta generation flag
2 parents b16b673 + 267ecbb commit 19778e3

4 files changed

Lines changed: 194 additions & 27 deletions

File tree

src/rsmetacheck/cli.py

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,76 @@
11
import argparse
22
import os
33
from pathlib import Path
4-
from rsmetacheck.run_somef import run_somef_batch, run_somef_single, ensure_somef_configured
4+
55
from rsmetacheck.run_analyzer import run_analysis
6+
from rsmetacheck.run_somef import (
7+
ensure_somef_configured,
8+
run_somef_batch,
9+
run_somef_single,
10+
)
611

712

813
def cli():
9-
parser = argparse.ArgumentParser(description="Detect metadata pitfalls in software repositories using SoMEF.")
14+
parser = argparse.ArgumentParser(
15+
description="Detect metadata pitfalls in software repositories using SoMEF."
16+
)
1017
parser.add_argument(
1118
"--input",
1219
nargs="+",
1320
required=True,
14-
help="One or more: GitHub/GitLab URLs, JSON files containing repositories, OR existing SoMEF output files when using --skip-somef."
21+
help="One or more: GitHub/GitLab URLs, JSON files containing repositories, OR existing SoMEF output files when using --skip-somef.",
1522
)
1623
parser.add_argument(
1724
"--skip-somef",
1825
action="store_true",
19-
help="Skip SoMEF execution and analyze existing SoMEF output files directly. --input should point to SoMEF JSON files."
26+
help="Skip SoMEF execution and analyze existing SoMEF output files directly. --input should point to SoMEF JSON files.",
2027
)
2128
parser.add_argument(
2229
"--pitfalls-output",
2330
default=os.path.join(os.getcwd(), "pitfalls_outputs"),
24-
help="Directory to store pitfall JSON-LD files (default: ./pitfalls_outputs)."
31+
help="Directory to store pitfall JSON-LD files (default: ./pitfalls_outputs).",
2532
)
2633
parser.add_argument(
2734
"--somef-output",
2835
default=os.path.join(os.getcwd(), "somef_outputs"),
29-
help="Directory to store SoMEF output files (default: ./somef_outputs)."
36+
help="Directory to store SoMEF output files (default: ./somef_outputs).",
3037
)
3138
parser.add_argument(
3239
"--analysis-output",
3340
default=os.path.join(os.getcwd(), "analysis_results.json"),
34-
help="File path for summary results (default: ./analysis_results.json)."
41+
help="File path for summary results (default: ./analysis_results.json).",
3542
)
3643
parser.add_argument(
3744
"--threshold",
3845
type=float,
3946
default=0.8,
40-
help="SoMEF confidence threshold (default: 0.8). Only used when running SoMEF."
47+
help="SoMEF confidence threshold (default: 0.8). Only used when running SoMEF.",
48+
)
49+
parser.add_argument(
50+
"-b",
51+
"--branch",
52+
help="Branch of the repository to analyze. Overrides the default branch. Only used when running SoMEF.",
4153
)
54+
4255
parser.add_argument(
43-
"-b", "--branch",
44-
help="Branch of the repository to analyze. Overrides the default branch. Only used when running SoMEF."
56+
"-c",
57+
"--generate-codemeta",
58+
action="store_true",
59+
help="Generate codemeta files for each repository. Only used when running SoMEF.",
4560
)
4661

4762
parser.add_argument(
4863
"--verbose",
4964
action="store_true",
50-
help="Include both detected AND undetected pitfalls in the output JSON-LD."
65+
help="Include both detected AND undetected pitfalls in the output JSON-LD.",
5166
)
5267

5368
args = parser.parse_args()
5469

5570
if args.skip_somef:
56-
print(f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files...")
71+
print(
72+
f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files..."
73+
)
5774

5875
somef_json_paths = []
5976
for json_path in args.input:
@@ -67,29 +84,58 @@ def cli():
6784
return
6885

6986
print(f"Analyzing {len(somef_json_paths)} SoMEF output files...")
70-
run_analysis(somef_json_paths, args.pitfalls_output, args.analysis_output, verbose=args.verbose)
87+
run_analysis(
88+
somef_json_paths,
89+
args.pitfalls_output,
90+
args.analysis_output,
91+
verbose=args.verbose,
92+
)
7193

7294
else:
7395
ensure_somef_configured()
7496

7597
threshold = args.threshold
7698
somef_output_dir = args.somef_output
99+
generate_codemeta = args.generate_codemeta
77100

78101
print(f"Detected {len(args.input)} input(s):")
102+
if generate_codemeta:
103+
print(
104+
"Codemeta generation is ENABLED. Codemeta files will be created for each repository."
105+
)
79106

80107
for input_item in args.input:
81108
if input_item.startswith("http://") or input_item.startswith("https://"):
82109
print(f"Processing repository URL: {input_item}")
83-
run_somef_single(input_item, somef_output_dir, threshold, branch=args.branch)
110+
run_somef_single(
111+
input_item,
112+
somef_output_dir,
113+
threshold,
114+
branch=args.branch,
115+
generate_codemeta=generate_codemeta,
116+
)
84117
elif os.path.exists(input_item):
85118
print(f"Processing repositories from file: {input_item}")
86-
run_somef_batch(input_item, somef_output_dir, threshold, branch=args.branch)
119+
run_somef_batch(
120+
input_item,
121+
somef_output_dir,
122+
threshold,
123+
branch=args.branch,
124+
generate_codemeta=generate_codemeta,
125+
)
87126
else:
88-
print(f"Warning: Skipping invalid input (not a URL or existing file): {input_item}")
127+
print(
128+
f"Warning: Skipping invalid input (not a URL or existing file): {input_item}"
129+
)
89130

90131
print(f"\nRunning analysis on outputs in {somef_output_dir}...")
91-
run_analysis(somef_output_dir, args.pitfalls_output, args.analysis_output, verbose=args.verbose)
132+
run_analysis(
133+
somef_output_dir,
134+
args.pitfalls_output,
135+
args.analysis_output,
136+
verbose=args.verbose,
137+
)
92138

93139

94140
if __name__ == "__main__":
95-
cli()
141+
cli()

src/rsmetacheck/detect_pitfalls_main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
from pathlib import Path
33
from typing import Iterable, Union
4+
from rsmetacheck.run_somef import CODEMETA_DEFAULT_NAME
45
from rsmetacheck.utils.pitfall_utils import extract_programming_languages
56
from rsmetacheck.utils.json_ld_utils import create_pitfall_jsonld, save_individual_pitfall_jsonld
67
from rsmetacheck.utils.somef_compat import normalize_somef_data
@@ -467,7 +468,10 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
467468
if not input_dir.exists():
468469
print(f"Error: Directory not found: {input_dir}")
469470
return
470-
json_files = list(input_dir.glob("*.json"))
471+
json_files = [
472+
f for f in input_dir.glob("*.json")
473+
if not f.stem.endswith(CODEMETA_DEFAULT_NAME)
474+
]
471475
print(f"Found {len(json_files)} JSON files in {input_dir}")
472476
else:
473477
print("Error: No input directory or JSON file list provided.")

src/rsmetacheck/run_somef.py

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
import os
21
import json
2+
import os
33
import subprocess
4-
54
from pathlib import Path
65

6+
CODEMETA_DEFAULT_NAME = "somef_generated_codemeta"
7+
8+
79
def ensure_somef_configured():
810
"""Run 'somef configure -a' only if it hasn't been configured yet."""
911
config_file = Path.home() / ".somef" / "config.json"
@@ -18,11 +20,14 @@ def ensure_somef_configured():
1820
return False
1921
return True
2022

21-
def run_somef(repo_url, output_file, threshold, branch=None):
23+
24+
def run_somef(repo_url, output_file, threshold, branch=None, codemeta_file=None):
2225
"""Run SoMEF on a given repository and save results."""
2326
cmd = ["somef", "describe", "-r", repo_url, "-o", output_file, "-t", str(threshold)]
2427
if branch:
2528
cmd.extend(["-b", branch])
29+
if codemeta_file:
30+
cmd.extend(["-c", codemeta_file])
2631
try:
2732
subprocess.run(cmd, check=True)
2833
print(f"SoMEF finished for: {repo_url}")
@@ -31,16 +36,38 @@ def run_somef(repo_url, output_file, threshold, branch=None):
3136
print(f"Error running SoMEF for {repo_url}: {e}")
3237
return False
3338

34-
def run_somef_single(repo_url, output_dir="somef_outputs", threshold=0.8, branch=None):
39+
40+
def run_somef_single(
41+
repo_url,
42+
output_dir="somef_outputs",
43+
threshold=0.8,
44+
branch=None,
45+
generate_codemeta=False,
46+
):
3547
"""Run SoMEF for a single repository."""
3648
os.makedirs(output_dir, exist_ok=True)
3749
output_file = os.path.join(output_dir, "output_1.json")
50+
codemeta_file = os.path.join(output_dir, CODEMETA_DEFAULT_NAME + ".json")
3851

3952
print(f"Running SoMEF for {repo_url}...")
40-
success = run_somef(repo_url, output_file, threshold, branch)
53+
54+
success = run_somef(
55+
repo_url,
56+
output_file,
57+
threshold,
58+
branch,
59+
codemeta_file=codemeta_file if generate_codemeta else None,
60+
)
4161
return output_dir if success else None
4262

43-
def run_somef_batch(json_file, output_dir="somef_outputs", threshold=0.8, branch=None):
63+
64+
def run_somef_batch(
65+
json_file,
66+
output_dir="somef_outputs",
67+
threshold=0.8,
68+
branch=None,
69+
generate_codemeta=False,
70+
):
4471
"""Run SoMEF for all repositories listed in a JSON file."""
4572
os.makedirs(output_dir, exist_ok=True)
4673

@@ -57,8 +84,26 @@ def run_somef_batch(json_file, output_dir="somef_outputs", threshold=0.8, branch
5784

5885
for idx, repo_url in enumerate(repos, start=1):
5986
output_file = os.path.join(output_dir, f"{base_name}_output_{idx}.json")
87+
codemeta_file = os.path.join(
88+
output_dir, f"{base_name}_{CODEMETA_DEFAULT_NAME}_{idx}.json"
89+
)
6090
print(f"[{idx}/{len(repos)}] {repo_url}")
61-
run_somef(repo_url, output_file, threshold, branch)
91+
run_somef(
92+
repo_url,
93+
output_file,
94+
threshold,
95+
branch,
96+
codemeta_file=codemeta_file if generate_codemeta else None,
97+
)
6298

6399
print(f"Completed SoMEF for {base_name}. Results in {output_dir}")
64-
return True
100+
return True
101+
102+
success = run_somef(
103+
repo_url,
104+
output_file,
105+
threshold,
106+
branch,
107+
codemeta_file=codemeta_file if generate_codemeta else None,
108+
)
109+
return output_dir if success else None

tests/test_cli.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Unit tests to verify CLI behavior for codemeta generation."""
2+
3+
import importlib
4+
from unittest.mock import MagicMock
5+
6+
cli_module = importlib.import_module("rsmetacheck.cli")
7+
8+
9+
REPO_URL = "https://github.com/SoftwareUnderstanding/sw-metadata-bot"
10+
11+
12+
def test_cli_with_generate_codemeta_adds_codemeta_output(monkeypatch, tmp_path):
13+
"""Ensure --generate-codemeta requests codemeta output in SoMEF command."""
14+
somef_output_dir = tmp_path / "somef_outputs"
15+
expected_codemeta = str(somef_output_dir / "somef_generated_codemeta.json")
16+
17+
run_analysis_mock = MagicMock()
18+
subprocess_run_mock = MagicMock()
19+
20+
monkeypatch.setattr(
21+
"sys.argv",
22+
[
23+
"rsmetacheck",
24+
"--input",
25+
REPO_URL,
26+
"--somef-output",
27+
str(somef_output_dir),
28+
"--generate-codemeta",
29+
],
30+
)
31+
monkeypatch.setattr(cli_module, "ensure_somef_configured", lambda: True)
32+
monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock)
33+
monkeypatch.setattr("rsmetacheck.run_somef.subprocess.run", subprocess_run_mock)
34+
35+
cli_module.cli()
36+
37+
command = subprocess_run_mock.call_args.args[0]
38+
assert command[0:2] == ["somef", "describe"]
39+
assert "-c" in command
40+
assert expected_codemeta in command
41+
42+
run_analysis_mock.assert_called_once()
43+
44+
45+
def test_cli_without_generate_codemeta_keeps_default_behavior(monkeypatch, tmp_path):
46+
"""Ensure default CLI call does not request codemeta output from SoMEF."""
47+
somef_output_dir = tmp_path / "somef_outputs"
48+
49+
run_analysis_mock = MagicMock()
50+
subprocess_run_mock = MagicMock()
51+
52+
monkeypatch.setattr(
53+
"sys.argv",
54+
[
55+
"rsmetacheck",
56+
"--input",
57+
REPO_URL,
58+
"--somef-output",
59+
str(somef_output_dir),
60+
],
61+
)
62+
monkeypatch.setattr(cli_module, "ensure_somef_configured", lambda: True)
63+
monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock)
64+
monkeypatch.setattr("rsmetacheck.run_somef.subprocess.run", subprocess_run_mock)
65+
66+
cli_module.cli()
67+
68+
command = subprocess_run_mock.call_args.args[0]
69+
assert command[0:2] == ["somef", "describe"]
70+
assert "-c" not in command
71+
72+
run_analysis_mock.assert_called_once()

0 commit comments

Comments
 (0)