Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions cr_checker/cr_checker.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def copyright_checker(
visibility,
template,
config,
exclusion = None,
extensions = [],
offset = 0,
remove_offset = 0,
Expand All @@ -40,6 +41,8 @@ def copyright_checker(
Defaults to "//tools/cr_checker/resources:templates".
config (str, optional): Path to the config resource used for project variables.
Defaults to "//tools/cr_checker/resources:config".
exclusion (str, optional): Path to a text file listing files to be excluded from the copyright check.
File format: one path per line, relative to the repository root.
extensions (list, optional): A list of file extensions to filter the source files.
Defaults to an empty list, meaning all files are checked.
offset (int, optional): The line offset for applying checks or modifications.
Expand All @@ -65,12 +68,14 @@ def copyright_checker(
"-t $(location {})".format(template),
"-c $(location {})".format(config),
]
data = []
if len(extensions):
args.append("-e {exts}".format(
exts = " ".join([exts for exts in extensions]),
))

if exclusion:
args.append("--exclusion-file $(location {})".format(exclusion))

if offset:
args.append("--offset {}".format(offset))

Expand All @@ -89,17 +94,18 @@ def copyright_checker(
if remove_offset:
args.append("--remove_offset {}".format(remove_offset))

data = srcs + [template, config]
if exclusion:
data.append(exclusion)

py_binary(
name = t_name,
main = "cr_checker.py",
srcs = [
"@score_tooling//cr_checker/tool:cr_checker_lib",
],
args = args,
data = srcs + [
template,
config,
],
data = data,
visibility = visibility,
)

Expand Down
4 changes: 2 additions & 2 deletions cr_checker/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
load("@score_tooling//python_basics:defs.bzl", "score_py_pytest")

score_py_pytest(
name = "shebang_unit_tests",
name = "unit_tests",
srcs = [
"test_shebang_handling.py",
"test_cr_checker.py",
],
deps = [
"@score_tooling//cr_checker/tool:cr_checker_lib",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import importlib.util
import json
import pytest
from datetime import datetime
from pathlib import Path

Expand All @@ -32,15 +33,15 @@ def load_cr_checker_module():


# load the license template
def load_py_template() -> str:
def load_template(extension: str) -> str:
cr_checker = load_cr_checker_module()
template_file = Path(__file__).resolve().parents[1] / "resources" / "templates.ini"
templates = cr_checker.load_templates(template_file)
return templates["py"]
return templates[extension]


# write the config file here so that the year is always up to date with the year
# written in the mock "script.py" file
# written in the test file
def write_config(path: Path, author: str) -> Path:
config_path = path / "config.json"
config_path.write_text(json.dumps({"author": author}), encoding="utf-8")
Expand All @@ -61,11 +62,110 @@ def test_detect_shebang_offset_counts_trailing_newlines(tmp_path):
assert offset == len("#!/usr/bin/env python3\n\n".encode("utf-8"))


@pytest.fixture(params=["cpp","c","h","hpp","py","sh","bzl","ini","yml","BUILD","bazel","rs","rst"])
def prepare_test_with_header(request, tmp_path):
extension = request.param
test_file = tmp_path / ("file." + extension)
header_template = load_template(extension)
current_year = datetime.now().year
header = header_template.format(year=current_year, author="Author")
test_file.write_text(
header + "some content\n",
encoding="utf-8",
)
return test_file, extension, header_template

@pytest.fixture(params=["cpp","c","h","hpp","py","sh","bzl","ini","yml","BUILD","bazel","rs","rst"])
def prepare_test_no_header(request, tmp_path):
extension = request.param
test_file = tmp_path / ("file." + extension)
header_template = load_template(extension)
current_year = datetime.now().year
test_file.write_text(
"some content\n",
encoding="utf-8",
)
return test_file, extension, header_template, tmp_path

def test_process_files_detects_header(prepare_test_with_header):
cr_checker = load_cr_checker_module()
test_file, extension, header_template = prepare_test_with_header

results = cr_checker.process_files(
[test_file],
{extension: header_template},
[],
False,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["no_copyright"] == 0

def test_process_files_detects_missing_header(prepare_test_no_header):
cr_checker = load_cr_checker_module()
test_file, extension, header_template, tmp_path = prepare_test_no_header

results = cr_checker.process_files(
[test_file],
{extension: header_template},
[],
False,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["no_copyright"] == 1

def test_process_files_inserts_missing_header(prepare_test_no_header):
cr_checker = load_cr_checker_module()
test_file, extension, header_template, tmp_path = prepare_test_no_header
author = "Author"
config = write_config(tmp_path, author)

results = cr_checker.process_files(
[test_file],
{extension: header_template},
[],
True,
config=config,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["no_copyright"] == 1
assert results["fixed"] == 1
expected_header = header_template.format(year=datetime.now().year, author="Author")
assert test_file.read_text(encoding="utf-8").startswith(expected_header)

def test_process_files_skips_exclusion_with_missing_header(prepare_test_no_header):
cr_checker = load_cr_checker_module()
test_file, extension, header_template, tmp_path = prepare_test_no_header

results = cr_checker.process_files(
[test_file],
{extension: header_template},
[str(test_file)],
False,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["no_copyright"] == 0

# test that process_files function validates a license header after the shebang line
def test_process_files_accepts_header_after_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
header_template = load_py_template()
header_template = load_template("py")
current_year = datetime.now().year
header = header_template.format(year=current_year, author="Author")
script.write_text(
Expand All @@ -76,6 +176,7 @@ def test_process_files_accepts_header_after_shebang(tmp_path):
results = cr_checker.process_files(
[script],
{"py": header_template},
[],
False,
use_mmap=False,
encoding="utf-8",
Expand All @@ -94,14 +195,15 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path):
"#!/usr/bin/env python3\nprint('hi')\n",
encoding="utf-8",
)
header_template = load_py_template()
header_template = load_template("py")
current_year = datetime.now().year
author = "Author"
config = write_config(tmp_path, author)

results = cr_checker.process_files(
[script],
{"py": header_template},
[],
True,
config,
use_mmap=False,
Expand All @@ -122,14 +224,15 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path):
def test_process_files_accepts_header_without_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
header_template = load_py_template()
header_template = load_template("py")
current_year = datetime.now().year
header = header_template.format(year=current_year, author="Author")
script.write_text(header + "print('hi')\n", encoding="utf-8")

results = cr_checker.process_files(
[script],
{"py": header_template},
[],
False,
use_mmap=False,
encoding="utf-8",
Expand All @@ -145,14 +248,15 @@ def test_process_files_fix_inserts_header_without_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
script.write_text("print('hi')\n", encoding="utf-8")
header_template = load_py_template()
header_template = load_template("py")
current_year = datetime.now().year
author = "Author"
config = write_config(tmp_path, author)

results = cr_checker.process_files(
[script],
{"py": header_template},
[],
True,
config,
use_mmap=False,
Expand Down
62 changes: 61 additions & 1 deletion cr_checker/tool/cr_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,39 @@ def add_template_for_extensions(templates: dict, extensions: list, template: str
LOGGER.debug(templates)
return templates

def load_exclusion(path):
"""
Loads the list of files being excluded from the copyright check.

Args:
path (str): Path to the exclusion file.

Returns:
tuple(list, bool): a list of files that are excluded from the coypright check and a boolean indicating whether
all paths listed in the exclusion file exist and are files.
"""

exclusion = []
valid = True
with open(path, "r", encoding="utf-8") as file:
exclusion = file.read().splitlines()

for item in exclusion:
path = Path(item)
if not path.exists():
LOGGER.error("Excluded file %s does not exist.", item)
exclusion.remove(item)
valid = False
continue
if not path.is_file():
exclusion.remove(item)
LOGGER.error("Excluded file %s is not a file.", item)
valid = False
continue

LOGGER.debug(exclusion)
return exclusion, valid


def configure_logging(log_file_path=None, verbose=False):
"""
Expand Down Expand Up @@ -489,6 +522,7 @@ def fix_copyright(path, copyright_text, encoding, offset, config=None):
def process_files(
files,
templates,
exclusion,
fix,
config=None,
use_mmap=False,
Expand All @@ -504,6 +538,8 @@ def process_files(
templates (dict): A dictionary where keys are file extensions
(e.g., '.py', '.txt') and values are strings or patterns
representing the required copyright text.
exclusion (list): A list of paths to files to be excluded from the copyright
check.
config (Path): Path to the config JSON file where configuration
variables are stored (e.g. years for copyright headers).
use_mmap (bool): Flag for using mmap function for reading files
Expand All @@ -528,6 +564,11 @@ def process_files(
)
continue


if str(item) in exclusion:
logging.debug("Skipped due to exclusion: %s", item)
continue

if os.path.getsize(item) == 0:
# No need to add copyright headers to empty files
continue
Expand Down Expand Up @@ -576,6 +617,13 @@ def parse_arguments(argv):
help="Path to the template file",
)

parser.add_argument(
"--exclusion-file",
type=Path,
required=False,
help="Path to the file listing file paths excluded from the copyright check.",
)

parser.add_argument(
"-c",
"--config-file",
Expand Down Expand Up @@ -675,6 +723,15 @@ def main(argv=None):
LOGGER.error("Failed to load copyright text: %s", err)
return err.errno

exclusion = []
exclusion_valid = True
if args.exclusion_file:
try:
exclusion, exclusion_valid = load_exclusion(args.exclusion_file)
except IOError as err:
LOGGER.error("Failed to load exclusion list: %s", err)
return err.errno

try:
files = collect_inputs(args.inputs, args.extensions)
except IOError as err:
Expand All @@ -694,6 +751,7 @@ def main(argv=None):
results = process_files(
files,
templates,
exclusion,
args.fix,
args.config_file,
args.use_memory_map,
Expand All @@ -712,6 +770,8 @@ def main(argv=None):
total_no,
COLORS["ENDC"],
)
if not exclusion_valid:
LOGGER.info("The exclusion file contains paths that do not exist.")
if args.fix:
total_not_fixed = total_no - total_fixes
LOGGER.info(
Expand All @@ -728,7 +788,7 @@ def main(argv=None):
)
LOGGER.info("=" * 64)

return 0 if total_no == 0 else 1
return 0 if (total_no == 0 and exclusion_valid) else 1


if __name__ == "__main__":
Expand Down
Loading