Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 33 additions & 38 deletions cfbs/masterfiles/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,19 @@ def versions_checksums_files(

if version not in versions_dict["versions"]:
versions_dict["versions"][version] = {}
if "files" not in versions_dict["versions"][version]:
versions_dict["versions"][version]["files"] = {}
versions_dict["versions"][version]["files"][tarball_relpath] = file_checksum
versions_dict["versions"][version][tarball_relpath] = file_checksum

if not file_checksum in checksums_dict["checksums"]:
checksums_dict["checksums"][file_checksum] = []
checksums_dict["checksums"][file_checksum].append(
{
"file": tarball_relpath,
"version": version,
}
)
checksums_dict["checksums"][file_checksum] = {}
if not tarball_relpath in checksums_dict["checksums"][file_checksum]:
checksums_dict["checksums"][file_checksum][tarball_relpath] = []
checksums_dict["checksums"][file_checksum][tarball_relpath].append(version)

if not tarball_relpath in files_dict["files"]:
files_dict["files"][tarball_relpath] = []
files_dict["files"][tarball_relpath].append(
{
"checksum": file_checksum,
"version": version,
}
)
files_dict["files"][tarball_relpath] = {}
if not file_checksum in files_dict["files"][tarball_relpath]:
files_dict["files"][tarball_relpath][file_checksum] = []
files_dict["files"][tarball_relpath][file_checksum].append(version)

return versions_dict, checksums_dict, files_dict

Expand All @@ -53,42 +45,45 @@ def finalize_vcf(versions_dict, checksums_dict, files_dict):

# checksums.json:
working_dict = checksums_dict["checksums"]
# sort each list, first by version descending, then by filepath alphabetically
for k in working_dict.keys():
working_dict[k] = sorted(
working_dict[k],
key=lambda d: (
version_as_comparable_list_negated(d["version"]),
d["file"],
),
)
for c in working_dict.keys():
for f in working_dict[c].keys():
# sort each version list, descending
working_dict[c][f] = sorted(
working_dict[c][f],
key=lambda v: version_as_comparable_list(v),
reverse=True,
)
# sort filepaths, alphabetically
working_dict[c] = dict_sorted_by_key(working_dict[c])
# sort checksums
checksums_dict["checksums"] = dict_sorted_by_key(working_dict)

# files.json:
working_dict = files_dict["files"]
# sort each list, first by version descending, then by checksum
for k in working_dict.keys():
working_dict[k] = sorted(
working_dict[k],
key=lambda d: (
version_as_comparable_list_negated(d["version"]),
d["checksum"],
),
)
for f in working_dict.keys():
for c in working_dict[f].keys():
# sort each version list, descending
working_dict[f][c] = sorted(
working_dict[f][c],
key=lambda v: version_as_comparable_list(v),
reverse=True,
)
# sort checksums
working_dict[f] = dict_sorted_by_key(working_dict[f])
# sort files, alphabetically
files_dict["files"] = dict_sorted_by_key(working_dict)

# versions.json:
working_dict = versions_dict["versions"]
# sort files of each version
for k in working_dict.keys():
working_dict[k]["files"] = dict_sorted_by_key(working_dict[k]["files"])
for v in working_dict.keys():
working_dict[v] = dict_sorted_by_key(working_dict[v])
# sort version numbers, in decreasing order
versions_dict["versions"] = OrderedDict(
sorted(
versions_dict["versions"].items(),
key=lambda p: (version_as_comparable_list(p[0]), p[1]),
working_dict.items(),
key=lambda p: version_as_comparable_list(p[0]),
reverse=True,
)
)
Expand Down
97 changes: 74 additions & 23 deletions cfbs/masterfiles/check_download_matches_git.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from collections import OrderedDict

from cfbs.utils import dict_diff, read_json, user_error
from cfbs.masterfiles.analyze import version_as_comparable_list
from cfbs.utils import dict_diff, read_json, user_error, write_json


def check_download_matches_git(versions):
Expand All @@ -14,33 +15,83 @@ def check_download_matches_git(versions):
download_versions_dict = read_json("versions.json")
git_versions_dict = read_json("versions-git.json")

os.makedirs("differences", exist_ok=True)
diffs_dict = {"differences": {}}

nonmatching_versions = []
extraneous_count = 0
differing_count = 0

for version in versions:
download_version_dict = download_versions_dict["versions"][version]["files"]
git_version_dict = git_versions_dict["versions"][version]["files"]
dl_version_files_dict = download_versions_dict["versions"][version]
git_version_files_dict = git_versions_dict["versions"][version]

# normalize downloaded version dictionary filepaths
# necessary because the downloaded version and git version dictionaries have filepaths of different forms
new_download_dict = {}
for key, value in download_version_dict.items():
for key, value in dl_version_files_dict.items():
if key.startswith("masterfiles/"):
key = key[12:]
new_download_dict[key] = value
download_version_dict = new_download_dict

with open("differences/difference-" + version + ".txt", "w") as f:
only_dl, only_git, value_diff = dict_diff(
download_version_dict, git_version_dict
)

print("Files only in the downloaded version:", only_dl, file=f)
print("Files only in the git version:", only_git, file=f)
print("Files with different contents:", value_diff, file=f)

if len(only_dl) > 0 or len(value_diff) > 0:
user_error(
"Downloadable files of version "
+ version
+ " do not match git files"
)
dl_version_files_dict = new_download_dict

version_diffs_dict = {}
version_diffs_dict["files_only_in_downloads"] = []
version_diffs_dict["files_only_in_git"] = []
version_diffs_dict["files_with_different_content"] = []

only_dl, only_git, value_diff = dict_diff(
dl_version_files_dict, git_version_files_dict
)

for filepath in only_dl:
version_diffs_dict["files_only_in_downloads"].append(filepath)
for filepath in only_git:
version_diffs_dict["files_only_in_git"].append(filepath)
for filepath, _, _ in value_diff:
version_diffs_dict["files_with_different_content"].append(filepath)

diffs_dict["differences"][version] = version_diffs_dict

if len(only_dl) > 0 or len(value_diff) > 0:
nonmatching_versions.append(version)
extraneous_count += len(only_dl)
differing_count += len(value_diff)

nonmatching_versions.sort(key=lambda v: version_as_comparable_list(v), reverse=True)

# fully sort differences.json:
working_dict = diffs_dict["differences"]
# sort filepaths of each version, alphabetically
for k in working_dict.keys():
working_dict[k]["files_only_in_downloads"].sort()
working_dict[k]["files_only_in_git"].sort()
working_dict[k]["files_with_different_content"].sort()
# sort version numbers, in decreasing order
diffs_dict["differences"] = OrderedDict(
sorted(
working_dict.items(),
key=lambda p: version_as_comparable_list(p[0]),
reverse=True,
)
)

write_json("differences.json", diffs_dict)

if len(nonmatching_versions) > 0:
user_error(
"The masterfiles downloaded from github.com and cfengine.com do not match - found "
+ str(extraneous_count)
+ " extraneous file"
+ ("" if extraneous_count == 1 else "s")
+ " and "
+ str(differing_count)
+ " differing file"
+ ("" if differing_count == 1 else "s")
+ " across "
+ str(len(nonmatching_versions))
+ " version"
+ ("" if len(nonmatching_versions) == 1 else "s")
+ " ("
+ ", ".join(nonmatching_versions)
+ "). See ./differences.json"
)
Loading