Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions dir_content_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import re
from pathlib import Path

from dir_content_diff.base_comparators import BaseComparator
from dir_content_diff.base_comparators import DefaultComparator
from dir_content_diff.base_comparators import IniComparator
from dir_content_diff.base_comparators import JsonComparator
Expand All @@ -29,7 +30,6 @@

__version__ = importlib.metadata.version("dir-content-diff")


_DEFAULT_COMPARATORS = {
None: DefaultComparator(),
".cfg": IniComparator(), # luigi config files
Expand Down Expand Up @@ -224,12 +224,34 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
),
)
else:
LOGGER.info(
LOGGER.debug(
"Skip formatting for '%s' because the comparator has no saving capability.",
file,
)


def pick_comparator(comparator=None, suffix=None, comparators=None):
"""Pick a comparator based on its name or a file suffix."""
if isinstance(comparator, BaseComparator):
return comparator
if comparators is None:
comparators = get_comparators()
if comparator is not None:
for i in comparators.values(): # pragma: no branch
if i.__class__.__name__ == comparator:
return i
LOGGER.debug(
"Could not find the comparator named '%s' in the given comparators",
comparator,
)
if suffix is not None:
if suffix in comparators:
return comparators.get(suffix)
LOGGER.debug("Could not find the comparator for the '%s' suffix", suffix)
LOGGER.debug("Returning the default comparator")
return _COMPARATORS.get(None)


def compare_trees(
ref_path,
comp_path,
Expand Down Expand Up @@ -290,9 +312,6 @@ def compare_trees(
difference messages. If the directories are considered as equal, an empty ``dict`` is
returned.
"""
if comparators is None:
comparators = _COMPARATORS

ref_path = Path(ref_path)
comp_path = Path(comp_path)
formatted_data_path = comp_path.with_name(
Expand Down Expand Up @@ -332,12 +351,10 @@ def compare_trees(
break
if specific_file_args is None:
specific_file_args = {}
comparator = specific_file_args.pop(
"comparator",
comparators.get(
ref_file.suffix,
_COMPARATORS.get(None),
),
comparator = pick_comparator(
comparator=specific_file_args.pop("comparator", None),
suffix=ref_file.suffix,
comparators=comparators,
)
comparator_args = specific_file_args.pop("args", [])
res = compare_files(
Expand Down
100 changes: 90 additions & 10 deletions dir_content_diff/base_comparators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
from xml.etree import ElementTree

import dictdiffer
import diff_pdf_visually
import jsonpath_ng
import yaml
from dicttoxml import dicttoxml
from diff_pdf_visually import pdf_similar
from diff_pdf_visually import pdfdiff_pages

from dir_content_diff.util import diff_msg_formatter

Expand Down Expand Up @@ -618,35 +619,114 @@ def diff(self, ref, comp, *args, **kwargs):

Keyword Args:
threshold (int): The threshold used to compare the images.
tempdir (pathlib.Path): Empty directory where the temporary images will be exported.
tempdir (pathlib.Path): Directory in which a new ``dir-diff`` directory will be created
to export the debug images.
dpi (int): The resolution used to convert the PDF files into images.
verbosity (int): The log verbosity.
max_report_pagenos (int): Only this number of the different pages will be logged (only
used if the verbosity is greater than 1).
num_threads (int): If set to 2 (the default), the image conversion are processed in
parallel. If set to 1 it is processed sequentially.
"""
res = pdfdiff_pages(ref, comp, *args, **kwargs)
if not res:
return False
return res

def __call__(self, ref_file, comp_file, *args, **kwargs):
"""Process arguments before calling the diff method."""
tempdir = kwargs.pop("tempdir", None)
if tempdir is not None:
relative_parts = []
for i, j in zip(ref.parts[::-1], comp.parts[::-1]): # pragma: no branch
for i, j in zip(
ref_file.parts[::-1], comp_file.parts[::-1]
): # pragma: no branch
if i != j:
break
relative_parts.append(i)
if relative_parts and relative_parts[-1] == Path(tempdir).root:
relative_parts.pop()
if not relative_parts:
relative_parts.append(comp.name)
relative_parts[-1] = "diff-pdf-" + relative_parts[-1]
relative_parts.append(comp_file.name)
relative_parts.append("diff-pdf")
new_tempdir = Path(tempdir) / Path(*relative_parts[::-1])

# Deduplicate name if needed
last_part = str(relative_parts[-1])
num = 1
while True:
try:
root = Path(tempdir) / relative_parts[-1]
if not root.exists():
new_tempdir.mkdir(parents=True, exist_ok=False)
break
except FileExistsError:
new_tempdir = new_tempdir.with_name(new_tempdir.name + f"_{num}")
num += 1
relative_parts[-1] = last_part + f"_{num}"
new_tempdir = Path(tempdir) / Path(*relative_parts[::-1])
num += 1

kwargs["tempdir"] = new_tempdir
return not pdf_similar(ref, comp, *args, **kwargs)

try:
# Update default verbosity
if "verbosity" not in kwargs: # pragma: no branch
current_default_verbosity = int(
diff_pdf_visually.constants.DEFAULT_VERBOSITY
)
try:
if (
diff_pdf_visually.diff.pdfdiff_pages.__defaults__[1] is None
): # pragma: no cover
diff_pdf_visually.constants.DEFAULT_VERBOSITY = 0
else:
kwargs["verbosity"] = 0
finally:
diff_pdf_visually.constants.DEFAULT_VERBOSITY = (
current_default_verbosity
)
return super().__call__(ref_file, comp_file, *args, **kwargs)
finally:
diff_pdf_visually.constants.DEFAULT_VERBOSITY = current_default_verbosity

def report(
self,
ref_file,
comp_file,
formatted_differences,
diff_args,
diff_kwargs,
load_kwargs=None,
format_data_kwargs=None,
filter_kwargs=None,
format_diff_kwargs=None,
sort_kwargs=None,
concat_kwargs=None,
**kwargs,
): # pylint: disable=too-many-arguments
"""Add specific information before calling the default method."""
if formatted_differences and isinstance(formatted_differences, str):
formatted_differences = (
"The following pages are the most different: "
+ formatted_differences.replace("\n", ", ")
)
if "tempdir" in diff_kwargs:
formatted_differences += (
"\nThe visual differences can be found here: "
+ str(diff_kwargs["tempdir"])
)
return super().report(
ref_file,
comp_file,
formatted_differences,
diff_args,
diff_kwargs,
load_kwargs=load_kwargs,
format_data_kwargs=format_data_kwargs,
filter_kwargs=filter_kwargs,
format_diff_kwargs=format_diff_kwargs,
sort_kwargs=sort_kwargs,
concat_kwargs=concat_kwargs,
**kwargs,
)

def format_diff(self, difference, **kwargs):
"""Format one element difference."""
return str(difference)
Loading