Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 43 additions & 68 deletions mapillary_tools/process_geotag_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _parse_source_options(

for s in video_geotag_source:
for video_option in parse_source_option(s):
video_option.filetypes = _combine_filetypes(
video_option.filetypes = types.combine_filetype_filters(
video_option.filetypes, {types.FileType.VIDEO}
)
parsed_options.append(video_option)
Expand All @@ -69,35 +69,6 @@ def _parse_source_options(
return parsed_options


# Assume {GOPRO, VIDEO} are the NATIVE_VIDEO_FILETYPES:
# a | b = result
# {CAMM} | {GOPRO} = {}
# {CAMM} | {GOPRO, VIDEO} = {CAMM}
# {GOPRO} | {GOPRO, VIDEO} = {GOPRO}
# {GOPRO} | {VIDEO} = {GOPRO}
# {CAMM, GOPRO} | {VIDEO} = {CAMM, GOPRO}
# {VIDEO} | {VIDEO} = {CAMM, GOPRO, VIDEO}
def _combine_filetypes(
a: set[types.FileType] | None, b: set[types.FileType] | None
) -> set[types.FileType] | None:
if a is None:
return b

if b is None:
return a

# VIDEO is a superset of NATIVE_VIDEO_FILETYPES,
# so we add NATIVE_VIDEO_FILETYPES to each set for intersection later

if types.FileType.VIDEO in a:
a = a | types.NATIVE_VIDEO_FILETYPES

if types.FileType.VIDEO in b:
b = b | types.NATIVE_VIDEO_FILETYPES

return a.intersection(b)


def process_geotag_properties(
import_path: Path | T.Sequence[Path],
filetypes: set[types.FileType] | None,
Expand Down Expand Up @@ -135,7 +106,7 @@ def process_geotag_properties(
)

for option in options:
option.filetypes = _combine_filetypes(option.filetypes, filetypes)
option.filetypes = types.combine_filetype_filters(option.filetypes, filetypes)
option.num_processes = num_processes
if option.interpolation is None:
option.interpolation = InterpolationOption(
Expand Down Expand Up @@ -236,9 +207,7 @@ def _write_metadatas(
LOG.info("Check the description file for details: %s", desc_path)


def _is_error_skipped(
error_type: str, skipped_process_errors: T.Set[T.Type[Exception]]
):
def _is_error_skipped(error_type: str, skipped_process_errors: set[T.Type[Exception]]):
skipped_process_error_names = set(err.__name__ for err in skipped_process_errors)
skip_all = Exception in skipped_process_errors
return skip_all or error_type in skipped_process_error_names
Expand All @@ -248,17 +217,13 @@ def _show_stats(
metadatas: T.Sequence[types.MetadataOrError],
skipped_process_errors: T.Set[T.Type[Exception]],
) -> None:
metadatas_by_filetype: T.Dict[types.FileType, list[types.MetadataOrError]] = {}
metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {}
for metadata in metadatas:
filetype: types.FileType | None
if isinstance(metadata, types.ImageMetadata):
filetype = types.FileType.IMAGE
else:
filetype = metadata.filetype
if filetype:
metadatas_by_filetype.setdefault(types.FileType(filetype), []).append(
metadata
)
metadatas_by_filetype.setdefault(filetype, []).append(metadata)

for filetype, group in metadatas_by_filetype.items():
_show_stats_per_filetype(group, filetype, skipped_process_errors)
Expand All @@ -278,19 +243,16 @@ def _show_stats(


def _show_stats_per_filetype(
metadatas: T.Sequence[types.MetadataOrError],
metadatas: T.Collection[types.MetadataOrError],
filetype: types.FileType,
skipped_process_errors: T.Set[T.Type[Exception]],
):
good_metadatas: list[T.Union[types.VideoMetadata, types.ImageMetadata]] = []
filesize_to_upload = 0
error_metadatas: list[types.ErrorMetadata] = []
for metadata in metadatas:
if isinstance(metadata, types.ErrorMetadata):
error_metadatas.append(metadata)
else:
good_metadatas.append(metadata)
filesize_to_upload += metadata.filesize or 0
good_metadatas: list[types.Metadata]
good_metadatas, error_metadatas = types.separate_errors(metadatas)

filesize_to_upload = sum(
[0 if m.filesize is None else m.filesize for m in good_metadatas]
)

LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
if good_metadatas:
Expand All @@ -317,8 +279,10 @@ def _show_stats_per_filetype(


def _validate_metadatas(
metadatas: T.Sequence[types.MetadataOrError], num_processes: int | None
metadatas: T.Collection[types.MetadataOrError], num_processes: int | None
) -> list[types.MetadataOrError]:
LOG.debug("Validating %d metadatas", len(metadatas))

# validating metadatas is slow, hence multiprocessing

# Do not pass error metadatas where the error object can not be pickled for multiprocessing to work
Expand Down Expand Up @@ -361,34 +325,45 @@ def process_finalize(
desc_path: str | None = None,
num_processes: int | None = None,
) -> list[types.MetadataOrError]:
image_metadatas: list[types.ImageMetadata] = []
video_metadatas: list[types.VideoMetadata] = []

for metadata in metadatas:
if isinstance(metadata, types.VideoMetadata):
if device_make is not None:
metadata.make = device_make
if device_model is not None:
metadata.model = device_model
video_metadatas.append(metadata)
elif isinstance(metadata, types.ImageMetadata):
if device_make is not None:
metadata.MAPDeviceMake = device_make
if device_model is not None:
metadata.MAPDeviceModel = device_model
image_metadatas.append(metadata)

for metadata in video_metadatas:
if device_make is not None:
metadata.make = device_make
if device_model is not None:
metadata.model = device_model

for metadata in image_metadatas:
if device_make is not None:
metadata.MAPDeviceMake = device_make
if device_model is not None:
metadata.MAPDeviceModel = device_model
# Add the basename
metadata.MAPFilename = metadata.filename.name

# modified in place
_apply_offsets(
[
metadata
for metadata in metadatas
if isinstance(metadata, types.ImageMetadata)
],
image_metadatas,
offset_time=offset_time,
offset_angle=offset_angle,
)

LOG.debug("Validating %d metadatas", len(metadatas))
metadatas = _validate_metadatas(metadatas, num_processes=num_processes)

# image_metadatas and video_metadatas get stale after the validation,
# hence delete them to avoid confusion
del image_metadatas
del video_metadatas

_overwrite_exif_tags(
# search image metadatas again because some of them might have been failed
# Search image metadatas again because some of them might have been failed
[
metadata
for metadata in metadatas
Expand Down Expand Up @@ -426,10 +401,10 @@ def process_finalize(
# write descs first because _show_stats() may raise an exception
_write_metadatas(metadatas, desc_path)

# show stats
# Show stats
skipped_process_errors: T.Set[T.Type[Exception]]
if skip_process_errors:
# skip all exceptions
# Skip all exceptions
skipped_process_errors = {Exception}
else:
skipped_process_errors = {exceptions.MapillaryDuplicationError}
Expand Down
52 changes: 40 additions & 12 deletions mapillary_tools/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ class ImageMetadata(geo.Point):
MAPOrientation: T.Optional[int] = None
# deprecated since v0.10.0; keep here for compatibility
MAPMetaTags: T.Optional[T.Dict] = None
# deprecated since v0.10.0; keep here for compatibility
MAPFilename: T.Optional[str] = None
filesize: T.Optional[int] = None

Expand Down Expand Up @@ -105,7 +104,7 @@ def update_md5sum(self) -> None:
@dataclasses.dataclass
class ErrorMetadata:
filename: Path
filetype: T.Optional[FileType]
filetype: FileType
error: Exception


Expand All @@ -115,6 +114,35 @@ class ErrorMetadata:
MetadataOrError = T.Union[Metadata, ErrorMetadata]


# Assume {GOPRO, VIDEO} are the NATIVE_VIDEO_FILETYPES:
# a | b = result
# {CAMM} | {GOPRO} = {}
# {CAMM} | {GOPRO, VIDEO} = {CAMM}
# {GOPRO} | {GOPRO, VIDEO} = {GOPRO}
# {GOPRO} | {VIDEO} = {GOPRO}
# {CAMM, GOPRO} | {VIDEO} = {CAMM, GOPRO}
# {VIDEO} | {VIDEO} = {CAMM, GOPRO, VIDEO}
def combine_filetype_filters(
a: set[FileType] | None, b: set[FileType] | None
) -> set[FileType] | None:
if a is None:
return b

if b is None:
return a

# VIDEO is a superset of NATIVE_VIDEO_FILETYPES,
# so we add NATIVE_VIDEO_FILETYPES to each set for intersection later

if FileType.VIDEO in a:
a = a | NATIVE_VIDEO_FILETYPES

if FileType.VIDEO in b:
b = b | NATIVE_VIDEO_FILETYPES

return a.intersection(b)


class UserItem(TypedDict, total=False):
MAPOrganizationKey: T.Union[int, str]
# Not in use. Keep here for back-compatibility
Expand Down Expand Up @@ -239,7 +267,7 @@ def _describe_error_desc(


def describe_error_metadata(
exc: Exception, filename: Path, filetype: T.Optional[FileType]
exc: Exception, filename: Path, filetype: FileType
) -> ErrorMetadata:
return ErrorMetadata(filename=filename, filetype=filetype, error=exc)

Expand Down Expand Up @@ -307,7 +335,6 @@ def describe_error_metadata(
"MAPDeviceModel": {"type": "string"},
"MAPGPSAccuracyMeters": {"type": "number"},
"MAPCameraUUID": {"type": "string"},
# deprecated since v0.10.0; keep here for compatibility
"MAPFilename": {
"type": "string",
"description": "The base filename of the image",
Expand Down Expand Up @@ -673,15 +700,16 @@ def validate_and_fail_metadata(metadata: MetadataOrError) -> MetadataOrError:
if isinstance(metadata, ErrorMetadata):
return metadata

filetype: T.Optional[FileType] = None
if isinstance(metadata, ImageMetadata):
filetype = FileType.IMAGE
validate = validate_image_desc
else:
assert isinstance(metadata, VideoMetadata)
filetype = metadata.filetype
validate = validate_video_desc

try:
if isinstance(metadata, ImageMetadata):
filetype = FileType.IMAGE
validate_image_desc(as_desc(metadata))
else:
assert isinstance(metadata, VideoMetadata)
filetype = metadata.filetype
validate_video_desc(as_desc(metadata))
validate(as_desc(metadata))
except exceptions.MapillaryMetadataValidationError as ex:
# rethrow because the original error is too verbose
return describe_error_metadata(
Expand Down
9 changes: 8 additions & 1 deletion tests/integration/test_process_and_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,14 @@ def _validate_uploads(upload_dir: py.path.local, expected):
else:
raise Exception(f"invalid file {file}")

excludes = ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"]
excludes = [
"filename",
"filesize",
"md5sum",
"MAPMetaTags",
"MAPSequenceUUID",
"MAPFilename",
]

actual = {}
for desc in descs:
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_sequence_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def test_process_finalize(setup_data):
"filename": str(test_exif),
"filetype": "image",
"filesize": None,
"MAPFilename": "test_exif.jpg",
"MAPLatitude": 1,
"MAPLongitude": 1,
"MAPCaptureTime": "1970_01_01_00_00_02_000",
Expand Down
Loading