Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mapillary_tools/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def _parse_scaled_integers(
CUTOFF_TIME = float(os.getenv(_ENV_PREFIX + "CUTOFF_TIME", 60))
DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1))
DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5))
MAX_AVG_SPEED = float(
os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 400_000 / 3600)
MAX_CAPTURE_SPEED_KMH = float(
os.getenv(_ENV_PREFIX + "MAX_CAPTURE_SPEED_KMH", 400)
) # 400 KM/h
# WARNING: Changing the following envvars might result in failed uploads
# Max number of images per sequence
Expand Down
4 changes: 4 additions & 0 deletions mapillary_tools/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ class MapillaryVideoGPSNotFoundError(MapillaryDescriptionError):
pass


class MapillaryInvalidVideoError(MapillaryDescriptionError):
pass


class MapillaryGPXEmptyError(MapillaryDescriptionError):
pass

Expand Down
9 changes: 8 additions & 1 deletion mapillary_tools/geotag/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,14 @@ def process(
reprocessable_paths = set(paths)

for idx, option in enumerate(options):
LOG.debug("Processing %d files with %s", len(reprocessable_paths), option)
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.info(
f"==> Processing {len(reprocessable_paths)} files with source {option}..."
)
else:
LOG.info(
f"==> Processing {len(reprocessable_paths)} files with source {option.source.value}..."
)

image_videos, video_paths = _filter_images_and_videos(
reprocessable_paths, option.filetypes
Expand Down
25 changes: 25 additions & 0 deletions mapillary_tools/geotag/video_extractors/native.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ... import blackvue_parser, exceptions, geo, telemetry, types, utils
from ...camm import camm_parser
from ...gpmf import gpmf_gps_filter, gpmf_parser
from ...mp4 import construct_mp4_parser, simple_mp4_parser
from .base import BaseVideoExtractor


Expand Down Expand Up @@ -113,20 +114,44 @@ def extract(self) -> types.VideoMetadata:
extractor = GoProVideoExtractor(self.video_path)
try:
return extractor.extract()
except simple_mp4_parser.BoxNotFoundError as ex:
raise exceptions.MapillaryInvalidVideoError(
f"Invalid video: {ex}"
) from ex
except construct_mp4_parser.BoxNotFoundError as ex:
raise exceptions.MapillaryInvalidVideoError(
f"Invalid video: {ex}"
) from ex
except exceptions.MapillaryVideoGPSNotFoundError:
pass

if ft is None or types.FileType.VIDEO in ft or types.FileType.CAMM in ft:
extractor = CAMMVideoExtractor(self.video_path)
try:
return extractor.extract()
except simple_mp4_parser.BoxNotFoundError as ex:
raise exceptions.MapillaryInvalidVideoError(
f"Invalid video: {ex}"
) from ex
except construct_mp4_parser.BoxNotFoundError as ex:
raise exceptions.MapillaryInvalidVideoError(
f"Invalid video: {ex}"
) from ex
except exceptions.MapillaryVideoGPSNotFoundError:
pass

if ft is None or types.FileType.VIDEO in ft or types.FileType.BLACKVUE in ft:
extractor = BlackVueVideoExtractor(self.video_path)
try:
return extractor.extract()
except simple_mp4_parser.BoxNotFoundError as ex:
raise exceptions.MapillaryInvalidVideoError(
f"Invalid video: {ex}"
) from ex
except construct_mp4_parser.BoxNotFoundError as ex:
raise exceptions.MapillaryInvalidVideoError(
f"Invalid video: {ex}"
) from ex
except exceptions.MapillaryVideoGPSNotFoundError:
pass

Expand Down
6 changes: 5 additions & 1 deletion mapillary_tools/mp4/construct_mp4_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,10 @@ class BoxDict(T.TypedDict, total=True):
SwitchMapType = T.Dict[BoxType, T.Union[C.Construct, "SwitchMapType"]]


class BoxNotFoundError(Exception):
pass


class Box64ConstructBuilder:
"""
Build a box struct that **parses** MP4 boxes with both 32-bit and 64-bit sizes.
Expand Down Expand Up @@ -591,7 +595,7 @@ def find_box_at_pathx(
) -> BoxDict:
found = find_box_at_path(box, path)
if found is None:
raise ValueError(f"box at path {path} not found")
raise BoxNotFoundError(f"box at path {path} not found")
return found


Expand Down
47 changes: 22 additions & 25 deletions mapillary_tools/process_geotag_properties.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

import collections
import datetime
import logging
import typing as T
from pathlib import Path

import humanize
from tqdm import tqdm

from . import constants, exceptions, exif_write, types, utils
Expand Down Expand Up @@ -217,17 +217,19 @@ def _write_metadatas(
LOG.info("Check the description file for details: %s", desc_path)


def _is_error_skipped(error_type: str, skipped_process_errors: set[T.Type[Exception]]):
skipped_process_error_names = set(err.__name__ for err in skipped_process_errors)
skip_all = Exception in skipped_process_errors
return skip_all or error_type in skipped_process_error_names
def _is_error_skipped(
error_type: type[Exception], skipped_process_errors: set[type[Exception]]
):
return (Exception in skipped_process_errors) or (
error_type in skipped_process_errors
)


def _show_stats(
metadatas: T.Sequence[types.MetadataOrError],
skipped_process_errors: set[T.Type[Exception]],
) -> None:
LOG.info("========== Process summary ==========")
LOG.info("==> Process summary")

metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {}
for metadata in metadatas:
Expand All @@ -244,9 +246,7 @@ def _show_stats(
metadata
for metadata in metadatas
if isinstance(metadata, types.ErrorMetadata)
and not _is_error_skipped(
metadata.error.__class__.__name__, skipped_process_errors
)
and not _is_error_skipped(type(metadata.error), skipped_process_errors)
]
if critical_error_metadatas:
raise exceptions.MapillaryProcessError(
Expand All @@ -262,38 +262,35 @@ def _show_stats_per_filetype(
good_metadatas: list[types.Metadata]
good_metadatas, error_metadatas = types.separate_errors(metadatas)

filesize_to_upload = sum(
[0 if m.filesize is None else m.filesize for m in good_metadatas]
)

LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
LOG.info(f"{len(metadatas)} {filetype.value} read in total")
if good_metadatas:
total_filesize = sum(
[0 if m.filesize is None else m.filesize for m in good_metadatas]
)
LOG.info(
"\t %8d %s(s) (%s MB) are ready to be uploaded",
len(good_metadatas),
filetype.value,
round(filesize_to_upload / 1024 / 1024, 1),
f"\t{len(good_metadatas)} ({humanize.naturalsize(total_filesize)}) ready"
)

error_counter = collections.Counter(
metadata.error.__class__.__name__ for metadata in error_metadatas
)
errors_by_type: dict[type[Exception], list[types.ErrorMetadata]] = {}
for metadata in error_metadatas:
errors_by_type.setdefault(type(metadata.error), []).append(metadata)

for error_type, count in error_counter.items():
for error_type, errors in errors_by_type.items():
total_filesize = sum([utils.get_file_size_quietly(m.filename) for m in errors])
if _is_error_skipped(error_type, skipped_process_errors):
LOG.warning(
"\t %8d %s(s) skipped due to %s", count, filetype.value, error_type
f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
)
else:
LOG.error(
"\t %8d %s(s) failed due to %s", count, filetype.value, error_type
f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
)


def _validate_metadatas(
metadatas: T.Collection[types.MetadataOrError], num_processes: int | None
) -> list[types.MetadataOrError]:
LOG.debug("Validating %d metadatas", len(metadatas))
LOG.info(f"==> Validating {len(metadatas)} metadatas...")

# validating metadatas is slow, hence multiprocessing

Expand Down
55 changes: 33 additions & 22 deletions mapillary_tools/process_sequence_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import os
import typing as T

import humanize

from . import constants, exceptions, geo, types, utils
from .serializer.description import DescriptionJSONSerializer

Expand Down Expand Up @@ -215,7 +217,7 @@ def _is_video_stationary(
def _check_video_limits(
video_metadatas: T.Iterable[types.VideoMetadata],
max_sequence_filesize_in_bytes: int | None,
max_avg_speed: float,
max_capture_speed_kmh: float,
max_radius_for_stationary_check: float,
) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
output_video_metadatas: list[types.VideoMetadata] = []
Expand All @@ -238,7 +240,7 @@ def _check_video_limits(
)
if video_filesize > max_sequence_filesize_in_bytes:
raise exceptions.MapillaryFileTooLargeError(
f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
)

contains_null_island = any(
Expand All @@ -249,15 +251,19 @@ def _check_video_limits(
"GPS coordinates in Null Island (0, 0)"
)

avg_speed_kmh = (
geo.avg_speed(video_metadata.points) * 3.6
) # Convert m/s to km/h
too_fast = (
len(video_metadata.points) >= 2
and geo.avg_speed(video_metadata.points) > max_avg_speed
and avg_speed_kmh > max_capture_speed_kmh
)
if too_fast:
raise exceptions.MapillaryCaptureSpeedTooFastError(
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
)
except exceptions.MapillaryDescriptionError as ex:
LOG.error(f"{_video_name(video_metadata)}: {ex}")
error_metadatas.append(
types.describe_error_metadata(
exc=ex,
Expand All @@ -268,18 +274,17 @@ def _check_video_limits(
else:
output_video_metadatas.append(video_metadata)

if error_metadatas:
LOG.info(
f"Video validation: {len(output_video_metadatas)} valid, {len(error_metadatas)} errors"
)

return output_video_metadatas, error_metadatas


def _video_name(video_metadata: types.VideoMetadata) -> str:
return video_metadata.filename.name


def _check_sequences_by_limits(
input_sequences: T.Sequence[PointSequence],
max_sequence_filesize_in_bytes: int | None,
max_avg_speed: float,
max_capture_speed_kmh: float,
) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
output_sequences: list[PointSequence] = []
output_errors: list[types.ErrorMetadata] = []
Expand All @@ -295,7 +300,7 @@ def _check_sequences_by_limits(
)
if sequence_filesize > max_sequence_filesize_in_bytes:
raise exceptions.MapillaryFileTooLargeError(
f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
)

contains_null_island = any(
Expand All @@ -306,12 +311,14 @@ def _check_sequences_by_limits(
"GPS coordinates in Null Island (0, 0)"
)

too_fast = len(sequence) >= 2 and geo.avg_speed(sequence) > max_avg_speed
avg_speed_kmh = geo.avg_speed(sequence) * 3.6 # Convert m/s to km/h
too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh
if too_fast:
raise exceptions.MapillaryCaptureSpeedTooFastError(
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
)
except exceptions.MapillaryDescriptionError as ex:
LOG.error(f"{_sequence_name(sequence)}: {ex}")
for image in sequence:
output_errors.append(
types.describe_error_metadata(
Expand All @@ -326,14 +333,16 @@ def _check_sequences_by_limits(
len(s) for s in input_sequences
)

if output_errors:
LOG.info(
f"Sequence validation: {len(output_sequences)} valid, {len(output_errors)} errors"
)

return output_sequences, output_errors


def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str:
if not sequence:
return "N/A"
image = sequence[0]
return f"{image.filename.parent.name}/{image.filename.name}"


def _group_by_folder_and_camera(
image_metadatas: list[types.ImageMetadata],
) -> list[list[types.ImageMetadata]]:
Expand Down Expand Up @@ -594,8 +603,10 @@ def process_sequence_properties(
interpolate_directions: bool = False,
duplicate_distance: float = constants.DUPLICATE_DISTANCE,
duplicate_angle: float = constants.DUPLICATE_ANGLE,
max_avg_speed: float = constants.MAX_AVG_SPEED,
max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH,
) -> list[types.MetadataOrError]:
LOG.info("==> Processing sequences...")

max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE
max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS

Expand All @@ -611,14 +622,14 @@ def process_sequence_properties(
elif isinstance(metadata, types.VideoMetadata):
video_metadatas.append(metadata)
else:
raise RuntimeError(f"invalid metadata type: {metadata}")
raise ValueError(f"invalid metadata type: {metadata}")

if video_metadatas:
# Check limits for videos
video_metadatas, video_error_metadatas = _check_video_limits(
video_metadatas,
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
max_avg_speed=max_avg_speed,
max_capture_speed_kmh=max_capture_speed_kmh,
max_radius_for_stationary_check=10.0,
)
error_metadatas.extend(video_error_metadatas)
Expand Down Expand Up @@ -668,7 +679,7 @@ def process_sequence_properties(
sequences, errors = _check_sequences_by_limits(
sequences,
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
max_avg_speed=max_avg_speed,
max_capture_speed_kmh=max_capture_speed_kmh,
)
error_metadatas.extend(errors)

Expand Down
Loading
Loading