Skip to content

Commit 3f054bd

Browse files
committed
refactor image transform code and fix a bunch of edge case resizing/cropping bugs, avoid downloading the same file multiple times, add unit tests and much debug logging
1 parent 00ed4da commit 3f054bd

File tree

7 files changed

+213
-79
lines changed

7 files changed

+213
-79
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ repos:
1616
- "exifread"
1717
- "httpx"
1818
- "pillow"
19+
exclude: "tests/.*"
1920
...

pyproject.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ dev = [
2828
"pre-commit==4.0.0",
2929
"setuptools-scm==8.0.4",
3030
]
31+
test = [
32+
"pytest==8.3.3",
33+
"pytest-mock==3.14.0",
34+
]
3135

3236
[project.urls]
3337
homepage = "https://github.com/bornhack/bma-client-lib-python"
@@ -63,3 +67,14 @@ line-length = 120
6367

6468
[tool.ruff.lint.pydocstyle]
6569
convention = "google"
70+
71+
[tool.ruff.lint.per-file-ignores]
72+
"*/tests/*" = [
73+
"S101", # https://docs.astral.sh/ruff/rules/assert/
74+
"ANN002", # https://docs.astral.sh/ruff/rules/missing-type-args/
75+
"ANN003", # https://docs.astral.sh/ruff/rules/missing-type-kwargs/
76+
]
77+
78+
[tool.mypy]
79+
mypy_path = "src"
80+
strict = true

src/bma_client_lib/bma_client.py

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import json
44
import logging
5-
import math
65
import time
76
import uuid
87
from fractions import Fraction
@@ -39,7 +38,7 @@
3938

4039
# get version
4140
try:
42-
__version__ = version("bma-client-lib")
41+
__version__ = version("bma_client_lib")
4342
except PackageNotFoundError:
4443
__version__ = "0.0.0"
4544

@@ -79,6 +78,11 @@ def __init__(
7978
self.skip_exif_tags = SKIP_EXIF_TAGS
8079
self.get_server_settings()
8180
self.__version__ = __version__
81+
# build client object
82+
self.clientjson = {
83+
"client_uuid": self.uuid,
84+
"client_version": f"bma-client-lib {__version__}",
85+
}
8286

8387
def update_access_token(self) -> None:
8488
"""Set or update self.access_token using self.refresh_token."""
@@ -104,54 +108,57 @@ def get_server_settings(self) -> dict[str, dict[str, dict[str, list[str]]]]:
104108
self.base_url + "/api/v1/json/jobs/settings/",
105109
).raise_for_status()
106110
self.settings = r.json()["bma_response"]
107-
return r.json()
111+
return self.settings # type: ignore[no-any-return]
108112

109113
def get_jobs(self, job_filter: str = "?limit=0") -> list[Job]:
110114
"""Get a filtered list of the jobs this user has access to."""
111115
r = self.client.get(self.base_url + f"/api/v1/json/jobs/{job_filter}").raise_for_status()
112116
response = r.json()["bma_response"]
113117
logger.debug(f"Returning {len(response)} jobs with filter {job_filter}")
114-
return response
118+
return response # type: ignore[no-any-return]
115119

116120
def get_file_info(self, file_uuid: uuid.UUID) -> dict[str, str]:
117121
"""Get metadata for a file."""
118122
r = self.client.get(self.base_url + f"/api/v1/json/files/{file_uuid}/").raise_for_status()
119-
return r.json()["bma_response"]
123+
return r.json()["bma_response"] # type: ignore[no-any-return]
120124

121125
def download(self, url: str, path: Path) -> Path:
122126
"""Download a file to a path."""
123127
r = self.client.get(url).raise_for_status()
124128
logger.debug(f"Done downloading {len(r.content)} bytes from {url}, saving to {path}")
129+
path.parent.mkdir(parents=True, exist_ok=True)
125130
with path.open("wb") as f:
126131
f.write(r.content)
127132
return path
128133

129134
def download_job_source(self, job: Job) -> Path:
130135
"""Download the file needed to do a job."""
136+
# skip the leading slash when using url as a local path
137+
path = self.path / job.source_url[1:]
138+
if path.exists():
139+
# file was downloaded previously
140+
return path
141+
# get the file
131142
return self.download(
132143
url=self.base_url + job.source_url,
133-
path=self.path / job.source_filename,
144+
path=path,
134145
)
135146

136147
def get_job_assignment(self, job_filter: str = "") -> list[Job]:
137148
"""Ask for new job(s) from the API."""
138149
url = self.base_url + "/api/v1/json/jobs/assign/"
139150
if job_filter:
140151
url += job_filter
141-
data = {
142-
"client_uuid": self.uuid,
143-
"client_version": f"bma-client-lib {__version__}",
144-
}
145152
try:
146-
r = self.client.post(url, json=data).raise_for_status()
153+
r = self.client.post(url, json=self.clientjson).raise_for_status()
147154
response = r.json()["bma_response"]
148155
except httpx.HTTPStatusError as e:
149156
if e.response.status_code == HTTPStatus.NOT_FOUND:
150157
response = []
151158
else:
152159
raise
153160
logger.debug(f"Returning {len(response)} assigned jobs")
154-
return response
161+
return response # type: ignore[no-any-return]
155162

156163
def unassign_job(self, job: Job) -> bool:
157164
"""Unassign a job."""
@@ -163,7 +170,7 @@ def unassign_job(self, job: Job) -> bool:
163170

164171
def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[str, dict[str, str]]:
165172
"""Upload a file."""
166-
# get mimetype
173+
# get mimetype using magic on the first 2kb of the file
167174
with path.open("rb") as fh:
168175
mimetype = magic.from_buffer(fh.read(2048), mime=True)
169176

@@ -209,11 +216,11 @@ def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[s
209216
# doit
210217
r = self.client.post(
211218
self.base_url + "/api/v1/json/files/upload/",
212-
data={"metadata": json.dumps(data)},
219+
data={"f_metadata": json.dumps(data), "client": json.dumps(self.clientjson)},
213220
files=files,
214221
timeout=30,
215222
)
216-
return r.json()
223+
return r.json() # type: ignore[no-any-return]
217224

218225
def handle_job(self, job: Job) -> None:
219226
"""Do the thing and upload the result."""
@@ -236,7 +243,7 @@ def handle_job(self, job: Job) -> None:
236243
raise JobNotSupportedError(job=job)
237244
source = self.download_job_source(job)
238245
result = self.create_thumbnail_source(job=job)
239-
filename = job.source_filename
246+
filename = job.source_url
240247

241248
else:
242249
raise JobNotSupportedError(job=job)
@@ -264,6 +271,7 @@ def write_and_upload_result(self, job: Job, result: "JobResult", filename: str)
264271
kwargs["append_images"] = image[1:]
265272
kwargs["save_all"] = True
266273
image[0].save(buf, format=job.filetype, exif=exif, **kwargs)
274+
metadata = {"width": image[0].width, "height": image[0].height, "mimetype": job.mimetype}
267275

268276
elif isinstance(job, ImageExifExtractionJob):
269277
logger.debug(f"Got exif data {result}")
@@ -329,6 +337,7 @@ def handle_image_conversion_job(
329337
logger.debug(f"Desired image size is {size}, aspect ratio: {ratio} ({orig_str}), converting image...")
330338
start = time.time()
331339
images = transform_image(original_img=image, crop_w=size[0], crop_h=size[1])
340+
logger.debug(f"Result image size is {images[0].width}*{images[0].height}")
332341
logger.debug(f"Converting image size and AR took {time.time() - start} seconds")
333342

334343
logger.debug("Done, returning result...")
@@ -340,20 +349,15 @@ def upload_job_result(
340349
buf: "BytesIO",
341350
filename: str,
342351
metadata: dict[str, str | int] | None = None,
343-
) -> dict:
352+
) -> dict[str, str]:
344353
"""Upload the result of a job."""
345354
size = buf.getbuffer().nbytes
346355
logger.debug(f"Uploading {size} bytes result for job {job.job_uuid} with filename {filename}")
347356
start = time.time()
348357
files = {"f": (filename, buf)}
349-
# build client object
350-
client = {
351-
"client_uuid": self.uuid,
352-
"client_version": "bma-client-lib {__version__}",
353-
}
354-
data = {"client": json.dumps(client)}
355-
if isinstance(job, ThumbnailSourceJob):
356-
# ThumbnailSourceJob needs a metadata object as well
358+
data = {"client": json.dumps(self.clientjson)}
359+
if isinstance(job, ThumbnailJob | ThumbnailSourceJob | ImageConversionJob):
360+
# Image generating jobs needs a metadata object as well
357361
data["metadata"] = json.dumps(metadata)
358362
# doit
359363
r = self.client.post(
@@ -363,7 +367,7 @@ def upload_job_result(
363367
).raise_for_status()
364368
t = time.time() - start
365369
logger.debug(f"Done, it took {t} seconds to upload {size} bytes, speed {round(size/t)} bytes/sec")
366-
return r.json()
370+
return r.json() # type: ignore[no-any-return]
367371

368372
def get_exif(self, fname: Path) -> "ExifExtractionJobResult":
369373
"""Return a dict with exif data as read by exifread from the file.
@@ -398,27 +402,9 @@ def create_album(self, file_uuids: list[uuid.UUID], title: str, description: str
398402
"description": description,
399403
}
400404
r = self.client.post(url, json=data).raise_for_status()
401-
return r.json()["bma_response"]
405+
return r.json()["bma_response"] # type: ignore[no-any-return]
402406

403407
def create_thumbnail_source(self, job: ThumbnailSourceJob) -> "ThumbnailSourceJobResult":
404408
"""Create a thumbnail source for this file."""
405-
info = self.get_file_info(file_uuid=job.basefile_uuid)
406-
if info["filetype"] == "image":
407-
# use a max 500px wide version of the image as thumbnail source
408-
path = self.path / info["filename"]
409-
original_ratio = Fraction(int(info["width"]), int(info["height"]))
410-
height = math.floor(500 / original_ratio)
411-
# just call the regular image conversion method to make a thumbnail
412-
return self.handle_image_conversion_job(
413-
job=ImageConversionJob(
414-
**job.__dict__,
415-
width=500,
416-
height=height,
417-
custom_aspect_ratio=False,
418-
filetype="WEBP",
419-
mimetype="image/webp",
420-
),
421-
orig=path,
422-
)
423409
# unsupported filetype
424410
raise JobNotSupportedError(job=job)

src/bma_client_lib/datastructures.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ class BaseJob:
1919
client_version: str
2020
finished: bool
2121
source_url: str
22-
source_filename: str
23-
source_mimetype: str
2422
schema_name: str
2523

2624

src/bma_client_lib/pillow_resize_and_crop.py

Lines changed: 74 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,106 @@
11
"""Pillow cropping with sequence (gif, webp) support.
22
3-
Borrowed from https://gist.github.com/muratgozel/ce1aa99f97fc1a99b3f3ec90cf77e5f5
3+
Originally based on https://gist.github.com/muratgozel/ce1aa99f97fc1a99b3f3ec90cf77e5f5
44
"""
55

6-
from math import fabs, floor
6+
import logging
7+
from fractions import Fraction
8+
from math import floor
79

810
from PIL import Image, ImageFile, ImageSequence
911

12+
logger = logging.getLogger("bma_client")
1013

11-
def transform_image(original_img: Image.Image, crop_w: int, crop_h: int, center_point: tuple[int, int] = (0.5,0.5)) -> list[Image.Image | ImageFile.ImageFile]:
12-
"""Resizes and crops the image to the specified crop_w and crop_h if necessary.
14+
15+
def transform_image(
16+
original_img: Image.Image, crop_w: int, crop_h: int, center_point: tuple[float, float] = (0.5, 0.5)
17+
) -> list[Image.Image | ImageFile.ImageFile]:
18+
"""Shrinks and crops the image to the specified crop_w and crop_h if necessary.
1319
1420
Works with multi frame gif and webp images.
1521
1622
Args:
1723
original_img(Image.Image): is the image instance created by pillow ( Image.open(filepath) )
18-
crop_w(int): is the width in pixels for the image that will be resized and cropped
19-
crop_h(int): is the height in pixels for the image that will be resized and cropped
24+
crop_w(int): is the desired width in pixels
25+
crop_h(int): is the desired height in pixels
26+
center_point(tuple[float,float]): The center point of cropping as a percentage.
2027
2128
returns:
22-
Instance of an Image or list of frames which they are instances of an Image individually
29+
List of one or more Image instances
2330
"""
2431
img_w, img_h = (original_img.size[0], original_img.size[1])
2532
# sequence?
2633
n_frames = getattr(original_img, "n_frames", 1)
2734

2835
def transform_frame(frame: Image.Image) -> Image.Image | ImageFile.ImageFile:
2936
"""Resizes and crops the individual frame in the image."""
30-
# resize the image to the specified height if crop_w is null in the recipe
31-
if crop_w is None:
32-
if crop_h == img_h:
33-
return frame
34-
new_w = floor(img_w * crop_h / img_h)
35-
new_h = crop_h
36-
return frame.resize((new_w, new_h), resample=Image.Resampling.LANCZOS)
37-
3837
# return the original image if crop size is equal to img size
3938
if crop_w == img_w and crop_h == img_h:
39+
logger.debug(
40+
f"Image size and requested size are the same ({crop_w}*{crop_h}), returning image without resizing"
41+
)
4042
return frame
4143

42-
# first resize to get most visible area of the image and then crop
43-
w_diff = fabs(crop_w - img_w)
44-
h_diff = fabs(crop_h - img_h)
45-
enlarge_image = bool(crop_w > img_w or crop_h > img_h)
46-
shrink_image = bool(crop_w < img_w or crop_h < img_h)
47-
48-
if enlarge_image is True:
49-
new_w = floor(crop_h * img_w / img_h) if h_diff > w_diff else crop_w
50-
new_h = floor(crop_w * img_h / img_w) if h_diff < w_diff else crop_h
51-
52-
if shrink_image is True:
53-
new_w = crop_w if h_diff > w_diff else floor(crop_h * img_w / img_h)
54-
new_h = crop_h if h_diff < w_diff else floor(crop_w * img_h / img_w)
55-
56-
left = (new_w - crop_w) * center_point[0]
44+
# resizing is required before cropping only if both image dimensions are bigger than the crop size
45+
if crop_w < img_w and crop_h < img_h:
46+
# if calculated height is bigger than requested crop height
47+
if floor(crop_w * img_h / img_w) > crop_h:
48+
# then resize image to requested crop width keeping proportional height
49+
new_w = crop_w
50+
new_h = floor(crop_w * img_h / img_w)
51+
else:
52+
# else resize the image to requested crop height keeping proportional width
53+
new_w = floor(crop_h * img_w / img_h)
54+
new_h = crop_h
55+
else:
56+
# keep size since one or both dimensions is <= crop size
57+
new_w = img_w
58+
new_h = img_h
59+
60+
# get crop coordinates
61+
left = floor((new_w - crop_w) * center_point[0])
62+
top = floor((new_h - crop_h) * center_point[1])
5763
right = left + crop_w
58-
top = (new_h - crop_h) * center_point[1]
5964
bottom = top + crop_h
6065

61-
return frame.resize((new_w, new_h), resample=Image.Resampling.LANCZOS).crop((left, top, right, bottom))
66+
orig_ratio = Fraction(img_w, img_h)
67+
new_ratio = Fraction(new_w, new_h)
68+
logger.debug(
69+
f"Original size is {img_w}*{img_h} ({orig_ratio}), requested size is {crop_w}*{crop_h}, resizing "
70+
f"to {new_w}*{new_h} ({new_ratio}), then cropping - initial crop coord are {left, top, right, bottom}"
71+
)
72+
73+
# make sure any space outside the image is transparent
74+
t_left = max(left, 0)
75+
t_top = max(top, 0)
76+
t_right = min(right, new_w)
77+
t_bottom = min(bottom, new_h)
78+
logger.debug(f"after transparency adjustments crop coords are {t_left, t_top, t_right, t_bottom}")
79+
80+
# resize and crop the image
81+
frame = frame.resize((new_w, new_h), resample=Image.Resampling.LANCZOS).crop((t_left, t_top, t_right, t_bottom))
82+
logger.debug(f"Result frame size is {frame.width}*{frame.height}")
83+
84+
logger.debug(f"new {crop_w}*{crop_h} orig {img_w}*{img_h}")
85+
if crop_w > img_w or crop_h > img_h:
86+
# original image has one or both dimensions smaller than the requested size,
87+
# paste the image onto a transparent canvas exactly as big as requested
88+
canvas = Image.new("RGBA", (crop_w, crop_h), (0, 0, 0, 0))
89+
logger.debug(f"Transparent canvas size is {canvas.width}*{canvas.height}")
90+
c_left = t_left - left
91+
c_top = t_top - top
92+
logger.debug(f"Positioning image on canvas at {c_left} {c_top}")
93+
canvas.paste(frame, (c_left, c_top))
94+
return canvas
95+
# original image larger than the requested size in both dimensions,
96+
# no transparent canvas needed, just return the resized frame as-is
97+
# frame might be 1px smaller than requested in one or both dimensions
98+
# due to rounding
99+
logger.debug(
100+
f"Image has been downsized from {img_w}*{img_h} to {frame.width}*{frame.height} - "
101+
f"requested size was {crop_w}*{crop_h}."
102+
)
103+
return frame
62104

63105
# single frame image
64106
if n_frames == 1:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""This __init__.py is empty."""

0 commit comments

Comments
 (0)