Skip to content

Commit 3c0cc49

Browse files
committed
handle ThumbnailSourceJob and ThumbnailJob, improve typing for job handling
1 parent 66a099e commit 3c0cc49

File tree

2 files changed

+169
-51
lines changed

2 files changed

+169
-51
lines changed

src/bma_client_lib/bma_client.py

Lines changed: 109 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,29 @@
22

33
import json
44
import logging
5+
import math
56
import time
67
import uuid
78
from fractions import Fraction
89
from http import HTTPStatus
910
from importlib.metadata import PackageNotFoundError, version
1011
from io import BytesIO
1112
from pathlib import Path
12-
from typing import TYPE_CHECKING, TypeAlias
13+
from typing import TYPE_CHECKING
1314

1415
import exifread
1516
import httpx
1617
import magic
1718
from PIL import Image, ImageOps
1819

20+
from .datastructures import ImageConversionJob, ImageExifExtractionJob, Job, ThumbnailJob, ThumbnailSourceJob
21+
1922
logger = logging.getLogger("bma_client")
2023

2124
if TYPE_CHECKING:
2225
from django.http import HttpRequest
2326

24-
ImageConversionJobResult: TypeAlias = tuple[Image.Image, Image.Exif]
25-
ExifExtractionJobResult: TypeAlias = dict[str, dict[str, str]]
26-
JobResult: TypeAlias = ImageConversionJobResult | ExifExtractionJobResult
27+
from .datastructures import ExifExtractionJobResult, ImageConversionJobResult, JobResult, ThumbnailSourceJobResult
2728

2829
# maybe these should come from server settings
2930
SKIP_EXIF_TAGS = ["JPEGThumbnail", "TIFFThumbnail", "Filename"]
@@ -56,7 +57,7 @@ def __init__(
5657
oauth_client_id: str,
5758
refresh_token: str,
5859
path: Path,
59-
base_url: str = "https://media.bornhack.dk",
60+
base_url: str,
6061
client_uuid: uuid.UUID | None = None,
6162
) -> None:
6263
"""Save refresh token, get access token, get or set client uuid."""
@@ -109,20 +110,22 @@ def get_file_info(self, file_uuid: uuid.UUID) -> dict[str, str]:
109110
r = self.client.get(self.base_url + f"/api/v1/json/files/{file_uuid}/").raise_for_status()
110111
return r.json()["bma_response"]
111112

112-
def download(self, file_uuid: uuid.UUID) -> dict[str, str]:
113-
"""Download a file from BMA."""
114-
info = self.get_file_info(file_uuid=file_uuid)
115-
path = self.path / info["filename"]
116-
if not path.exists():
117-
url = self.base_url + info["links"]["downloads"]["original"] # type: ignore[index]
118-
logger.debug(f"Downloading file {url} ...")
119-
r = self.client.get(url).raise_for_status()
120-
logger.debug(f"Done downloading {len(r.content)} bytes, saving to {path}")
121-
with path.open("wb") as f:
122-
f.write(r.content)
123-
return info
124-
125-
def get_job_assignment(self, file_uuid: uuid.UUID | None = None) -> list[dict[str, dict[str, str]]]:
113+
def download(self, url: str, path: Path) -> Path:
114+
"""Download a file to a path."""
115+
r = self.client.get(url).raise_for_status()
116+
logger.debug(f"Done downloading {len(r.content)} bytes from {url}, saving to {path}")
117+
with path.open("wb") as f:
118+
f.write(r.content)
119+
return path
120+
121+
def download_job_source(self, job: Job) -> Path:
122+
"""Download the file needed to do a job."""
123+
return self.download(
124+
url=self.base_url + job.source_url,
125+
path=self.path / job.source_filename,
126+
)
127+
128+
def get_job_assignment(self, file_uuid: uuid.UUID | None = None) -> list[Job]:
126129
"""Ask for new job(s) from the API."""
127130
url = self.base_url + "/api/v1/json/jobs/assign/"
128131
if file_uuid:
@@ -195,46 +198,67 @@ def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[s
195198
)
196199
return r.json()
197200

198-
def handle_job(self, job: dict[str, str], orig: Path) -> None:
201+
def handle_job(self, job: Job) -> None:
199202
"""Do the thing and upload the result."""
203+
# make sure the source file for the job is available
204+
source = self.download_job_source(job)
205+
# do it
200206
result: JobResult
201-
# get the result of the job
202-
if job["job_type"] == "ImageConversionJob":
203-
result = self.handle_image_conversion_job(job=job, orig=orig)
204-
filename = job["job_uuid"] + "." + job["filetype"].lower()
205-
elif job["job_type"] == "ImageExifExtractionJob":
206-
result = self.get_exif(fname=orig)
207+
if isinstance(job, ImageConversionJob | ThumbnailJob):
208+
result = self.handle_image_conversion_job(job=job, orig=source)
209+
filename = f"{job.job_uuid}.{job.filetype.lower()}"
210+
211+
elif isinstance(job, ImageExifExtractionJob):
212+
result = self.get_exif(fname=source)
207213
filename = "exif.json"
214+
215+
elif isinstance(job, ThumbnailSourceJob):
216+
result = self.create_thumbnail_source(job=job)
217+
filename = job.source_filename
218+
208219
else:
209-
logger.error(f"Unsupported job type {job['job_type']}")
220+
raise TypeError(type(job))
210221

211222
self.write_and_upload_result(job=job, result=result, filename=filename)
212223

213-
def write_and_upload_result(self, job: dict[str, str], result: JobResult, filename: str) -> None:
224+
def write_and_upload_result(self, job: Job, result: "JobResult", filename: str) -> None:
214225
"""Encode and write the job result to a buffer, then upload."""
215226
with BytesIO() as buf:
216-
if job["job_type"] == "ImageConversionJob":
227+
metadata: dict[str, int | str] = {}
228+
if isinstance(job, ImageConversionJob | ThumbnailJob):
217229
image, exif = result
218230
if not isinstance(image, Image.Image) or not isinstance(exif, Image.Exif):
219-
raise ValueError("Fuck")
231+
raise TypeError("Fuck")
220232
# apply format specific encoding options
221233
kwargs = {}
222-
if job["mimetype"] in self.settings["encoding"]["images"]:
234+
if job.mimetype in self.settings["encoding"]["images"]:
223235
# this format has custom encoding options, like quality/lossless, apply them
224-
kwargs.update(self.settings["encoding"]["images"][job["mimetype"]])
225-
logger.debug(f"Format {job['mimetype']} has custom encoding settings, kwargs is now: {kwargs}")
236+
kwargs.update(self.settings["encoding"]["images"][job.mimetype])
237+
logger.debug(f"Format {job.mimetype} has custom encoding settings, kwargs is now: {kwargs}")
226238
else:
227-
logger.debug(f"No custom settings for format {job['mimetype']}")
228-
image.save(buf, format=job["filetype"], exif=exif, **kwargs)
229-
elif job["job_type"] == "ImageExifExtractionJob":
239+
logger.debug(f"No custom settings for format {job.mimetype}")
240+
image.save(buf, format=job.filetype, exif=exif, **kwargs)
241+
242+
elif isinstance(job, ImageExifExtractionJob):
230243
logger.debug(f"Got exif data {result}")
231244
buf.write(json.dumps(result).encode())
245+
246+
elif isinstance(job, ThumbnailSourceJob):
247+
image, exif = result
248+
if not isinstance(image, Image.Image) or not isinstance(exif, Image.Exif):
249+
raise TypeError("Fuck")
250+
image.save(buf, format="WEBP", lossless=True, quality=1)
251+
metadata = {"width": 500, "height": image.height, "mimetype": "image/webp"}
252+
232253
else:
233254
logger.error("Unsupported job type")
234-
raise RuntimeError(job["job_type"])
235-
self.upload_job_result(job_uuid=uuid.UUID(job["job_uuid"]), buf=buf, filename=filename)
255+
raise TypeError(job.job_type)
236256

237-
def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> ImageConversionJobResult:
257+
self.upload_job_result(job=job, buf=buf, filename=filename, metadata=metadata)
258+
259+
def handle_image_conversion_job(
260+
self, job: ImageConversionJob, orig: Path, crop_center: tuple[float, float] = (0.5, 0.5)
261+
) -> "ImageConversionJobResult":
238262
"""Handle image conversion job."""
239263
start = time.time()
240264
logger.debug(f"Opening original image {orig}...")
@@ -258,49 +282,60 @@ def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> ImageC
258282
exif = image.getexif()
259283
logger.debug(f"Getting exif data took {time.time() - start} seconds")
260284

261-
size = int(job["width"]), int(job["height"])
285+
size = int(job.width), int(job.height)
262286
ratio = Fraction(*size)
263287

264-
if job["custom_aspect_ratio"]:
288+
if job.custom_aspect_ratio:
265289
orig_str = "custom"
266290
else:
267291
orig_str = "original"
268292
if orig_ar != ratio:
269293
orig_str += "(ish)"
294+
270295
logger.debug(f"Desired image size is {size}, aspect ratio: {ratio} ({orig_str}), converting image...")
271296
start = time.time()
272297
# custom AR or not?
273-
if job["custom_aspect_ratio"]:
274-
image = ImageOps.fit(image, size) # type: ignore[assignment]
298+
if job.custom_aspect_ratio:
299+
image = ImageOps.fit(image=image, size=size, method=Image.Resampling.LANCZOS, centering=crop_center) # type: ignore[assignment]
275300
else:
276-
image.thumbnail(size)
301+
image.thumbnail(size=size, resample=Image.Resampling.LANCZOS)
277302
logger.debug(f"Converting image size and AR took {time.time() - start} seconds")
278303

279304
logger.debug("Done, returning result...")
280305
return image, exif
281306

282-
def upload_job_result(self, job_uuid: uuid.UUID, buf: "BytesIO", filename: str) -> dict:
307+
def upload_job_result(
308+
self,
309+
job: Job,
310+
buf: "BytesIO",
311+
filename: str,
312+
metadata: dict[str, str | int] | None = None,
313+
) -> dict:
283314
"""Upload the result of a job."""
284315
size = buf.getbuffer().nbytes
285-
logger.debug(f"Uploading {size} bytes result for job {job_uuid} with filename {filename}")
316+
logger.debug(f"Uploading {size} bytes result for job {job.job_uuid} with filename {filename}")
286317
start = time.time()
287318
files = {"f": (filename, buf)}
288-
# build metadata
289-
data = {
319+
# build client object
320+
client = {
290321
"client_uuid": self.uuid,
291322
"client_version": "bma-client-lib {__version__}",
292323
}
324+
data = {"client": json.dumps(client)}
325+
if isinstance(job, ThumbnailSourceJob):
326+
# ThumbnailSourceJob needs a metadata object as well
327+
data["metadata"] = json.dumps(metadata)
293328
# doit
294329
r = self.client.post(
295-
self.base_url + f"/api/v1/json/jobs/{job_uuid}/result/",
296-
data={"client": json.dumps(data)},
330+
self.base_url + f"/api/v1/json/jobs/{job.job_uuid}/result/",
331+
data=data,
297332
files=files,
298333
).raise_for_status()
299334
t = time.time() - start
300335
logger.debug(f"Done, it took {t} seconds to upload {size} bytes, speed {round(size/t)} bytes/sec")
301336
return r.json()
302337

303-
def get_exif(self, fname: Path) -> ExifExtractionJobResult:
338+
def get_exif(self, fname: Path) -> "ExifExtractionJobResult":
304339
"""Return a dict with exif data as read by exifread from the file.
305340
306341
exifread returns a flat dict of key: value pairs where the key
@@ -334,3 +369,26 @@ def create_album(self, file_uuids: list[uuid.UUID], title: str, description: str
334369
}
335370
r = self.client.post(url, json=data).raise_for_status()
336371
return r.json()["bma_response"]
372+
373+
def create_thumbnail_source(self, job: ThumbnailSourceJob) -> "ThumbnailSourceJobResult":
374+
"""Create a thumbnail source for this file."""
375+
info = self.get_file_info(file_uuid=job.basefile_uuid)
376+
if info["filetype"] == "image":
377+
# use a max 500px wide version of the image as thumbnail source
378+
path = self.path / info["filename"]
379+
original_ratio = Fraction(int(info["width"]), int(info["height"]))
380+
height = math.floor(500 / original_ratio)
381+
# just call the regular image conversion method to make a thumbnail
382+
return self.handle_image_conversion_job(
383+
job=ImageConversionJob(
384+
**job.__dict__,
385+
width=500,
386+
height=height,
387+
custom_aspect_ratio=False,
388+
filetype="WEBP",
389+
mimetype="image/webp",
390+
),
391+
orig=path,
392+
)
393+
# unsupported filetype
394+
raise ValueError(info["filetype"])
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Datastructures used in bma_client_lib."""
2+
3+
import uuid
4+
from dataclasses import dataclass
5+
from typing import TypeAlias
6+
7+
from PIL import Image
8+
9+
10+
@dataclass
11+
class BaseJob:
12+
"""Base class inherited by ImageConversionJob and ImageExifExtractionJob."""
13+
14+
job_type: str
15+
job_uuid: uuid.UUID
16+
basefile_uuid: uuid.UUID
17+
user_uuid: uuid.UUID
18+
client_uuid: uuid.UUID
19+
client_version: str
20+
finished: bool
21+
source_url: str
22+
source_filename: str
23+
schema_name: str
24+
25+
26+
@dataclass
27+
class ImageConversionJob(BaseJob):
28+
"""Represent an ImageConversionJob."""
29+
30+
filetype: str
31+
width: int
32+
height: int
33+
mimetype: str
34+
custom_aspect_ratio: bool
35+
36+
37+
class ImageExifExtractionJob(BaseJob):
38+
"""Represent an ImageExifExtractionJob."""
39+
40+
41+
class ThumbnailSourceJob(BaseJob):
42+
"""Represent a ThumbnailSourceJob."""
43+
44+
45+
class ThumbnailJob(ImageConversionJob):
46+
"""Represent a ThumbnailJob."""
47+
48+
49+
Job: TypeAlias = ImageConversionJob | ImageExifExtractionJob | ThumbnailSourceJob | ThumbnailJob
50+
job_types = {
51+
"ImageConversionJob": ImageConversionJob,
52+
"ImageExifExtractionJob": ImageExifExtractionJob,
53+
"ThumbnailSourceJob": ThumbnailSourceJob,
54+
"ThumbnailJob": ThumbnailJob,
55+
}
56+
57+
ImageConversionJobResult: TypeAlias = tuple[Image.Image, Image.Exif]
58+
ThumbnailSourceJobResult: TypeAlias = ImageConversionJobResult
59+
ExifExtractionJobResult: TypeAlias = dict[str, dict[str, str]]
60+
JobResult: TypeAlias = ImageConversionJobResult | ExifExtractionJobResult | ThumbnailSourceJobResult

0 commit comments

Comments
 (0)