Skip to content

Commit 44738a0

Browse files
committed
AP-687
- given a UTC modified datetime string for the requested file, we now check if the file exists locally and has a newer mtime
1 parent 5c27a31 commit 44738a0

2 files changed

Lines changed: 31 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [Unreleased (Targeting 0.2.5)]
9+
10+
### Added
11+
- Optional parameter to `fetch_file()` with a modified time of the remote file pulled from the TIND API
12+
- `fetch_file()` uses this to avoid unnecessary downloads if a file already exists at the target
13+
location and has a modified time that is newer than the requested file
14+
15+
### Changed
16+
- slight change to raise a file not downloaded error if `tind_download()` fails to return a written file path
17+
18+
819
## [0.2.4]
920

1021
### Added

tind_client/client.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,22 @@
33
"""
44

55
import json
6+
import logging
67
import os
78
import re
89
from io import StringIO
910
from pathlib import Path
1011
from typing import Any, Iterator
1112
import xml.etree.ElementTree as E
13+
from datetime import datetime, timezone
1214

1315
from pymarc import Record
1416
from pymarc.marcxml import parse_xml_to_array
1517

1618
from .api import tind_get, tind_download
1719
from .errors import RecordNotFoundError, TINDError
1820

21+
logger = logging.getLogger(__name__)
1922

2023
NS = "http://www.loc.gov/MARC21/slim"
2124
E.register_namespace("", NS)
@@ -69,12 +72,15 @@ def fetch_metadata(self, record: str) -> Record:
6972

7073
return records[0]
7174

72-
def fetch_file(self, file_url: str, output_dir: str = "") -> str:
73-
"""Download a file from TIND and save it locally.
75+
def fetch_file(self, file_url: str, output_dir: str = "", modified: str = "") -> str:
76+
"""Download a file from TIND and save it locally. If the file already exists in the output
77+
directory and has a local modified timestamp that is newer than supplied ``modified``
78+
timestamp, the file will not be re-downloaded.
7479
7580
:param str file_url: The TIND file download URL.
7681
:param str output_dir: Directory in which to save the file.
7782
Falls back to ``default_storage_dir`` when empty.
83+
:param str modified: Optional modified timestamp from the file metadata returned by TIND
7884
:raises AuthorizationError: When the TIND API key is invalid or the file is restricted.
7985
:raises ValueError: When ``file_url`` is not a valid TIND file download URL.
8086
:raises RecordNotFoundError: When the file is invalid or not found.
@@ -84,9 +90,20 @@ def fetch_file(self, file_url: str, output_dir: str = "") -> str:
8490
raise ValueError("URL is not a valid TIND file download URL.")
8591

8692
output_target = output_dir or self.default_storage_dir
93+
94+
expected_filename = file_url.rstrip("/").split("/")[-2]
95+
expected_path = Path(output_target) / expected_filename
96+
97+
if modified and expected_path.exists():
98+
meta_mtime = datetime.fromisoformat(modified).replace(tzinfo=timezone.utc)
99+
local_mtime = datetime.fromtimestamp(expected_path.stat().st_mtime, tz=timezone.utc)
100+
if local_mtime >= meta_mtime:
101+
logger.debug("Cached file at (%s) is newer; skipping download.", expected_path)
102+
return str(expected_path)
103+
87104
(status, saved_to) = tind_download(file_url, output_dir=output_target, api_key=self.api_key)
88105

89-
if status != 200:
106+
if status != 200 or not saved_to:
90107
raise RecordNotFoundError("Referenced file could not be downloaded.")
91108

92109
return saved_to

0 commit comments

Comments
 (0)