33"""
44
55import json
6+ import logging
67import os
78import re
89from io import StringIO
910from pathlib import Path
1011from typing import Any , Iterator
1112import xml .etree .ElementTree as E
13+ from datetime import datetime , timezone
1214
1315from pymarc import Record
1416from pymarc .marcxml import parse_xml_to_array
1517
1618from .api import tind_get , tind_download
1719from .errors import RecordNotFoundError , TINDError
1820
21+ logger = logging .getLogger (__name__ )
1922
2023NS = "http://www.loc.gov/MARC21/slim"
2124E .register_namespace ("" , NS )
@@ -69,12 +72,15 @@ def fetch_metadata(self, record: str) -> Record:
6972
7073 return records [0 ]
7174
72- def fetch_file (self , file_url : str , output_dir : str = "" ) -> str :
73- """Download a file from TIND and save it locally.
75+ def fetch_file (self , file_url : str , output_dir : str = "" , modified : str = "" ) -> str :
76+ """Download a file from TIND and save it locally. If the file already exists in the output
77+ directory and has a local modified timestamp that is newer than supplied ``modified``
78+ timestamp, the file will not be re-downloaded.
7479
7580 :param str file_url: The TIND file download URL.
7681 :param str output_dir: Directory in which to save the file.
7782 Falls back to ``default_storage_dir`` when empty.
83+ :param str modified: Optional modified timestamp from the file metadata returned by TIND
7884 :raises AuthorizationError: When the TIND API key is invalid or the file is restricted.
7985 :raises ValueError: When ``file_url`` is not a valid TIND file download URL.
8086 :raises RecordNotFoundError: When the file is invalid or not found.
@@ -84,9 +90,20 @@ def fetch_file(self, file_url: str, output_dir: str = "") -> str:
8490 raise ValueError ("URL is not a valid TIND file download URL." )
8591
8692 output_target = output_dir or self .default_storage_dir
93+
94+ expected_filename = file_url .rstrip ("/" ).split ("/" )[- 2 ]
95+ expected_path = Path (output_target ) / expected_filename
96+
97+ if modified and expected_path .exists ():
98+ meta_mtime = datetime .fromisoformat (modified ).replace (tzinfo = timezone .utc )
99+ local_mtime = datetime .fromtimestamp (expected_path .stat ().st_mtime , tz = timezone .utc )
100+ if local_mtime >= meta_mtime :
101+ logger .debug ("Cached file at (%s) is newer; skipping download." , expected_path )
102+ return str (expected_path )
103+
87104 (status , saved_to ) = tind_download (file_url , output_dir = output_target , api_key = self .api_key )
88105
89- if status != 200 :
106+ if status != 200 or not saved_to :
90107 raise RecordNotFoundError ("Referenced file could not be downloaded." )
91108
92109 return saved_to
0 commit comments