Skip to content

Commit 5f7ea0f

Browse files
committed
fixes from PR
- reverts max-line-length preference to 100 - fixes some tests - clarifies a couple of edge cases
1 parent ac28514 commit 5f7ea0f

5 files changed

Lines changed: 70 additions & 47 deletions

File tree

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[flake8]
2-
max-line-length = 120
2+
max-line-length = 100
33
# Equivalent to allow-init-docstring, which we set on pydoclint.
44
extend-ignore = DOC301,F401

CHANGELOG.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515

1616
### Changed
1717
- README examples
18-
- lengthened max-line-length for pylint/flake8 to 120 to accomodate added typing
1918

2019
### Deprecated
2120
- N/A

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,4 @@ skip-checking-raises = true
6262
style = "sphinx"
6363

6464
[tool.pylint.format]
65-
max-line-length = 120
65+
max-line-length = 100

tests/test_fetch.py

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@ def test_fetch_metadata_success(
2828
client: TINDClient,
2929
) -> None:
3030
"""fetch_metadata returns a PyMARC Record for a valid record ID."""
31-
requests_mock.get(
32-
f"{BASE_URL}/record/12345/", text=sample_marc_xml, status_code=200
33-
)
31+
requests_mock.get(f"{BASE_URL}/record/12345/", text=sample_marc_xml, status_code=200)
3432
record = client.fetch_metadata("12345")
3533
assert record["245"]["a"] == "Sample Title"
3634

@@ -42,9 +40,7 @@ def test_fetch_metadata_404(requests_mock: req_mock.Mocker, client: TINDClient)
4240
client.fetch_metadata("99999")
4341

4442

45-
def test_fetch_metadata_empty_body(
46-
requests_mock: req_mock.Mocker, client: TINDClient
47-
) -> None:
43+
def test_fetch_metadata_empty_body(requests_mock: req_mock.Mocker, client: TINDClient) -> None:
4844
"""fetch_metadata raises RecordNotFoundError when the response body is empty."""
4945
requests_mock.get(f"{BASE_URL}/record/11111/", text=" ", status_code=200)
5046
with pytest.raises(RecordNotFoundError):
@@ -96,9 +92,7 @@ def test_fetch_file_not_found(
9692
# ---------------------------------------------------------------------------
9793

9894

99-
def test_fetch_file_metadata_success(
100-
requests_mock: req_mock.Mocker, client: TINDClient
101-
) -> None:
95+
def test_fetch_file_metadata_success(requests_mock: req_mock.Mocker, client: TINDClient) -> None:
10296
"""fetch_file_metadata returns a list of file metadata dicts."""
10397
payload = [{"name": "file.pdf", "size": 1024}]
10498
requests_mock.get(
@@ -110,9 +104,7 @@ def test_fetch_file_metadata_success(
110104
assert result[0]["name"] == "file.pdf"
111105

112106

113-
def test_fetch_file_metadata_error(
114-
requests_mock: req_mock.Mocker, client: TINDClient
115-
) -> None:
107+
def test_fetch_file_metadata_error(requests_mock: req_mock.Mocker, client: TINDClient) -> None:
116108
"""fetch_file_metadata raises TINDError on non-200 responses."""
117109
requests_mock.get(
118110
f"{BASE_URL}/record/12345/files",
@@ -128,9 +120,7 @@ def test_fetch_file_metadata_error(
128120
# ---------------------------------------------------------------------------
129121

130122

131-
def test_fetch_ids_search_success(
132-
requests_mock: req_mock.Mocker, client: TINDClient
133-
) -> None:
123+
def test_fetch_ids_search_success(requests_mock: req_mock.Mocker, client: TINDClient) -> None:
134124
"""fetch_ids_search returns the list of record IDs from the search response."""
135125
requests_mock.get(
136126
f"{BASE_URL}/search",
@@ -141,9 +131,7 @@ def test_fetch_ids_search_success(
141131
assert ids == ["1", "2", "3"]
142132

143133

144-
def test_fetch_ids_search_error(
145-
requests_mock: req_mock.Mocker, client: TINDClient
146-
) -> None:
134+
def test_fetch_ids_search_error(requests_mock: req_mock.Mocker, client: TINDClient) -> None:
147135
"""fetch_ids_search raises TINDError on non-200 responses."""
148136
requests_mock.get(
149137
f"{BASE_URL}/search",
@@ -184,6 +172,7 @@ def test_search_returns_xml(
184172
assert len(results) >= 1
185173
assert requests_mock.call_count == 1
186174

175+
187176
# ---------------------------------------------------------------------------
188177
# write_search_results_to_file / _iter_xml_records
189178
# ---------------------------------------------------------------------------
@@ -197,7 +186,7 @@ def test_write_search_results_to_file_with_malformed_output_filename(
197186
) -> None:
198187
"""write_search_results_to_file raises ValueError for a malformed output filename."""
199188
with pytest.raises(ValueError, match="output_file_name"):
200-
client.write_search_results_to_file("", output_file_name = malformed_filename)
189+
client.write_search_results_to_file("", output_file_name=malformed_filename)
201190

202191

203192
def test_write_search_results_to_file_empty_filename(client: TINDClient) -> None:
@@ -247,15 +236,18 @@ def test_write_search_results_to_file_success(
247236
tree = E.parse(tmp_path / "out.xml")
248237
records = tree.getroot().findall(f"{{{marc21_ns}}}record")
249238
assert len(records) == 3
250-
assert tree.getroot().findtext(f"{{{marc21_ns}}}record/{{{marc21_ns}}}controlfield[@tag='001']") == "27320"
239+
assert (
240+
tree.getroot().findtext(f"{{{marc21_ns}}}record/{{{marc21_ns}}}controlfield[@tag='001']")
241+
== "27320"
242+
)
251243

252244

253-
def test_write_search_results_to_file_count_mismatch(
245+
def test_write_search_results_to_file_matched_but_no_records_returned(
254246
requests_mock: req_mock.Mocker,
255247
client: TINDClient,
256248
tmp_path: Path,
257249
) -> None:
258-
"""write_search_results_to_file raises TINDError when streamed record count != ID count."""
250+
"""write_search_results_to_file raises TINDError when API returns no records for matched IDs"""
259251
client.default_storage_dir = str(tmp_path)
260252
requests_mock.get(
261253
f"{BASE_URL}/search",
@@ -266,13 +258,39 @@ def test_write_search_results_to_file_count_mismatch(
266258
{"text": (FIXTURES / "end-of-batch-tind-response.xml").read_text(), "status_code": 200},
267259
],
268260
)
269-
with pytest.raises(TINDError, match="Expected 3 records"):
261+
with pytest.raises(TINDError, match="API did not return any."):
262+
client.write_search_results_to_file("collection:'test'", "mismatch.xml")
263+
264+
265+
def test_write_search_results_to_file_matched_but_api_mismatch(
266+
requests_mock: req_mock.Mocker,
267+
client: TINDClient,
268+
tmp_path: Path,
269+
) -> None:
270+
"""write_search_results_to_file raises TINDError when streamed record count != ID count."""
271+
client.default_storage_dir = str(tmp_path)
272+
requests_mock.get(
273+
f"{BASE_URL}/search",
274+
response_list=[
275+
# fetch_ids_search says 3 hits
276+
{
277+
"text": json.dumps({"hits": ["27320", "28819", "29563", "123123"]}),
278+
"status_code": 200,
279+
},
280+
# first paginated XML batch, but only 3 records instead of 4 as expected from the IDs
281+
{"text": (FIXTURES / "1st-batch-tind-response.xml").read_text(), "status_code": 200},
282+
# but the XML stream returns nothing immediately
283+
{"text": (FIXTURES / "end-of-batch-tind-response.xml").read_text(), "status_code": 200},
284+
],
285+
)
286+
with pytest.raises(TINDError, match="Expected 4 records"):
270287
client.write_search_results_to_file("collection:'test'", "mismatch.xml")
271288

289+
272290
def test_write_search_results_to_file_malformed_xml_response(
273-
requests_mock: req_mock.Mocker,
274-
client: TINDClient,
275-
tmp_path: Path,
291+
requests_mock: req_mock.Mocker,
292+
client: TINDClient,
293+
tmp_path: Path,
276294
) -> None:
277295
"""write_search_results_to_file raises TINDError when the API returns malformed XML."""
278296
client.default_storage_dir = str(tmp_path)

tind_client/client.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import re
88
from io import StringIO
9+
from pathlib import Path
910
from typing import Any, Iterator
1011
import xml.etree.ElementTree as E
1112

@@ -22,6 +23,7 @@
2223
# remove namespace that ElementTree adds to records when passed
2324
_NS_DECL: str = f' xmlns="{NS}"'
2425

26+
2527
class TINDClient:
2628
"""Client for interacting with a TIND DA instance.
2729
@@ -63,9 +65,7 @@ def fetch_metadata(self, record: str) -> Record:
6365
# records. Additionally, if the XML is malformed, the parser function may return
6466
# multiple records. We need to ensure that exactly one record is parsed.
6567
if len(records) != 1:
66-
raise RecordNotFoundError(
67-
f"Record {record} did not match exactly one record in TIND."
68-
)
68+
raise RecordNotFoundError(f"Record {record} did not match exactly one record in TIND.")
6969

7070
return records[0]
7171

@@ -84,9 +84,7 @@ def fetch_file(self, file_url: str, output_dir: str = "") -> str:
8484
raise ValueError("URL is not a valid TIND file download URL.")
8585

8686
output_target = output_dir or self.default_storage_dir
87-
(status, saved_to) = tind_download(
88-
file_url, output_dir=output_target, api_key=self.api_key
89-
)
87+
(status, saved_to) = tind_download(file_url, output_dir=output_target, api_key=self.api_key)
9088

9189
if status != 200:
9290
raise RecordNotFoundError("Referenced file could not be downloaded.")
@@ -179,9 +177,11 @@ def search(self, query: str, result_format: str = "xml") -> list[Any]:
179177

180178
return recs
181179

182-
def write_search_results_to_file(self, query: str = "", output_file_name: str = "tind.xml") -> int:
180+
def write_search_results_to_file(
181+
self, query: str = "", output_file_name: str = "tind.xml"
182+
) -> int:
183183
"""Search TIND and stream results to an XML file.
184-
184+
185185
:param str query: A TIND search query string.
186186
:param str output_file_name: filename for the output XML file.
187187
:returns int: The number of records written to the file.
@@ -195,23 +195,29 @@ def write_search_results_to_file(self, query: str = "", output_file_name: str =
195195

196196
recs_written = 0
197197
output_path = os.path.join(self.default_storage_dir, output_file_name)
198-
with open(output_path, "w", encoding="utf-8") as f:
199-
f.write(f'<?xml version="1.0" encoding="UTF-8"?>\n<collection xmlns="{NS}">\n')
200-
for record in self._iter_xml_records(query):
201-
record_xml = E.tostring(record, encoding="unicode")
202-
f.write(record_xml.replace(_NS_DECL, ""))
203-
f.write("\n")
204-
recs_written += 1
205-
206-
f.write("</collection>\n")
198+
try:
199+
with open(output_path, "w", encoding="utf-8") as f:
200+
f.write(f'<?xml version="1.0" encoding="UTF-8"?>\n<collection xmlns="{NS}">\n')
201+
for record in self._iter_xml_records(query):
202+
record_xml = E.tostring(record, encoding="unicode")
203+
f.write(record_xml.replace(_NS_DECL, ""))
204+
f.write("\n")
205+
recs_written += 1
206+
if recs_written == 0:
207+
# We expected records but didn't receive any through pagination
208+
raise TINDError(f"Matched {total_hits} tind ids, but API did not return any.")
209+
f.write("</collection>\n")
210+
except Exception:
211+
Path(output_path).unlink(missing_ok=True)
212+
raise
207213

208214
if recs_written != total_hits:
209215
raise TINDError(f"Expected {total_hits} records, but wrote {recs_written} to file.")
210216
return recs_written
211217

212218
def _iter_xml_records(self, query: str) -> Iterator[E.Element]:
213219
"""Yield every ``<record>`` element from all pages of a search.
214-
220+
215221
Issues the initial search request, then yields records one at a time,
216222
and continues to issue paginated search requests until all records have been yielded.
217223
:param str query: A TIND search query string.

0 commit comments

Comments
 (0)