Skip to content

Commit e96e291

Browse files
committed
Skip checkpoint-equal incremental items
1 parent cbc4005 commit e96e291

4 files changed

Lines changed: 90 additions & 7 deletions

File tree

CHANGES.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ Unreleased
1313
legacy file is removed once existing issue/pull backups have resource
1414
checkpoints (#62).
1515
- Stop paginating pull requests during incremental backups once the sorted
16-
results are older than the active checkpoint.
16+
results are at or older than the active checkpoint.
17+
- Avoid re-fetching discussions and pull requests whose ``updated_at`` exactly
18+
matches the active incremental checkpoint.
1719
- Avoid extra release asset list requests by using asset metadata already
1820
included in GitHub's releases response.
1921
- Add ``--token-from-gh`` to read authentication from ``gh auth token``.

github_backup/github_backup.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2513,7 +2513,7 @@ def retrieve_discussion_summaries(args, repository, since=None):
25132513
if updated_at and (newest_seen is None or updated_at > newest_seen):
25142514
newest_seen = updated_at
25152515

2516-
if since and updated_at and updated_at < since:
2516+
if since and updated_at and updated_at <= since:
25172517
stop = True
25182518
break
25192519

@@ -2899,7 +2899,7 @@ def track_newest_pull_update(pull):
28992899
newest_pull_update = updated_at
29002900

29012901
def pull_is_due_for_repository_checkpoint(pull):
2902-
return not repository_since or pull["updated_at"] >= repository_since
2902+
return not repository_since or pull["updated_at"] > repository_since
29032903

29042904
if not args.include_pull_details:
29052905
pull_states = ["open", "closed"]
@@ -2909,18 +2909,18 @@ def pull_is_due_for_repository_checkpoint(pull):
29092909
args, _pulls_template, query_args=query_args, lazy=True
29102910
):
29112911
track_newest_pull_update(pull)
2912-
if pulls_since and pull["updated_at"] < pulls_since:
2912+
if pulls_since and pull["updated_at"] <= pulls_since:
29132913
break
2914-
if not pulls_since or pull["updated_at"] >= pulls_since:
2914+
if not pulls_since or pull["updated_at"] > pulls_since:
29152915
pulls[pull["number"]] = pull
29162916
else:
29172917
for pull in retrieve_data(
29182918
args, _pulls_template, query_args=query_args, lazy=True
29192919
):
29202920
track_newest_pull_update(pull)
2921-
if pulls_since and pull["updated_at"] < pulls_since:
2921+
if pulls_since and pull["updated_at"] <= pulls_since:
29222922
break
2923-
if not pulls_since or pull["updated_at"] >= pulls_since:
2923+
if not pulls_since or pull["updated_at"] > pulls_since:
29242924
if pull_is_due_for_repository_checkpoint(pull):
29252925
pulls[pull["number"]] = retrieve_data(
29262926
args,

tests/test_discussions.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,41 @@ def test_retrieve_discussion_summaries_stops_at_incremental_since(create_args):
4646
assert mock_retrieve.call_count == 1
4747

4848

49+
def test_retrieve_discussion_summaries_excludes_checkpoint_timestamp(create_args):
50+
args = create_args()
51+
repository = {"full_name": "owner/repo"}
52+
53+
page = {
54+
"repository": {
55+
"hasDiscussionsEnabled": True,
56+
"discussions": {
57+
"totalCount": 1,
58+
"nodes": [
59+
{
60+
"number": 1,
61+
"title": "already backed up",
62+
"updatedAt": "2026-01-01T00:00:00Z",
63+
},
64+
],
65+
"pageInfo": {"hasNextPage": True, "endCursor": "NEXT"},
66+
},
67+
}
68+
}
69+
70+
with patch(
71+
"github_backup.github_backup.retrieve_graphql_data", return_value=page
72+
) as mock_retrieve:
73+
summaries, newest, enabled, total = github_backup.retrieve_discussion_summaries(
74+
args, repository, since="2026-01-01T00:00:00Z"
75+
)
76+
77+
assert enabled is True
78+
assert total == 1
79+
assert newest == "2026-01-01T00:00:00Z"
80+
assert summaries == []
81+
assert mock_retrieve.call_count == 1
82+
83+
4984
def test_retrieve_discussion_summaries_disabled_discussions(create_args):
5085
args = create_args()
5186
repository = {"full_name": "owner/repo"}

tests/test_pull_incremental_pagination.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,52 @@ def headers(self):
3131
return headers
3232

3333

34+
def test_backup_pulls_incremental_excludes_checkpoint_timestamp(create_args, tmp_path):
35+
args = create_args(include_pulls=True, incremental=True)
36+
args.since = "2026-04-26T08:13:46Z"
37+
repository = {"full_name": "owner/repo"}
38+
39+
responses = [
40+
MockHTTPResponse([]),
41+
MockHTTPResponse(
42+
[
43+
{
44+
"number": 1,
45+
"title": "already backed up",
46+
"updated_at": "2026-04-26T08:13:46Z",
47+
},
48+
],
49+
link_header='<https://api.github.com/repos/owner/repo/pulls?per_page=100&state=closed&page=2>; rel="next"',
50+
),
51+
MockHTTPResponse(
52+
[
53+
{
54+
"number": 0,
55+
"title": "older pull on page 2",
56+
"updated_at": "2026-04-25T07:00:00Z",
57+
}
58+
]
59+
),
60+
]
61+
requests_made = []
62+
63+
def mock_urlopen(request, *args, **kwargs):
64+
requests_made.append(request.get_full_url())
65+
return responses[len(requests_made) - 1]
66+
67+
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
68+
github_backup.backup_pulls(
69+
args, tmp_path, repository, "https://api.github.com/repos"
70+
)
71+
72+
assert len(requests_made) == 2
73+
assert "state=open" in requests_made[0]
74+
assert "state=closed" in requests_made[1]
75+
assert all("page=2" not in url for url in requests_made)
76+
assert not os.path.exists(tmp_path / "pulls" / "1.json")
77+
assert not os.path.exists(tmp_path / "pulls" / "0.json")
78+
79+
3480
def test_backup_pulls_incremental_stops_before_fetching_old_pages(
3581
create_args, tmp_path
3682
):

0 commit comments

Comments
 (0)