@@ -286,6 +286,12 @@ def parse_args(args=None):
286286 dest = "include_pull_comments" ,
287287 help = "include pull request review comments in backup" ,
288288 )
289+ parser .add_argument (
290+ "--pull-reviews" ,
291+ action = "store_true" ,
292+ dest = "include_pull_reviews" ,
293+ help = "include pull request reviews in backup" ,
294+ )
289295 parser .add_argument (
290296 "--pull-commits" ,
291297 action = "store_true" ,
@@ -2672,6 +2678,57 @@ def backup_issues(args, repo_cwd, repository, repos_template):
26722678 os .replace (issue_file + ".temp" , issue_file ) # Atomic write
26732679
26742680
2681+ PULL_OPTIONAL_DATA_KEYS = (
2682+ "comment_regular_data" ,
2683+ "comment_data" ,
2684+ "commit_data" ,
2685+ "review_data" ,
2686+ )
2687+ PULL_REVIEWS_LAST_UPDATE_FILENAME = "reviews_last_update"
2688+
2689+
2690+ def read_json_file_if_exists (path ):
2691+ if not os .path .isfile (path ):
2692+ return None
2693+
2694+ try :
2695+ with codecs .open (path , "r" , encoding = "utf-8" ) as f :
2696+ return json .load (f )
2697+ except (OSError , UnicodeDecodeError , json .decoder .JSONDecodeError ) as e :
2698+ logger .debug ("Error reading existing JSON file {0}: {1}" .format (path , e ))
2699+ return None
2700+
2701+
2702+ def restore_existing_pull_optional_data (pull , existing_pull ):
2703+ if not existing_pull :
2704+ return
2705+
2706+ for key in PULL_OPTIONAL_DATA_KEYS :
2707+ if key not in pull and key in existing_pull :
2708+ pull [key ] = existing_pull [key ]
2709+
2710+
2711+ def get_pull_reviews_since (args , pulls_cwd ):
2712+ args_since = getattr (args , "since" , None )
2713+ if not args .incremental :
2714+ return args_since , None , None
2715+
2716+ reviews_last_update_path = os .path .join (
2717+ pulls_cwd , PULL_REVIEWS_LAST_UPDATE_FILENAME
2718+ )
2719+ if not os .path .exists (reviews_last_update_path ):
2720+ # One-time backfill for existing incremental backups: if the user adds
2721+ # --pull-reviews after a repository checkpoint already exists, the
2722+ # repository-level checkpoint would otherwise skip old PRs forever.
2723+ return None , None , reviews_last_update_path
2724+
2725+ reviews_since = open (reviews_last_update_path ).read ().strip ()
2726+ if args_since and reviews_since :
2727+ return min (args_since , reviews_since ), reviews_since , reviews_last_update_path
2728+
2729+ return args_since or reviews_since , reviews_since , reviews_last_update_path
2730+
2731+
26752732def backup_pulls (args , repo_cwd , repository , repos_template ):
26762733 has_pulls_dir = os .path .isdir ("{0}/pulls/.git" .format (repo_cwd ))
26772734 if args .skip_existing and has_pulls_dir :
@@ -2681,7 +2738,20 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
26812738 pulls_cwd = os .path .join (repo_cwd , "pulls" )
26822739 mkdir_p (repo_cwd , pulls_cwd )
26832740
2741+ include_pull_reviews = args .include_pull_reviews or args .include_everything
2742+ repository_since = getattr (args , "since" , None )
2743+ pulls_since = repository_since
2744+ pull_reviews_since = None
2745+ pull_reviews_last_update_path = None
2746+ if include_pull_reviews :
2747+ (
2748+ pulls_since ,
2749+ pull_reviews_since ,
2750+ pull_reviews_last_update_path ,
2751+ ) = get_pull_reviews_since (args , pulls_cwd )
2752+
26842753 pulls = {}
2754+ newest_pull_update = None
26852755 _pulls_template = "{0}/{1}/pulls" .format (repos_template , repository ["full_name" ])
26862756 _issue_template = "{0}/{1}/issues" .format (repos_template , repository ["full_name" ])
26872757 query_args = {
@@ -2691,27 +2761,43 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
26912761 "direction" : "desc" ,
26922762 }
26932763
2764+ def track_newest_pull_update (pull ):
2765+ nonlocal newest_pull_update
2766+ updated_at = pull .get ("updated_at" )
2767+ if updated_at and (
2768+ newest_pull_update is None or updated_at > newest_pull_update
2769+ ):
2770+ newest_pull_update = updated_at
2771+
2772+ def pull_is_due_for_repository_checkpoint (pull ):
2773+ return not repository_since or pull ["updated_at" ] >= repository_since
2774+
26942775 if not args .include_pull_details :
26952776 pull_states = ["open" , "closed" ]
26962777 for pull_state in pull_states :
26972778 query_args ["state" ] = pull_state
26982779 _pulls = retrieve_data (args , _pulls_template , query_args = query_args )
26992780 for pull in _pulls :
2700- if args .since and pull ["updated_at" ] < args .since :
2781+ track_newest_pull_update (pull )
2782+ if pulls_since and pull ["updated_at" ] < pulls_since :
27012783 break
2702- if not args . since or pull ["updated_at" ] >= args . since :
2784+ if not pulls_since or pull ["updated_at" ] >= pulls_since :
27032785 pulls [pull ["number" ]] = pull
27042786 else :
27052787 _pulls = retrieve_data (args , _pulls_template , query_args = query_args )
27062788 for pull in _pulls :
2707- if args .since and pull ["updated_at" ] < args .since :
2789+ track_newest_pull_update (pull )
2790+ if pulls_since and pull ["updated_at" ] < pulls_since :
27082791 break
2709- if not args .since or pull ["updated_at" ] >= args .since :
2710- pulls [pull ["number" ]] = retrieve_data (
2711- args ,
2712- _pulls_template + "/{}" .format (pull ["number" ]),
2713- paginated = False ,
2714- )[0 ]
2792+ if not pulls_since or pull ["updated_at" ] >= pulls_since :
2793+ if pull_is_due_for_repository_checkpoint (pull ):
2794+ pulls [pull ["number" ]] = retrieve_data (
2795+ args ,
2796+ _pulls_template + "/{}" .format (pull ["number" ]),
2797+ paginated = False ,
2798+ )[0 ]
2799+ else :
2800+ pulls [pull ["number" ]] = pull
27152801
27162802 logger .info ("Saving {0} pull requests to disk" .format (len (list (pulls .keys ()))))
27172803 # Comments from pulls API are only _review_ comments
@@ -2721,35 +2807,73 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
27212807 comments_regular_template = _issue_template + "/{0}/comments"
27222808 comments_template = _pulls_template + "/{0}/comments"
27232809 commits_template = _pulls_template + "/{0}/commits"
2810+ reviews_template = _pulls_template + "/{0}/reviews"
2811+ pull_review_errors = False
2812+
27242813 for number , pull in list (pulls .items ()):
27252814 pull_file = "{0}/{1}.json" .format (pulls_cwd , number )
2815+ existing_pull = read_json_file_if_exists (pull_file )
2816+ needs_review_backfill = (
2817+ include_pull_reviews
2818+ and (not existing_pull or "review_data" not in existing_pull )
2819+ )
2820+
27262821 if args .incremental_by_files and os .path .isfile (pull_file ):
27272822 modified = os .path .getmtime (pull_file )
27282823 modified = datetime .fromtimestamp (modified ).strftime ("%Y-%m-%dT%H:%M:%SZ" )
2729- if modified > pull ["updated_at" ]:
2824+ if modified > pull ["updated_at" ] and not needs_review_backfill :
27302825 logger .info (
27312826 "Skipping pull request {0} because it wasn't modified since last backup" .format (
27322827 number
27332828 )
27342829 )
27352830 continue
2736- if args .include_pull_comments or args .include_everything :
2831+
2832+ should_fetch_non_review_data = pull_is_due_for_repository_checkpoint (pull )
2833+ if (
2834+ args .include_pull_comments or args .include_everything
2835+ ) and should_fetch_non_review_data :
27372836 template = comments_regular_template .format (number )
27382837 pulls [number ]["comment_regular_data" ] = retrieve_data (args , template )
27392838 template = comments_template .format (number )
27402839 pulls [number ]["comment_data" ] = retrieve_data (args , template )
2741- if args .include_pull_commits or args .include_everything :
2840+ if include_pull_reviews :
2841+ template = reviews_template .format (number )
2842+ try :
2843+ pulls [number ]["review_data" ] = retrieve_data (args , template )
2844+ except Exception as e :
2845+ pull_review_errors = True
2846+ logger .warning (
2847+ "Unable to retrieve reviews for pull request {0}#{1}, skipping reviews: {2}" .format (
2848+ repository ["full_name" ], number , e
2849+ )
2850+ )
2851+ if (
2852+ args .include_pull_commits or args .include_everything
2853+ ) and should_fetch_non_review_data :
27422854 template = commits_template .format (number )
27432855 pulls [number ]["commit_data" ] = retrieve_data (args , template )
27442856 if args .include_attachments :
27452857 download_attachments (
27462858 args , pulls_cwd , pulls [number ], number , repository , item_type = "pull"
27472859 )
27482860
2861+ restore_existing_pull_optional_data (pull , existing_pull )
2862+
27492863 with codecs .open (pull_file + ".temp" , "w" , encoding = "utf-8" ) as f :
27502864 json_dump (pull , f )
27512865 os .replace (pull_file + ".temp" , pull_file ) # Atomic write
27522866
2867+ if (
2868+ include_pull_reviews
2869+ and args .incremental
2870+ and pull_reviews_last_update_path
2871+ and newest_pull_update
2872+ and not pull_review_errors
2873+ and (not pull_reviews_since or newest_pull_update > pull_reviews_since )
2874+ ):
2875+ open (pull_reviews_last_update_path , "w" ).write (newest_pull_update )
2876+
27532877
27542878def backup_milestones (args , repo_cwd , repository , repos_template ):
27552879 milestone_cwd = os .path .join (repo_cwd , "milestones" )
0 commit comments