rewindio · dnorth98 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
@@ -1,14 +1,32 @@
 #!/usr/bin/env python3
 
 import os
+import sys
 import logging
 import logging.handlers
 import argparse
 import fnmatch
+import time
 from agithub.GitHub import GitHub
 from dotenv import load_dotenv
 
 
+def make_verbose_rate_limit_handler(client):
+    """Patch a GitHub client to print rate limit messages."""
+
+    def verbose_sleep():
+        seconds = client.ratelimit_seconds_remaining()
+        reset_time = time.strftime("%H:%M:%S", time.localtime(time.time() + seconds))
+        print(
+            f"Rate limited by GitHub API. Sleeping for {seconds} seconds until {reset_time}...",
+            file=sys.stderr,
+            flush=True,
+        )
+        time.sleep(seconds)
+
+    client.sleep_until_more_ratelimit = verbose_sleep
+
+
 def get_mins_secs_str(duration_in_ms):
     duration_secs, duration_in_ms = divmod(duration_in_ms, 1000)
     duration_mins, duration_secs = divmod(duration_secs, 60)
@@ -25,6 +43,27 @@ def format_number(float_val):
     return str(return_val)
 
 
+def is_rate_limited(status, response):
+    """Check if a GitHub API response indicates rate limiting."""
+    if status == 429:
+        return True
+    if status == 403:
+        message = response.get("message", "") if isinstance(response, dict) else ""
+        if "rate limit" in message.lower():
+            return True
+    return False
+
+
+# Global list to collect output for file writing
+output_lines = []
+
+
+def output(message=""):
+    """Print to stdout and collect for file output."""
+    print(message)
+    output_lines.append(message)
+
+
 def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
     # Pagination does not work on this call
     # https://github.com/mozilla/agithub/issues/76
@@ -42,6 +81,34 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
             .runs.get(created=date_filter, page=page_to_get)
         )
 
+        # Check for rate limiting
+        if is_rate_limited(gh_status, workflow_runs):
+            print(
+                "WARNING: GitHub API rate limit exceeded while fetching workflow runs for {}/{}. Results may be incomplete.".format(
+                    org_name, repo_name
+                )
+            )
+            logger.warning("Rate limit exceeded - stopping pagination")
+            break
+
+        # Check for API errors (non-2xx status codes)
+        if gh_status < 200 or gh_status >= 300:
+            logger.warning(
+                "GitHub API returned status {} for workflow {} in {}/{}: {}".format(
+                    gh_status, workflow_id, org_name, repo_name, workflow_runs
+                )
+            )
+            break
+
+        # Handle unexpected response format
+        if "workflow_runs" not in workflow_runs:
+            logger.warning(
+                "Unexpected API response for workflow {} in {}/{}: {}".format(
+                    workflow_id, org_name, repo_name, workflow_runs
+                )
+            )
+            break
+
         runs = runs + workflow_runs["workflow_runs"]
 
         total_runs = workflow_runs["total_count"]
@@ -102,6 +169,12 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
     parser.add_argument(
         "--verbose", help="Turn on DEBUG logging", action="store_true", required=False
     )
+    parser.add_argument(
+        "--output-file",
+        help="Write results to the specified file",
+        dest="output_file",
+        required=False,
+    )
 
     args = parser.parse_args()
 
@@ -129,13 +202,20 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
         logger.error("Missing GITHUB_PAT environment variable - unable to continue")
         exit(1)
 
-    # Initialize connection to Github API
+    # Initialize connection to Github API with verbose rate limit handling
     github_handle = GitHub(token=github_pat, paginate=True)
+    make_verbose_rate_limit_handler(github_handle.client)
 
     # Get all the repos in the org
     # /orgs/{org}/repos
     gh_status, repo_data = github_handle.orgs[args.org_name].repos.get()
 
+    if is_rate_limited(gh_status, repo_data):
+        print(
+            "ERROR: GitHub API rate limit exceeded while fetching repos. Please wait and try again."
+        )
+        exit(1)
+
     for repo in repo_data:
         repo_name = repo["name"]
         repo_printed = False
@@ -152,6 +232,22 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
             repo_name
         ].actions.workflows.get()
 
+        if is_rate_limited(gh_status, workflow_data):
+            print(
+                "WARNING: GitHub API rate limit exceeded while fetching workflows for {}. Results may be incomplete.".format(
+                    repo_name
+                )
+            )
+            continue
+
+        if "workflows" not in workflow_data:
+            logger.warning(
+                "Unexpected API response for workflows in {}: {}".format(
+                    repo_name, workflow_data
+                )
+            )
+            continue
+
         for workflow in workflow_data["workflows"]:
             # Possible states: success, failure, cancelled, skipped, timed_out, action_required, neutral
 
@@ -201,7 +297,7 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                 # Were there any runs for this workflow in this time period?
                 if total_workflow_runs > 0:
                     if args.detailed and not repo_printed:
-                        print("{}".format(repo_name))
+                        output("{}".format(repo_name))
                         repo_printed = True
 
                     # Initialize our summary stats dict
@@ -262,6 +358,12 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                             .timing.get()
                         )
 
+                        if is_rate_limited(gh_status, workflow_durations):
+                            print(
+                                "WARNING: GitHub API rate limit exceeded while fetching timing data. Results may be incomplete."
+                            )
+                            break
+
                         # Some jobs may not have run at all
                         if "run_duration_ms" in workflow_durations:
                             job_duration = workflow_durations["run_duration_ms"]
@@ -306,23 +408,23 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                     summary_stats[repo_name][workflow_summary_name] = stat
 
                     if args.detailed:
-                        print("\t{}:".format(workflow_name))
-                        print("\t\tRuns: {}".format(total_workflow_runs))
-                        print("\t\tSuccessful: {}".format(workflow_success_count))
-                        print("\t\tFailed: {}".format(workflow_fail_count))
-                        print("\t\tSuccess Rate: {}%".format(workflow_success_rate))
-                        print(
+                        output("\t{}:".format(workflow_name))
+                        output("\t\tRuns: {}".format(total_workflow_runs))
+                        output("\t\tSuccessful: {}".format(workflow_success_count))
+                        output("\t\tFailed: {}".format(workflow_fail_count))
+                        output("\t\tSuccess Rate: {}%".format(workflow_success_rate))
+                        output(
                             "\t\tAvg Duration:: {:.0f} ms ({})".format(
                                 workflow_avg_duration,
                                 get_mins_secs_str(workflow_avg_duration),
                             )
                         )
-                        print("\t\tDeployers:")
+                        output("\t\tDeployers:")
                         sorted_deployers = sorted(
                             deployers.items(), key=lambda item: item[1], reverse=True
                         )
                         for deploy_user, deploy_count in sorted_deployers:
-                            print("\t\t\t{}:{}".format(deploy_user, deploy_count))
+                            output("\t\t\t{}:{}".format(deploy_user, deploy_count))
 
     # now we can process the stats we have gathered and get the overall averages
     workflow_count = 0
@@ -362,9 +464,9 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                     else:
                         overall_deployers[deploy_user] = deploy_count
 
-    print("\n")
-    print("-------- SUMMARY ---------")
-    print(
+    output("\n")
+    output("-------- SUMMARY ---------")
+    output(
         "For the period {} with workflows matching {}".format(
             args.date_filter, args.workflow_pattern
         )
@@ -380,20 +482,26 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
         )
         overall_average_duration_ms = overall_duration_ms_sum / workflow_count
 
-        print("Total Runs: {}".format(overall_run_count))
-        print("Avg Success Rate: {}%".format(overall_average_success_rate))
-        print("Avg Failure Rate: {}%".format(overall_average_failure_rate))
-        print(
+        output("Total Runs: {}".format(overall_run_count))
+        output("Avg Success Rate: {}%".format(overall_average_success_rate))
+        output("Avg Failure Rate: {}%".format(overall_average_failure_rate))
+        output(
             "Avg Duration:: {:.0f} ms ({})".format(
                 overall_average_duration_ms,
                 get_mins_secs_str(overall_average_duration_ms),
             )
         )
-        print("Non 'Branch Deploy' Deployers:")
+        output("Non 'Branch Deploy' Deployers:")
         sorted_overall_deployers = sorted(
             overall_deployers.items(), key=lambda item: item[1], reverse=True
         )
         for deploy_user, deploy_count in sorted_overall_deployers:
-            print("\t{}:{}".format(deploy_user, deploy_count))
+            output("\t{}:{}".format(deploy_user, deploy_count))
     else:
-        print("No workflows found matching the filter and/or date critiera")
+        output("No workflows found matching the filter and/or date criteria")
+
+    # Write results to file if requested
+    if args.output_file:
+        with open(args.output_file, "w") as f:
+            f.write("\n".join(output_lines))
+        print("\nResults written to {}".format(args.output_file))