@@ -138,3 +138,87 @@ legend.click_policy = "mute"
138138p.add_layout(legend, "below")
139139show(p)
140140```
141+
142+ % TODO: automate project generation based on which data files are in devstats-data
143+
144+ ``` {code-cell} ipython3
145+ ---
146+ tags: [remove-cell]
147+ ---
148+ import json
149+ import datetime
150+ import itertools
151+ from dateutil.parser import isoparse
152+ import numpy as np
153+ import matplotlib.pyplot as plt
154+
155+ projects = [
156+ "numpy", "scipy", "matplotlib", "pandas", "scikit-learn", "scikit-image", "networkx"
157+ ]
158+
159+ project_prs = dict()
160+ for proj in projects:
161+ with open(f"../devstats-data/{proj}_prs.json") as fh:
162+ data = [item["node"] for item in json.loads(fh.read())]
163+
164+ # Only consider prs to the main development branch
165+ default_branches = {"main", "master"}
166+ prs = [pr for pr in data if pr["baseRefName"] in default_branches]
167+
168+ # Ignore PRs with unknown author
169+ prs = [pr for pr in prs if pr["author"]] # Failed author query results in None
170+
171+ # Ignore bots
172+ bot_filter = {"dependabot-preview"}
173+ prs = [pr for pr in prs if pr["author"]["login"] not in bot_filter]
174+
175+ # Split into merged and open
176+ merged_prs = [pr for pr in prs if pr["state"] == "MERGED"]
177+ open_prs = [pr for pr in prs if pr["state"] == "OPEN"]
178+
179+ # Only look at PRs that have been created or merged in the last year
180+ today = np.datetime64(datetime.datetime.now(), "D")
181+ year = np.timedelta64(365, "D")
182+ merged_prs = [
183+ pr for pr in merged_prs
184+ if (today - np.datetime64(pr["mergedAt"], "D")) < year
185+ ]
186+ open_prs = [
187+ pr for pr in open_prs
188+ if (today - np.datetime64(pr["createdAt"], "D")) < year
189+ ]
190+
191+ project_prs[proj] = {
192+ "open_prs" : open_prs,
193+ "merged_prs" : merged_prs,
194+ }
195+ ```
196+
197+ ``` {code-cell} ipython3
198+ ---
199+ tags: [remove-input]
200+ ---
201+ # Num merged PRs per month
202+ start_date = today - year
203+ bedges = np.array(
204+ [start_date + i * np.timedelta64(30, "D") for i in range(13)], dtype=np.datetime64
205+ )
206+ # Proxy date for center of bin
207+ x = bedges[:-1] + np.timedelta64(15, "D")
208+
209+ fig, ax = plt.subplots(figsize=(16, 12))
210+ ax.set_title("Merged PRs", fontsize=24)
211+
212+ # NOTE: np.histogram doesn't work on datetimes
213+ for proj, data in project_prs.items():
214+ merged_prs = data["merged_prs"]
215+ merge_dates = np.array([pr["mergedAt"] for pr in merged_prs], dtype="M8[D]")
216+ num_merged_per_month = []
217+ for lo, hi in itertools.pairwise(bedges):
218+ num_merged_per_month.append(
219+ sum(1 for md in merge_dates if md > lo and md < hi)
220+ )
221+ ax.plot(x, num_merged_per_month, label=proj)
222+ ax.legend()
223+ plt.show()
224+ ```
0 commit comments