zenhub-cli/zh_api.py at develop · daniel-pittman/zenhub-cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
"""Direct ZenHub GraphQL client for the MCP server.

The MCP server used to shell out to `zh --machine` and parse TSV/RESULT
lines. Four rounds of release review on v1.5.0's sub-issue feature
surfaced ~25 findings, roughly half architectural: every time the bash
emitter and the Python parser drifted, the wrapper lied. v1.6.0 replaces
the text-contract layer with this module — direct HTTP calls to ZenHub's
GraphQL API, native Python data structures all the way out.

This module is deliberately dependency-light:

  - urllib.request for HTTP (no third-party HTTP client)
  - json from stdlib
  - re from stdlib
  - subprocess only for the `git remote get-url` fallback when no
    explicit owner/repo is given

This keeps the MCP venv bootstrap path narrow (no extra wheels to
download on first run) and means the same module is testable without
network access by mocking `graphql_request`.

Auth + config resolution mirrors what `zh` (bash) does so users with an
existing `~/.config/zh/config` get the same behavior from MCP. Repo and
workspace resolution use the same GraphQL queries the bash script uses,
ported to Python.
"""

from __future__ import annotations

import json
import os
import re
import subprocess
import urllib.error
import urllib.request
from pathlib import Path

# =============================================================================
# Configuration / auth
# =============================================================================

DEFAULT_CONFIG_PATH = Path.home() / ".config" / "zh" / "config"
ZH_GRAPHQL_URL = "https://api.zenhub.com/public/graphql"

_CONFIG_KEY_RE = re.compile(r"^\s*([A-Z_][A-Z0-9_]*)\s*=\s*(.*?)\s*$")


class ZhApiError(RuntimeError):
    """Anything wrong with config, transport, or the GraphQL response."""


def _strip_quotes(s: str) -> str:
    """Strip a single layer of surrounding quotes if present."""
    if len(s) >= 2 and ((s[0] == s[-1] == '"') or (s[0] == s[-1] == "'")):
        return s[1:-1]
    return s


def load_config(config_path: Path | str | None = None) -> dict[str, str]:
    """Read ~/.config/zh/config into a dict.

    Mirrors the bash `source` semantics conservatively: KEY=value pairs,
    lines starting with `#` are comments, surrounding quotes are stripped.
    Env-var-style export prefixes (`export KEY=value`) are accepted.

    Args:
        config_path: Optional override. Falls back to DEFAULT_CONFIG_PATH.

    Returns:
        Dict of config values. Missing file returns {} (caller decides
        whether absence is fatal).
    """
    path = Path(config_path) if config_path else DEFAULT_CONFIG_PATH
    if not path.exists():
        return {}

    out: dict[str, str] = {}
    for raw in path.read_text().splitlines():
        line = raw.strip()
        if not line or line.startswith("#"):
            continue
        if line.startswith("export "):
            line = line[len("export "):].lstrip()
        m = _CONFIG_KEY_RE.match(line)
        if not m:
            continue
        out[m.group(1)] = _strip_quotes(m.group(2))
    return out


def resolve_token(config: dict[str, str] | None = None) -> str:
    """Resolve the ZenHub GraphQL token.

    Priority: ZH_TOKEN env var > config file. Raises ZhApiError if absent
    (or empty), since every GraphQL call needs it.
    """
    if env_token := os.environ.get("ZH_TOKEN", "").strip():
        return env_token
    if config is None:
        config = load_config()
    token = config.get("ZH_TOKEN", "").strip()
    if not token:
        raise ZhApiError(
            "ZH_TOKEN not set. Create ~/.config/zh/config with:\n"
            "  ZH_TOKEN=your_graphql_token\n"
            "Generate at: https://app.zenhub.com/settings/tokens"
        )
    return token


# =============================================================================
# GraphQL transport
# =============================================================================

def graphql_request(
    query: str,
    variables: dict | None = None,
    *,
    token: str | None = None,
    timeout: float = 30.0,
    url: str = ZH_GRAPHQL_URL,
) -> dict:
    """Send a GraphQL request and return the parsed JSON response.

    Errors at every layer are raised as ZhApiError. The caller is
    responsible for interpreting `errors` arrays inside the response
    (some ZenHub queries return partial data with errors).

    Args:
        query: GraphQL query string.
        variables: Optional variables dict.
        token: Optional explicit token (else uses resolve_token()).
        timeout: HTTP timeout seconds.
        url: GraphQL endpoint URL (default ZenHub public endpoint).

    Returns:
        Parsed JSON body (typically with `data` and possibly `errors`).
    """
    if token is None:
        token = resolve_token()
    payload = {"query": query}
    if variables is not None:
        payload["variables"] = variables
    data = json.dumps(payload).encode("utf-8")
    req = urllib.request.Request(
        url,
        data=data,
        headers={
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json",
            "Accept": "application/json",
        },
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:  # noqa: S310
            body = resp.read().decode("utf-8")
    except urllib.error.HTTPError as e:
        # Try to extract the body for a meaningful error
        try:
            err_body = e.read().decode("utf-8", errors="replace")
        except Exception:
            err_body = ""
        raise ZhApiError(
            f"HTTP {e.code} from ZenHub GraphQL: {err_body or e.reason}"
        ) from e
    except urllib.error.URLError as e:
        raise ZhApiError(f"Transport error to ZenHub GraphQL: {e.reason}") from e
    try:
        return json.loads(body)
    except json.JSONDecodeError as e:
        raise ZhApiError(f"Non-JSON response from ZenHub: {body[:200]!r}") from e


def check_graphql_errors(response: dict, *, context: str = "") -> None:
    """Raise ZhApiError if the GraphQL response has top-level `errors`.

    GraphQL allows partial-data responses where `data` is populated and
    `errors` is present. For mutations and structural queries the MCP
    relies on, we treat any top-level error as fatal.
    """
    errors = response.get("errors") or []
    if errors:
        msg = "; ".join(
            e.get("message", str(e)) for e in errors if isinstance(e, dict)
        )
        prefix = f"{context}: " if context else ""
        raise ZhApiError(f"{prefix}GraphQL errors: {msg or errors}")


# =============================================================================
# Repo + workspace resolution
# =============================================================================

_GH_URL_RE = re.compile(
    # Repo names on GitHub can contain dots ("docs.github.io",
    # "my.tool", "internal.docs"). Match anything that isn't a `/`,
    # then strip an optional trailing `.git` and trailing `/`. A
    # non-greedy `[^/]+?` ensures the optional `\.git` suffix is
    # claimed at the end rather than absorbed into the name.
    #
    # The `^` anchor (round-5 #6) rejects garbage prefixes — without
    # it `re.search` would accept "prefix-junk-git@github.com:owner/
    # repo" by matching the substring starting at `git@`. The
    # canonical contract is: input must START with one of the two
    # accepted scheme forms.
    r"^(?:git@github\.com:|https?://github\.com/)"
    r"(?P<owner>[^/]+)/"
    r"(?P<repo>[^/]+?)(?:\.git)?/?$"
)


def get_owner_repo_from_git(cwd: str | os.PathLike | None = None) -> str:
    """Resolve `owner/repo` from the cwd's git origin remote.

    Raises ZhApiError if not a git repo, no origin remote, or origin URL
    doesn't parse as a GitHub URL.
    """
    try:
        out = subprocess.check_output(
            ["git", "remote", "get-url", "origin"],
            cwd=cwd,
            stderr=subprocess.DEVNULL,
            text=True,
        ).strip()
    except (subprocess.CalledProcessError, FileNotFoundError) as e:
        raise ZhApiError(
            "Could not determine repository from git remote. "
            "Run this MCP tool with an explicit repo_path that points to "
            "a git checkout with a GitHub remote, or pass owner_repo "
            "directly to the underlying helper."
        ) from e

    m = _GH_URL_RE.search(out)
    if not m:
        raise ZhApiError(
            f"Origin remote URL did not parse as GitHub URL: {out!r}"
        )
    return f"{m.group('owner')}/{m.group('repo')}"


def get_gh_repo_id(owner_repo: str, *, gh_token: str | None = None) -> int:
    """Get the GitHub numeric repo ID via the GitHub REST API.

    Used as the input to ZenHub's `repositoryByGhId` / `repositoriesByGhId`
    queries.

    Args:
        owner_repo: "owner/repo" string. Case is preserved on input but
            GitHub matches case-insensitively.
        gh_token: Optional GitHub token. Falls back to `gh auth token`
            (matches the bash script's behaviour).
    """
    if gh_token is None:
        try:
            gh_token = subprocess.check_output(
                ["gh", "auth", "token"], text=True, stderr=subprocess.DEVNULL
            ).strip()
        except (subprocess.CalledProcessError, FileNotFoundError) as e:
            raise ZhApiError(
                "Could not get GitHub token via `gh auth token`. "
                "Either authenticate the gh CLI or pass gh_token explicitly."
            ) from e

    url = f"https://api.github.com/repos/{owner_repo}"
    req = urllib.request.Request(
        url,
        headers={
            "Authorization": f"token {gh_token}",
            "Accept": "application/vnd.github+json",
        },
    )
    try:
        with urllib.request.urlopen(req, timeout=15.0) as resp:  # noqa: S310
            body = json.loads(resp.read().decode("utf-8"))
    except urllib.error.HTTPError as e:
        raise ZhApiError(
            f"GitHub API HTTP {e.code} for repos/{owner_repo}: {e.reason}"
        ) from e
    except urllib.error.URLError as e:
        raise ZhApiError(f"GitHub API transport error: {e.reason}") from e

    repo_id = body.get("id")
    if not isinstance(repo_id, int):
        raise ZhApiError(
            f"GitHub API returned no numeric id for repos/{owner_repo}"
        )
    return repo_id


_REPO_ID_QUERY = """
query($ghIds: [Int!]!) {
  repositoriesByGhId(ghIds: $ghIds) {
    id
    name
    ownerName
  }
}
"""


def get_zenhub_repo_id(
    owner_repo: str, *, token: str | None = None, gh_token: str | None = None
) -> str:
    """Resolve the ZenHub repository ID for a GitHub `owner/repo`.

    Performs the same two-step resolution `zh` does in bash: gh REST →
    numeric repo id → ZenHub `repositoriesByGhId`.
    """
    gh_id = get_gh_repo_id(owner_repo, gh_token=gh_token)
    resp = graphql_request(
        _REPO_ID_QUERY, {"ghIds": [gh_id]}, token=token
    )
    check_graphql_errors(resp, context="repositoriesByGhId")
    nodes = (resp.get("data") or {}).get("repositoriesByGhId") or []
    if not nodes:
        raise ZhApiError(
            f"No ZenHub repository found for GitHub {owner_repo} (id {gh_id}). "
            "Connect the repo to a ZenHub workspace first."
        )
    return nodes[0]["id"]


_WORKSPACE_QUERY = """
query($ghIds: [Int!]!, $after: String) {
  repositoriesByGhId(ghIds: $ghIds) {
    id
    workspacesConnection(first: 50, after: $after) {
      pageInfo { hasNextPage endCursor }
      nodes {
        id
        name
      }
    }
  }
}
"""

# Hard cap on workspace-list pagination iterations. 50 pages × 50 nodes
# = 2,500 workspaces — far past any plausible per-repo count, so a cap
# this size only ever fires on a stuck cursor.
_WORKSPACE_PAGINATION_CAP = 50


def list_workspaces(
    owner_repo: str,
    *,
    token: str | None = None,
    gh_token: str | None = None,
) -> list[dict]:
    """Return every workspace this repo is connected to.

    Walks `workspacesConnection` pagination until `hasNextPage=false`,
    with the same stuck-cursor + iteration-cap defenses we use for
    sub-issue listings. Enterprise repos connected to >50 workspaces
    would silently truncate without this.
    """
    gh_id = get_gh_repo_id(owner_repo, gh_token=gh_token)
    cursor: str | None = None
    last_cursor: str | None = None
    iterations = 0
    out: list[dict] = []

    while True:
        iterations += 1
        if iterations > _WORKSPACE_PAGINATION_CAP:
            # Defensive bail; should never fire in practice.
            break
        resp = graphql_request(
            _WORKSPACE_QUERY,
            {"ghIds": [gh_id], "after": cursor},
            token=token,
        )
        check_graphql_errors(resp, context="workspacesConnection")
        repos = (resp.get("data") or {}).get("repositoriesByGhId") or []
        if not repos:
            # Round-6 #12: raise only on page 1 (nothing accumulated
            # yet). On page 2+, treat the empty response as a
            # transient API blip — break and return what we've
            # accumulated rather than discarding it. Without this
            # guard, a server-side hiccup on a deep walk would
            # discard 50+ already-fetched workspaces and force the
            # user to retry.
            if not out:
                raise ZhApiError(
                    f"No ZenHub repository found for GitHub {owner_repo}"
                )
            break
        conn = repos[0].get("workspacesConnection") or {}
        for n in conn.get("nodes") or []:
            out.append(n)
        page_info = conn.get("pageInfo") or {}
        if not page_info.get("hasNextPage"):
            break
        end_cursor = page_info.get("endCursor")
        # Stuck-cursor defense: server claims hasNextPage but returns no
        # cursor, or the same cursor as last time. Either is bad enough
        # to bail rather than spin.
        if not end_cursor or end_cursor == last_cursor:
            break
        last_cursor = end_cursor
        cursor = end_cursor

    return out


def get_workspace_id(
    owner_repo: str,
    *,
    workspace_name: str | None = None,
    token: str | None = None,
    gh_token: str | None = None,
) -> str:
    """Resolve the ZenHub workspace ID for a repo.

    If `workspace_name` is provided (case-insensitive match), the
    matching workspace is returned; otherwise the first workspace wins
    (mirrors bash behaviour).

    Walks every page of `workspacesConnection` so a workspace beyond
    position 50 still resolves.

    Raises ZhApiError if no workspace is found or the named workspace
    doesn't exist on this repo.
    """
    nodes = list_workspaces(owner_repo, token=token, gh_token=gh_token)
    if not nodes:
        raise ZhApiError(f"No workspace found for {owner_repo}")

    if workspace_name:
        want = workspace_name.lower()
        for n in nodes:
            if (n.get("name") or "").lower() == want:
                return n["id"]
        available = ", ".join(n.get("name") or "?" for n in nodes)
        raise ZhApiError(
            f"Workspace {workspace_name!r} not found for {owner_repo}. "
            f"Available: {available}"
        )
    return nodes[0]["id"]


# =============================================================================
# Issue resolution
# =============================================================================

_ISSUE_BY_INFO_QUERY = """
query($repoId: ID!, $issueNumber: Int!) {
  issueByInfo(repositoryId: $repoId, issueNumber: $issueNumber) {
    id
    number
    title
    state
    repository {
      ownerName
      name
    }
    parentIssue {
      id
      number
      title
      repository {
        ownerName
        name
      }
    }
  }
}
"""


def repos_match(a: dict | None, owner_repo: str) -> bool:
    """Case-insensitive comparison of a node's `.repository` vs `owner/repo`.

    Round-4 finding #4 caught a case-sensitive bug on the bash side; we
    do not reproduce it here. `a` is expected to look like
    `{"ownerName": "...", "name": "..."}`.
    """
    if not a:
        return False
    owner, _, repo = owner_repo.partition("/")
    return (
        (a.get("ownerName") or "").lower() == owner.lower()
        and (a.get("name") or "").lower() == repo.lower()
    )


# =============================================================================
# Resolver: bundle the common (owner_repo, repo_id, workspace_id) tuple
# =============================================================================

class RepoContext:
    """Resolved (owner_repo, repo_id, workspace_id) for a working directory.

    The MCP tools each need all three. Bundling them lets the tool body
    fail fast with a single resolution step instead of three round-trips
    that each report their own error.

    The `query` method is regular attribute access (not `__slots__`-pinned)
    so test suites can `patch.object(ctx, "query", ...)` and inject mock
    GraphQL responses without monkey-patching `urllib`.
    """

    def __init__(
        self,
        owner_repo: str,
        repo_id: str,
        workspace_id: str,
        token: str,
    ) -> None:
        self.owner_repo = owner_repo
        self.repo_id = repo_id
        self.workspace_id = workspace_id
        self.token = token

    def query(self, query: str, variables: dict | None = None) -> dict:
        return graphql_request(query, variables, token=self.token)


def resolve_context(
    cwd: str | os.PathLike | None = None,
    *,
    owner_repo: str | None = None,
    workspace_name: str | None = None,
    config: dict[str, str] | None = None,
) -> RepoContext:
    """One-shot resolution of (owner_repo, repo_id, workspace_id, token).

    Args:
        cwd: Working directory to read `git remote` from. Ignored when
            `owner_repo` is supplied.
        owner_repo: Optional explicit "owner/repo". If unset, falls
            back to ZH_REPO_OVERRIDE > ZH_REPO > config > git remote.
        workspace_name: Optional workspace name for multi-workspace repos.
            If unset, falls back to ZH_WORKSPACE_NAME > ZH_WORKSPACE >
            config > first-workspace.
        config: Pre-loaded config dict (else loaded from default path).

    Environment-variable contract (kept in sync with `zh`'s bash side,
    see `main()` and `get_repo_info` / `get_workspace_id` there):

      Repo selection (highest precedence first):
        1. owner_repo arg
        2. ZH_REPO_OVERRIDE env (set by bash `-r` / `--repo` flag)
        3. ZH_REPO env / config
        4. git remote of cwd

      Workspace selection (highest precedence first):
        1. workspace_name arg
        2. ZH_WORKSPACE_NAME env (set by bash `-w` / `--workspace` flag)
        3. ZH_WORKSPACE env / config
        4. First workspace returned by the API

    Both sides read the same env vars in the same order so calling
    Python directly (e.g. from the MCP server when invoked from a
    `-r owner/repo`-style bash wrapper) honors the same flags.
    """
    if config is None:
        config = load_config()
    token = resolve_token(config)

    # Repo: explicit arg > ZH_REPO_OVERRIDE > ZH_REPO env > config > git
    if owner_repo is None:
        owner_repo = (
            os.environ.get("ZH_REPO_OVERRIDE", "").strip()
            or os.environ.get("ZH_REPO", "").strip()
            or config.get("ZH_REPO", "").strip()
            or None
        )
    if owner_repo is None:
        owner_repo = get_owner_repo_from_git(cwd=cwd)

    # Workspace: explicit arg > ZH_WORKSPACE_NAME > ZH_WORKSPACE > config
    if workspace_name is None:
        workspace_name = (
            os.environ.get("ZH_WORKSPACE_NAME", "").strip()
            or os.environ.get("ZH_WORKSPACE", "").strip()
            or config.get("ZH_WORKSPACE", "").strip()
            or None
        )

    repo_id = get_zenhub_repo_id(owner_repo, token=token)
    workspace_id = get_workspace_id(
        owner_repo, workspace_name=workspace_name, token=token
    )
    return RepoContext(owner_repo, repo_id, workspace_id, token)