Skip to content

Commit 4515bfa

Browse files
k4cper-gclaude
andcommitted
Add vocabulary short codes, simplify detail levels to compact/full
Add ROLE_CODES, STATE_CODES, ACTION_CODES dicts for compact output. Remove "minimal" detail level — consolidate to "compact" (default) and "full". Only emit bounds for interactable nodes. Update docs, MCP server defaults, and tests to match. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9041087 commit 4515bfa

File tree

6 files changed

+155
-142
lines changed

6 files changed

+155
-142
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ Output:
5959
# app: Spotify
6060
# 63 nodes (280 before pruning)
6161
62-
[e0] window "Spotify" @120,40 1680x1020
63-
[e1] document "Spotify" @120,40 1680x1020
64-
[e2] button "Back" @132,52 32x32 [click]
65-
[e3] button "Forward" @170,52 32x32 {disabled} [click]
66-
[e7] navigation "Main" @120,88 240x972
67-
[e8] link "Home" @132,100 216x40 {selected} [click]
68-
[e9] link "Search" @132,148 216x40 [click]
62+
[e0] win "Spotify" 120,40 1680x1020
63+
[e1] doc "Spotify" 120,40 1680x1020
64+
[e2] btn "Back" 132,52 32x32 [clk]
65+
[e3] btn "Forward" 170,52 32x32 {dis} [clk]
66+
[e7] nav "Main" 120,88 240x972
67+
[e8] lnk "Home" 132,100 216x40 {sel} [clk]
68+
[e9] lnk "Search" 132,148 216x40 [clk]
6969
```
7070

7171
## CLI

cup/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def snapshot(
142142
app: str | None = None,
143143
max_depth: int = 999,
144144
compact: bool = True,
145-
detail: Detail = "standard",
145+
detail: Detail = "compact",
146146
) -> str | dict:
147147
"""Capture the accessibility tree.
148148
@@ -156,7 +156,7 @@ def snapshot(
156156
max_depth: Maximum tree depth.
157157
compact: If True, return compact LLM text; if False, return
158158
the full CUP envelope dict.
159-
detail: Pruning level ("standard", "minimal", or "full").
159+
detail: Pruning level ("compact" or "full").
160160
161161
Returns:
162162
Compact text string or CUP envelope dict.

cup/format.py

Lines changed: 125 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import time
1111
from typing import Literal
1212

13-
Detail = Literal["standard", "minimal", "full"]
13+
Detail = Literal["compact", "full"]
1414

1515

1616
# ---------------------------------------------------------------------------
@@ -122,6 +122,110 @@ def _count_nodes(nodes: list[dict]) -> int:
122122

123123
_CHROME_ROLES = frozenset({"scrollbar", "separator", "titlebar", "tooltip", "status"})
124124

125+
# ---------------------------------------------------------------------------
126+
# Vocabulary short codes — compact aliases for roles, states, and actions.
127+
# These reduce per-node token cost by ~50% on role/state/action strings.
128+
# ---------------------------------------------------------------------------
129+
130+
ROLE_CODES: dict[str, str] = {
131+
"alert": "alrt",
132+
"alertdialog": "adlg",
133+
"application": "app",
134+
"banner": "bnr",
135+
"button": "btn",
136+
"cell": "cel",
137+
"checkbox": "chk",
138+
"columnheader": "colh",
139+
"combobox": "cmb",
140+
"complementary": "cmp",
141+
"contentinfo": "ci",
142+
"dialog": "dlg",
143+
"document": "doc",
144+
"form": "frm",
145+
"generic": "gen",
146+
"grid": "grd",
147+
"group": "grp",
148+
"heading": "hdg",
149+
"img": "img",
150+
"link": "lnk",
151+
"list": "lst",
152+
"listitem": "li",
153+
"log": "log",
154+
"main": "main",
155+
"marquee": "mrq",
156+
"menu": "mnu",
157+
"menubar": "mnub",
158+
"menuitem": "mi",
159+
"menuitemcheckbox": "mic",
160+
"menuitemradio": "mir",
161+
"navigation": "nav",
162+
"none": "none",
163+
"option": "opt",
164+
"progressbar": "pbar",
165+
"radio": "rad",
166+
"region": "rgn",
167+
"row": "row",
168+
"rowheader": "rowh",
169+
"scrollbar": "sb",
170+
"search": "srch",
171+
"searchbox": "sbx",
172+
"separator": "sep",
173+
"slider": "sld",
174+
"spinbutton": "spn",
175+
"status": "sts",
176+
"switch": "sw",
177+
"tab": "tab",
178+
"table": "tbl",
179+
"tablist": "tabs",
180+
"tabpanel": "tpnl",
181+
"text": "txt",
182+
"textbox": "tbx",
183+
"timer": "tmr",
184+
"titlebar": "ttlb",
185+
"toolbar": "tlbr",
186+
"tooltip": "ttp",
187+
"tree": "tre",
188+
"treeitem": "ti",
189+
"window": "win",
190+
}
191+
192+
STATE_CODES: dict[str, str] = {
193+
"busy": "bsy",
194+
"checked": "chk",
195+
"collapsed": "col",
196+
"disabled": "dis",
197+
"editable": "edt",
198+
"expanded": "exp",
199+
"focused": "foc",
200+
"hidden": "hid",
201+
"mixed": "mix",
202+
"modal": "mod",
203+
"multiselectable": "msel",
204+
"offscreen": "off",
205+
"pressed": "prs",
206+
"readonly": "ro",
207+
"required": "req",
208+
"selected": "sel",
209+
}
210+
211+
ACTION_CODES: dict[str, str] = {
212+
"click": "clk",
213+
"collapse": "col",
214+
"decrement": "dec",
215+
"dismiss": "dsm",
216+
"doubleclick": "dbl",
217+
"expand": "exp",
218+
"focus": "foc",
219+
"increment": "inc",
220+
"longpress": "lp",
221+
"rightclick": "rclk",
222+
"scroll": "scr",
223+
"select": "sel",
224+
"setvalue": "sv",
225+
"toggle": "tog",
226+
"type": "typ",
227+
}
228+
125229

126230
def _should_skip(node: dict, parent: dict | None, siblings: int) -> bool:
127231
"""Decide if a node should be pruned (entire subtree is dropped)."""
@@ -346,64 +450,28 @@ def _has_meaningful_actions(node: dict) -> bool:
346450
return any(a != "focus" for a in actions)
347451

348452

349-
def _prune_minimal_node(node: dict) -> dict | None:
350-
"""Minimal pruning: keep only nodes with meaningful actions + ancestors.
351-
352-
Returns a pruned copy of the node if it or any descendant has meaningful
353-
actions, or None if the entire subtree can be dropped.
354-
"""
355-
children = node.get("children", [])
356-
357-
# Recursively prune children first
358-
kept_children = []
359-
for child in children:
360-
pruned_child = _prune_minimal_node(child)
361-
if pruned_child is not None:
362-
kept_children.append(pruned_child)
363-
364-
# Keep this node if it has meaningful actions OR if any child was kept
365-
if _has_meaningful_actions(node) or kept_children:
366-
pruned = {k: v for k, v in node.items() if k != "children"}
367-
if kept_children:
368-
pruned["children"] = kept_children
369-
return pruned
370-
371-
return None
372-
373-
374453
def prune_tree(
375454
tree: list[dict],
376455
*,
377-
detail: Detail = "standard",
456+
detail: Detail = "compact",
378457
screen: dict | None = None,
379458
) -> list[dict]:
380459
"""Apply pruning to a CUP tree, returning a new pruned tree.
381460
382461
Args:
383462
tree: List of root CUP node dicts.
384463
detail: Pruning level:
385-
"standard" — Remove unnamed generics, decorative images, empty
386-
text, offscreen noise, etc. (default)
387-
"minimal" — Keep only nodes with meaningful actions (not just
388-
focus) and their ancestors. Dramatically reduces
389-
token count.
390-
"full" — No pruning; return every node from the raw tree.
464+
"compact" — Remove unnamed generics, decorative images, empty
465+
text, offscreen noise, etc. (default)
466+
"full" — No pruning; return every node from the raw tree.
391467
screen: Screen dimensions dict with "w" and "h" keys. When provided,
392468
elements entirely outside the screen bounds are clipped even
393469
if no scrollable ancestor is present.
394470
"""
395471
if detail == "full":
396472
return copy.deepcopy(tree)
397473

398-
if detail == "minimal":
399-
result = []
400-
for root in tree:
401-
pruned = _prune_minimal_node(root)
402-
if pruned is not None:
403-
result.append(pruned)
404-
return result
405-
406-
# "standard" — use screen as baseline viewport so elements far offscreen
474+
# "compact" — use screen as baseline viewport so elements far offscreen
407475
# (e.g. in web-based apps with virtual scroll) are clipped even when no
408476
# ancestor exposes the "scroll" action.
409477
screen_viewport = None
@@ -417,7 +485,8 @@ def prune_tree(
417485

418486
def _format_line(node: dict) -> str:
419487
"""Format a single CUP node as a compact one-liner."""
420-
parts = [f"[{node['id']}]", node["role"]]
488+
role = node["role"]
489+
parts = [f"[{node['id']}]", ROLE_CODES.get(role, role)]
421490

422491
name = node.get("name", "")
423492
if name:
@@ -426,22 +495,26 @@ def _format_line(node: dict) -> str:
426495
truncated = truncated.replace("\\", "\\\\").replace('"', '\\"').replace("\n", " ")
427496
parts.append(f'"{truncated}"')
428497

498+
# Actions (drop "focus" -- it's noise)
499+
actions = [a for a in node.get("actions", []) if a != "focus"]
500+
501+
# Only include bounds for interactable nodes (nodes with meaningful actions).
502+
# Non-interactable nodes are context-only — agents reference them by ID, not
503+
# by coordinates, so spatial info adds tokens without value.
429504
bounds = node.get("bounds")
430-
if bounds:
431-
parts.append(f"@{bounds['x']},{bounds['y']} {bounds['w']}x{bounds['h']}")
505+
if bounds and actions:
506+
parts.append(f"{bounds['x']},{bounds['y']} {bounds['w']}x{bounds['h']}")
432507

433508
states = node.get("states", [])
434509
if states:
435-
parts.append("{" + ",".join(states) + "}")
510+
parts.append("{" + ",".join(STATE_CODES.get(s, s) for s in states) + "}")
436511

437-
# Actions (drop "focus" -- it's noise)
438-
actions = [a for a in node.get("actions", []) if a != "focus"]
439512
if actions:
440-
parts.append("[" + ",".join(actions) + "]")
513+
parts.append("[" + ",".join(ACTION_CODES.get(a, a) for a in actions) + "]")
441514

442515
# Value for input-type elements
443516
value = node.get("value", "")
444-
if value and node["role"] in ("textbox", "searchbox", "combobox", "spinbutton", "slider"):
517+
if value and role in ("textbox", "searchbox", "combobox", "spinbutton", "slider"):
445518
truncated_val = value[:120] + ("..." if len(value) > 120 else "")
446519
truncated_val = truncated_val.replace('"', '\\"').replace("\n", " ")
447520
parts.append(f'val="{truncated_val}"')
@@ -513,7 +586,7 @@ def serialize_compact(
513586
envelope: dict,
514587
*,
515588
window_list: list[dict] | None = None,
516-
detail: Detail = "standard",
589+
detail: Detail = "compact",
517590
max_chars: int = MAX_OUTPUT_CHARS,
518591
) -> str:
519592
"""Serialize a CUP envelope to compact LLM-friendly text.
@@ -526,7 +599,7 @@ def serialize_compact(
526599
envelope: CUP envelope dict with tree data.
527600
window_list: Optional list of open windows to include in header
528601
for situational awareness (used by foreground scope).
529-
detail: Pruning level ("standard", "minimal", or "full").
602+
detail: Pruning level ("compact" or "full").
530603
max_chars: Hard character limit for output. When exceeded, the
531604
output is truncated with a diagnostic message.
532605
"""

cup/mcp/server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def snapshot() -> str:
8686
scope="foreground",
8787
max_depth=999,
8888
compact=True,
89-
detail="standard",
89+
detail="compact",
9090
)
9191

9292

@@ -114,7 +114,7 @@ def snapshot_app(app: str) -> str:
114114
app=app,
115115
max_depth=999,
116116
compact=True,
117-
detail="standard",
117+
detail="compact",
118118
)
119119

120120

@@ -134,7 +134,7 @@ def snapshot_desktop() -> str:
134134
scope="desktop",
135135
max_depth=999,
136136
compact=True,
137-
detail="standard",
137+
detail="compact",
138138
)
139139

140140

docs/api-reference.md

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ result = session.snapshot(
2525
app=None, # filter by window title (scope="full" only)
2626
max_depth=999, # maximum tree depth
2727
compact=True, # True → compact text, False → CUP envelope dict
28-
detail="standard", # "standard" | "minimal" | "full"
28+
detail="compact", # "compact" | "full"
2929
)
3030
```
3131

@@ -44,8 +44,7 @@ result = session.snapshot(
4444

4545
| Level | Behavior |
4646
|-------|----------|
47-
| `standard` | Prunes unnamed generics, empty text, decorative images (~75% smaller) |
48-
| `minimal` | Keep only interactive nodes and their ancestors |
47+
| `compact` | Prunes unnamed generics, empty text, decorative images (~75% smaller) |
4948
| `full` | No pruning — every node included |
5049

5150
---
@@ -274,14 +273,14 @@ The text format returned by `session.snapshot(compact=True)`. Optimized for LLM
274273
# app: Discord
275274
# 87 nodes (353 before pruning)
276275
277-
[e0] window "Discord" @509,62 1992x1274
278-
[e1] document "General" @509,62 1992x1274 {readonly}
279-
[e2] button "Back" @518,66 26x24 [click]
280-
[e7] tree "Servers" @509,94 72x1242
281-
[e8] treeitem "Lechownia" @513,190 64x48 {selected} [click,select]
276+
[e0] win "Discord" 509,62 1992x1274
277+
[e1] doc "General" 509,62 1992x1274 {ro}
278+
[e2] btn "Back" 518,66 26x24 [clk]
279+
[e7] tre "Servers" 509,94 72x1242
280+
[e8] ti "Lechownia" 513,190 64x48 {sel} [clk,sel]
282281
```
283282

284-
Line format: `[id] role "name" @x,y wxh {states} [actions] val="value" (attrs)`
283+
Line format: `[id] role "name" x,y wxh {states} [actions] val="value" (attrs)`
285284

286285
Full spec: [compact.md](https://github.com/computeruseprotocol/computeruseprotocol/blob/main/schema/compact.md)
287286

0 commit comments

Comments
 (0)