Skip to content

Commit a4df96a

Browse files
committed
yeast: Support capturing unnamed nodes in queries
Three improvements to the query parser, all aimed at allowing query patterns to refer to unnamed tokens: 1. Bare-literal capture: `"=" @op` now captures the unnamed `=` token, matching the parenthesized form `("=") @op`. Previously the literal branch in parse_query_list skipped the maybe_wrap_capture call, so the `@op` was a leftover token and would error. 2. Bare `_` matches any node, named or unnamed. Previously bare `_` and `(_)` both produced QueryNode::Any with the same matches_named_only behaviour, so bare `_` would skip unnamed children. Now Any carries a match_unnamed flag: false for `(_)` (named-only, tree-sitter default) and true for bare `_` (any node). 3. Named fields and bare child patterns may be intermixed in any order. Previously, once parse_query_fields saw a bare pattern it would stop accepting named fields. The fix accumulates bare patterns into the implicit `child` field and keeps parsing. Each named field independently selects its target field for matching, so the source-order of fields in the query is purely cosmetic and intermixing is safe. Add tests covering parenthesized capture, bare-literal capture, and the named-vs-any distinction between `(_)` and bare `_`. Update query-syntax docs to reflect all three.
1 parent a0a0e9e commit a4df96a

5 files changed

Lines changed: 200 additions & 27 deletions

File tree

shared/yeast-macros/src/lib.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,21 @@ mod parse;
99
///
1010
/// ```text
1111
/// (_) - match any named node (skips unnamed tokens)
12+
/// _ - match any node, named or unnamed
1213
/// (kind) - match a named node of the given kind
1314
/// ("literal") - match an unnamed token by its text
15+
/// "literal" - shorthand for `("literal")`
1416
/// (kind field: (pattern)) - match with named field
15-
/// (kind (pat) (pat)...) - match unnamed children (after all fields)
17+
/// (kind field: _) - bare `_` and bare literals work in field position too
18+
/// (kind (pat) (pat)...) - match unnamed children
1619
/// (pattern) @capture - capture the matched node
20+
/// "literal" @capture - capture an unnamed token
21+
/// _ @capture - capture any node
1722
/// (pattern)* @capture - capture each repeated match
1823
/// (pattern)? - zero or one
1924
/// ```
25+
///
26+
/// Named fields and bare child patterns may be intermixed in any order.
2027
#[proc_macro]
2128
pub fn query(input: TokenStream) -> TokenStream {
2229
let input2: TokenStream2 = input.into();

shared/yeast-macros/src/parse.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
3838
}
3939
}
4040

41-
/// Parse a query atom: `(kind fields...)` or `(kind fields... bare_children...)`.
41+
/// Parse a query atom: a parenthesized node, a bare `_` (any node), or a
42+
/// bare string literal (unnamed token).
4243
/// Does not handle `@capture` — that's handled by the caller as a postfix.
4344
fn parse_query_atom(tokens: &mut Tokens) -> Result<TokenStream> {
4445
match tokens.peek() {
@@ -58,9 +59,17 @@ fn parse_query_atom(tokens: &mut Tokens) -> Result<TokenStream> {
5859
}
5960
Ok(result)
6061
}
62+
Some(TokenTree::Ident(id)) if *id == "_" => {
63+
tokens.next();
64+
Ok(quote! { yeast::query::QueryNode::Any { match_unnamed: true } })
65+
}
66+
Some(TokenTree::Literal(_)) => {
67+
let lit = expect_literal(tokens)?;
68+
Ok(quote! { yeast::query::QueryNode::UnnamedNode { kind: #lit } })
69+
}
6170
Some(tok) => Err(syn::Error::new_spanned(
6271
tok.clone(),
63-
"expected `(` in query; use `(_) @name` to capture a wildcard",
72+
"expected `(`, `_`, or string literal in query",
6473
)),
6574
}
6675
}
@@ -74,7 +83,7 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
7483
)),
7584
Some(TokenTree::Ident(id)) if *id == "_" => {
7685
tokens.next();
77-
Ok(quote! { yeast::query::QueryNode::Any() })
86+
Ok(quote! { yeast::query::QueryNode::Any { match_unnamed: false } })
7887
}
7988
Some(TokenTree::Literal(_)) => {
8089
let lit = expect_literal(tokens)?;
@@ -98,11 +107,14 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
98107
}
99108
}
100109

101-
/// Parse zero or more field specifications and trailing bare patterns.
102-
/// Named fields: `name: pattern` or `name*: (list...)`.
103-
/// Bare patterns (no field name) become implicit `child` field entries.
110+
/// Parse zero or more field specifications and bare patterns.
111+
/// Named fields: `name: pattern`. Bare patterns (no field name) become
112+
/// implicit `child` field entries. Named fields and bare patterns may
113+
/// appear in any order; bare patterns are accumulated and emitted as a
114+
/// single `("child", ...)` entry.
104115
fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
105116
let mut fields = Vec::new();
117+
let mut bare_children: Vec<TokenStream> = Vec::new();
106118
while tokens.peek().is_some() {
107119
if peek_is_field(tokens) {
108120
let field_name = expect_ident(tokens, "expected field name")?;
@@ -115,16 +127,21 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
115127
(#field_str, vec![yeast::query::QueryListElem::SingleNode(#child)])
116128
});
117129
} else {
118-
// Bare patterns — collect as implicit `child` field
130+
// Bare patterns — accumulate into the implicit `child` field.
131+
// We don't break here, so we can interleave with named fields.
119132
let elems = parse_query_list(tokens)?;
120-
if !elems.is_empty() {
121-
fields.push(quote! {
122-
("child", vec![#(#elems),*])
123-
});
133+
if elems.is_empty() {
134+
// Nothing more we can parse at this level.
135+
break;
124136
}
125-
break;
137+
bare_children.extend(elems);
126138
}
127139
}
140+
if !bare_children.is_empty() {
141+
fields.push(quote! {
142+
("child", vec![#(#bare_children),*])
143+
});
144+
}
128145
Ok(fields)
129146
}
130147

@@ -178,10 +195,11 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
178195
continue;
179196
}
180197

181-
// Check for string literal (unnamed node)
198+
// Check for string literal (unnamed node), optionally followed by @capture
182199
if peek_is_literal(tokens) {
183200
let lit = expect_literal(tokens)?;
184201
let node = quote! { yeast::query::QueryNode::UnnamedNode { kind: #lit } };
202+
let node = maybe_wrap_capture(tokens, node)?;
185203
let elem = maybe_wrap_repetition(
186204
tokens,
187205
quote! {
@@ -192,10 +210,12 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
192210
continue;
193211
}
194212

195-
// Check for bare _ (wildcard), possibly followed by @capture
213+
// Check for bare `_` (any node, named or unnamed), possibly followed by @capture.
214+
// Distinct from `(_)` which only matches named nodes — this matches
215+
// tree-sitter query semantics.
196216
if peek_is_underscore(tokens) {
197217
tokens.next();
198-
let node = quote! { yeast::query::QueryNode::Any() };
218+
let node = quote! { yeast::query::QueryNode::Any { match_unnamed: true } };
199219
let node = maybe_wrap_capture(tokens, node)?;
200220
let elem = maybe_wrap_repetition(
201221
tokens,

shared/yeast/doc/yeast.md

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,19 +103,30 @@ Captures bind matched nodes to names for use in the transform. A capture
103103
(identifier) @name // capture an identifier node
104104
(_) @value // capture any named node
105105
(identifier)* @items // capture each repeated match
106+
("=") @op // capture an unnamed token by its text
107+
"=" @op // shorthand for the line above
108+
_ @anything // capture any node, named or unnamed
106109
```
107110

108-
### Unnamed children
111+
### Named vs unnamed children
109112

110-
Patterns that appear after all named fields match unnamed (positional)
111-
children. Named node patterns like `(_)` automatically skip unnamed tokens
112-
(keywords, operators, punctuation), matching tree-sitter semantics:
113+
The two wildcard forms `(_)` and bare `_` differ:
114+
115+
- `(_)` matches only **named** nodes. When used as a positional pattern,
116+
unnamed children (keywords, operators, punctuation) are skipped over to
117+
find the next named child.
118+
- Bare `_` matches **any** node, named or unnamed, taking whatever is next
119+
in the child list.
120+
121+
Similarly, named-kind patterns like `(call ...)` skip unnamed children;
122+
unnamed-kind patterns like `("end")` or `"end"` consume the next child
123+
unconditionally:
113124

114125
```rust
115126
(for
116-
pattern: (_) @pat // named field
117-
value: (in (_) @val) // "in" token is skipped automatically
118-
body: (do (_)* @body) // "do" and "end" tokens skipped
127+
pattern: (_) @pat // named field, captures any named node
128+
value: (in (_) @val) // "in" wrapper is a named node here
129+
body: (do (_)* @body) // "do" and "end" tokens skipped by (_)
119130
)
120131
```
121132

shared/yeast/src/query.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,13 @@ use crate::{captures::Captures, Ast, Id};
22

33
#[derive(Debug, Clone)]
44
pub enum QueryNode {
5-
Any(),
5+
/// A wildcard. With `match_unnamed = false` (the default for `(_)`),
6+
/// only matches named nodes when used positionally — unnamed children
7+
/// are skipped over. With `match_unnamed = true` (for bare `_`), the
8+
/// wildcard consumes whatever the next child is, named or unnamed.
9+
Any {
10+
match_unnamed: bool,
11+
},
612
Node {
713
kind: &'static str,
814
children: Vec<(&'static str, Vec<QueryListElem>)>,
@@ -24,7 +30,7 @@ impl QueryNode {
2430
QueryNode::Node { kind, .. } => Some(kind),
2531
QueryNode::UnnamedNode { kind } => Some(kind),
2632
QueryNode::Capture { node, .. } => node.root_kind(),
27-
QueryNode::Any() => None,
33+
QueryNode::Any { .. } => None,
2834
}
2935
}
3036
}
@@ -51,7 +57,7 @@ impl QueryNode {
5157
/// semantics where `(_)` only matches named nodes.
5258
fn matches_named_only(&self) -> bool {
5359
match self {
54-
QueryNode::Any() => true,
60+
QueryNode::Any { match_unnamed } => !match_unnamed,
5561
QueryNode::Node { .. } => true,
5662
QueryNode::UnnamedNode { .. } => false,
5763
QueryNode::Capture { node, .. } => node.matches_named_only(),
@@ -60,7 +66,7 @@ impl QueryNode {
6066

6167
pub fn do_match(&self, ast: &Ast, node: Id, matches: &mut Captures) -> Result<bool, String> {
6268
match self {
63-
QueryNode::Any() => Ok(true),
69+
QueryNode::Any { .. } => Ok(true),
6470
QueryNode::Node { kind, children } => {
6571
let node = ast.get_node(node).unwrap();
6672
let target_kind = ast

shared/yeast/tests/test.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,135 @@ fn test_query_repeated_capture() {
170170
assert_eq!(captures.get_all("names").len(), 3);
171171
}
172172

173+
#[test]
174+
fn test_capture_unnamed_node_parenthesized() {
175+
// `("=") @op` captures the unnamed `=` token between left and right.
176+
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
177+
let ast = runner.run("x = 1").unwrap();
178+
179+
let query = yeast::query!(
180+
(assignment
181+
left: (_) @lhs
182+
("=") @op
183+
right: (_) @rhs
184+
)
185+
);
186+
187+
let mut cursor = AstCursor::new(&ast);
188+
cursor.goto_first_child();
189+
let assignment_id = cursor.node().id();
190+
191+
let mut captures = yeast::captures::Captures::new();
192+
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
193+
assert!(matched);
194+
let op_id = captures.get_var("op").unwrap();
195+
let op_node = ast.get_node(op_id).unwrap();
196+
assert_eq!(op_node.kind(), "=");
197+
assert!(!op_node.is_named());
198+
}
199+
200+
#[test]
201+
fn test_capture_unnamed_node_bare_literal() {
202+
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
203+
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
204+
let ast = runner.run("x = 1").unwrap();
205+
206+
let query = yeast::query!(
207+
(assignment
208+
left: (_) @lhs
209+
"=" @op
210+
right: (_) @rhs
211+
)
212+
);
213+
214+
let mut cursor = AstCursor::new(&ast);
215+
cursor.goto_first_child();
216+
let assignment_id = cursor.node().id();
217+
218+
let mut captures = yeast::captures::Captures::new();
219+
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
220+
assert!(matched);
221+
let op_id = captures.get_var("op").unwrap();
222+
let op_node = ast.get_node(op_id).unwrap();
223+
assert_eq!(op_node.kind(), "=");
224+
assert!(!op_node.is_named());
225+
}
226+
227+
#[test]
228+
fn test_bare_underscore_matches_unnamed() {
229+
// Bare `_` matches any node, including unnamed tokens, while `(_)`
230+
// matches only named nodes. Demonstrate by matching the unnamed `=`
231+
// token in the implicit `child` field of an `assignment`.
232+
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
233+
let ast = runner.run("x = 1").unwrap();
234+
235+
let mut cursor = AstCursor::new(&ast);
236+
cursor.goto_first_child();
237+
let assignment_id = cursor.node().id();
238+
239+
// `(_)` skips unnamed children, so a query containing a single `(_)`
240+
// bare pattern fails to match the assignment (whose only unfielded
241+
// child is the unnamed `=`).
242+
let query_named = yeast::query!((assignment (_) @any));
243+
let mut captures = yeast::captures::Captures::new();
244+
let matched = query_named
245+
.do_match(&ast, assignment_id, &mut captures)
246+
.unwrap();
247+
assert!(
248+
!matched,
249+
"(_) should skip the unnamed `=` and fail to match"
250+
);
251+
252+
// Bare `_` accepts the next child whatever it is, so it matches the
253+
// unnamed `=` token.
254+
let query_any = yeast::query!((assignment _ @any));
255+
let mut captures = yeast::captures::Captures::new();
256+
let matched = query_any
257+
.do_match(&ast, assignment_id, &mut captures)
258+
.unwrap();
259+
assert!(matched, "_ should match the unnamed `=`");
260+
let any_node = ast.get_node(captures.get_var("any").unwrap()).unwrap();
261+
assert_eq!(any_node.kind(), "=");
262+
assert!(!any_node.is_named());
263+
}
264+
265+
#[test]
266+
fn test_bare_forms_in_field_position() {
267+
// The bare `_` and bare-literal forms should be accepted as a
268+
// field's value, not just in the bare-children position. This is
269+
// syntactic sugar for `(_)` / `("…")` and goes through the same
270+
// code paths.
271+
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
272+
let ast = runner.run("x = 1").unwrap();
273+
274+
let mut cursor = AstCursor::new(&ast);
275+
cursor.goto_first_child();
276+
let assignment_id = cursor.node().id();
277+
278+
// Bare `_` in field position. Captures the named `identifier "x"`
279+
// child of the `left` field — bare `_` admits unnamed too, but the
280+
// first child of `left` happens to be named.
281+
let query = yeast::query!((assignment left: _ @lhs));
282+
let mut captures = yeast::captures::Captures::new();
283+
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
284+
assert!(matched);
285+
assert_eq!(
286+
ast.get_node(captures.get_var("lhs").unwrap())
287+
.unwrap()
288+
.kind(),
289+
"identifier"
290+
);
291+
292+
// Bare literal in field position. Equivalent to `("=") @op`.
293+
let query = yeast::query!((assignment child: "=" @op));
294+
let mut captures = yeast::captures::Captures::new();
295+
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
296+
assert!(matched);
297+
let op = ast.get_node(captures.get_var("op").unwrap()).unwrap();
298+
assert_eq!(op.kind(), "=");
299+
assert!(!op.is_named());
300+
}
301+
173302
// ---- Tree builder tests ----
174303

175304
#[test]

0 commit comments

Comments
 (0)