Skip to content

Commit b027ac3

Browse files
authored
Merge pull request #21809 from github/tausbn/yeast-add-support-for-desugaring-phases
Yeast: Two small improvements
2 parents f9e42ac + e0d663f commit b027ac3

3 files changed

Lines changed: 267 additions & 45 deletions

File tree

shared/yeast/doc/yeast.md

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,22 @@ rule matches, the node is kept and its children are processed recursively.
6161
A rule can replace one node with zero nodes (deletion), one node (rewriting),
6262
or multiple nodes (expansion).
6363

64+
By default a rule fires **at most once on a given node**: after firing, the
65+
engine will not re-try that same rule on the result root. Other rules may
66+
still fire on the result, and the rule may still fire on different nodes
67+
(including the result's children). To opt into iterative behaviour — when a
68+
rule's output is intentionally re-matched by the same rule — call
69+
`.repeated()` on the constructed `Rule`:
70+
71+
```rust
72+
let r = yeast::rule!((foo ...) => (foo ...)).repeated();
73+
```
74+
75+
Without `.repeated()`, a rule whose output happens to match its own query
76+
simply fires once and stops. With `.repeated()`, the rule is allowed to
77+
re-match indefinitely; the runner still enforces a global rewrite-depth
78+
limit (currently 100) as a safety net against accidental cycles.
79+
6480
## Query language
6581

6682
Queries use a syntax inspired by
@@ -303,11 +319,17 @@ capture name to a field of the same name on the output node.
303319
## Integration with the extractor
304320

305321
A YEAST desugaring pass is configured with a [`DesugaringConfig`], which
306-
carries the rules and an optional output node-types schema (in YAML
307-
format). Attach it to a language spec to enable rewriting:
322+
carries one or more named [`Phase`]s of rules and an optional output
323+
node-types schema (in YAML format). Each phase is a complete traversal
324+
that runs to completion before the next phase starts; only the current
325+
phase's rules are considered during that traversal. Attach the config to
326+
a language spec
327+
to enable rewriting:
308328

309329
```rust
310-
let desugar = yeast::DesugaringConfig::new(my_rules)
330+
let desugar = yeast::DesugaringConfig::new()
331+
.add_phase("cleanup", cleanup_rules())
332+
.add_phase("desugar", desugar_rules())
311333
.with_output_node_types_yaml(include_str!("output-node-types.yml"));
312334

313335
let lang = simple::LanguageSpec {
@@ -319,11 +341,14 @@ let lang = simple::LanguageSpec {
319341
};
320342
```
321343

344+
A single-phase config is just `.add_phase(...)` called once. Phase names
345+
appear in error messages so you can tell which phase failed.
346+
322347
The same YAML node-types is used for both the runtime yeast `Schema` (so
323348
rules can refer to output-only kinds and fields) and TRAP validation (it
324349
is converted to JSON internally).
325350

326351
For the dbscheme/QL code generator, set `Language::desugar` to a
327352
`DesugaringConfig` carrying the same YAML; the generator converts it to
328-
JSON for downstream code generation. The `rules` field of the config is
353+
JSON for downstream code generation. The `phases` field of the config is
329354
unused at code-generation time.

shared/yeast/src/lib.rs

Lines changed: 109 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -471,11 +471,29 @@ pub type Transform = Box<
471471
pub struct Rule {
472472
query: QueryNode,
473473
transform: Transform,
474+
/// If true, after this rule fires on a node the engine will try to
475+
/// re-apply this same rule on the result root. Defaults to false:
476+
/// each rule fires at most once on a given node, which prevents
477+
/// accidental loops where a rule's output matches its own query.
478+
repeated: bool,
474479
}
475480

476481
impl Rule {
477482
pub fn new(query: QueryNode, transform: Transform) -> Self {
478-
Self { query, transform }
483+
Self {
484+
query,
485+
transform,
486+
repeated: false,
487+
}
488+
}
489+
490+
/// Mark this rule as allowed to fire multiple times on the same node.
491+
/// Use when the rule is intentionally iterative (its output may match
492+
/// its own query). Without this, a rule fires at most once per node;
493+
/// other rules can still fire on the result.
494+
pub fn repeated(mut self) -> Self {
495+
self.repeated = true;
496+
self
479497
}
480498

481499
fn try_rule(
@@ -537,7 +555,7 @@ fn apply_rules(
537555
fresh: &tree_builder::FreshScope,
538556
) -> Result<Vec<Id>, String> {
539557
let index = RuleIndex::new(rules);
540-
apply_rules_inner(&index, ast, id, fresh, 0)
558+
apply_rules_inner(&index, ast, id, fresh, 0, None)
541559
}
542560

543561
fn apply_rules_inner(
@@ -546,6 +564,7 @@ fn apply_rules_inner(
546564
id: Id,
547565
fresh: &tree_builder::FreshScope,
548566
rewrite_depth: usize,
567+
skip_rule: Option<*const Rule>,
549568
) -> Result<Vec<Id>, String> {
550569
if rewrite_depth > MAX_REWRITE_DEPTH {
551570
return Err(format!(
@@ -556,7 +575,16 @@ fn apply_rules_inner(
556575

557576
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
558577
for rule in index.rules_for_kind(node_kind) {
578+
let rule_ptr = *rule as *const Rule;
579+
if Some(rule_ptr) == skip_rule {
580+
continue;
581+
}
559582
if let Some(result_node) = rule.try_rule(ast, id, fresh)? {
583+
// For non-repeated rules, suppress further application of *this*
584+
// rule on the result root, so a rule whose output matches its own
585+
// query doesn't loop. Other rules and child traversal are
586+
// unaffected.
587+
let next_skip = if rule.repeated { None } else { Some(rule_ptr) };
560588
let mut results = Vec::new();
561589
for node in result_node {
562590
results.extend(apply_rules_inner(
@@ -565,6 +593,7 @@ fn apply_rules_inner(
565593
node,
566594
fresh,
567595
rewrite_depth + 1,
596+
next_skip,
568597
)?);
569598
}
570599
return Ok(results);
@@ -579,13 +608,14 @@ fn apply_rules_inner(
579608
.collect();
580609

581610
// recursively descend into all the fields
582-
// Child traversal does not increment rewrite depth
611+
// Child traversal does not increment rewrite depth and starts fresh
612+
// (no rule is skipped on child subtrees).
583613
let mut changed = false;
584614
let mut new_fields = BTreeMap::new();
585615
for (field_id, children) in field_entries {
586616
let mut new_children = Vec::new();
587617
for child_id in children {
588-
let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth)?;
618+
let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
589619
if result.len() != 1 || result[0] != child_id {
590620
changed = true;
591621
}
@@ -605,28 +635,64 @@ fn apply_rules_inner(
605635
Ok(vec![ast.nodes.len() - 1])
606636
}
607637

608-
/// Configuration for a desugaring pass: a set of rules and an optional
609-
/// output node-types schema (in YAML format).
638+
/// One phase of a desugaring pass: a named bundle of rules that runs to
639+
/// completion (a full traversal applying its rules) before the next phase
640+
/// starts. Rules within a phase compete for matches as usual; rules in
641+
/// different phases never compete because each traversal only considers the
642+
/// current phase's rules.
643+
pub struct Phase {
644+
/// Name used in error messages.
645+
pub name: String,
646+
pub rules: Vec<Rule>,
647+
}
648+
649+
impl Phase {
650+
pub fn new(name: impl Into<String>, rules: Vec<Rule>) -> Self {
651+
Self {
652+
name: name.into(),
653+
rules,
654+
}
655+
}
656+
}
657+
658+
/// Configuration for a desugaring pass: an ordered list of [`Phase`]s and
659+
/// an optional output node-types schema (in YAML format).
610660
///
611661
/// When attached to a `LanguageSpec` (in the shared tree-sitter extractor),
612662
/// enables yeast-based AST rewriting before TRAP extraction. The same YAML
613663
/// is used both to validate TRAP output (via JSON conversion) and to
614664
/// resolve output-only node kinds and fields at runtime.
665+
///
666+
/// Construct with `DesugaringConfig::new()` and add phases via
667+
/// `add_phase`:
668+
///
669+
/// ```ignore
670+
/// let config = yeast::DesugaringConfig::new()
671+
/// .add_phase("cleanup", cleanup_rules)
672+
/// .add_phase("desugar", desugar_rules)
673+
/// .with_output_node_types_yaml(yaml);
674+
/// ```
675+
#[derive(Default)]
615676
pub struct DesugaringConfig {
616-
/// Rules to apply during desugaring.
617-
pub rules: Vec<Rule>,
677+
/// Phases of rule application, applied in order.
678+
pub phases: Vec<Phase>,
618679
/// Output node-types in YAML format. If `None`, the input grammar's
619680
/// node types are used (i.e. the desugared AST has the same node types
620681
/// as the tree-sitter grammar).
621682
pub output_node_types_yaml: Option<&'static str>,
622683
}
623684

624685
impl DesugaringConfig {
625-
pub fn new(rules: Vec<Rule>) -> Self {
626-
Self {
627-
rules,
628-
output_node_types_yaml: None,
629-
}
686+
/// Create an empty configuration. Add phases via [`add_phase`] and an
687+
/// optional output schema via [`with_output_node_types_yaml`].
688+
pub fn new() -> Self {
689+
Self::default()
690+
}
691+
692+
/// Append a new phase with the given name and rules.
693+
pub fn add_phase(mut self, name: impl Into<String>, rules: Vec<Rule>) -> Self {
694+
self.phases.push(Phase::new(name, rules));
695+
self
630696
}
631697

632698
pub fn with_output_node_types_yaml(mut self, yaml: &'static str) -> Self {
@@ -648,30 +714,30 @@ impl DesugaringConfig {
648714
pub struct Runner<'a> {
649715
language: tree_sitter::Language,
650716
schema: schema::Schema,
651-
rules: &'a [Rule],
717+
phases: &'a [Phase],
652718
}
653719

654720
impl<'a> Runner<'a> {
655721
/// Create a runner using the input grammar's schema for output.
656-
pub fn new(language: tree_sitter::Language, rules: &'a [Rule]) -> Self {
722+
pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self {
657723
let schema = schema::Schema::from_language(&language);
658724
Self {
659725
language,
660726
schema,
661-
rules,
727+
phases,
662728
}
663729
}
664730

665731
/// Create a runner with separate input language and output schema.
666732
pub fn with_schema(
667733
language: tree_sitter::Language,
668734
schema: &schema::Schema,
669-
rules: &'a [Rule],
735+
phases: &'a [Phase],
670736
) -> Self {
671737
Self {
672738
language,
673739
schema: schema.clone(),
674-
rules,
740+
phases,
675741
}
676742
}
677743

@@ -684,27 +750,17 @@ impl<'a> Runner<'a> {
684750
Ok(Self {
685751
language,
686752
schema,
687-
rules: &config.rules,
753+
phases: &config.phases,
688754
})
689755
}
690756

691757
pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Result<Ast, String> {
692-
let fresh = tree_builder::FreshScope::new();
693758
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), tree, &self.language);
694-
let root = ast.get_root();
695-
let res = apply_rules(self.rules, &mut ast, root, &fresh)?;
696-
if res.len() != 1 {
697-
return Err(format!(
698-
"Expected exactly one result node, got {}",
699-
res.len()
700-
));
701-
}
702-
ast.set_root(res[0]);
759+
self.run_phases(&mut ast)?;
703760
Ok(ast)
704761
}
705762

706763
pub fn run(&self, input: &str) -> Result<Ast, String> {
707-
let fresh = tree_builder::FreshScope::new();
708764
let mut parser = tree_sitter::Parser::new();
709765
parser
710766
.set_language(&self.language)
@@ -713,15 +769,29 @@ impl<'a> Runner<'a> {
713769
.parse(input, None)
714770
.ok_or_else(|| "Failed to parse input".to_string())?;
715771
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), &tree, &self.language);
716-
let root = ast.get_root();
717-
let res = apply_rules(self.rules, &mut ast, root, &fresh)?;
718-
if res.len() != 1 {
719-
return Err(format!(
720-
"Expected exactly one result node, got {}",
721-
res.len()
722-
));
723-
}
724-
ast.set_root(res[0]);
772+
self.run_phases(&mut ast)?;
725773
Ok(ast)
726774
}
775+
776+
/// Apply each phase in turn to the AST, threading the root through.
777+
/// A single `FreshScope` is shared across phases so that fresh
778+
/// identifiers generated in different phases don't collide.
779+
fn run_phases(&self, ast: &mut Ast) -> Result<(), String> {
780+
let fresh = tree_builder::FreshScope::new();
781+
let mut root = ast.get_root();
782+
for phase in self.phases {
783+
let res = apply_rules(&phase.rules, ast, root, &fresh)
784+
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
785+
if res.len() != 1 {
786+
return Err(format!(
787+
"Phase `{}`: expected exactly one result node, got {}",
788+
phase.name,
789+
res.len()
790+
));
791+
}
792+
root = res[0];
793+
}
794+
ast.set_root(root);
795+
Ok(())
796+
}
727797
}

0 commit comments

Comments
 (0)