Skip to content

Commit 957c89b

Browse files
committed
yeast: Support multi-phase desugaring via DesugaringConfig::add_phase
Extend the desugaring config from a single flat list of rules to an ordered sequence of named Phases. Each phase runs to completion (a full traversal applying its rules) before the next phase starts. Rules in different phases never compete for matches. The config is built via the new chainable API: DesugaringConfig::new() .add_phase("cleanup", cleanup_rules) .add_phase("desugar", desugar_rules) .with_output_node_types_yaml(yaml); Single-phase configs are just .add_phase(...) called once. A single FreshScope is shared across phases so generated identifier names (e.g. $tmp-N) are unique throughout the run. Phase names appear in error messages, e.g. "Phase `desugar`: exceeded maximum rewrite depth". Add two regression tests: one verifying basic two-phase chained desugaring, and one verifying that errors include the failing phase name.
1 parent 9a94836 commit 957c89b

3 files changed

Lines changed: 158 additions & 41 deletions

File tree

shared/yeast/doc/yeast.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,11 +319,16 @@ capture name to a field of the same name on the output node.
319319
## Integration with the extractor
320320

321321
A YEAST desugaring pass is configured with a [`DesugaringConfig`], which
322-
carries the rules and an optional output node-types schema (in YAML
323-
format). Attach it to a language spec to enable rewriting:
322+
carries one or more named [`Phase`]s of rules and an optional output
323+
node-types schema (in YAML format). Each phase is a complete traversal
324+
that runs to completion before the next phase starts; rules in different
325+
phases never compete for matches. Attach the config to a language spec
326+
to enable rewriting:
324327

325328
```rust
326-
let desugar = yeast::DesugaringConfig::new(my_rules)
329+
let desugar = yeast::DesugaringConfig::new()
330+
.add_phase("cleanup", cleanup_rules())
331+
.add_phase("desugar", desugar_rules())
327332
.with_output_node_types_yaml(include_str!("output-node-types.yml"));
328333

329334
let lang = simple::LanguageSpec {
@@ -335,6 +340,9 @@ let lang = simple::LanguageSpec {
335340
};
336341
```
337342

343+
A single-phase config is just `.add_phase(...)` called once. Phase names
344+
appear in error messages so you can tell which phase failed.
345+
338346
The same YAML node-types is used for both the runtime yeast `Schema` (so
339347
rules can refer to output-only kinds and fields) and TRAP validation (it
340348
is converted to JSON internally).

shared/yeast/src/lib.rs

Lines changed: 74 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -635,28 +635,63 @@ fn apply_rules_inner(
635635
Ok(vec![ast.nodes.len() - 1])
636636
}
637637

638-
/// Configuration for a desugaring pass: a set of rules and an optional
639-
/// output node-types schema (in YAML format).
638+
/// One phase of a desugaring pass: a named bundle of rules that runs to
639+
/// completion (a full traversal applying its rules) before the next phase
640+
/// starts. Rules within a phase compete for matches as usual; rules in
641+
/// different phases never compete because they don't see each other's input.
642+
pub struct Phase {
643+
/// Name used in error messages.
644+
pub name: String,
645+
pub rules: Vec<Rule>,
646+
}
647+
648+
impl Phase {
649+
pub fn new(name: impl Into<String>, rules: Vec<Rule>) -> Self {
650+
Self {
651+
name: name.into(),
652+
rules,
653+
}
654+
}
655+
}
656+
657+
/// Configuration for a desugaring pass: an ordered list of [`Phase`]s and
658+
/// an optional output node-types schema (in YAML format).
640659
///
641660
/// When attached to a `LanguageSpec` (in the shared tree-sitter extractor),
642661
/// enables yeast-based AST rewriting before TRAP extraction. The same YAML
643662
/// is used both to validate TRAP output (via JSON conversion) and to
644663
/// resolve output-only node kinds and fields at runtime.
664+
///
665+
/// Construct with `DesugaringConfig::new()` and add phases via
666+
/// `add_phase`:
667+
///
668+
/// ```ignore
669+
/// let config = yeast::DesugaringConfig::new()
670+
/// .add_phase("cleanup", cleanup_rules)
671+
/// .add_phase("desugar", desugar_rules)
672+
/// .with_output_node_types_yaml(yaml);
673+
/// ```
674+
#[derive(Default)]
645675
pub struct DesugaringConfig {
646-
/// Rules to apply during desugaring.
647-
pub rules: Vec<Rule>,
676+
/// Phases of rule application, applied in order.
677+
pub phases: Vec<Phase>,
648678
/// Output node-types in YAML format. If `None`, the input grammar's
649679
/// node types are used (i.e. the desugared AST has the same node types
650680
/// as the tree-sitter grammar).
651681
pub output_node_types_yaml: Option<&'static str>,
652682
}
653683

654684
impl DesugaringConfig {
655-
pub fn new(rules: Vec<Rule>) -> Self {
656-
Self {
657-
rules,
658-
output_node_types_yaml: None,
659-
}
685+
/// Create an empty configuration. Add phases via [`add_phase`] and an
686+
/// optional output schema via [`with_output_node_types_yaml`].
687+
pub fn new() -> Self {
688+
Self::default()
689+
}
690+
691+
/// Append a new phase with the given name and rules.
692+
pub fn add_phase(mut self, name: impl Into<String>, rules: Vec<Rule>) -> Self {
693+
self.phases.push(Phase::new(name, rules));
694+
self
660695
}
661696

662697
pub fn with_output_node_types_yaml(mut self, yaml: &'static str) -> Self {
@@ -678,30 +713,30 @@ impl DesugaringConfig {
678713
pub struct Runner<'a> {
679714
language: tree_sitter::Language,
680715
schema: schema::Schema,
681-
rules: &'a [Rule],
716+
phases: &'a [Phase],
682717
}
683718

684719
impl<'a> Runner<'a> {
685720
/// Create a runner using the input grammar's schema for output.
686-
pub fn new(language: tree_sitter::Language, rules: &'a [Rule]) -> Self {
721+
pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self {
687722
let schema = schema::Schema::from_language(&language);
688723
Self {
689724
language,
690725
schema,
691-
rules,
726+
phases,
692727
}
693728
}
694729

695730
/// Create a runner with separate input language and output schema.
696731
pub fn with_schema(
697732
language: tree_sitter::Language,
698733
schema: &schema::Schema,
699-
rules: &'a [Rule],
734+
phases: &'a [Phase],
700735
) -> Self {
701736
Self {
702737
language,
703738
schema: schema.clone(),
704-
rules,
739+
phases,
705740
}
706741
}
707742

@@ -714,27 +749,17 @@ impl<'a> Runner<'a> {
714749
Ok(Self {
715750
language,
716751
schema,
717-
rules: &config.rules,
752+
phases: &config.phases,
718753
})
719754
}
720755

721756
pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Result<Ast, String> {
722-
let fresh = tree_builder::FreshScope::new();
723757
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), tree, &self.language);
724-
let root = ast.get_root();
725-
let res = apply_rules(self.rules, &mut ast, root, &fresh)?;
726-
if res.len() != 1 {
727-
return Err(format!(
728-
"Expected exactly one result node, got {}",
729-
res.len()
730-
));
731-
}
732-
ast.set_root(res[0]);
758+
self.run_phases(&mut ast)?;
733759
Ok(ast)
734760
}
735761

736762
pub fn run(&self, input: &str) -> Result<Ast, String> {
737-
let fresh = tree_builder::FreshScope::new();
738763
let mut parser = tree_sitter::Parser::new();
739764
parser
740765
.set_language(&self.language)
@@ -743,15 +768,29 @@ impl<'a> Runner<'a> {
743768
.parse(input, None)
744769
.ok_or_else(|| "Failed to parse input".to_string())?;
745770
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), &tree, &self.language);
746-
let root = ast.get_root();
747-
let res = apply_rules(self.rules, &mut ast, root, &fresh)?;
748-
if res.len() != 1 {
749-
return Err(format!(
750-
"Expected exactly one result node, got {}",
751-
res.len()
752-
));
753-
}
754-
ast.set_root(res[0]);
771+
self.run_phases(&mut ast)?;
755772
Ok(ast)
756773
}
774+
775+
/// Apply each phase in turn to the AST, threading the root through.
776+
/// A single `FreshScope` is shared across phases so that fresh
777+
/// identifiers generated in different phases don't collide.
778+
fn run_phases(&self, ast: &mut Ast) -> Result<(), String> {
779+
let fresh = tree_builder::FreshScope::new();
780+
let mut root = ast.get_root();
781+
for phase in self.phases {
782+
let res = apply_rules(&phase.rules, ast, root, &fresh)
783+
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
784+
if res.len() != 1 {
785+
return Err(format!(
786+
"Phase `{}`: expected exactly one result node, got {}",
787+
phase.name,
788+
res.len()
789+
));
790+
}
791+
root = res[0];
792+
}
793+
ast.set_root(root);
794+
Ok(())
795+
}
757796
}

shared/yeast/tests/test.rs

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,19 @@ fn parse_and_dump(input: &str) -> String {
1212
dump_ast(&ast, ast.get_root(), input)
1313
}
1414

15-
/// Helper: parse Ruby source with a custom output schema and rules, return dump.
15+
/// Helper: parse Ruby source with a custom output schema and a single
16+
/// phase of rules, return dump.
1617
fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
18+
run_phased_and_dump(input, vec![Phase::new("test", rules)])
19+
}
20+
21+
/// Helper: parse Ruby source with a custom output schema and multiple
22+
/// rule phases, return dump.
23+
fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
1724
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
1825
let schema =
1926
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
20-
let runner = Runner::with_schema(lang, &schema, &rules);
27+
let runner = Runner::with_schema(lang, &schema, &phases);
2128
let ast = runner.run(input).unwrap();
2229
dump_ast(&ast, ast.get_root(), input)
2330
}
@@ -28,7 +35,8 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
2835
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
2936
let schema =
3037
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
31-
let runner = Runner::with_schema(lang, &schema, &rules);
38+
let phases = vec![Phase::new("test", rules)];
39+
let runner = Runner::with_schema(lang, &schema, &phases);
3240
runner
3341
.run(input)
3442
.expect_err("expected runner to return an error")
@@ -439,6 +447,68 @@ fn test_default_rule_fires_at_most_once_per_node() {
439447
);
440448
}
441449

450+
// ---- Phase tests ----
451+
452+
#[test]
453+
fn test_phased_desugaring() {
454+
// Two phases that could equally have been a single one with chained
455+
// rules. Splitting them makes the intent (cleanup, then desugar)
456+
// explicit and provides per-phase error messages.
457+
let cleanup = vec![yeast::rule!(
458+
(assignment
459+
left: (_) @left
460+
right: (_) @right
461+
)
462+
=> first_node
463+
)];
464+
let desugar = vec![yeast::rule!(
465+
(first_node
466+
left: (_) @left
467+
right: (_) @right
468+
)
469+
=> second_node
470+
)];
471+
472+
let dump = run_phased_and_dump(
473+
"x = 1",
474+
vec![
475+
Phase::new("cleanup", cleanup),
476+
Phase::new("desugar", desugar),
477+
],
478+
);
479+
assert_dump_eq(
480+
&dump,
481+
r#"
482+
program
483+
second_node
484+
left: identifier "x"
485+
right: integer "1"
486+
"#,
487+
);
488+
}
489+
490+
#[test]
491+
fn test_phase_error_includes_phase_name() {
492+
// A repeated rule that loops; the error message should identify the
493+
// phase that tripped the depth limit.
494+
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
495+
let schema =
496+
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
497+
let phases = vec![Phase::new("buggy", vec![swap_assignment_rule().repeated()])];
498+
let runner = Runner::with_schema(lang, &schema, &phases);
499+
let err = runner
500+
.run("x = 1")
501+
.expect_err("expected runner to return an error");
502+
assert!(
503+
err.contains("Phase `buggy`"),
504+
"error should mention the failing phase, got: {err}"
505+
);
506+
assert!(
507+
err.contains("exceeded maximum rewrite depth"),
508+
"error should mention the depth limit, got: {err}"
509+
);
510+
}
511+
442512
// ---- Cursor tests ----
443513

444514
#[test]

0 commit comments

Comments
 (0)