Skip to content

Commit c6c2e12

Browse files
tausbnCopilot
andcommitted
yeast: AST desugaring framework with proc-macro DSL
YEAST (YEAST Elaborates Abstract Syntax Trees) is a framework for transforming tree-sitter parse trees before CodeQL extraction. Core components: - shared/yeast/ — Ast, Node, Schema, query matching engine, captures, FreshScope, BuildCtx - shared/yeast-macros/ — proc macros: query!, tree!, trees!, rule! The query language is inspired by tree-sitter queries: (assignment left: (_) @lhs right: (_) @rhs) Templates support embedded Rust ({expr}), splicing ({..expr}), computed literals (#{expr}), and fresh identifiers ($name). The rule! macro combines query and transform: rule!((for pattern: (_) @pat ...) => (call receiver: {val} ...)) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4c1461a commit c6c2e12

21 files changed

Lines changed: 2922 additions & 4 deletions

Cargo.lock

Lines changed: 36 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
resolver = "2"
55
members = [
66
"shared/tree-sitter-extractor",
7+
"shared/yeast",
8+
"shared/yeast-macros",
79
"ruby/extractor",
810
"rust/extractor",
911
"rust/extractor/macros",

shared/yeast-macros/Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "yeast-macros"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[lib]
7+
proc-macro = true
8+
9+
[dependencies]
10+
proc-macro2 = "1.0"
11+
quote = "1.0"
12+
syn = "2.0"

shared/yeast-macros/src/lib.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
use proc_macro::TokenStream;
2+
use proc_macro2::TokenStream as TokenStream2;
3+
4+
mod parse;
5+
6+
/// Proc macro for constructing a `QueryNode` from a tree-sitter-inspired pattern.
7+
///
8+
/// # Syntax
9+
///
10+
/// ```text
11+
/// (_) - match any named node (skips unnamed tokens)
12+
/// (kind) - match a named node of the given kind
13+
/// ("literal") - match an unnamed token by its text
14+
/// (kind field: (pattern)) - match with named field
15+
/// (kind (pat) (pat)...) - match unnamed children (after all fields)
16+
/// (pattern) @capture - capture the matched node
17+
/// (pattern)* @capture - capture each repeated match
18+
/// (pattern)? - zero or one
19+
/// ```
20+
#[proc_macro]
21+
pub fn query(input: TokenStream) -> TokenStream {
22+
let input2: TokenStream2 = input.into();
23+
match parse::parse_query_top(input2) {
24+
Ok(output) => output.into(),
25+
Err(err) => err.to_compile_error().into(),
26+
}
27+
}
28+
29+
/// Build a single AST node from a template, returning its `Id`.
30+
///
31+
/// # Template syntax
32+
///
33+
/// ```text
34+
/// (kind "literal") - leaf with static content
35+
/// (kind #{expr}) - leaf with computed content (expr.to_string())
36+
/// (kind $fresh) - leaf with auto-generated unique name
37+
/// {expr} - embed a Rust expression returning Id
38+
/// {..expr} - splice an iterable of Id (in child/field position)
39+
/// field: {..expr} - splice into a named field
40+
/// ```
41+
///
42+
/// Can be called with an explicit context or using the implicit context
43+
/// from an enclosing `rule!`:
44+
///
45+
/// ```text
46+
/// tree!(ctx, (kind ...)) // explicit BuildCtx
47+
/// tree!((kind ...)) // implicit context from rule!
48+
/// ```
49+
#[proc_macro]
50+
pub fn tree(input: TokenStream) -> TokenStream {
51+
let input2: TokenStream2 = input.into();
52+
match parse::parse_tree_top(input2) {
53+
Ok(output) => output.into(),
54+
Err(err) => err.to_compile_error().into(),
55+
}
56+
}
57+
58+
/// Build a list of AST nodes from a template, returning `Vec<Id>`.
59+
///
60+
/// Like `tree!` but returns `Vec<Id>` and supports multiple top-level
61+
/// elements. All syntax from `tree!` is available.
62+
///
63+
/// Can be called with an explicit context or using the implicit context
64+
/// from an enclosing `rule!`:
65+
///
66+
/// ```text
67+
/// trees!(ctx, (node1 ...) (node2 ...)) // explicit BuildCtx
68+
/// trees!((node1 ...) (node2 ...)) // implicit context from rule!
69+
/// ```
70+
#[proc_macro]
71+
pub fn trees(input: TokenStream) -> TokenStream {
72+
let input2: TokenStream2 = input.into();
73+
match parse::parse_trees_top(input2) {
74+
Ok(output) => output.into(),
75+
Err(err) => err.to_compile_error().into(),
76+
}
77+
}
78+
79+
/// Define a desugaring rule with query and transform in one declaration.
80+
///
81+
/// ```text
82+
/// rule!(
83+
/// (query_pattern field: (_) @name (kind)* @repeated (_)? @optional)
84+
/// =>
85+
/// (output_template field: {name} {..repeated})
86+
/// )
87+
///
88+
/// // Shorthand: captures become fields on the output node
89+
/// rule!((query ...) => output_kind)
90+
/// ```
91+
///
92+
/// Captures become Rust variables automatically:
93+
/// - `@name` (no quantifier) → `name: Id`
94+
/// - `@name` (after `*`/`+`) → `name: Vec<Id>`
95+
/// - `@name` (after `?`) → `name: Option<Id>`
96+
///
97+
/// `tree!` and `trees!` can be used without explicit context inside `{...}`.
98+
#[proc_macro]
99+
pub fn rule(input: TokenStream) -> TokenStream {
100+
let input2: TokenStream2 = input.into();
101+
match parse::parse_rule_top(input2) {
102+
Ok(output) => output.into(),
103+
Err(err) => err.to_compile_error().into(),
104+
}
105+
}

0 commit comments

Comments
 (0)