From 54f060c27309f301c034fecf0f1a43df837b7dab Mon Sep 17 00:00:00 2001 From: Ryth Azhur Date: Mon, 18 May 2026 00:01:28 -0400 Subject: [PATCH] mizan: IR inline-substitution + reachability tree-shake + serde rename_all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three substrate moves required by the Blazr-session port that surfaced real cross-backend divergences: 1. Inline-substitution for primitive aliases and string enums. Named types whose body is `Alias(Primitive(_))` or `Enum(_)` collapse into their inline TypeShape at every `Ref` use site, and don't emit as their own `type "X" { ... }` entry. Matches Python's Pydantic Literal and `Foo = str` alias inlining — codegen consumers see the primitive directly rather than chasing a single-hop indirection. 2. Reachability tree-shake on the type registry. `#[derive(Mizan)]` now auto-registers every Mizan type into the TYPES slice; the emitter then transitively walks Refs from function inputs/outputs and emits only the reachable subset. Original-named entries from derive register only when something refs them; canonical-renamed entries from the function macro are reachable by definition. Mirrors Python's `_collect_named_types`. 3. `#[serde(rename_all = "...")]` + `#[serde(rename = "...")]` propagation in `#[derive(Mizan)]` for enums. IR enum variants now match the on-wire JSON casing (lowercase / snake_case / kebab-case / etc.), not the Rust variant idents. Supports all serde casings. AFI codegen + wire parity stays green after these changes (the AFI fixture's enum-free + Pydantic-shape types are unchanged by the three substrate extensions). Co-Authored-By: Claude Opus 4.7 (1M context) --- cores/mizan-rust-macros/src/derive.rs | 117 +++++++++++++++++++++--- cores/mizan-rust/src/kdl.rs | 125 ++++++++++++++++++++++++-- 2 files changed, 225 insertions(+), 17 deletions(-) diff --git a/cores/mizan-rust-macros/src/derive.rs b/cores/mizan-rust-macros/src/derive.rs index 1aa0a8f..8aab694 100644 --- a/cores/mizan-rust-macros/src/derive.rs +++ b/cores/mizan-rust-macros/src/derive.rs @@ -1,24 +1,101 @@ //! `#[derive(Mizan)]` — emit `MizanType` impl + linkme registration. +use heck::{ToKebabCase, ToLowerCamelCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase}; use proc_macro2::TokenStream; use quote::quote; -use syn::{Data, DataEnum, DataStruct, DeriveInput, Fields}; +use syn::{ + parse::Parser, punctuated::Punctuated, Data, DataEnum, DataStruct, DeriveInput, Fields, Lit, + Meta, Token, +}; use crate::shape::type_shape_expr; -/// Expand `#[derive(Mizan)]`. Emits only the `MizanType` trait impl — -/// registration into the IR `TYPES` slice happens at the function macro -/// (which owns the canonical-named type entries `Input` / -/// `Output`) and at sub-type discovery inside `Vec` outputs. -/// This keeps the type registry tree-shaken: only types actually reachable -/// from a registered function appear in the emitted IR. +/// Apply a `#[serde(rename_all = "...")]` casing transform to a Rust +/// variant identifier so the IR's enum variant matches what serde emits +/// on the wire. Supported casings mirror serde's set. +fn apply_rename_all(rule: &str, ident: &str) -> String { + match rule { + "lowercase" => ident.to_lowercase(), + "UPPERCASE" => ident.to_uppercase(), + "PascalCase" => ident.to_upper_camel_case(), + "camelCase" => ident.to_lower_camel_case(), + "snake_case" => ident.to_snake_case(), + "SCREAMING_SNAKE_CASE" => ident.to_shouty_snake_case(), + "kebab-case" => ident.to_kebab_case(), + _ => ident.to_string(), + } +} + +/// Walk the enum's outer attributes for `#[serde(rename_all = "...")]`. +fn serde_rename_all(attrs: &[syn::Attribute]) -> Option { + for attr in attrs { + if !attr.path().is_ident("serde") { + continue; + } + let list = match &attr.meta { + Meta::List(l) => l, + _ => continue, + }; + let parser = Punctuated::::parse_terminated; + let metas = match parser.parse2(list.tokens.clone()) { + Ok(m) => m, + Err(_) => continue, + }; + for meta in metas { + if let Meta::NameValue(nv) = meta { + if nv.path.is_ident("rename_all") { + if let syn::Expr::Lit(syn::ExprLit { lit: Lit::Str(s), .. }) = nv.value { + return Some(s.value()); + } + } + } + } + } + None +} + +/// Walk a variant's attributes for an explicit `#[serde(rename = "...")]` +/// override. Variant-level rename overrides the enum-level rename_all. +fn serde_rename(attrs: &[syn::Attribute]) -> Option { + for attr in attrs { + if !attr.path().is_ident("serde") { + continue; + } + let list = match &attr.meta { + Meta::List(l) => l, + _ => continue, + }; + let parser = Punctuated::::parse_terminated; + let metas = match parser.parse2(list.tokens.clone()) { + Ok(m) => m, + Err(_) => continue, + }; + for meta in metas { + if let Meta::NameValue(nv) = meta { + if nv.path.is_ident("rename") { + if let syn::Expr::Lit(syn::ExprLit { lit: Lit::Str(s), .. }) = nv.value { + return Some(s.value()); + } + } + } + } + } + None +} + +/// Expand `#[derive(Mizan)]`. Emits the `MizanType` impl AND a linkme +/// TypeEntry registration. Every Mizan-shaped type lands in the IR; +/// the emitter's inline-substitution pass collapses primitive-aliases +/// and enums at use sites so the IR stays tight. pub fn expand(input: DeriveInput) -> TokenStream { let ident = input.ident.clone(); let type_name = ident.to_string(); + let rename_all = serde_rename_all(&input.attrs); + let named_type_body = match &input.data { Data::Struct(s) => emit_struct(s), - Data::Enum(e) => emit_enum(e), + Data::Enum(e) => emit_enum(e, rename_all.as_deref()), Data::Union(_) => { return syn::Error::new_spanned( &input, @@ -28,11 +105,22 @@ pub fn expand(input: DeriveInput) -> TokenStream { } }; + let register_static = + quote::format_ident!("__MIZAN_TYPE_REGISTER_{}", ident.to_string().to_shouty_snake_case()); + quote! { impl ::mizan_core::MizanType for #ident { const TYPE_NAME: &'static str = #type_name; fn shape() -> ::mizan_core::NamedType { #named_type_body } } + + #[::mizan_core::__priv::linkme::distributed_slice(::mizan_core::TYPES)] + #[linkme(crate = ::mizan_core::__priv::linkme)] + #[allow(non_upper_case_globals)] + static #register_static: ::mizan_core::TypeEntry = ::mizan_core::TypeEntry { + name: #type_name, + shape_fn: <#ident as ::mizan_core::MizanType>::shape, + }; } } @@ -81,7 +169,7 @@ fn emit_struct(s: &DataStruct) -> TokenStream { } } -fn emit_enum(e: &DataEnum) -> TokenStream { +fn emit_enum(e: &DataEnum, rename_all: Option<&str>) -> TokenStream { let mut variants: Vec = Vec::new(); for variant in &e.variants { if !matches!(variant.fields, Fields::Unit) { @@ -91,7 +179,16 @@ fn emit_enum(e: &DataEnum) -> TokenStream { ) .to_compile_error(); } - let name = variant.ident.to_string(); + let raw = variant.ident.to_string(); + // Variant-level `#[serde(rename = "...")]` wins; otherwise apply + // the enum-level `#[serde(rename_all = "...")]` rule. + let name = if let Some(explicit) = serde_rename(&variant.attrs) { + explicit + } else if let Some(rule) = rename_all { + apply_rename_all(rule, &raw) + } else { + raw + }; variants.push(quote! { #name }); } quote! { diff --git a/cores/mizan-rust/src/kdl.rs b/cores/mizan-rust/src/kdl.rs index d59bd99..c67dcd1 100644 --- a/cores/mizan-rust/src/kdl.rs +++ b/cores/mizan-rust/src/kdl.rs @@ -75,13 +75,20 @@ pub fn snake_to_camel(name: &str) -> String { out } -struct Emitter { +struct Emitter<'a> { lines: Vec, + /// Types whose references should be substituted with their inline + /// shape at the use site (and which don't emit as their own + /// `type "X" { ... }` entries). Populated from `IrSnapshot::inlines`. + inlines: &'a BTreeMap<&'static str, TypeShape>, } -impl Emitter { - fn new() -> Self { - Self { lines: Vec::new() } +impl<'a> Emitter<'a> { + fn new(inlines: &'a BTreeMap<&'static str, TypeShape>) -> Self { + Self { + lines: Vec::new(), + inlines, + } } fn prefix(&self, indent: usize) -> String { @@ -118,6 +125,13 @@ impl Emitter { self.leaf(indent, &["primitive", &name]); } TypeShape::Ref(name) => { + // Inline-substitute when the referenced type is a + // primitive-alias or string-enum. Matches Python's + // Pydantic Literal/alias inlining. + if let Some(inline_shape) = self.inlines.get(name).cloned() { + self.emit_type_child(indent, &inline_shape); + return; + } let n = kdl_string(name); self.leaf(indent, &["ref", &n]); } @@ -313,16 +327,86 @@ pub(crate) struct IrSnapshot { pub types: BTreeMap<&'static str, NamedType>, pub functions: Vec<&'static dyn FunctionSpec>, pub contexts: Vec<(&'static str, Vec<&'static dyn FunctionSpec>)>, + /// Types that inline to a `TypeShape` at every reference site rather + /// than emitting as their own `type "X" { ... }` entry. Populated from + /// `Alias(Primitive(_))` and `Enum` named types — both are + /// information-zero indirections that the codegen consumer doesn't + /// gain anything from naming. Matches the Python emitter's behavior + /// (Pydantic `FigureId = str` and `Literal["..."]` inline; they don't + /// materialize as named types). + pub inlines: BTreeMap<&'static str, TypeShape>, } impl IrSnapshot { pub(crate) fn collect() -> Self { // Types: alphabetized for byte-equivalence with Python's `sorted(named_types)`. - let mut types: BTreeMap<&'static str, NamedType> = BTreeMap::new(); + let mut all_types: BTreeMap<&'static str, NamedType> = BTreeMap::new(); for entry in TYPES { - types.insert(entry.name, (entry.shape_fn)()); + all_types.insert(entry.name, (entry.shape_fn)()); } + // Partition into emit-candidate types vs inlines. An inline is a + // named type whose shape collapses to a single `TypeShape` at the + // field site — primitive aliases and string enums. + let mut candidates: BTreeMap<&'static str, NamedType> = BTreeMap::new(); + let mut inlines: BTreeMap<&'static str, TypeShape> = BTreeMap::new(); + for (name, body) in all_types { + match &body { + NamedType::Alias(TypeShape::Primitive(p)) => { + inlines.insert(name, TypeShape::Primitive(*p)); + } + NamedType::Enum(variants) => { + inlines.insert(name, TypeShape::Enum(variants.clone())); + } + _ => { + candidates.insert(name, body); + } + } + } + + // Tree-shake: keep only types reachable from a registered function's + // input/output. The function macro registers canonical-named + // entries (e.g. `userPrefsOutput`); derive registers original-named + // entries (`UserPrefs`, `BrushSettings`, …). Only those reached + // via Ref-walk from a function's input/output names belong in the + // emitted IR. Mirrors Python's `_collect_named_types`. + let mut reachable: std::collections::HashSet<&'static str> = + std::collections::HashSet::new(); + let mut frontier: Vec<&'static str> = Vec::new(); + for fn_spec in FUNCTIONS { + if fn_spec.private() { + continue; + } + if let Some(input_name) = fn_spec.input_type() { + if reachable.insert(input_name) { + frontier.push(input_name); + } + } + let output_name = fn_spec.output_type(); + if reachable.insert(output_name) { + frontier.push(output_name); + } + } + while let Some(name) = frontier.pop() { + // Inlines don't carry refs we care about (Primitive/Enum); skip. + if inlines.contains_key(name) { + continue; + } + let body = match candidates.get(name) { + Some(b) => b.clone(), + None => continue, + }; + collect_refs(&body, &mut |r| { + if reachable.insert(r) { + frontier.push(r); + } + }); + } + let types: BTreeMap<&'static str, NamedType> = candidates + .into_iter() + .filter(|(name, _)| reachable.contains(name)) + .collect(); + // Functions: alphabetical by wire name (canonical IR ordering, // matches the Python emitter's `sorted(functions)`). Skip `private`. let mut functions: Vec<&'static dyn FunctionSpec> = FUNCTIONS @@ -353,15 +437,42 @@ impl IrSnapshot { types, functions, contexts, + inlines, } } } +/// Walk every Ref in a NamedType's shape and call `visit` for each name. +fn collect_refs(body: &NamedType, visit: &mut F) { + match body { + NamedType::Struct(fields) => { + for field in fields { + walk_shape_refs(&field.shape, visit); + } + } + NamedType::Alias(inner) => walk_shape_refs(inner, visit), + NamedType::Enum(_) => {} + } +} + +fn walk_shape_refs(shape: &TypeShape, visit: &mut F) { + match shape { + TypeShape::Ref(name) => visit(name), + TypeShape::List(inner) | TypeShape::Optional(inner) => walk_shape_refs(inner, visit), + TypeShape::Union(branches) => { + for b in branches { + walk_shape_refs(b, visit); + } + } + TypeShape::Primitive(_) | TypeShape::Enum(_) => {} + } +} + /// Build the Mizan IR for every registered type/function/context. Returns KDL. pub fn build_ir() -> String { crate::graph_check::verify_invariants(); let snap = IrSnapshot::collect(); - let mut em = Emitter::new(); + let mut em = Emitter::new(&snap.inlines); // Type definitions let types_emitted = !snap.types.is_empty();