mizan: IR inline-substitution + reachability tree-shake + serde rename_all
Three substrate moves required by the Blazr-session port that surfaced
real cross-backend divergences:
1. Inline-substitution for primitive aliases and string enums. Named
types whose body is `Alias(Primitive(_))` or `Enum(_)` collapse into
their inline TypeShape at every `Ref` use site, and don't emit as
their own `type "X" { ... }` entry. Matches Python's Pydantic Literal
and `Foo = str` alias inlining — codegen consumers see the primitive
directly rather than chasing a single-hop indirection.
2. Reachability tree-shake on the type registry. `#[derive(Mizan)]` now
auto-registers every Mizan type into the TYPES slice; the emitter
then transitively walks Refs from function inputs/outputs and emits
only the reachable subset. Original-named entries from derive
register only when something refs them; canonical-renamed entries
from the function macro are reachable by definition. Mirrors
Python's `_collect_named_types`.
3. `#[serde(rename_all = "...")]` + `#[serde(rename = "...")]`
propagation in `#[derive(Mizan)]` for enums. IR enum variants now
match the on-wire JSON casing (lowercase / snake_case / kebab-case /
etc.), not the Rust variant idents. Supports all serde casings.
AFI codegen + wire parity stays green after these changes (the AFI
fixture's enum-free + Pydantic-shape types are unchanged by the three
substrate extensions).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -75,13 +75,20 @@ pub fn snake_to_camel(name: &str) -> String {
|
||||
out
|
||||
}
|
||||
|
||||
struct Emitter {
|
||||
struct Emitter<'a> {
|
||||
lines: Vec<String>,
|
||||
/// Types whose references should be substituted with their inline
|
||||
/// shape at the use site (and which don't emit as their own
|
||||
/// `type "X" { ... }` entries). Populated from `IrSnapshot::inlines`.
|
||||
inlines: &'a BTreeMap<&'static str, TypeShape>,
|
||||
}
|
||||
|
||||
impl Emitter {
|
||||
fn new() -> Self {
|
||||
Self { lines: Vec::new() }
|
||||
impl<'a> Emitter<'a> {
|
||||
fn new(inlines: &'a BTreeMap<&'static str, TypeShape>) -> Self {
|
||||
Self {
|
||||
lines: Vec::new(),
|
||||
inlines,
|
||||
}
|
||||
}
|
||||
|
||||
fn prefix(&self, indent: usize) -> String {
|
||||
@@ -118,6 +125,13 @@ impl Emitter {
|
||||
self.leaf(indent, &["primitive", &name]);
|
||||
}
|
||||
TypeShape::Ref(name) => {
|
||||
// Inline-substitute when the referenced type is a
|
||||
// primitive-alias or string-enum. Matches Python's
|
||||
// Pydantic Literal/alias inlining.
|
||||
if let Some(inline_shape) = self.inlines.get(name).cloned() {
|
||||
self.emit_type_child(indent, &inline_shape);
|
||||
return;
|
||||
}
|
||||
let n = kdl_string(name);
|
||||
self.leaf(indent, &["ref", &n]);
|
||||
}
|
||||
@@ -313,16 +327,86 @@ pub(crate) struct IrSnapshot {
|
||||
pub types: BTreeMap<&'static str, NamedType>,
|
||||
pub functions: Vec<&'static dyn FunctionSpec>,
|
||||
pub contexts: Vec<(&'static str, Vec<&'static dyn FunctionSpec>)>,
|
||||
/// Types that inline to a `TypeShape` at every reference site rather
|
||||
/// than emitting as their own `type "X" { ... }` entry. Populated from
|
||||
/// `Alias(Primitive(_))` and `Enum` named types — both are
|
||||
/// information-zero indirections that the codegen consumer doesn't
|
||||
/// gain anything from naming. Matches the Python emitter's behavior
|
||||
/// (Pydantic `FigureId = str` and `Literal["..."]` inline; they don't
|
||||
/// materialize as named types).
|
||||
pub inlines: BTreeMap<&'static str, TypeShape>,
|
||||
}
|
||||
|
||||
impl IrSnapshot {
|
||||
pub(crate) fn collect() -> Self {
|
||||
// Types: alphabetized for byte-equivalence with Python's `sorted(named_types)`.
|
||||
let mut types: BTreeMap<&'static str, NamedType> = BTreeMap::new();
|
||||
let mut all_types: BTreeMap<&'static str, NamedType> = BTreeMap::new();
|
||||
for entry in TYPES {
|
||||
types.insert(entry.name, (entry.shape_fn)());
|
||||
all_types.insert(entry.name, (entry.shape_fn)());
|
||||
}
|
||||
|
||||
// Partition into emit-candidate types vs inlines. An inline is a
|
||||
// named type whose shape collapses to a single `TypeShape` at the
|
||||
// field site — primitive aliases and string enums.
|
||||
let mut candidates: BTreeMap<&'static str, NamedType> = BTreeMap::new();
|
||||
let mut inlines: BTreeMap<&'static str, TypeShape> = BTreeMap::new();
|
||||
for (name, body) in all_types {
|
||||
match &body {
|
||||
NamedType::Alias(TypeShape::Primitive(p)) => {
|
||||
inlines.insert(name, TypeShape::Primitive(*p));
|
||||
}
|
||||
NamedType::Enum(variants) => {
|
||||
inlines.insert(name, TypeShape::Enum(variants.clone()));
|
||||
}
|
||||
_ => {
|
||||
candidates.insert(name, body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tree-shake: keep only types reachable from a registered function's
|
||||
// input/output. The function macro registers canonical-named
|
||||
// entries (e.g. `userPrefsOutput`); derive registers original-named
|
||||
// entries (`UserPrefs`, `BrushSettings`, …). Only those reached
|
||||
// via Ref-walk from a function's input/output names belong in the
|
||||
// emitted IR. Mirrors Python's `_collect_named_types`.
|
||||
let mut reachable: std::collections::HashSet<&'static str> =
|
||||
std::collections::HashSet::new();
|
||||
let mut frontier: Vec<&'static str> = Vec::new();
|
||||
for fn_spec in FUNCTIONS {
|
||||
if fn_spec.private() {
|
||||
continue;
|
||||
}
|
||||
if let Some(input_name) = fn_spec.input_type() {
|
||||
if reachable.insert(input_name) {
|
||||
frontier.push(input_name);
|
||||
}
|
||||
}
|
||||
let output_name = fn_spec.output_type();
|
||||
if reachable.insert(output_name) {
|
||||
frontier.push(output_name);
|
||||
}
|
||||
}
|
||||
while let Some(name) = frontier.pop() {
|
||||
// Inlines don't carry refs we care about (Primitive/Enum); skip.
|
||||
if inlines.contains_key(name) {
|
||||
continue;
|
||||
}
|
||||
let body = match candidates.get(name) {
|
||||
Some(b) => b.clone(),
|
||||
None => continue,
|
||||
};
|
||||
collect_refs(&body, &mut |r| {
|
||||
if reachable.insert(r) {
|
||||
frontier.push(r);
|
||||
}
|
||||
});
|
||||
}
|
||||
let types: BTreeMap<&'static str, NamedType> = candidates
|
||||
.into_iter()
|
||||
.filter(|(name, _)| reachable.contains(name))
|
||||
.collect();
|
||||
|
||||
// Functions: alphabetical by wire name (canonical IR ordering,
|
||||
// matches the Python emitter's `sorted(functions)`). Skip `private`.
|
||||
let mut functions: Vec<&'static dyn FunctionSpec> = FUNCTIONS
|
||||
@@ -353,15 +437,42 @@ impl IrSnapshot {
|
||||
types,
|
||||
functions,
|
||||
contexts,
|
||||
inlines,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk every Ref in a NamedType's shape and call `visit` for each name.
|
||||
fn collect_refs<F: FnMut(&'static str)>(body: &NamedType, visit: &mut F) {
|
||||
match body {
|
||||
NamedType::Struct(fields) => {
|
||||
for field in fields {
|
||||
walk_shape_refs(&field.shape, visit);
|
||||
}
|
||||
}
|
||||
NamedType::Alias(inner) => walk_shape_refs(inner, visit),
|
||||
NamedType::Enum(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn walk_shape_refs<F: FnMut(&'static str)>(shape: &TypeShape, visit: &mut F) {
|
||||
match shape {
|
||||
TypeShape::Ref(name) => visit(name),
|
||||
TypeShape::List(inner) | TypeShape::Optional(inner) => walk_shape_refs(inner, visit),
|
||||
TypeShape::Union(branches) => {
|
||||
for b in branches {
|
||||
walk_shape_refs(b, visit);
|
||||
}
|
||||
}
|
||||
TypeShape::Primitive(_) | TypeShape::Enum(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the Mizan IR for every registered type/function/context. Returns KDL.
|
||||
pub fn build_ir() -> String {
|
||||
crate::graph_check::verify_invariants();
|
||||
let snap = IrSnapshot::collect();
|
||||
let mut em = Emitter::new();
|
||||
let mut em = Emitter::new(&snap.inlines);
|
||||
|
||||
// Type definitions
|
||||
let types_emitted = !snap.types.is_empty();
|
||||
|
||||
Reference in New Issue
Block a user