diff --git a/crates/buiy_core/src/render/atlas/mod.rs b/crates/buiy_core/src/render/atlas/mod.rs index 9e9fd57..ad4652c 100644 --- a/crates/buiy_core/src/render/atlas/mod.rs +++ b/crates/buiy_core/src/render/atlas/mod.rs @@ -72,7 +72,9 @@ mod atlas; pub use atlas::BuiyAtlas; mod primitive; -pub use primitive::{GLYPH_ALPHA_INSTANCE_STRIDE_BYTES, GlyphAlphaInstance, IconInstance}; +pub use primitive::{ + GLYPH_ALPHA_FLOAT_OFFSET, GLYPH_ALPHA_INSTANCE_STRIDE_BYTES, GlyphAlphaInstance, IconInstance, +}; mod warmup; pub use warmup::{AtlasWarmupQueue, AtlasWarmupRequest}; diff --git a/crates/buiy_core/src/render/atlas/primitive.rs b/crates/buiy_core/src/render/atlas/primitive.rs index c04f65e..cc6b126 100644 --- a/crates/buiy_core/src/render/atlas/primitive.rs +++ b/crates/buiy_core/src/render/atlas/primitive.rs @@ -10,6 +10,30 @@ use bytemuck::{Pod, Zeroable}; /// `coverage.wgsl`'s instance `@location`s read. `[f32;4]×4 + u32 = 68`. pub const GLYPH_ALPHA_INSTANCE_STRIDE_BYTES: usize = 68; +/// Float index of the per-glyph straight-alpha (`color[3]`) when a +/// [`GlyphAlphaInstance`] is viewed as a flat `[f32]` raw record. The fields +/// pack `rect[4] ++ uv[4] ++ color[4] ++ clip[4] ++ page` (contiguous `#[repr(C)]`, +/// no pad before `page`), so `color` is the **3rd** `[f32;4]` block and its +/// alpha lands at float index `8 + 3 = 11`. This is the GLYPH mirror of the +/// quad-tier `ALPHA_FLOAT_OFFSET` (= 7, a DIFFERENT offset on the `[f32;17]` +/// quad record): R2's degraded-group forward-composite re-tints glyph alpha, +/// and using the quad offset 7 on a glyph record would corrupt `uv[3]` (a +/// silent wrong-pixel bug). NAMED + compile-asserted so the offset is never a +/// literal `11` at the use site (R1's discipline). The fold itself writes the +/// typed `color[3]` field; this const documents the raw-view parity for the +/// spec and any byte-level reader. +/// +/// [`ALPHA_FLOAT_OFFSET`]: crate::render::instance::ALPHA_FLOAT_OFFSET +pub const GLYPH_ALPHA_FLOAT_OFFSET: usize = 11; + +// Tie `GLYPH_ALPHA_FLOAT_OFFSET` to the layout: `color` is the 3rd `[f32;4]` +// block (`rect`, `uv`, `color`), so its alpha (`color[3]`) is at float index +// `8 + 3`. A field reorder that moved `color` would fail this. +const _: () = assert!( + GLYPH_ALPHA_FLOAT_OFFSET == 8 + 3, + "GLYPH_ALPHA_FLOAT_OFFSET must index color[3] = the 3rd vec4 block's alpha" +); + /// One instance per visible glyph (or any single-channel coverage quad, e.g. /// a generated mask stamp). The **alpha-as-color** primitive: the atlas /// stores `R8` coverage and color is applied per-instance, so one resident diff --git a/crates/buiy_core/src/render/buckets.rs b/crates/buiy_core/src/render/buckets.rs index 591e406..afeae03 100644 --- a/crates/buiy_core/src/render/buckets.rs +++ b/crates/buiy_core/src/render/buckets.rs @@ -84,13 +84,13 @@ impl Ord for PrimitiveBatchKey { /// [`PackedInstance`]. #[derive(Default)] pub struct InstanceBuckets { - batches: BTreeMap>, + batches: BTreeMap>, } impl InstanceBuckets { - /// Push one packed instance (as raw `[f32; 13]` = - /// pos2+size2+color4+radius1+clip_min2+clip_max2) into its batch. - pub fn push(&mut self, key: PrimitiveBatchKey, instance: [f32; 13]) { + /// Push one packed instance (as raw `[f32; 17]` = + /// pos2+size2+color4+radius1+clip_min2+clip_max2+affine4) into its batch. + pub fn push(&mut self, key: PrimitiveBatchKey, instance: [f32; 17]) { self.batches.entry(key).or_default().push(instance); } @@ -110,20 +110,30 @@ impl InstanceBuckets { } /// Iterate batches in draw order (`(layer, primitive paint order)`). - pub fn batches(&self) -> impl Iterator)> { + pub fn batches(&self) -> impl Iterator)> { self.batches.iter() } } -/// Flatten a [`PackedInstance`] into the raw `[f32; 13]` the bucket store holds. +/// Flatten a [`PackedInstance`] into the raw `[f32; 17]` the bucket store holds. /// Keeps the bucket store decoupled from the concrete instance struct while /// the stride is asserted equal in tests. -pub fn packed_to_raw(p: &PackedInstance) -> [f32; 13] { +/// +/// LAYOUT INVARIANT (R1 / R2 dependency): indices `0..13` are byte-identical to +/// the pre-R1 layout — color is at [`COLOR_FLOAT_OFFSET`]`..+4` and alpha at +/// [`ALPHA_FLOAT_OFFSET`] (R2's degraded-group re-tint reads alpha there). The +/// 2D affine basis appends at `[13..17]` (`[m00, m10, m01, m11]`); identity +/// `[1, 0, 0, 1]` paints axis-aligned. +/// +/// [`COLOR_FLOAT_OFFSET`]: crate::render::instance::COLOR_FLOAT_OFFSET +/// [`ALPHA_FLOAT_OFFSET`]: crate::render::instance::ALPHA_FLOAT_OFFSET +pub fn packed_to_raw(p: &PackedInstance) -> [f32; 17] { [ p.rect_pos[0], p.rect_pos[1], p.rect_size[0], p.rect_size[1], + // color@COLOR_FLOAT_OFFSET (4..8); alpha@ALPHA_FLOAT_OFFSET (7). p.color[0], p.color[1], p.color[2], @@ -133,6 +143,11 @@ pub fn packed_to_raw(p: &PackedInstance) -> [f32; 13] { p.clip_min[1], p.clip_max[0], p.clip_max[1], + // The 2D affine basis APPENDED after index 13 (offsets 0..13 unchanged). + p.affine[0], + p.affine[1], + p.affine[2], + p.affine[3], ] } @@ -197,7 +212,7 @@ pub fn pack_view(nodes: &[ExtractedNode]) -> InstanceBuckets { pub struct PackedPartition { /// The full flat quad blob (every instance, in paint order) — identical to /// `pack_view`'s single `(Quad, 0)` batch flattened. - pub instances: Vec<[f32; 13]>, + pub instances: Vec<[f32; 17]>, /// `group_ranges[g]` = the `[start, end)` instance range of group `g`'s /// members (empty range if the group has no opaque member). pub group_ranges: Vec>, @@ -276,7 +291,7 @@ pub fn pack_view_partitioned( /// per-group contiguous ranges (with the contiguity tripwire) and the /// complement flat runs. struct Partitioner { - instances: Vec<[f32; 13]>, + instances: Vec<[f32; 17]>, ranges: RangePartitioner, } @@ -290,7 +305,7 @@ impl Partitioner { /// Append one instance under group `g` (already bounds-filtered by the /// caller), extending or starting the group/flat run it belongs to. - fn push(&mut self, instance: [f32; 13], g: Option) { + fn push(&mut self, instance: [f32; 17], g: Option) { self.instances.push(instance); self.ranges.push(g); } diff --git a/crates/buiy_core/src/render/compositor.rs b/crates/buiy_core/src/render/compositor.rs index cb00c5d..cc89018 100644 --- a/crates/buiy_core/src/render/compositor.rs +++ b/crates/buiy_core/src/render/compositor.rs @@ -17,7 +17,7 @@ use bevy::render::render_resource::{ CachedRenderPipelineId, Extent3d, PipelineCache, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages, }; -use bevy::render::renderer::RenderDevice; +use bevy::render::renderer::{RenderDevice, RenderQueue}; use bevy::render::texture::{CachedTexture, TextureCache}; use bevy::render::view::{Msaa, ViewTarget}; @@ -178,6 +178,21 @@ pub fn post_order_indices(parents: &[Option]) -> Vec { /// with, the glyph atlas pool. pub const RT_POOL_BUDGET_BYTES: u64 = 64 * 1024 * 1024; +/// The live aggregate RT-pool budget (bytes) `prepare_effect_groups` feeds to +/// [`plan_allocation`], as a resource so it is overridable (defaults to +/// [`RT_POOL_BUDGET_BYTES`]). Production never changes it; a test inserts a small +/// budget to FORCE degradation deterministically (the root-cause-correct way to +/// exercise the forward-composite path without an unwieldy 64 MiB-breaching +/// fixture). No production behavior change — the default IS the const. +#[derive(Resource, Clone, Copy, Debug)] +pub struct RtPoolBudget(pub u64); + +impl Default for RtPoolBudget { + fn default() -> Self { + Self(RT_POOL_BUDGET_BYTES) + } +} + /// Bytes one pooled target of `extent` consumes. Group targets are pinned /// `Rgba16Float` (effect-compositor.md § 2.2) = 8 bytes/texel. pub fn target_bytes(extent: UVec2) -> u64 { @@ -233,6 +248,174 @@ pub fn plan_allocation(groups: &[(UVec2, EffectReason)], budget: u64) -> Vec, + /// The group's glyph-instance range (`glyph_group_ranges[i]`). + pub glyph_range: Range, + /// Group opacity to fold into each member instance's alpha. + pub opacity: f32, + /// Parent group index, or `None` for a ROOT group. Only ROOT groups are + /// forward-composited by this slice (§ 2.3 scope); a nested degraded group + /// is a documented follow-up (debug-asserted, release-skipped). + pub parent: Option, +} + +/// Coalesce a sorted-or-unsorted list of half-open `[start, end)` ranges into the +/// minimal set of disjoint runs (adjacent + overlapping runs join). Keeps the +/// node's flat-draw loop a clean complement with no fragmented draw calls. +fn merge_ranges(ranges: &mut Vec>) { + ranges.retain(|r| r.start < r.end); // drop empties + ranges.sort_by_key(|r| r.start); + let mut merged: Vec> = Vec::with_capacity(ranges.len()); + for r in ranges.drain(..) { + match merged.last_mut() { + // Adjacent OR overlapping → extend the last run. + Some(last) if r.start <= last.end => last.end = last.end.max(r.end), + _ => merged.push(r), + } + } + *ranges = merged; +} + +/// Forward-composite the ROOT degraded effect groups FLAT (effect-compositor.md +/// § 2.3): for every group `i` with `allocate[i] == false` and `parent == None`, +/// fold its `opacity` into the ALPHA slot of every member instance IN PLACE +/// (quad alpha at [`ALPHA_FLOAT_OFFSET`](crate::render::instance::ALPHA_FLOAT_OFFSET), +/// glyph alpha at `color[3]` = +/// [`GLYPH_ALPHA_FLOAT_OFFSET`](crate::render::atlas::GLYPH_ALPHA_FLOAT_OFFSET)) and merge its instance ranges into the flat +/// ranges so the node's flat WINDOW draw paints them — the group dims exactly +/// once and paints flat, instead of vanishing. +/// +/// **Two DIFFERENT gates per tier — alpha-fold vs range-merge (§ 2.3).** The +/// alpha-fold and the range-merge answer to DIFFERENT invariants, so each tier +/// passes two flags: +/// +/// - `fold_*` (the BUFFER-repack signal): the alpha-fold runs IFF that tier's +/// instance buffer was repacked from SOURCE this frame. A freshly repacked +/// buffer carries SOURCE alpha (fold it once); a RETAINED buffer already holds +/// last frame's fold (folding it again compounds to black). So the caller +/// gates `fold_quad`/`fold_glyph` on the buffer-repack signals +/// (`quad_dirty`/`glyph_dirty`), NOT a wider signal. +/// +/// - `merge_*` (the PARTITION-rebuild signal): the range-merge into `*_flat` runs +/// IFF the flat/group partition for that tier was REBUILT this frame, because the +/// rebuild OVERWRITES `*_flat` wholesale and re-EXCLUDES the degraded group's +/// range. The merge must re-add it every rebuild or the degraded group vanishes +/// that frame. The quad partition rebuilds under `quad_dirty` (= the quad fold +/// gate, so quad is symmetric), but the GLYPH partition rebuilds under the UNION +/// `quad_dirty || glyph_dirty` (prepare.rs `partition_glyph_ranges`) — wider than +/// the glyph buffer-repack gate. So on a quad-dirty-only frame with a live +/// degraded glyph group, the glyph partition is rebuilt (range re-excluded) while +/// the glyph buffer is RETAINED: `merge_glyph` must be true (re-add the range) +/// while `fold_glyph` is false (the retained buffer already carries the fold). +/// Conflating the two would drop the glyph range-merge on that frame and the +/// degraded glyphs would vanish. +/// +/// The merge operates on the (possibly retained, already-folded) buffer's range — +/// correct, because the retained buffer still carries last frame's fold; only the +/// PARTITION needs re-stitching, not the alpha. +/// +/// **Scope: ROOT degraded groups only.** `plan_allocation` can degrade a NESTED +/// child while its parent keeps a target; routing a nested child's ranges into +/// the WINDOW flat draw paints it in the wrong space/clip and leaves the parent's +/// composite sampling a target the child never reached. Forward-compositing a +/// nested child correctly is a node-side change (route into the PARENT's step-1 +/// target pass) tracked as a follow-up. Here a nested degraded group +/// `debug_assert!(false, …)`s (loud in dev/tests) and in release is left +/// untouched + un-merged — no worse than today's vanish. +#[allow(clippy::too_many_arguments)] +pub fn fold_root_degraded_into_flat( + allocate: &[bool], + groups: &[DegradedGroup], + fold_quad: bool, + merge_quad: bool, + fold_glyph: bool, + merge_glyph: bool, + quad_raw: &mut [[f32; 17]], + glyph_raw: &mut [crate::render::atlas::GlyphAlphaInstance], + quad_flat: &mut Vec>, + glyph_flat: &mut Vec>, +) { + use crate::render::instance::ALPHA_FLOAT_OFFSET; + + let mut quad_merged = false; + let mut glyph_merged = false; + + for (i, group) in groups.iter().enumerate() { + // Only degraded groups participate. + if allocate.get(i).copied().unwrap_or(true) { + continue; + } + // MAJOR-1: nested degraded groups are out of this slice's charter. + if group.parent.is_some() { + debug_assert!( + false, + "nested degraded effect-group forward-composite into the parent \ + target is not yet implemented (follow-up); group {i} parent {:?} \ + — root-degraded only this slice (effect-compositor.md § 2.3)", + group.parent + ); + continue; // release: leave the nested child untouched (vanishes — tracked). + } + + let opacity = group.opacity; + + // Quad tier. The alpha-fold gates on the quad BUFFER-repack signal + // (`fold_quad`); the range-merge on the quad PARTITION-rebuild signal + // (`merge_quad`). They coincide today (both `quad_dirty`), but stay + // distinct so the quad tier reads symmetrically with the glyph tier + // below — where they genuinely differ. + if fold_quad { + for idx in group.quad_range.clone() { + if let Some(inst) = quad_raw.get_mut(idx as usize) { + inst[ALPHA_FLOAT_OFFSET] *= opacity; + } + } + } + if merge_quad && group.quad_range.start < group.quad_range.end { + quad_flat.push(group.quad_range.clone()); + quad_merged = true; + } + + // Glyph tier. The alpha-fold gates on the glyph BUFFER-repack signal + // (`fold_glyph` = glyph_dirty); the range-merge on the glyph + // PARTITION-rebuild signal (`merge_glyph` = quad_dirty || glyph_dirty). + // On a quad-dirty-only frame the partition is rebuilt (the degraded + // glyph range re-excluded) while the glyph buffer is RETAINED, so + // `merge_glyph` re-adds the range while `fold_glyph` leaves the + // already-folded retained alpha alone — the MAJOR-2 vanish fix. + if fold_glyph { + for idx in group.glyph_range.clone() { + if let Some(inst) = glyph_raw.get_mut(idx as usize) { + // The glyph alpha is the typed `color[3]` (= GLYPH_ALPHA_FLOAT_OFFSET + // in the raw view) — NOT the quad ALPHA_FLOAT_OFFSET (7), which + // would corrupt `uv[3]`. + inst.color[3] *= opacity; + } + } + } + if merge_glyph && group.glyph_range.start < group.glyph_range.end { + glyph_flat.push(group.glyph_range.clone()); + glyph_merged = true; + } + } + + // Coalesce once per tier so the node's flat loop stays a clean complement. + if quad_merged { + merge_ranges(quad_flat); + } + if glyph_merged { + merge_ranges(glyph_flat); + } +} + /// The pinned off-screen group-target descriptor (effect-compositor.md § 2.2): /// FIXED `Rgba16Float` (linear, NOT the view's SDR format) so group opacity + /// isolation composite in linear space; `RENDER_ATTACHMENT` (subtree renders @@ -323,8 +506,15 @@ pub struct PreparedEffectGroups { #[derive(Component, Default, Clone)] pub struct PreparedEffectTargets { /// Per-group off-screen `Rgba16Float` targets (extract order). `None` == the - /// group degraded under budget (`plan_allocation` == false) and has no target - /// — the node skips it (v1: degraded groups draw flat, no per-child approx). + /// group degraded under budget (`plan_allocation` == false) and has no target. + /// The node's step-1 group pass `continue`s on a `None` target — but a ROOT + /// degraded group is NOT lost: `prepare_effect_groups` folded its `opacity` + /// into its member instances' alpha in place and merged its ranges into + /// `flat_ranges`/`glyph_flat_ranges`, so the FLAT window draw paints it + /// (effect-compositor.md § 2.3 forward-composite). A NESTED degraded group + /// (parent == Some) is the one case still skipped here (its correct + /// forward-composite is into the PARENT target, a node-side follow-up; + /// `fold_root_degraded_into_flat` debug-asserts on it). pub targets: Vec>, /// Per-group placement: the logical→target view-uniform columns (to render /// the group's subtree INTO its target), the composite quad's logical bounds, @@ -403,10 +593,22 @@ pub struct RtPoolStats { pub(crate) fn prepare_effect_groups( mut commands: Commands, render_device: Res, + render_queue: Res, mut texture_cache: ResMut, extracted: Res, - buffers: Res, + // MAJOR-3: the glyph + text-quad carriers `prepare_buiy_instances` packs from, + // so this system can reconstruct the SAME per-tier buffer-repack signals it + // saw this frame (Bevy change-detection is per-system; reconstructing the + // `is_changed()` gates here is valid). `ExtractedNodesView` (`nodes` below) + // and `extracted` (= `ExtractedEffectGroups`, the `groups` term) cover the + // rest of the quad-dirty signal. + glyphs: Res, + text_quads: Res, + // ResMut so the degraded-group fold can re-tint the already-packed buffers + // in place + re-upload the touched ones (effect-compositor.md § 2.3). + mut buffers: ResMut, mut stats: ResMut, + budget: Res, pipeline_cache: Res, composite_pipeline: Res, mut group_pipelines: ResMut, @@ -421,6 +623,18 @@ pub(crate) fn prepare_effect_groups( views: Query<(Entity, &ViewTarget, &Msaa)>, nodes: Res, ) { + // MAJOR-2: reconstruct the SAME per-tier BUFFER-repack signals + // `prepare_buiy_instances` used this frame (prepare.rs § damage gate). The + // degraded-group fold re-tints a buffer IFF that buffer was repacked from + // SOURCE this frame; a retained buffer already carries last frame's fold and + // must NOT be re-folded (it would compound to black). These mirror + // `prepare.rs` `quad_dirty`/`glyph_dirty` exactly — quad on + // nodes|groups|text_quads, glyph on glyphs alone (the buffer-repack signal, + // which DIFFERS from the wider glyph-partition signal). Computed before the + // `extracted.0` shadow below so the `is_changed()` reads the `Res` wrappers. + let quad_dirty = nodes.is_changed() || extracted.is_changed() || text_quads.is_changed(); + let glyph_dirty = glyphs.is_changed(); + let extracted = &extracted.0; // No live groups: clear the carriers off every view so a frame that drops @@ -520,7 +734,7 @@ pub(crate) fn prepare_effect_groups( (extent, g.reason) }) .collect(); - let allocate = plan_allocation(&alloc_inputs, RT_POOL_BUDGET_BYTES); + let allocate = plan_allocation(&alloc_inputs, budget.0); // Build the post-order composite sequence over the parent links. let parents: Vec> = extracted.iter().map(|g| g.parent).collect(); @@ -627,6 +841,66 @@ pub(crate) fn prepare_effect_groups( live_targets, }; + // Degraded-group forward composite (effect-compositor.md § 2.3). A ROOT group + // that did NOT get a pooled target (`allocate[i] == false`) under budget + // pressure must paint FLAT with its `opacity` folded per-instance, not vanish. + // Gate on ANY degradation so the no-degradation steady state stays a zero + // fold + zero re-upload (gate-#14 budget). Re-uploads only the touched buffer, + // and only when that buffer was repacked from SOURCE this frame (the per-tier + // idempotency discipline — a retained buffer already holds the fold). + if allocate.iter().any(|a| !a) { + let degraded: Vec = extracted + .iter() + .enumerate() + .map(|(i, g)| DegradedGroup { + quad_range: buffers.group_ranges.get(i).cloned().unwrap_or(0..0), + glyph_range: buffers.glyph_group_ranges.get(i).cloned().unwrap_or(0..0), + opacity: g.opacity, + parent: g.parent, + }) + .collect(); + + // Borrow-split: `values_mut()` on each RawBufferVec + the flat-range vecs. + // `BuiyInstanceBuffers` exposes the raw quad/glyph stores and the flat + // ranges as distinct fields, so split them through a single `&mut buffers`. + // Per-tier gates (MAJOR-2). The ALPHA-fold gates on the BUFFER-repack + // signal so a retained (already-folded) buffer is left untouched. The + // RANGE-merge gates on the PARTITION-rebuild signal, because + // `prepare_buiy_instances` rebuilds (and re-excludes the degraded range + // from) each flat partition under that signal — the merge must re-add the + // range every rebuild. Quad: both are `quad_dirty` (the quad partition + // rebuilds under the quad gate). GLYPH: the fold is `glyph_dirty` (buffer + // repack) but the MERGE is `quad_dirty || glyph_dirty` (the glyph + // partition's union rebuild gate, prepare.rs `partition_glyph_ranges`) — + // so a quad-dirty-only frame re-merges the retained degraded glyph range + // instead of letting it vanish. + let merge_glyph = quad_dirty || glyph_dirty; + let buffers = &mut *buffers; + fold_root_degraded_into_flat( + &allocate, + °raded, + quad_dirty, + quad_dirty, + glyph_dirty, + merge_glyph, + buffers.quad.values_mut(), + buffers.glyph.values_mut(), + &mut buffers.flat_ranges, + &mut buffers.glyph_flat_ranges, + ); + + // Re-upload only the buffer(s) whose CPU bytes the fold touched. The fold + // runs per tier iff that tier was repacked this frame, so the re-upload + // mirrors the same per-tier gate (a retained buffer was neither folded nor + // needs re-upload). + if quad_dirty { + buffers.quad.write_buffer(&render_device, &render_queue); + } + if glyph_dirty { + buffers.glyph.write_buffer(&render_device, &render_queue); + } + } + let prepared = PreparedEffectGroups { groups, composite_order, @@ -654,7 +928,7 @@ pub(crate) fn prepare_effect_groups( /// edge — the `BuiyRenderLabel` node group and its edges are owned by /// architecture.md § 1.3; the compositor's passes run *inside* /// [`BuiyNode::run`](super::node). It registers the per-`EffectGroup` -/// [`prepare_effect_groups`] system, the [`RtPoolStats`] observable, and (via +/// `prepare_effect_groups` system, the [`RtPoolStats`] observable, and (via /// [`super::composite::register`]) the composite-pipeline specialization cache. /// The device-owning composite resources (`CompositePipeline`) init in /// `finish` (`composite::register_gpu`). @@ -667,6 +941,9 @@ pub(crate) fn register(render_app: &mut SubApp) { // the textured-quad composite pipeline) — device-free to init here; the // concrete pipeline ids materialize lazily through the `PipelineCache`. render_app.init_resource::(); + // The overridable RT-pool budget (defaults to `RT_POOL_BUDGET_BYTES`); a test + // inserts a small value to force the degradation path deterministically. + render_app.init_resource::(); super::composite::register(render_app); // The per-`EffectGroup` prepare pass (effect-compositor.md § 1.1) attaches // in `RenderSystems::Prepare`. It runs AFTER `prepare_buiy_instances` so the diff --git a/crates/buiy_core/src/render/extract.rs b/crates/buiy_core/src/render/extract.rs index 49a38a3..b39676e 100644 --- a/crates/buiy_core/src/render/extract.rs +++ b/crates/buiy_core/src/render/extract.rs @@ -72,6 +72,20 @@ pub struct ExtractedNode { pub position: Vec2, /// Box size in logical px, from `ResolvedLayout.size`. pub size: Vec2, + /// The 2D linear part of `GlobalTransform`'s affine — the box-local → + /// window-logical basis, as column vectors `[col0, col1]` where + /// `col0 = [m00, m10]` and `col1 = [m01, m11]`. Applied per-vertex in the + /// quad/shadow vertex stage about the box-local origin (the corner the + /// composed matrix maps `0` to), so a rotated/scaled element paints with the + /// right orientation and size. Identity `[[1,0],[0,1]]` == no rotation/scale + /// (the byte-identical fast path). Pillar 5: this reads the propagated + /// `GlobalTransform`, NOT `ResolvedTransform` — the bridge already folded + /// `ResolvedTransform.matrix` into `Transform` so render == picking by + /// construction. FIDELITY: faithful for rotation + (non-)uniform scale; + /// skew / general `TransformMatrix::Matrix` are bounded by the bridge's + /// TRS-only `Transform::from_matrix` decompose (a lossy shear) — a separate + /// residual (clip-and-transform.md § B.5). + pub affine: [[f32; 2]; 2], /// Resolved background fill (already theme-resolved; `Color::NONE` == /// transparent, extract emits no quad for it downstream). pub color: Color, @@ -111,6 +125,13 @@ pub fn extracted_node_for( theme: &Theme, ) -> ExtractedNode { let translation = global_transform.translation(); + // The 2D linear part of the composed affine (glam `Affine3A.matrix3`): xy of + // the x-axis is col0, xy of the y-axis is col1 (COLUMNS, not rows — a + // transpose would rotate the wrong way). For a pure rotation/scale the + // matrix maps box-local `0 -> 0`, so `translation.xy` stays the painted + // top-left and an identity transform yields the `[[1,0],[0,1]]` fast path. + let m = global_transform.affine().matrix3; + let affine = [[m.x_axis.x, m.x_axis.y], [m.y_axis.x, m.y_axis.y]]; let color = match background { Some(bg) => crate::render::color::resolve_token(&bg.color, theme), None => Color::NONE, @@ -122,6 +143,7 @@ pub fn extracted_node_for( color, clip: clip.copied(), group: None, + affine, } } diff --git a/crates/buiy_core/src/render/instance.rs b/crates/buiy_core/src/render/instance.rs index dfe0779..98e7719 100644 --- a/crates/buiy_core/src/render/instance.rs +++ b/crates/buiy_core/src/render/instance.rs @@ -1,6 +1,6 @@ //! Per-instance data layout for the rounded-rect pipeline (the view-uniform //! path). The struct stride must equal the per-instance `array_stride` declared -//! in `pipeline.rs::register` (52 B). Records stay in LOGICAL-pixel units: the +//! in `pipeline.rs::register` (68 B). Records stay in LOGICAL-pixel units: the //! per-view [`BuiyViewUniform`] does the logical → clip transform in the vertex //! stage, so the Phase-0 per-instance y-flip / `2/min(w,h)` radius hack is //! retired (`buiy-render-pipeline-design`, architecture.md § 3). @@ -10,6 +10,14 @@ //! per-batch scissor or re-sort (one order-safe draw). A node with no clip packs //! the full-view sentinel (`[±INFINITY]`) so the discard never fires. //! +//! It also carries the 2D affine basis (`affine`, R1 — the `[m00,m10,m01,m11]` +//! columns of `GlobalTransform`'s 2D linear part), APPENDED after the clip +//! fields so every prior field offset stays byte-stable (notably +//! [`COLOR_FLOAT_OFFSET`] / [`ALPHA_FLOAT_OFFSET`], which R2's degraded-group +//! re-tint reads). The vertex stage transforms each box-local corner by it, so +//! a rotated/scaled element paints with the right orientation/size. Identity +//! `[1,0,0,1]` == no transform (the byte-identical fast path). +//! //! [`BuiyViewUniform`]: crate::render::view_uniform::BuiyViewUniform use crate::render::DrawData; @@ -18,11 +26,30 @@ use bevy::prelude::*; use bytemuck::{Pod, Zeroable}; /// Stride of the logical-pixel [`PackedInstance`] in bytes. Must match the -/// per-instance `array_stride` declared in `pipeline.rs::register` (52 B). The +/// per-instance `array_stride` declared in `pipeline.rs::register` (68 B). The /// values are LOGICAL pixels — the GPU view uniform /// ([`crate::render::view_uniform::BuiyViewUniform`]) applies the logical->clip /// transform in the vertex stage. -pub const PACKED_INSTANCE_STRIDE_BYTES: usize = 52; +pub const PACKED_INSTANCE_STRIDE_BYTES: usize = 68; + +/// Float index of the per-instance color block (`color[0]`) in the raw +/// [`crate::render::buckets::packed_to_raw`] record. NAMED so the color/alpha +/// offset is referenced symbolically everywhere (R1 HARD CONSTRAINT): the +/// append-after-13 affine layout exists precisely to keep this offset stable so +/// R2's degraded-group re-tint can index it. +pub const COLOR_FLOAT_OFFSET: usize = 4; + +/// Float index of the per-instance alpha (`color[3]`) in the raw record — +/// `COLOR_FLOAT_OFFSET + 3`. R2's degraded-group forward-composite re-tints by +/// reading the alpha at this offset; it MUST stay `7` across any layout growth +/// (the affine basis appends after the clip fields, never before color). +pub const ALPHA_FLOAT_OFFSET: usize = COLOR_FLOAT_OFFSET + 3; + +/// The identity 2D affine basis `[m00, m10, m01, m11] = [1, 0, 0, 1]` — no +/// rotation/scale. Quads with no `GlobalTransform` linear part (the `DrawData` +/// and text-quad packers) carry this, so their packed bytes are unchanged by +/// R1's growth except for the four appended identity floats. +const IDENTITY_AFFINE: [f32; 4] = [1.0, 0.0, 0.0, 1.0]; /// Full-view clip sentinel for an unclipped instance (`ExtractedNode.clip == /// None`): `clip_min = [-INFINITY; 2]`, `clip_max = [+INFINITY; 2]`. For any @@ -55,6 +82,12 @@ pub struct PackedInstance { /// Clip AABB maximum in LOGICAL px. The fragment discards `frag_pos > clip_max`; /// `[+INFINITY; 2]` = no upper bound (the full-view sentinel). pub clip_max: [f32; 2], + /// The 2D affine basis `[m00, m10, m01, m11]` (the column vectors of + /// `GlobalTransform`'s 2D linear part) — R1. APPENDED after the clip fields + /// so every prior offset stays byte-stable (the R2 dependency). The vertex + /// stage maps each box-local corner `c` to `mat2(col0, col1) * c`. Identity + /// `[1, 0, 0, 1]` paints axis-aligned (no rotation/scale). + pub affine: [f32; 4], } /// Pack one [`DrawData`] into a logical-pixel [`PackedInstance`]. The clip @@ -70,6 +103,8 @@ pub fn pack_instance(draw: &DrawData) -> PackedInstance { radius: draw.radius, clip_min: CLIP_SENTINEL_MIN, clip_max: CLIP_SENTINEL_MAX, + // `DrawData` has no transform; paint axis-aligned (identity basis). + affine: IDENTITY_AFFINE, } } @@ -94,6 +129,15 @@ pub fn pack_extracted(node: &ExtractedNode) -> PackedInstance { radius: 0.0, clip_min, clip_max, + // The 2D affine basis, flattened to columns [m00, m10, m01, m11] (R1): + // col0 = node.affine[0], col1 = node.affine[1]. The vertex stage applies + // it about the box-local origin so rotation/scale paint correctly. + affine: [ + node.affine[0][0], + node.affine[0][1], + node.affine[1][0], + node.affine[1][1], + ], } } @@ -113,13 +157,15 @@ pub fn pack_text_quad(quad: &TextQuad) -> PackedInstance { radius: 0.0, clip_min, clip_max, + // Text quads carry no transform; paint axis-aligned (identity basis). + affine: IDENTITY_AFFINE, } } -/// `true` iff the raw `[f32; 13]` bucket layout is byte-equal to +/// `true` iff the raw `[f32; 17]` bucket layout is byte-equal to /// [`PackedInstance`]'s stride (the pipeline-descriptor invariant). Pins the /// agreement the instanced draw relies on. pub fn packed_raw_stride_agrees() -> bool { - std::mem::size_of::() == std::mem::size_of::<[f32; 13]>() - && PACKED_INSTANCE_STRIDE_BYTES == std::mem::size_of::<[f32; 13]>() + std::mem::size_of::() == std::mem::size_of::<[f32; 17]>() + && PACKED_INSTANCE_STRIDE_BYTES == std::mem::size_of::<[f32; 17]>() } diff --git a/crates/buiy_core/src/render/node.rs b/crates/buiy_core/src/render/node.rs index 199d3f5..de307d4 100644 --- a/crates/buiy_core/src/render/node.rs +++ b/crates/buiy_core/src/render/node.rs @@ -137,8 +137,14 @@ impl ViewNode for BuiyNode { .and_then(|a| a.coverage_bind_group()); for group in &prepared.groups { let Some(target) = targets.targets.get(group.index).and_then(|t| t.as_ref()) else { - // Degraded group (no target): members are SKIPPED, not drawn - // flat — see the follow-ups entry T8 files. + // Degraded group (no target): skip the off-screen pass here. A + // ROOT degraded group is NOT lost — `prepare_effect_groups` + // folded its `opacity` into its members' alpha and merged its + // ranges into the flat draw, so the flat WINDOW pass below + // paints it (effect-compositor.md § 2.3 forward-composite). A + // NESTED degraded group is still skipped (its correct + // forward-composite into the parent target is a node-side + // follow-up; the prepare fold debug-asserts on it). continue; }; let placement = &targets.placements[group.index]; diff --git a/crates/buiy_core/src/render/prepare.rs b/crates/buiy_core/src/render/prepare.rs index 518250d..de8462d 100644 --- a/crates/buiy_core/src/render/prepare.rs +++ b/crates/buiy_core/src/render/prepare.rs @@ -82,13 +82,13 @@ pub struct GlyphEntityRun { /// carriers to components together when the view-entity routing lands. /// /// The quad instance store is a [`RawBufferVec`] (not a `BufferVec`): the -/// instance record is a raw `[f32; 13]` POD vertex blob (the pipeline-descriptor +/// instance record is a raw `[f32; 17]` POD vertex blob (the pipeline-descriptor /// layout), which is `NoUninit` but **not** a `ShaderType`, so it rides the /// raw, CPU-readable vertex path rather than the std140/encase `BufferVec` path. #[derive(Resource)] pub struct BuiyInstanceBuffers { /// Quad-family instances (the v1 primitive set). Grows in place. - pub quad: RawBufferVec<[f32; 13]>, + pub quad: RawBufferVec<[f32; 17]>, /// Coverage-glyph instances (the alpha-as-color primitive, /// atlas-and-text-seam.md § 4.1). A `RawBufferVec` for /// the same reason as `quad`: `GlyphAlphaInstance` is a raw `#[repr(C)]` @@ -181,9 +181,9 @@ pub struct BufferUploadStats { /// R5's `ExtractedNodes.nodes` is fed to [`pack_view`] directly — no `DrawData` /// adapter — so the prepare phase consumes R5's component with no parallel /// carrier (the packing seam after Task 6's flip). -pub fn pack_extracted_nodes(nodes: &ExtractedNodes) -> (Vec<[f32; 13]>, [f32; 12]) { +pub fn pack_extracted_nodes(nodes: &ExtractedNodes) -> (Vec<[f32; 17]>, [f32; 12]) { let buckets = pack_view(&nodes.nodes); - let instances: Vec<[f32; 13]> = buckets + let instances: Vec<[f32; 17]> = buckets .batches() .flat_map(|(_key, batch)| batch.iter().copied()) .collect(); diff --git a/crates/buiy_core/src/render/primitive.rs b/crates/buiy_core/src/render/primitive.rs index de9011d..fbbd1d2 100644 --- a/crates/buiy_core/src/render/primitive.rs +++ b/crates/buiy_core/src/render/primitive.rs @@ -13,8 +13,10 @@ //! module imports it and adds only the `(kind, format)` specialization key. //! //! The per-instance vertex layout carries the R8b clip AABB -//! (`clip_min`/`clip_max`) at `@location(6)`/`(7)`, lifting the instance stride -//! to 52 B; the quad-family shaders discard fragments outside it. +//! (`clip_min`/`clip_max`) at `@location(6)`/`(7)` and the R1 2D affine basis +//! (`affine_col0`/`affine_col1`) at `@location(8)`/`(9)`, lifting the instance +//! stride to 68 B; the quad-family shaders discard fragments outside the clip +//! and transform each box-local corner by the affine. use bevy::mesh::VertexBufferLayout; use bevy::render::render_resource::{ @@ -62,10 +64,10 @@ pub struct BuiyPrimitives; impl BuiyPrimitives { /// The two interleaved vertex-buffer layouts shared by every quad-family - /// primitive (static unit quad, stride 16; per-instance record, stride 52). + /// primitive (static unit quad, stride 16; per-instance record, stride 68). /// The instance record carries the per-primitive clip AABB at - /// `@location(6)`/`(7)` (R8b); its `array_stride` tracks - /// [`PACKED_INSTANCE_STRIDE_BYTES`] (52 B). + /// `@location(6)`/`(7)` (R8b) and the 2D affine basis at `@location(8)`/`(9)` + /// (R1); its `array_stride` tracks [`PACKED_INSTANCE_STRIDE_BYTES`] (68 B). /// /// [`PACKED_INSTANCE_STRIDE_BYTES`]: crate::render::instance::PACKED_INSTANCE_STRIDE_BYTES fn quad_family_vertex_buffers() -> Vec { @@ -87,7 +89,7 @@ impl BuiyPrimitives { ], }, VertexBufferLayout { - array_stride: 52, + array_stride: 68, step_mode: VertexStepMode::Instance, attributes: vec![ VertexAttribute { @@ -123,6 +125,20 @@ impl BuiyPrimitives { offset: 44, shader_location: 7, }, + // R1 2D affine basis: `affine_col0` @ 52, `affine_col1` @ 60 + // — appended AFTER the clip fields so offsets 0..52 stay + // byte-stable (the R2 dependency). See `PackedInstance.affine` + // and both quad-family shaders' `Instance.affine_col0/1`. + VertexAttribute { + format: VertexFormat::Float32x2, + offset: 52, + shader_location: 8, + }, + VertexAttribute { + format: VertexFormat::Float32x2, + offset: 60, + shader_location: 9, + }, ], }, ] @@ -235,7 +251,9 @@ impl SpecializedRenderPipeline for BuiyPrimitives { vec![view_uniform_layout_descriptor()] }; // The glyph instance record is `GlyphAlphaInstance` (stride 68), a - // different layout from the quad family's `PackedInstance` (stride 52). + // DISTINCT layout from the quad family's `PackedInstance` — even though + // both strides are now 68 B (R1), the attr sets, raw types ([f32;17] vs + // GlyphAlphaInstance), and pipelines differ and must not be conflated. let buffers = if is_glyph { Self::glyph_vertex_buffers() } else { diff --git a/crates/buiy_core/src/render/shader.wgsl b/crates/buiy_core/src/render/shader.wgsl index 6b3b036..e316e24 100644 --- a/crates/buiy_core/src/render/shader.wgsl +++ b/crates/buiy_core/src/render/shader.wgsl @@ -24,15 +24,17 @@ struct Instance { @location(5) radius: f32, // logical px @location(6) clip_min: vec2, // logical px, clip AABB min (-inf = none) @location(7) clip_max: vec2, // logical px, clip AABB max (+inf = none) + @location(8) affine_col0: vec2, // 2D affine basis col0 = [m00, m10] + @location(9) affine_col1: vec2, // 2D affine basis col1 = [m01, m11] }; struct VertexOut { @builtin(position) clip_position: vec4, - @location(0) local_uv: vec2, // -1..+1 across the rect - @location(1) half_size: vec2, // logical px + @location(0) local_uv: vec2, // -1..+1 across the rect (box-local, rotation-invariant) + @location(1) half_size: vec2, // logical px (box-local SDF half-extent) @location(2) color: vec4, @location(3) radius: f32, // logical px - @location(4) rect_center: vec2, // logical px, window-relative + @location(4) frag_logical: vec2, // affine-transformed window-logical corner (slot 4, was the axis-aligned center) @location(5) clip_min: vec2, // logical px (clip AABB, ClipRect space) @location(6) clip_max: vec2, // logical px (clip AABB, ClipRect space) }; @@ -44,13 +46,20 @@ fn logical_to_clip(p: vec2) -> vec2 { @vertex fn vertex(v: Vertex, i: Instance) -> VertexOut { var out: VertexOut; - let logical = i.rect_pos + v.uv * i.rect_size; // logical-px corner + // R1: transform the box-local corner by the 2D affine BEFORE the + // logical->clip view map. The affine maps box-local 0 -> 0 for a pure + // rotation/scale, so an identity basis [1,0,0,1] yields rect_pos + local + // (byte-identical to the pre-R1 axis-aligned path). + let local = v.uv * i.rect_size; // box-local corner (top-left at 0) + let logical = i.rect_pos + mat2x2(i.affine_col0, i.affine_col1) * local; out.clip_position = vec4(logical_to_clip(logical), 0.0, 1.0); out.local_uv = v.uv * 2.0 - 1.0; out.half_size = i.rect_size * 0.5; // positive — no abs needed out.color = i.color; out.radius = i.radius; - out.rect_center = i.rect_pos + out.half_size; // logical px, window-relative + // The affine is linear, so the interpolated frag_logical is the correct + // transformed window-space point for the clip-AABB discard. + out.frag_logical = logical; out.clip_min = i.clip_min; out.clip_max = i.clip_max; return out; @@ -67,7 +76,9 @@ fn fragment(in: VertexOut) -> @location(0) vec4 { // Per-primitive clip AABB (R8b): discard fragments outside [clip_min, // clip_max] in logical-px window space — the same space as ClipRect. The // full-view sentinel (±inf) makes this never fire (unclipped / top-layer). - let frag_pos = in.rect_center + in.local_uv * in.half_size; + // frag_logical is the affine-transformed window-logical corner (R1) — the + // correct post-transform point, not the old axis-aligned box center. + let frag_pos = in.frag_logical; if any(frag_pos < in.clip_min) || any(frag_pos > in.clip_max) { return vec4(0.0, 0.0, 0.0, 0.0); } diff --git a/crates/buiy_core/src/render/shadow.wgsl b/crates/buiy_core/src/render/shadow.wgsl index 56897c1..d8c0755 100644 --- a/crates/buiy_core/src/render/shadow.wgsl +++ b/crates/buiy_core/src/render/shadow.wgsl @@ -1,6 +1,6 @@ // Buiy box-shadow shader (octet ..02). Closed-form Gaussian-blurred // rounded-rect coverage — one draw per shadow, no convolution pass. -// Inputs match the quad instance layout (stride 52); the instance `blur` +// Inputs match the quad instance layout (stride 68); the instance `blur` // field carries the shadow's effective blur sigma in logical px for this // primitive (the sibling component-model phase maps `BoxShadow.blur` into it). // @@ -31,15 +31,17 @@ struct Instance { @location(5) blur: f32, // logical px, effective blur sigma @location(6) clip_min: vec2, // logical px, clip AABB min (-inf = none) @location(7) clip_max: vec2, // logical px, clip AABB max (+inf = none) + @location(8) affine_col0: vec2, // 2D affine basis col0 = [m00, m10] + @location(9) affine_col1: vec2, // 2D affine basis col1 = [m01, m11] }; struct VertexOut { @builtin(position) clip_position: vec4, - @location(0) local_uv: vec2, // -1..+1 across the rect - @location(1) half_size: vec2, // logical px + @location(0) local_uv: vec2, // -1..+1 across the rect (box-local, rotation-invariant) + @location(1) half_size: vec2, // logical px (box-local SDF half-extent) @location(2) color: vec4, @location(3) blur: f32, // logical px - @location(4) rect_center: vec2, // logical px, window-relative + @location(4) frag_logical: vec2, // affine-transformed window-logical corner (slot 4, was the axis-aligned center) @location(5) clip_min: vec2, // logical px (clip AABB, ClipRect space) @location(6) clip_max: vec2, // logical px (clip AABB, ClipRect space) }; @@ -51,13 +53,19 @@ fn logical_to_clip(p: vec2) -> vec2 { @vertex fn vertex(v: Vertex, i: Instance) -> VertexOut { var out: VertexOut; - let logical = i.rect_pos + v.uv * i.rect_size; // logical-px corner + // R1: transform the box-local corner by the 2D affine BEFORE the + // logical->clip view map (identity basis [1,0,0,1] -> rect_pos + local, + // byte-identical to the pre-R1 axis-aligned path). + let local = v.uv * i.rect_size; // box-local corner (top-left at 0) + let logical = i.rect_pos + mat2x2(i.affine_col0, i.affine_col1) * local; out.clip_position = vec4(logical_to_clip(logical), 0.0, 1.0); out.local_uv = v.uv * 2.0 - 1.0; out.half_size = i.rect_size * 0.5; // positive — no abs needed out.color = i.color; out.blur = i.blur; - out.rect_center = i.rect_pos + out.half_size; // logical px, window-relative + // The affine is linear, so the interpolated frag_logical is the correct + // transformed window-space point for the clip-AABB discard. + out.frag_logical = logical; out.clip_min = i.clip_min; out.clip_max = i.clip_max; return out; @@ -85,7 +93,8 @@ fn fragment(in: VertexOut) -> @location(0) vec4 { // Per-primitive clip AABB (R8b): discard fragments outside [clip_min, // clip_max] in logical-px window space — the same space as ClipRect. The // full-view sentinel (±inf) makes this never fire (unclipped / top-layer). - let frag_pos = in.rect_center + in.local_uv * in.half_size; + // frag_logical is the affine-transformed window-logical corner (R1). + let frag_pos = in.frag_logical; if any(frag_pos < in.clip_min) || any(frag_pos > in.clip_max) { return vec4(0.0, 0.0, 0.0, 0.0); } diff --git a/crates/buiy_core/tests/render_buckets.rs b/crates/buiy_core/tests/render_buckets.rs index eb54914..e892721 100644 --- a/crates/buiy_core/tests/render_buckets.rs +++ b/crates/buiy_core/tests/render_buckets.rs @@ -74,9 +74,9 @@ fn buckets_group_pushed_instances_by_key() { primitive: BuiyPrimitiveKind::Shadow, layer: 0, }; - b.push(q0, [0.0; 13]); - b.push(q0, [1.0; 13]); - b.push(s0, [2.0; 13]); + b.push(q0, [0.0; 17]); + b.push(q0, [1.0; 17]); + b.push(s0, [2.0; 17]); assert_eq!(b.len(q0), 2); assert_eq!(b.len(s0), 1); assert_eq!(b.total_instances(), 3); @@ -98,21 +98,21 @@ fn buckets_iterate_in_paint_order() { primitive: BuiyPrimitiveKind::Quad, layer: 0, }, - [0.0; 13], + [0.0; 17], ); b.push( PrimitiveBatchKey { primitive: BuiyPrimitiveKind::Shadow, layer: 0, }, - [0.0; 13], + [0.0; 17], ); b.push( PrimitiveBatchKey { primitive: BuiyPrimitiveKind::Quad, layer: 1, }, - [0.0; 13], + [0.0; 17], ); let order: Vec<_> = b.batches().map(|(k, _)| *k).collect(); // shadow@0, quad@0, then quad@1 — sorted ascending. @@ -140,16 +140,32 @@ fn node(entity: u32, position: Vec2, size: Vec2, color: Color) -> ExtractedNode color, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], } } #[test] fn raw_layout_stride_agrees_with_struct() { - // The [f32;13] the bucket holds must be byte-identical in size to the - // PackedInstance struct the pipeline descriptor declares (52 B). If this + // The [f32;17] the bucket holds must be byte-identical in size to the + // PackedInstance struct the pipeline descriptor declares (68 B). If this // ever drifts, the instanced draw reads garbage. assert!(packed_raw_stride_agrees()); - assert_eq!(std::mem::size_of::<[f32; 13]>(), 52); + assert_eq!(std::mem::size_of::<[f32; 17]>(), 68); +} + +#[test] +fn packed_to_raw_appends_affine_via_offset_consts() { + // packed_to_raw returns 17 floats: the affine basis at [13..17], and the + // alpha at ALPHA_FLOAT_OFFSET unchanged (the R2 re-tint invariant). + use buiy_core::render::buckets::packed_to_raw; + use buiy_core::render::instance::ALPHA_FLOAT_OFFSET; + let mut n = node(1, Vec2::ZERO, Vec2::splat(10.0), Color::WHITE); + n.affine = [[2.0, 0.0], [0.0, 3.0]]; + let p = pack_extracted(&n); + let raw = packed_to_raw(&p); + assert_eq!(raw.len(), 17); + assert_eq!(&raw[13..17], &[2.0, 0.0, 0.0, 3.0]); + assert_eq!(raw[ALPHA_FLOAT_OFFSET], p.color[3]); } #[test] @@ -234,6 +250,7 @@ fn grouped(entity: u32, color: Color, group: Option) -> ExtractedNode { color, clip: None, group, + affine: [[1.0, 0.0], [0.0, 1.0]], } } diff --git a/crates/buiy_core/tests/render_compositor.rs b/crates/buiy_core/tests/render_compositor.rs index 945c646..8d66ba9 100644 --- a/crates/buiy_core/tests/render_compositor.rs +++ b/crates/buiy_core/tests/render_compositor.rs @@ -368,3 +368,376 @@ fn churn_never_exceeds_rt_pool_budget() { ); assert!(plan.iter().any(|&a| !a), "some groups degraded under churn"); } + +// --------------------------------------------------------------------------- +// R2 — degraded effect groups forward-composite flat (effect-compositor.md § 2.3) +// --------------------------------------------------------------------------- + +use buiy_core::render::atlas::{GLYPH_ALPHA_FLOAT_OFFSET, GlyphAlphaInstance}; +use buiy_core::render::compositor::{DegradedGroup, fold_root_degraded_into_flat}; +use buiy_core::render::instance::ALPHA_FLOAT_OFFSET; +use std::ops::Range; + +/// A `[f32;17]` quad record with a known alpha at `ALPHA_FLOAT_OFFSET` and a +/// sentinel in the neighbouring slots so an off-by-one write is caught. +fn quad_with_alpha(alpha: f32) -> [f32; 17] { + let mut r = [0.0f32; 17]; + // Fill with a recognizable ramp so a stray write to the wrong index shows. + for (i, v) in r.iter_mut().enumerate() { + *v = i as f32; + } + r[ALPHA_FLOAT_OFFSET] = alpha; + r +} + +fn glyph_with_alpha(alpha: f32) -> GlyphAlphaInstance { + GlyphAlphaInstance { + rect: [1.0, 2.0, 3.0, 4.0], + uv: [5.0, 6.0, 7.0, 8.0], + color: [0.1, 0.2, 0.3, alpha], + clip: [9.0, 10.0, 11.0, 12.0], + page: 0, + } +} + +#[test] +fn degraded_fold_multiplies_quad_alpha_and_merges_flat_range() { + // Two ROOT groups: A degraded (opacity 0.5), B keeps its target. + // Quad layout: A's members [0,2), B's members [2,4), a non-group run [4,6). + let mut quad: Vec<[f32; 17]> = (0..6).map(|i| quad_with_alpha(0.8 + i as f32)).collect(); + let mut glyph: Vec = Vec::new(); + // The flat ranges as prepare's partition would hand them: only the non-group + // The non-group tail [4,6) is the only flat run before the fold (group + // members A,B excluded). `iter::once` sidesteps the `single_range_in_vec_init` + // lint, which fires on both `vec![4..6]` and `[4..6]` array initializers. + let mut quad_flat: Vec> = std::iter::once(4..6).collect(); + let mut glyph_flat: Vec> = vec![]; + + let original: Vec<[f32; 17]> = quad.clone(); + + let groups = [ + DegradedGroup { + quad_range: 0..2, + glyph_range: 0..0, + opacity: 0.5, + parent: None, + }, + DegradedGroup { + quad_range: 2..4, + glyph_range: 0..0, + opacity: 0.7, + parent: None, + }, + ]; + let allocate = [false, true]; // A degraded, B allocated. + + fold_root_degraded_into_flat( + &allocate, + &groups, + true, // fold_quad + true, // merge_quad + true, // fold_glyph + true, // merge_glyph + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + + // (a) every instance in A's range dimmed by 0.5, read at ALPHA_FLOAT_OFFSET. + for i in 0..2 { + let want = original[i][ALPHA_FLOAT_OFFSET] * 0.5; + assert!( + (quad[i][ALPHA_FLOAT_OFFSET] - want).abs() < 1e-6, + "A instance {i} alpha folded by 0.5" + ); + // Neighbouring slots untouched (no off-by-one). + assert_eq!( + quad[i][ALPHA_FLOAT_OFFSET - 1], + original[i][ALPHA_FLOAT_OFFSET - 1] + ); + assert_eq!( + quad[i][ALPHA_FLOAT_OFFSET + 1], + original[i][ALPHA_FLOAT_OFFSET + 1] + ); + } + // (b) B's range (allocated, keeps a target) is unchanged. + for i in 2..4 { + assert_eq!( + quad[i], original[i], + "B instance {i} unchanged (not degraded)" + ); + } + // (c) A's range is merged into flat ranges; B's stays excluded. + assert!( + quad_flat.contains(&(0..2)), + "A's degraded range merged into flat: {quad_flat:?}" + ); + assert!( + !quad_flat.iter().any(|r| r.start == 2), + "B's range stays excluded from flat: {quad_flat:?}" + ); + // (d) coalescing + order: ranges sorted, no overlaps. + for w in quad_flat.windows(2) { + assert!( + w[0].end <= w[1].start, + "flat ranges sorted & disjoint: {quad_flat:?}" + ); + } +} + +#[test] +fn degraded_fold_coalesces_adjacent_flat_runs() { + // A degraded group [2,4) sits exactly between two existing flat runs + // [0,2) and [4,6): merging must coalesce all three into [0,6). + let mut quad: Vec<[f32; 17]> = (0..6).map(|_| quad_with_alpha(1.0)).collect(); + let mut glyph: Vec = Vec::new(); + let mut quad_flat: Vec> = vec![0..2, 4..6]; + let mut glyph_flat: Vec> = vec![]; + + let groups = [DegradedGroup { + quad_range: 2..4, + glyph_range: 0..0, + opacity: 0.25, + parent: None, + }]; + fold_root_degraded_into_flat( + &[false], + &groups, + true, + true, + true, + true, + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + assert_eq!( + quad_flat, + vec![0..6], + "adjacent runs coalesce: {quad_flat:?}" + ); +} + +#[test] +fn degraded_fold_multiplies_glyph_alpha_at_offset_11() { + // The glyph tier folds color[3] (raw float index 11), NOT offset 7. + let mut quad: Vec<[f32; 17]> = Vec::new(); + let mut glyph: Vec = (0..3) + .map(|i| glyph_with_alpha(0.4 + i as f32 * 0.1)) + .collect(); + let mut quad_flat: Vec> = vec![]; + let mut glyph_flat: Vec> = vec![]; + let original = glyph.clone(); + + let groups = [DegradedGroup { + quad_range: 0..0, + glyph_range: 0..3, + opacity: 0.5, + parent: None, + }]; + fold_root_degraded_into_flat( + &[false], + &groups, + true, + true, + true, + true, + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + + for i in 0..3 { + // color[3] dimmed. + assert!( + (glyph[i].color[3] - original[i].color[3] * 0.5).abs() < 1e-6, + "glyph {i} alpha folded at color[3]" + ); + // Raw-view parity: the named const points at color[3] (= float idx 11). + assert_eq!(GLYPH_ALPHA_FLOAT_OFFSET, 11); + let raw: &[f32; 17] = bytemuck::cast_ref::(&glyph[i]); + assert!( + (raw[GLYPH_ALPHA_FLOAT_OFFSET] - glyph[i].color[3]).abs() < 1e-6, + "raw float index 11 == color[3]" + ); + // uv/clip/rect untouched — proves we did NOT write offset 7 (= uv[3]). + assert_eq!(glyph[i].uv, original[i].uv, "uv untouched"); + assert_eq!(glyph[i].clip, original[i].clip, "clip untouched"); + assert_eq!(glyph[i].rect, original[i].rect, "rect untouched"); + assert_eq!( + glyph[i].color[0], original[i].color[0], + "color.rgb untouched" + ); + } + assert!(glyph_flat.contains(&(0..3)), "glyph range merged into flat"); +} + +#[test] +fn degraded_fold_reads_source_alpha_not_accumulated() { + // The fn computes source*opacity over the value it READS once — it does not + // accumulate. (The once-per-pack contract is enforced by the system gate; + // here we pin that ONE call yields exactly source*opacity.) + let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)]; + let mut glyph: Vec = Vec::new(); + let mut quad_flat: Vec> = vec![]; + let mut glyph_flat: Vec> = vec![]; + let groups = [DegradedGroup { + quad_range: 0..1, + glyph_range: 0..0, + opacity: 0.5, + parent: None, + }]; + fold_root_degraded_into_flat( + &[false], + &groups, + true, + true, + true, + true, + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + assert!( + (quad[0][ALPHA_FLOAT_OFFSET] - 0.4).abs() < 1e-6, + "0.8 * 0.5 == 0.4" + ); +} + +#[test] +fn degraded_fold_per_tier_gate_skips_ungated_tier() { + // All glyph gates OFF (fold_glyph=false, merge_glyph=false): the quad + // buffer/ranges fold + merge; the glyph buffer AND glyph ranges are left + // wholly untouched. This pins the case where NEITHER the glyph buffer nor + // the glyph partition was rebuilt this frame. + let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)]; + let mut glyph: Vec = vec![glyph_with_alpha(0.8)]; + let mut quad_flat: Vec> = vec![]; + let mut glyph_flat: Vec> = vec![]; + let groups = [DegradedGroup { + quad_range: 0..1, + glyph_range: 0..1, + opacity: 0.5, + parent: None, + }]; + fold_root_degraded_into_flat( + &[false], + &groups, + true, // fold_quad + true, // merge_quad + false, // fold_glyph — skip + false, // merge_glyph — skip + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + assert!( + (quad[0][ALPHA_FLOAT_OFFSET] - 0.4).abs() < 1e-6, + "quad folded" + ); + assert_eq!(quad_flat, vec![0..1], "quad range merged"); + assert!( + (glyph[0].color[3] - 0.8).abs() < 1e-6, + "glyph NOT folded (gate off)" + ); + assert!(glyph_flat.is_empty(), "glyph range NOT merged (gate off)"); +} + +#[test] +fn degraded_glyph_range_remerges_on_quad_dirty_only_frame() { + // MAJOR-2 (the vanish fix). On a quad-dirty-only frame with a live degraded + // glyph group, the glyph PARTITION is rebuilt (prepare re-EXCLUDES the + // degraded glyph range from `glyph_flat`) while the glyph BUFFER is RETAINED + // (already carries last frame's fold). The two glyph gates therefore SPLIT: + // fold_glyph = glyph_dirty = false (buffer retained) + // merge_glyph = quad_dirty || glyph_dirty = true (partition rebuilt) + // The range MUST be re-merged (else the degraded glyphs vanish that frame), + // and the already-folded retained alpha MUST NOT be re-folded (else it + // compounds toward black). This is exactly the frame the #[ignore] GPU test + // `degraded_glyph_fold_idempotent_under_quad_dirty_only_frame` exercises + // end-to-end; this pins the caller's gate choice headlessly. + // + // Model the retained buffer: its glyph already carries last frame's fold + // (0.8 * 0.5 == 0.4). `glyph_flat` starts EMPTY — prepare's fresh partition + // rebuild excluded the degraded range this frame. + let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)]; + let mut glyph: Vec = vec![glyph_with_alpha(0.4)]; + let mut quad_flat: Vec> = vec![]; + let mut glyph_flat: Vec> = vec![]; + let groups = [DegradedGroup { + quad_range: 0..1, + glyph_range: 0..1, + opacity: 0.5, + parent: None, + }]; + fold_root_degraded_into_flat( + &[false], + &groups, + true, // fold_quad (quad buffer repacked this frame) + true, // merge_quad (quad partition rebuilt) + false, // fold_glyph — glyph buffer RETAINED, do NOT re-fold + true, // merge_glyph — glyph partition rebuilt, re-add the range + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + // The range is re-merged so the flat draw paints the degraded glyphs. + assert_eq!( + glyph_flat, + vec![0..1], + "degraded glyph range re-merged on a quad-dirty-only frame (not vanished)" + ); + // The retained, already-folded alpha is NOT re-folded (stays 0.4, not 0.2). + assert!( + (glyph[0].color[3] - 0.4).abs() < 1e-6, + "retained glyph alpha left untouched (no double-fold to 0.2)" + ); +} + +#[test] +fn degraded_fold_skips_nested_group_in_release_path() { + // A degraded NESTED group (parent == Some): the slice scopes to root-degraded. + // In release, the nested group's ranges are NOT merged and its alpha is left + // untouched (no worse than today's vanish — tracked by a follow-up). Under + // debug the fn debug_asserts; this test must run release-only to assert the + // containment behavior. + if cfg!(debug_assertions) { + // Debug builds debug_assert!(false) on a nested degraded group — that is + // the loud-in-dev guard; the release containment is what we assert. + return; + } + let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)]; + let mut glyph: Vec = Vec::new(); + let mut quad_flat: Vec> = vec![]; + let mut glyph_flat: Vec> = vec![]; + let original = quad.clone(); + let groups = [DegradedGroup { + quad_range: 0..1, + glyph_range: 0..0, + opacity: 0.5, + parent: Some(7), // nested under group 7 + }]; + fold_root_degraded_into_flat( + &[false], + &groups, + true, + true, + true, + true, + &mut quad, + &mut glyph, + &mut quad_flat, + &mut glyph_flat, + ); + assert_eq!( + quad[0], original[0], + "nested degraded alpha untouched in release" + ); + assert!(quad_flat.is_empty(), "nested degraded range NOT merged"); +} diff --git a/crates/buiy_core/tests/render_degraded_group_gpu.rs b/crates/buiy_core/tests/render_degraded_group_gpu.rs new file mode 100644 index 0000000..35829af --- /dev/null +++ b/crates/buiy_core/tests/render_degraded_group_gpu.rs @@ -0,0 +1,438 @@ +//! GPU-path tests for the DEGRADED effect-group forward-composite (R2 / +//! effect-compositor.md § 2.3): a ROOT group that loses its pooled +//! `Rgba16Float` target under RT-pool budget pressure must paint FLAT with its +//! `opacity` folded per-instance, NOT vanish. These need a wgpu adapter (real +//! GPU or lavapipe), which CI / this host lack, so they are `#[ignore]` exactly +//! like tests/render_smoke.rs. Run locally with: +//! cargo test -p buiy_core --test render_degraded_group_gpu -- --ignored + +mod support; + +use bevy::prelude::*; + +/// Force the RT-pool degradation path: insert a tiny [`RtPoolBudget`] into the +/// render world so `plan_allocation` returns `false` for the lowest-cost groups. +/// The const default is 64 MiB (nothing degrades); a few hundred bytes degrades +/// almost everything. The render world persists across frames, so one insert +/// holds for the whole test. +fn force_tiny_rt_budget(app: &mut App, bytes: u64) { + use buiy_core::render::compositor::RtPoolBudget; + app.get_sub_app_mut(bevy::render::RenderApp) + .expect("RenderApp") + .world_mut() + .insert_resource(RtPoolBudget(bytes)); +} + +#[test] +#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"] +fn render_degraded_group_gpu() { + // R2 (effect-compositor.md § 2.3): a ROOT `Opacity` group that DEGRADES under + // budget pressure must paint FLAT with its opacity folded per-instance — its + // pixels are PRESENT at folded opacity, NOT vanished. With a tiny RT budget, + // `plan_allocation` degrades the group, `prepare_effect_groups` folds + // `opacity` into its members' alpha and merges its range into `flat_ranges`, + // and the flat window draw paints it. + use buiy_core::Node; + use buiy_core::layout::{Inset, Length, Sizing, Style}; + use buiy_core::render::color::ColorToken; + use buiy_core::render::components::{Background, Opacity}; + use std::borrow::Cow; + + const W: u32 = 64; + const H: u32 = 64; + let red = Color::srgb(0.9, 0.05, 0.05); // OPAQUE red + + let mut app = support::gpu_render_app(W, H); + // Degrade everything: budget far below one group's target bytes. + force_tiny_rt_budget(&mut app, 64); + { + let mut theme = app.world_mut().resource_mut::(); + theme.colors.insert("test.red".into(), red); + } + let target = support::render_to_image(&mut app, W, H); + support::spawn_capture_camera(&mut app, target.clone()); + + // One ROOT Opacity(0.6) group with a single opaque-red fill child. + let fill = app + .world_mut() + .spawn(( + Node, + Style::default() + .absolute() + .inset(Inset { + top: Sizing::Length(Length::px(16.0)), + left: Sizing::Length(Length::px(16.0)), + ..default() + }) + .width_px(32.0) + .height_px(32.0), + Background { + color: ColorToken::Token(Cow::Borrowed("test.red")), + }, + )) + .id(); + let parent = app + .world_mut() + .spawn((Node, Style::default().absolute(), Opacity(0.6))) + .id(); + app.world_mut().entity_mut(parent).add_children(&[fill]); + app.world_mut() + .spawn((Node, Style::default())) + .add_children(&[parent]); + + support::finish_and_run(&mut app, 4); + let pixels = support::readback_rgba(&mut app, target); + let px = |x: u32, y: u32| support::px(&pixels, W, x, y); + + // Folded-flat expectation: opaque red at alpha 0.6 over the opaque-black + // clear, encoded linear→sRGB8 (the Rgba8UnormSrgb target). The fold sets the + // instance alpha to 0.6, and the flat straight-alpha SrcOver blend produces + // red*0.6 over black. + let red_lin = LinearRgba::from(red); + let folded = LinearRgba::new(red_lin.red, red_lin.green, red_lin.blue, 0.6); + let black = LinearRgba::new(0.0, 0.0, 0.0, 1.0); + let a = folded.alpha; + let over = LinearRgba::new( + folded.red * a + black.red * (1.0 - a), + folded.green * a + black.green * (1.0 - a), + folded.blue * a + black.blue * (1.0 - a), + a + black.alpha * (1.0 - a), + ); + let s = Srgba::from(over); + let expected = [ + (s.red * 255.0).round() as u8, + (s.green * 255.0).round() as u8, + (s.blue * 255.0).round() as u8, + 255u8, + ]; + + let inside = px(28, 28); // deep interior of the 32x32 fill + let clear = px(1, 1); + println!("degraded inside (28,28) = {inside:?} (expected {expected:?})"); + println!("clear (1,1) = {clear:?}"); + + // (a) the degraded group's pixels are PRESENT (not background) at folded 0.6. + assert_ne!(inside, clear, "degraded group must paint, not vanish"); + const TOL: i32 = 5; + for ch in 0..3 { + assert!( + (inside[ch] as i32 - expected[ch] as i32).abs() <= TOL, + "degraded channel {ch}: got {}, expected {} (±{TOL}); folded-flat at 0.6", + inside[ch], + expected[ch] + ); + } +} + +#[test] +#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"] +fn degraded_fold_does_not_compound_over_two_frames() { + // Per-tier idempotency (effect-compositor.md § 2.3): on a STEADY-STATE frame + // the quad buffer is RETAINED (not repacked from source), so the fold must NOT + // re-run — the degraded pixel is identical frame-to-frame. A fold that ran + // every frame would compound to black. + use buiy_core::Node; + use buiy_core::layout::{Inset, Length, Sizing, Style}; + use buiy_core::render::color::ColorToken; + use buiy_core::render::components::{Background, Opacity}; + use std::borrow::Cow; + + const W: u32 = 64; + const H: u32 = 64; + let red = Color::srgb(0.9, 0.05, 0.05); + + let mut app = support::gpu_render_app(W, H); + force_tiny_rt_budget(&mut app, 64); + { + let mut theme = app.world_mut().resource_mut::(); + theme.colors.insert("test.red".into(), red); + } + let target = support::render_to_image(&mut app, W, H); + support::spawn_capture_camera(&mut app, target.clone()); + + let fill = app + .world_mut() + .spawn(( + Node, + Style::default() + .absolute() + .inset(Inset { + top: Sizing::Length(Length::px(16.0)), + left: Sizing::Length(Length::px(16.0)), + ..default() + }) + .width_px(32.0) + .height_px(32.0), + Background { + color: ColorToken::Token(Cow::Borrowed("test.red")), + }, + )) + .id(); + let parent = app + .world_mut() + .spawn((Node, Style::default().absolute(), Opacity(0.6))) + .id(); + app.world_mut().entity_mut(parent).add_children(&[fill]); + app.world_mut() + .spawn((Node, Style::default())) + .add_children(&[parent]); + + support::finish_and_run(&mut app, 4); + let frame1 = support::readback_rgba(&mut app, target.clone()); + // Drive steady-state frames (no paint input changes → quad buffer retained). + for _ in 0..3 { + app.update(); + } + let frame2 = support::readback_rgba(&mut app, target); + let p1 = support::px(&frame1, W, 28, 28); + let p2 = support::px(&frame2, W, 28, 28); + println!("frame1 (28,28) = {p1:?} frame2 = {p2:?}"); + const TOL: i32 = 2; + for ch in 0..4 { + assert!( + (p1[ch] as i32 - p2[ch] as i32).abs() <= TOL, + "degraded pixel must not compound across steady frames: ch {ch} \ + {} vs {} (the fold ran once, not per-frame)", + p1[ch], + p2[ch] + ); + } +} + +#[test] +#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"] +fn degraded_glyph_fold_idempotent_under_quad_dirty_only_frame() { + // MAJOR-2 glyph idempotency: a degraded group with BOTH a quad bg and glyphs. + // Frame 2 mutates ONLY a quad input (the blue bg color → quad_dirty true, + // glyph_dirty false, so the glyph buffer is RETAINED). The degraded WHITE + // glyph ink must be unchanged (NOT re-dimmed) AND must still be PRESENT — + // proving the glyph ALPHA-fold gates on `glyph_dirty` (no re-fold on the + // retained buffer) while the glyph RANGE-MERGE gates on `quad_dirty || + // glyph_dirty` (the partition rebuild re-excludes the degraded glyph range, + // so the merge must re-add it or the glyphs VANISH that frame). Without a real + // GPU this is `#[ignore]`; it pins the split-gate the headless + // `degraded_glyph_range_remerges_on_quad_dirty_only_frame` proves at the + // pure-function tier, end-to-end. + // + // Channel discipline: we assert on the white ink's RED+GREEN, not blue. The + // ink sits over the blue bg we deliberately edit, so anti-aliased glyph edges + // legitimately blend the new bg into their BLUE channel (correct AA, not a + // regression). White ink dominates R+G, which are orthogonal to a blue-only + // bg edit, so R+G isolate the glyph fold. A double-fold dims the ink (R+G + // drop); a dropped range-merge reverts it to the ~8 bg (R+G collapse) — R+G + // stability rejects both. See the per-pixel assertion below. + use buiy_core::Node; + use buiy_core::layout::{Inset, Length, Sizing, Style}; + use buiy_core::render::color::ColorToken; + use buiy_core::render::components::{Background, Opacity, TextColor}; + use buiy_core::text::{FontSize, Text}; + use std::borrow::Cow; + + const W: u32 = 96; + const H: u32 = 64; + let blue = Color::srgb(0.05, 0.05, 0.9); + + let mut app = support::gpu_render_app(W, H); + force_tiny_rt_budget(&mut app, 64); + { + let mut theme = app.world_mut().resource_mut::(); + theme.colors.insert("test.blue".into(), blue); + theme.colors.insert("test.white".into(), Color::WHITE); + } + let target = support::render_to_image(&mut app, W, H); + support::spawn_capture_camera(&mut app, target.clone()); + + // A degraded Opacity group holding a quad bg AND a glyph run. + let bg = app + .world_mut() + .spawn(( + Node, + Style::default() + .absolute() + .inset(Inset { + top: Sizing::Length(Length::px(8.0)), + left: Sizing::Length(Length::px(8.0)), + ..default() + }) + .width_px(64.0) + .height_px(40.0), + Background { + color: ColorToken::Token(Cow::Borrowed("test.blue")), + }, + )) + .id(); + let text = app + .world_mut() + .spawn(( + Node, + Style::default().absolute().inset(Inset { + top: Sizing::Length(Length::px(12.0)), + left: Sizing::Length(Length::px(12.0)), + ..default() + }), + Text(String::from("Hi")), + FontSize(24.0), + TextColor(ColorToken::Token(Cow::Borrowed("test.white"))), + )) + .id(); + let parent = app + .world_mut() + .spawn((Node, Style::default().absolute(), Opacity(0.6))) + .id(); + app.world_mut().entity_mut(parent).add_children(&[bg, text]); + app.world_mut() + .spawn((Node, Style::default())) + .add_children(&[parent]); + + support::finish_and_run(&mut app, 4); + support::wait_for_text_ready(&mut app, 60); + let frame1 = support::readback_rgba(&mut app, target.clone()); + + // Mutate ONLY a quad input (the bg color) → quad_dirty, glyph retained. + { + let mut theme = app.world_mut().resource_mut::(); + theme + .colors + .insert("test.blue".into(), Color::srgb(0.05, 0.05, 0.7)); + } + support::finish_and_run(&mut app, 3); + let frame2 = support::readback_rgba(&mut app, target); + + // Sample the WHITE glyph ink of the "Hi" run and assert its red+green + // channels are byte-stable frame-to-frame. We test R+G specifically (NOT + // blue) because the ink is white over a BLUE background we *deliberately* + // mutated this frame: anti-aliased glyph-edge pixels legitimately blend + // `white·coverage + bg·(1-coverage)`, so their BLUE channel tracks the bg + // change by design — comparing blue would flag correct AA as a regression. + // White ink dominates R+G, which are orthogonal to the blue bg edit, so R+G + // isolate the glyph fold from the bg. The dominant failure this frame pins is + // VANISH: if the range-merge were (wrongly) gated on glyph_dirty instead of + // quad_dirty||glyph_dirty, the partition rebuild would drop the degraded glyph + // range on this quad-dirty-only frame and the ink would revert to the ~(8,8) + // blue background — R+G collapsing from ~150-203 down to ~8, a huge delta this + // assertion rejects. (The complementary double-fold/compounding hazard — the + // alpha-fold wrongly re-running on the retained buffer — is the charter of + // `degraded_fold_does_not_compound_over_two_frames`, which drives multiple + // glyph-dirty frames; on THIS single quad-dirty-only frame a CPU re-fold would + // not even re-upload, so the split-gate's no-re-fold half is pinned at the + // pure-function tier by the headless mirror named in the doc comment above.) + // Glyph ink is identified by high R+G (white over blue bg never reaches that: + // its R+G are ~8). We require a real population of ink pixels so a fixture + // drift that moved the glyphs out of the sampled box fails loudly instead of + // silently asserting over zero pixels (the original single-row band sampled + // pure background and "passed"/"failed" on the bg, not the ink). + let mut ink_pixels = 0usize; + for y in 14..34 { + for x in 10..40 { + let a = support::px(&frame1, W, x, y); + let b = support::px(&frame2, W, x, y); + // White ink in frame 1: both R and G well above the blue bg's ~8. + if a[0] > 150 && a[1] > 150 { + ink_pixels += 1; + let d_r = (a[0] as i32 - b[0] as i32).abs(); + let d_g = (a[1] as i32 - b[1] as i32).abs(); + assert!( + d_r <= 3 && d_g <= 3, + "degraded glyph ink (white) must be stable on a quad-dirty-only \ + frame at ({x},{y}): frame1={a:?} frame2={b:?} (R+G must not \ + move — the alpha-fold gates on glyph_dirty so the retained \ + buffer is not re-folded, while the range-merge gates on \ + quad_dirty||glyph_dirty so the glyphs are re-merged, not \ + vanished). A double-fold dims the ink; a dropped merge \ + reverts it to the ~8 blue background." + ); + } + } + } + assert!( + ink_pixels >= 8, + "expected the white \"Hi\" run to land in the sampled box (found only \ + {ink_pixels} ink pixels) — fixture drift moved the glyphs; the stability \ + assertion would otherwise vacuously pass over background" + ); +} + +#[test] +#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"] +fn nested_degraded_group_does_not_corrupt_parent() { + // Scope guard (MAJOR-1): a NESTED group forced to degrade. The root-degraded + // slice does NOT handle nested forward-composite (that routes into the PARENT + // target, a node-side follow-up). `fold_root_degraded_into_flat` + // debug_asserts on a nested degraded group, so in a DEBUG build this fixture + // would panic in prepare — which is the intended loud guard. In a RELEASE + // build the nested child is left untouched (it vanishes, tracked) and must NOT + // mis-place at window level or corrupt the parent. We assert the parent's + // composited region is not corrupted (a plausible non-degraded sibling still + // paints). Under debug we skip the body (the debug_assert is the containment). + if cfg!(debug_assertions) { + // The prepare-side debug_assert is the containment in debug builds. + return; + } + use buiy_core::Node; + use buiy_core::layout::{Inset, Length, Sizing, Style}; + use buiy_core::render::color::ColorToken; + use buiy_core::render::components::{Background, Opacity}; + use std::borrow::Cow; + + const W: u32 = 96; + const H: u32 = 96; + let red = Color::srgb(0.9, 0.05, 0.05); + + // Budget that fits the OUTER group's target but degrades the smaller INNER + // (nested) one: plan_allocation degrades lowest-cost (smallest) first. + let mut app = support::gpu_render_app(W, H); + force_tiny_rt_budget(&mut app, 4096); + { + let mut theme = app.world_mut().resource_mut::(); + theme.colors.insert("test.red".into(), red); + } + let target = support::render_to_image(&mut app, W, H); + support::spawn_capture_camera(&mut app, target.clone()); + + // Outer Opacity group (large) containing an inner Opacity group (small) with + // a fill — the inner is the nested degrade candidate. + let inner_fill = app + .world_mut() + .spawn(( + Node, + Style::default() + .absolute() + .inset(Inset { + top: Sizing::Length(Length::px(20.0)), + left: Sizing::Length(Length::px(20.0)), + ..default() + }) + .width_px(16.0) + .height_px(16.0), + Background { + color: ColorToken::Token(Cow::Borrowed("test.red")), + }, + )) + .id(); + let inner = app + .world_mut() + .spawn((Node, Style::default().absolute(), Opacity(0.5))) + .id(); + app.world_mut() + .entity_mut(inner) + .add_children(&[inner_fill]); + let outer = app + .world_mut() + .spawn((Node, Style::default().absolute(), Opacity(0.8))) + .id(); + app.world_mut().entity_mut(outer).add_children(&[inner]); + app.world_mut() + .spawn((Node, Style::default())) + .add_children(&[outer]); + + support::finish_and_run(&mut app, 4); + let pixels = support::readback_rgba(&mut app, target); + // The corner is untouched: the slice's flat-merge must NOT have mis-placed the + // nested child at window level (a wrong-space paint would smear it here). + let corner = support::px(&pixels, W, 1, 1); + assert_eq!( + corner, + [0, 0, 0, 255], + "nested degrade must not mis-place the child at window level (corner clean)" + ); +} diff --git a/crates/buiy_core/tests/render_extract.rs b/crates/buiy_core/tests/render_extract.rs index ee5a298..819e4df 100644 --- a/crates/buiy_core/tests/render_extract.rs +++ b/crates/buiy_core/tests/render_extract.rs @@ -166,6 +166,7 @@ fn assemble_preserves_clip_per_entity() { // Only entity 2 carries a clip; the others stay unclipped. clip: (x == e(2)).then_some(clip2), group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) }); let clips: Vec> = nodes.nodes.iter().map(|n| n.clip).collect(); @@ -266,6 +267,103 @@ fn extracted_node_position_follows_global_transform() { assert_eq!(node.position, Vec2::new(200.0, 300.0)); } +#[test] +fn extracted_node_carries_affine_basis_from_global_transform() { + // The 2D linear part of GlobalTransform's affine is carried onto the record + // so the GPU vertex stage can apply rotation/scale (not just the + // translation). A 90deg z-rotation is ASYMMETRIC, so it catches a transpose: + // R(90) maps x_axis -> (0,1) and y_axis -> (-1,0), so col0 = [0,1] and + // col1 = [-1,0]. The translation.xy must still be the painted top-left. + use std::f32::consts::FRAC_PI_2; + let theme = Theme::default(); + let layout = ResolvedLayout { + position: Vec2::ZERO, + size: Vec2::splat(50.0), + }; + let affine3 = bevy::math::Affine3A::from_rotation_translation( + Quat::from_rotation_z(FRAC_PI_2), + Vec3::new(11.0, 22.0, 0.0), + ); + let gt = GlobalTransform::from(affine3); + let node = extracted_node_for( + Entity::from_raw_u32(3).unwrap(), + >, + &layout, + None, + None, + &theme, + ); + // col0 = xy of x_axis, col1 = xy of y_axis (columns, NOT rows). + let eps = 1e-5; + assert!( + (node.affine[0][0] - 0.0).abs() < eps, + "m00 = {}", + node.affine[0][0] + ); + assert!( + (node.affine[0][1] - 1.0).abs() < eps, + "m10 = {}", + node.affine[0][1] + ); + assert!( + (node.affine[1][0] - -1.0).abs() < eps, + "m01 = {}", + node.affine[1][0] + ); + assert!( + (node.affine[1][1] - 0.0).abs() < eps, + "m11 = {}", + node.affine[1][1] + ); + assert_eq!(node.position, Vec2::new(11.0, 22.0)); +} + +#[test] +fn extracted_node_identity_affine_is_identity_basis() { + // An identity GlobalTransform yields the [[1,0],[0,1]] basis — the + // byte-identical fast path (every pre-affine pixel/test stays unchanged). + let theme = Theme::default(); + let layout = ResolvedLayout { + position: Vec2::ZERO, + size: Vec2::splat(10.0), + }; + let node = extracted_node_for( + Entity::from_raw_u32(4).unwrap(), + &GlobalTransform::IDENTITY, + &layout, + None, + None, + &theme, + ); + assert_eq!(node.affine, [[1.0, 0.0], [0.0, 1.0]]); +} + +#[test] +fn extracted_node_nonuniform_scale_basis() { + // A (2,3) non-uniform scale yields the diagonal basis [[2,0],[0,3]] — + // faithful for non-uniform scale (within the bridge's TRS range). + let theme = Theme::default(); + let layout = ResolvedLayout { + position: Vec2::ZERO, + size: Vec2::splat(10.0), + }; + let affine3 = bevy::math::Affine3A::from_scale(Vec3::new(2.0, 3.0, 1.0)); + let gt = GlobalTransform::from(affine3); + let node = extracted_node_for( + Entity::from_raw_u32(5).unwrap(), + >, + &layout, + None, + None, + &theme, + ); + let eps = 1e-5; + assert!((node.affine[0][0] - 2.0).abs() < eps); + assert!((node.affine[0][1] - 0.0).abs() < eps); + assert!((node.affine[1][0] - 0.0).abs() < eps); + assert!((node.affine[1][1] - 3.0).abs() < eps); +} + use buiy_core::render::extract::{ ExtractedNode, ExtractedNodes, assemble_context_tree, assemble_in_paint_order, }; @@ -298,6 +396,7 @@ fn assemble_emits_in_painters_z_order() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) }); let got: Vec = nodes.nodes.iter().map(|n| n.entity).collect(); @@ -325,6 +424,7 @@ fn assemble_drops_skipped_entities() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) } }); @@ -360,6 +460,7 @@ fn hit_test_order_is_paint_order_reversed() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) }); // Paint order is painters_z forward. @@ -420,6 +521,7 @@ fn nested_context_is_entered_atomically_at_its_parent_position() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) }, &mut out, @@ -468,6 +570,7 @@ fn tree_assembly_skips_dropped_entities_across_the_boundary() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) } }, diff --git a/crates/buiy_core/tests/render_instance.rs b/crates/buiy_core/tests/render_instance.rs index 90a7b5b..2639708 100644 --- a/crates/buiy_core/tests/render_instance.rs +++ b/crates/buiy_core/tests/render_instance.rs @@ -42,13 +42,14 @@ use buiy_core::render::instance::{PACKED_INSTANCE_STRIDE_BYTES, PackedInstance, #[test] fn packed_instance_stride_matches_logical_pipeline_descriptor() { // pos(2*4) + size(2*4) + color(4*4) + radius(1*4) + clip_min(2*4) + - // clip_max(2*4) = 52, in LOGICAL px (not clip). The clip AABB rides every - // instance (R8b fragment discard); the const must equal the struct stride. + // clip_max(2*4) + affine(4*4) = 68, in LOGICAL px (not clip). The clip AABB + // and the 2D affine basis ride every instance (R8b fragment discard + R1 + // transform paint); the const must equal the struct stride. assert_eq!( std::mem::size_of::(), PACKED_INSTANCE_STRIDE_BYTES ); - assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 52); + assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 68); } #[test] @@ -102,20 +103,68 @@ fn node_with_clip(clip: Option) -> ExtractedNode { color: Color::WHITE, clip, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], } } #[test] -fn packed_instance_stride_is_52() { - // R8b: pos(2)+size(2)+color(4)+radius(1)+clip_min(2)+clip_max(2) = 13 f32 = 52 B. - // The struct stride, the const, and the raw [f32;13] must all agree (52 B); - // any drift makes the instanced draw read garbage. - assert_eq!(std::mem::size_of::(), 52); +fn packed_instance_stride_is_68() { + // R8b + R1: pos(2)+size(2)+color(4)+radius(1)+clip_min(2)+clip_max(2) + // +affine(4) = 17 f32 = 68 B. The struct stride, the const, and the raw + // [f32;17] must all agree (68 B); any drift makes the instanced draw read + // garbage. + assert_eq!(std::mem::size_of::(), 68); assert_eq!( std::mem::size_of::(), - std::mem::size_of::<[f32; 13]>() + std::mem::size_of::<[f32; 17]>() ); - assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 52); + assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 68); +} + +#[test] +fn packed_instance_appends_affine_after_existing_thirteen() { + // R1 HARD CONSTRAINT (campaign-review MAJOR — R2 depends on it): the 2x2 + // affine basis appends AFTER the existing 13 floats so every existing field + // offset is UNCHANGED (notably color@4 / alpha@7). The raw record carries + // the flattened basis [m00,m10,m01,m11] at [13..17], and raw[0..13] is + // byte-identical to the pre-R1 layout. + use buiy_core::render::buckets::packed_to_raw; + let mut node = node_with_clip(Some(ClipRect { + min: Vec2::new(5.0, 6.0), + max: Vec2::new(105.0, 206.0), + })); + node.affine = [[2.0, 3.0], [4.0, 5.0]]; // col0 = [m00,m10], col1 = [m01,m11] + let p = pack_extracted(&node); + let raw = packed_to_raw(&p); + assert_eq!( + &raw[13..17], + &[2.0, 3.0, 4.0, 5.0], + "affine appended at [13..17]" + ); + // The pre-R1 layout is byte-identical: pos/size/color/radius/clip unchanged. + assert_eq!(raw[0], 10.0); + assert_eq!(raw[1], 20.0); + assert_eq!(raw[2], 30.0); + assert_eq!(raw[3], 40.0); + let lin = LinearRgba::from(Color::WHITE); + assert_eq!(&raw[4..8], &[lin.red, lin.green, lin.blue, lin.alpha]); + assert_eq!(raw[8], 0.0); // radius + assert_eq!(&raw[9..13], &[5.0, 6.0, 105.0, 206.0]); // clip min/max +} + +#[test] +fn color_and_alpha_offset_consts_point_at_color() { + // R2 (degraded-group re-tint) reads alpha via ALPHA_FLOAT_OFFSET, so the + // named consts must point at the color block (color@4, alpha@7) — the + // invariant the append-after-13 layout exists to preserve. + use buiy_core::render::buckets::packed_to_raw; + use buiy_core::render::instance::{ALPHA_FLOAT_OFFSET, COLOR_FLOAT_OFFSET}; + assert_eq!(COLOR_FLOAT_OFFSET, 4); + assert_eq!(ALPHA_FLOAT_OFFSET, 7); + let p = pack_extracted(&node_with_clip(None)); + let raw = packed_to_raw(&p); + assert_eq!(raw[ALPHA_FLOAT_OFFSET], p.color[3]); + assert_eq!(&raw[COLOR_FLOAT_OFFSET..COLOR_FLOAT_OFFSET + 4], &p.color); } #[test] @@ -143,8 +192,8 @@ fn pack_extracted_uses_full_view_sentinel_when_clip_absent() { } #[test] -fn packed_raw_stride_agrees_with_thirteen_floats() { - // The raw bucket layout is [f32;13] and byte-equal to PackedInstance's stride. +fn packed_raw_stride_agrees_with_seventeen_floats() { + // The raw bucket layout is [f32;17] and byte-equal to PackedInstance's stride. assert!(buiy_core::render::instance::packed_raw_stride_agrees()); } diff --git a/crates/buiy_core/tests/render_paint_order.rs b/crates/buiy_core/tests/render_paint_order.rs index 019961a..f4b3593 100644 --- a/crates/buiy_core/tests/render_paint_order.rs +++ b/crates/buiy_core/tests/render_paint_order.rs @@ -95,6 +95,7 @@ fn top_layer_tail_is_tier_ordered_fullscreen_to_modal() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) .collect(), ..Default::default() @@ -149,6 +150,7 @@ fn modal_is_first_hit_candidate_over_popover() { color: Color::WHITE, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }) }, &mut assembled, diff --git a/crates/buiy_core/tests/render_prepare.rs b/crates/buiy_core/tests/render_prepare.rs index 8cb4398..cd2d88d 100644 --- a/crates/buiy_core/tests/render_prepare.rs +++ b/crates/buiy_core/tests/render_prepare.rs @@ -51,6 +51,7 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() { color: Color::srgb(1.0, 0.0, 0.0), clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }, ExtractedNode { entity: Entity::from_raw_u32(2).unwrap(), @@ -59,6 +60,7 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() { color: Color::srgb(0.0, 1.0, 0.0), clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }, ], }; @@ -71,6 +73,12 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() { 2, "populated carrier must yield one quad per node" ); + // R1: each raw instance is 17 floats (the affine basis appended at [13..17]). + assert_eq!( + instances[0].len(), + 17, + "raw instance carries 17 floats (incl. the appended affine basis)" + ); // The uniform is built from the carrier's logical_size + scale_factor: the // std140 array carries scale_factor at slot 8. assert!( @@ -84,6 +92,32 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() { assert!(empty.is_empty(), "empty carrier yields no quads"); } +#[test] +fn pack_extracted_nodes_carries_non_identity_affine() { + // A node with a non-identity affine carries the flattened basis at [13..17] + // through the prepare pack (the GPU vertex stage reads it as the 2x2 mat). + let nodes = ExtractedNodes { + logical_size: Vec2::new(800.0, 600.0), + scale_factor: 1.0, + nodes: vec![ExtractedNode { + entity: Entity::from_raw_u32(1).unwrap(), + position: Vec2::new(10.0, 20.0), + size: Vec2::new(100.0, 50.0), + color: Color::srgb(1.0, 0.0, 0.0), + clip: None, + group: None, + affine: [[0.0, 1.0], [-1.0, 0.0]], // 90deg rotation basis + }], + }; + let (instances, _) = pack_extracted_nodes(&nodes); + assert_eq!(instances.len(), 1); + assert_eq!( + &instances[0][13..17], + &[0.0, 1.0, -1.0, 0.0], + "the 2D affine basis rides the packed instance at [13..17]" + ); +} + #[test] fn extracted_nodes_pack_view_routes_records_to_quad_layer_0() { // R6 consumes R5's ExtractedNodes and packs its `nodes` via pack_view — no @@ -103,6 +137,7 @@ fn extracted_nodes_pack_view_routes_records_to_quad_layer_0() { color: Color::srgb(1.0, 0.0, 0.0), clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }); let buckets = pack_view(&view.nodes); let quad0 = PrimitiveBatchKey { diff --git a/crates/buiy_core/tests/render_primitive_descriptor.rs b/crates/buiy_core/tests/render_primitive_descriptor.rs index 1ae035a..930493f 100644 --- a/crates/buiy_core/tests/render_primitive_descriptor.rs +++ b/crates/buiy_core/tests/render_primitive_descriptor.rs @@ -67,9 +67,9 @@ fn quad_descriptor_keeps_alpha_blending_and_entry_points() { #[test] fn quad_descriptor_has_two_vertex_buffers_with_phase0_strides() { - // Static unit-quad VBO (stride 16) + per-instance buffer (stride 52 after - // R8b appends the clip AABB at @location(6)/(7)); the unit-quad VBO is - // untouched. + // Static unit-quad VBO (stride 16) + per-instance buffer (stride 68 after + // R1 appends the 2x2 affine basis at @location(8)/(9), on top of R8b's clip + // AABB at @location(6)/(7)); the unit-quad VBO is untouched. let d = BuiyPrimitives::default().specialize(BuiyPrimitiveKey { kind: BuiyPrimitiveKind::Quad, format: TextureFormat::Rgba8UnormSrgb, @@ -78,21 +78,55 @@ fn quad_descriptor_has_two_vertex_buffers_with_phase0_strides() { let buffers = &d.vertex.buffers; assert_eq!(buffers.len(), 2, "vertex + instance buffer layouts"); assert_eq!(buffers[0].array_stride, 16); - assert_eq!(buffers[1].array_stride, 52); + assert_eq!(buffers[1].array_stride, 68); } #[test] -fn instance_buffer_stride_is_52_with_clip_fields() { - // The per-instance record grew from 36 B (R7) to 52 B (R8b) when the clip - // AABB (`clip_min`/`clip_max`, two Float32x2) was appended; the vertex - // layout's `array_stride` must track `PackedInstance`'s 52-byte stride or - // wgpu mis-strides the instance buffer. +fn instance_buffer_stride_is_68_with_clip_and_affine_fields() { + // The per-instance record grew from 52 B (R8b) to 68 B (R1) when the 2x2 + // affine basis (two Float32x2 columns) was appended after the clip AABB; the + // vertex layout's `array_stride` must track `PackedInstance`'s 68-byte stride + // or wgpu mis-strides the instance buffer. + use buiy_core::render::instance::PACKED_INSTANCE_STRIDE_BYTES; let d = BuiyPrimitives::default().specialize(BuiyPrimitiveKey { kind: BuiyPrimitiveKind::Quad, format: TextureFormat::Rgba8UnormSrgb, samples: 1, }); - assert_eq!(d.vertex.buffers[1].array_stride, 52); + assert_eq!(d.vertex.buffers[1].array_stride, 68); + assert_eq!( + d.vertex.buffers[1].array_stride as usize, + PACKED_INSTANCE_STRIDE_BYTES + ); +} + +#[test] +fn instance_keeps_clip_attrs_byte_stable_and_appends_affine() { + // R1 HARD CONSTRAINT: the existing 6 instance attrs (locations 2..7, offsets + // 0..44) are UNCHANGED, and two NEW Float32x2 affine columns append at + // @location(8) offset 52 (col0 = [m00,m10]) and @location(9) offset 60 + // (col1 = [m01,m11]). + let d = BuiyPrimitives::default().specialize(BuiyPrimitiveKey { + kind: BuiyPrimitiveKind::Quad, + format: TextureFormat::Rgba8UnormSrgb, + samples: 1, + }); + let attrs = &d.vertex.buffers[1].attributes; + let at = |loc: u32| attrs.iter().find(|a| a.shader_location == loc).copied(); + // Existing six attrs unchanged. + assert_eq!(at(2).unwrap().offset, 0); + assert_eq!(at(3).unwrap().offset, 8); + assert_eq!(at(4).unwrap().offset, 16); // color + assert_eq!(at(5).unwrap().offset, 32); // radius/blur + assert_eq!(at(6).unwrap().offset, 36); // clip_min + assert_eq!(at(7).unwrap().offset, 44); // clip_max + // New affine columns appended. + let col0 = at(8).expect("instance layout has @location(8) affine col0"); + assert_eq!(col0.format, VertexFormat::Float32x2); + assert_eq!(col0.offset, 52); + let col1 = at(9).expect("instance layout has @location(9) affine col1"); + assert_eq!(col1.format, VertexFormat::Float32x2); + assert_eq!(col1.offset, 60); } #[test] diff --git a/crates/buiy_core/tests/render_shader_wgsl.rs b/crates/buiy_core/tests/render_shader_wgsl.rs index 1f67f55..7dafe4b 100644 --- a/crates/buiy_core/tests/render_shader_wgsl.rs +++ b/crates/buiy_core/tests/render_shader_wgsl.rs @@ -78,3 +78,63 @@ fn shadow_shader_with_clip_parses() { "shadow clip inputs bound at @location(6)/(7) (matches the vertex layout)" ); } + +#[test] +fn quad_shader_applies_affine_via_mat2x2() { + // R1: the quad shader declares the 2D affine basis instance inputs at + // @location(8)/(9), builds the window-logical corner via a `mat2x2`, and + // interpolates `frag_logical` for the clip discard — `rect_center` is GONE + // (it was the axis-aligned corner, wrong under rotation). naga PARSES (not + // string-grep) so a malformed VertexOut/fragment-input mismatch is rejected. + let m = parse_wgsl("quad", QUAD_WGSL); + assert!(has_entry_point(&m, "vertex")); + assert!(has_entry_point(&m, "fragment")); + assert!( + QUAD_WGSL.contains("@location(8)") && QUAD_WGSL.contains("@location(9)"), + "quad affine inputs bound at @location(8)/(9) (matches the vertex layout)" + ); + assert!( + QUAD_WGSL.contains("affine_col0") && QUAD_WGSL.contains("affine_col1"), + "quad shader declares the affine basis columns" + ); + assert!( + QUAD_WGSL.contains("mat2x2"), + "quad vertex builds the logical corner via a mat2x2 affine" + ); + assert!( + QUAD_WGSL.contains("frag_logical"), + "quad carries the affine-transformed window-logical corner for the clip discard" + ); + assert!( + !QUAD_WGSL.contains("rect_center"), + "rect_center (the axis-aligned corner) is dropped — replaced by frag_logical" + ); +} + +#[test] +fn shadow_shader_applies_affine_via_mat2x2() { + // The shadow shader mirrors the quad shader's affine path identically. + let m = parse_wgsl("shadow", SHADOW_WGSL); + assert!(has_entry_point(&m, "vertex")); + assert!(has_entry_point(&m, "fragment")); + assert!( + SHADOW_WGSL.contains("@location(8)") && SHADOW_WGSL.contains("@location(9)"), + "shadow affine inputs bound at @location(8)/(9)" + ); + assert!( + SHADOW_WGSL.contains("affine_col0") && SHADOW_WGSL.contains("affine_col1"), + "shadow shader declares the affine basis columns" + ); + assert!( + SHADOW_WGSL.contains("mat2x2"), + "shadow vertex builds the logical corner via a mat2x2 affine" + ); + assert!( + SHADOW_WGSL.contains("frag_logical"), + "shadow carries the affine-transformed window-logical corner" + ); + assert!( + !SHADOW_WGSL.contains("rect_center"), + "rect_center is dropped in the shadow shader too" + ); +} diff --git a/crates/buiy_core/tests/render_text_quads.rs b/crates/buiy_core/tests/render_text_quads.rs index 8f9add1..3de1d19 100644 --- a/crates/buiy_core/tests/render_text_quads.rs +++ b/crates/buiy_core/tests/render_text_quads.rs @@ -16,6 +16,7 @@ fn node(entity: Entity, x: f32, color: Color, group: Option) -> Extracted color, clip: None, group, + affine: [[1.0, 0.0], [0.0, 1.0]], } } diff --git a/crates/buiy_core/tests/render_transform_paint_gpu.rs b/crates/buiy_core/tests/render_transform_paint_gpu.rs new file mode 100644 index 0000000..abcc522 --- /dev/null +++ b/crates/buiy_core/tests/render_transform_paint_gpu.rs @@ -0,0 +1,167 @@ +//! GPU reftest (#[ignore]) for R1 transform paint: a UiTransform's 2D affine +//! (rotation / scale) is applied in the quad vertex stage so a transformed fill +//! paints OFF the axis-aligned box. Needs a real wgpu adapter; the headless gate +//! proves the byte layout + the WGSL naga-parse shape, the human runs the GPU +//! lane (`cargo test -p buiy_core -- --ignored --test-threads=1`). +//! +//! SCOPE: pure rotation / pure scale only — within the bridge's faithful TRS +//! range. Skew / general TransformMatrix::Matrix are bounded by the bridge's +//! TRS-only Transform::from_matrix decompose (a separate residual, +//! clip-and-transform.md § B.5), so this fixture deliberately avoids them. + +mod support; + +/// A pure 2x scale about the box-local top-left grows a 10×10 fill to 20×20, so +/// a pixel ~15px from the top-left — INSIDE the scaled fill but OUTSIDE the +/// unscaled 10×10 box — must be painted. If the affine were dropped (the R1 +/// bug), render would paint the original 10×10 axis-aligned box and that pixel +/// would read the clear color. The scaled-grown corner is direction-independent, +/// so this is the unambiguous transform-paint assertion. +#[test] +#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"] +fn scaled_fill_paints_beyond_unscaled_box() { + use bevy::prelude::*; + use buiy_core::Node; + use buiy_core::layout::{Inset, Length, Sizing, Style}; + use buiy_core::render::color::ColorToken; + use buiy_core::render::components::Background; + use std::borrow::Cow; + + const W: u32 = 64; + const H: u32 = 64; + + let mut app = support::gpu_render_app(W, H); + { + let mut theme = app.world_mut().resource_mut::(); + theme.colors.insert("test.fill".into(), Color::WHITE); + } + + let target = support::render_to_image(&mut app, W, H); + support::spawn_capture_camera(&mut app, target.clone()); + + // A 10×10 fill at top-left (16,16), scaled 2x about its box-local top-left → + // occupies x∈[16,36), y∈[16,36). The unscaled box is x∈[16,26), y∈[16,26). + let child = ( + Node, + Style::default() + .absolute() + .inset(Inset { + top: Sizing::Length(Length::px(16.0)), + left: Sizing::Length(Length::px(16.0)), + ..default() + }) + .width_px(10.0) + .height_px(10.0) + .scale(2.0), + Background { + color: ColorToken::Token(Cow::Borrowed("test.fill")), + }, + ); + let c = app.world_mut().spawn(child).id(); + app.world_mut() + .spawn((Node, Style::default())) + .add_children(&[c]); + + support::finish_and_run(&mut app, 3); + let pixels = support::readback_rgba(&mut app, target); + assert_eq!(pixels.len(), (W * H * 4) as usize); + let px = |x: u32, y: u32| -> [u8; 4] { + let i = ((y * W + x) * 4) as usize; + [pixels[i], pixels[i + 1], pixels[i + 2], pixels[i + 3]] + }; + + let clear = px(1, 1); + assert_eq!( + clear, + [0, 0, 0, 255], + "untouched corner reads the clear color" + ); + + // Deep interior of the scaled-only region (x∈[26,36), y∈[26,36)) — OUTSIDE + // the unscaled 10×10 box, well clear of the SDF rim. The R1 bug (axis-aligned + // paint, scale dropped) leaves this at the clear color. + let scaled_only = px(30, 30); + assert_ne!( + scaled_only, + [0, 0, 0, 255], + "the 2x scale must paint at (30,30), beyond the unscaled 10×10 box \ + (a dropped affine would leave this at the clear color)" + ); +} + +/// A pure 90° rotation about the box-local top-left sweeps a tall thin rect into +/// a horizontal extent the unrotated rect never reaches. The exact swept +/// quadrant depends on the rotation sign, so this asserts the rotated fill +/// paints SOME pixel off the unrotated rect's vertical column (a column the +/// axis-aligned rect would leave at the clear color), which holds for either +/// sign of a 90° turn about the top-left. +#[test] +#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"] +fn rotated_fill_paints_off_axis() { + use bevy::prelude::*; + use buiy_core::Node; + use buiy_core::layout::{Inset, Length, Sizing, Style}; + use buiy_core::render::color::ColorToken; + use buiy_core::render::components::Background; + use std::borrow::Cow; + use std::f32::consts::FRAC_PI_2; + + const W: u32 = 64; + const H: u32 = 64; + + let mut app = support::gpu_render_app(W, H); + { + let mut theme = app.world_mut().resource_mut::(); + theme.colors.insert("test.fill".into(), Color::WHITE); + } + + let target = support::render_to_image(&mut app, W, H); + support::spawn_capture_camera(&mut app, target.clone()); + + // A 4px-wide × 30px-tall rect with its top-left at (32,32) (image center). + // Unrotated it occupies x∈[32,36), y∈[32,62). Rotated 90° about its top-left + // it sweeps a ~30px HORIZONTAL extent the unrotated thin column never covers. + let child = ( + Node, + Style::default() + .absolute() + .inset(Inset { + top: Sizing::Length(Length::px(32.0)), + left: Sizing::Length(Length::px(32.0)), + ..default() + }) + .width_px(4.0) + .height_px(30.0) + .rotate_z(FRAC_PI_2), + Background { + color: ColorToken::Token(Cow::Borrowed("test.fill")), + }, + ); + let c = app.world_mut().spawn(child).id(); + app.world_mut() + .spawn((Node, Style::default())) + .add_children(&[c]); + + support::finish_and_run(&mut app, 3); + let pixels = support::readback_rgba(&mut app, target); + let px = |x: u32, y: u32| -> [u8; 4] { + let i = ((y * W + x) * 4) as usize; + [pixels[i], pixels[i + 1], pixels[i + 2], pixels[i + 3]] + }; + + // The unrotated thin column is x∈[32,36): any painted pixel with x far from + // that column (≥10px away horizontally) proves the rotation moved fill onto + // a horizontal extent the axis-aligned rect never reaches. Scan the rotated + // sweep band near the pivot row. + let off_axis_painted = (0..W).any(|x| { + (x + 10 < 32 || x > 36 + 10) && { + // sample a few rows around the pivot (y=32) where a 90° turn lands fill + (28..=36).any(|y| px(x, y) != [0, 0, 0, 255]) + } + }); + assert!( + off_axis_painted, + "the 90° rotation must paint fill off the unrotated thin column \ + (a dropped affine would paint only the axis-aligned x∈[32,36) column)" + ); +} diff --git a/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap b/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap index 949ce83..140c931 100644 --- a/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap +++ b/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap @@ -2,4 +2,4 @@ source: crates/buiy_core/tests/render_instance.rs expression: pack_extracted_finite_clip --- -000020410000a0410000f041000020420000803f0000803f0000803f0000803f000000000000a0400000c0400000d24200004e43 +000020410000a0410000f041000020420000803f0000803f0000803f0000803f000000000000a0400000c0400000d24200004e430000803f00000000000000000000803f diff --git a/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap b/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap index 15858d1..308b1c1 100644 --- a/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap +++ b/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap @@ -2,4 +2,4 @@ source: crates/buiy_core/tests/render_instance.rs expression: pack_extracted_sentinel_clip --- -000020410000a0410000f041000020420000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f +000020410000a0410000f041000020420000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f0000803f00000000000000000000803f diff --git a/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap b/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap index bbdce65..0bffb8f 100644 --- a/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap +++ b/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap @@ -2,4 +2,4 @@ source: crates/buiy_core/tests/render_instance.rs expression: pack_instance_logical_px --- -0000c84200004842000048430000a0420000803f0000803f0000803f0000803f00004041000080ff000080ff0000807f0000807f +0000c84200004842000048430000a0420000803f0000803f0000803f0000803f00004041000080ff000080ff0000807f0000807f0000803f00000000000000000000803f diff --git a/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap b/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap index d98c5d0..633ed8b 100644 --- a/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap +++ b/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap @@ -2,4 +2,4 @@ source: crates/buiy_core/tests/render_buckets.rs expression: pack_view_node_payload --- -0000e0400000104100004040000080400000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f +0000e0400000104100004040000080400000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f0000803f00000000000000000000803f diff --git a/crates/buiy_verify/src/invariant/predicates.rs b/crates/buiy_verify/src/invariant/predicates.rs index 8d57ac1..427a56b 100644 --- a/crates/buiy_verify/src/invariant/predicates.rs +++ b/crates/buiy_verify/src/invariant/predicates.rs @@ -307,7 +307,7 @@ pub fn all_finite(nodes: &ExtractedNodes) -> Result<(), Violation> { /// clip" and are checked separately. pub fn all_finite_packed(packed: &[PackedInstance]) -> Result<(), Violation> { for (i, p) in packed.iter().enumerate() { - let finite_fields: [(&str, f32); 9] = [ + let finite_fields: [(&str, f32); 13] = [ ("rect_pos.x", p.rect_pos[0]), ("rect_pos.y", p.rect_pos[1]), ("rect_size.x", p.rect_size[0]), @@ -317,6 +317,12 @@ pub fn all_finite_packed(packed: &[PackedInstance]) -> Result<(), Violation> { ("color.b", p.color[2]), ("color.a", p.color[3]), ("radius", p.radius), + // The 2D affine basis (R1) — always finite for a valid + // GlobalTransform; a NaN/inf here is a real packing bug. + ("affine.m00", p.affine[0]), + ("affine.m10", p.affine[1]), + ("affine.m01", p.affine[2]), + ("affine.m11", p.affine[3]), ]; for (field, v) in finite_fields { if !v.is_finite() { diff --git a/crates/buiy_verify/src/invariant/scene.rs b/crates/buiy_verify/src/invariant/scene.rs index d2832fb..f6268bc 100644 --- a/crates/buiy_verify/src/invariant/scene.rs +++ b/crates/buiy_verify/src/invariant/scene.rs @@ -669,6 +669,8 @@ fn extracted_node(entity: Entity, n: &FlatNode) -> ExtractedNode { color, clip, group: None, + // The synthetic scene carries no UiTransform; paint axis-aligned. + affine: [[1.0, 0.0], [0.0, 1.0]], } } diff --git a/crates/buiy_verify/src/snapshot.rs b/crates/buiy_verify/src/snapshot.rs index 4c7b96a..55e9bb6 100644 --- a/crates/buiy_verify/src/snapshot.rs +++ b/crates/buiy_verify/src/snapshot.rs @@ -431,7 +431,7 @@ pub fn display_list_dump(nodes: &ExtractedNodes, names: &NameLookup) -> String { // --------------------------------------------------------------------------- /// Hex-dump a [`PackedInstance`] as `bytemuck::bytes_of(p)` — a byte-exact -/// snapshot of the GPU upload payload (52 B → 104 hex chars), independent of +/// snapshot of the GPU upload payload (68 B → 136 hex chars), independent of /// the Display dump's format version. A packing arithmetic change (e.g. the /// half-size sign bug `render_instance.rs` regression-tests) flips the hex even /// when the rounded Display dump rounds it away (snapshots.md § byte-exact). diff --git a/crates/buiy_verify/tests/invariant_mutations.rs b/crates/buiy_verify/tests/invariant_mutations.rs index 5c83917..abbac84 100644 --- a/crates/buiy_verify/tests/invariant_mutations.rs +++ b/crates/buiy_verify/tests/invariant_mutations.rs @@ -38,6 +38,7 @@ fn node(entity: Entity, size: Vec2) -> ExtractedNode { max: size, }), group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], } } @@ -231,6 +232,7 @@ fn packed(rect_size: [f32; 2]) -> PackedInstance { radius: 0.0, clip_min: [f32::NEG_INFINITY, f32::NEG_INFINITY], clip_max: [f32::INFINITY, f32::INFINITY], + affine: [1.0, 0.0, 0.0, 1.0], } } diff --git a/crates/buiy_verify/tests/snapshot_display_list.rs b/crates/buiy_verify/tests/snapshot_display_list.rs index bed7509..858289c 100644 --- a/crates/buiy_verify/tests/snapshot_display_list.rs +++ b/crates/buiy_verify/tests/snapshot_display_list.rs @@ -32,6 +32,7 @@ fn two_node_scene(swap: bool) -> (ExtractedNodes, NameLookup) { color: Color::srgba(0.1, 0.2, 0.3, 1.0), clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }; let tooltip = ExtractedNode { entity: tooltip_e, @@ -43,6 +44,7 @@ fn two_node_scene(swap: bool) -> (ExtractedNodes, NameLookup) { max: Vec2::new(80.0, 24.0), }), group: Some(0), + affine: [[1.0, 0.0], [0.0, 1.0]], }; let nodes = ExtractedNodes { // Stored paint order is modal (bottom) then tooltip (top); the dump @@ -117,6 +119,7 @@ fn missing_token_surfaces_as_magenta() { color: MISSING_TOKEN_FALLBACK, clip: None, group: None, + affine: [[1.0, 0.0], [0.0, 1.0]], }; let nodes = ExtractedNodes { nodes: vec![node], diff --git a/crates/buiy_verify/tests/snapshot_instance_hex.rs b/crates/buiy_verify/tests/snapshot_instance_hex.rs index d1917da..943cfa9 100644 --- a/crates/buiy_verify/tests/snapshot_instance_hex.rs +++ b/crates/buiy_verify/tests/snapshot_instance_hex.rs @@ -9,7 +9,7 @@ use buiy_verify::snapshot::instance_hex; fn hex_round_trips_bytes() { // `instance_hex(p)` → parse hex → `bytemuck::pod_read_unaligned` must // reconstruct the ORIGINAL `PackedInstance` bit-for-bit, proving the hex is - // lossless and matches the GPU upload payload (52 B → 104 hex chars). + // lossless and matches the GPU upload payload (68 B → 136 hex chars). let p = PackedInstance { rect_pos: [10.0, 20.0], rect_size: [100.0, 40.0], @@ -17,10 +17,11 @@ fn hex_round_trips_bytes() { radius: 8.0, clip_min: [0.0, 0.0], clip_max: [200.0, 100.0], + affine: [1.0, 0.0, 0.0, 1.0], }; let hex = instance_hex(&p); - assert_eq!(hex.len(), 104, "52 bytes → 104 hex chars"); + assert_eq!(hex.len(), 136, "68 bytes → 136 hex chars"); // Parse the hex back into the 52 bytes. let bytes: Vec = (0..hex.len()) @@ -49,6 +50,7 @@ fn hex_flips_on_a_packing_change() { radius: 0.0, clip_min: [f32::NEG_INFINITY, f32::NEG_INFINITY], clip_max: [f32::INFINITY, f32::INFINITY], + affine: [1.0, 0.0, 0.0, 1.0], }; let mut flipped = base; // The half-size sign bug `render_instance.rs` regression-tests: a negated diff --git a/docs/plans/follow-ups.md b/docs/plans/follow-ups.md index 5e75857..963e85d 100644 --- a/docs/plans/follow-ups.md +++ b/docs/plans/follow-ups.md @@ -594,16 +594,53 @@ stays open until such a mechanism exists. **Originated:** Phase 8 (spec § 4 — render-side concerns stored only). -**Symptom:** `perspective`, `TransformStyle::Preserve3d`, and -`BackfaceVisibility::Hidden` are stored on `UiTransform` and the -`LAYOUT` / `PAINT` / `STYLE` contain flags are stored on `Containment`, -but render does not yet consume them. - -**Implementation sketch:** render consumes `ResolvedTransform` + the -containment flags — applies the composed matrix, the PAINT clip rect, and -honors perspective / backface / `transform-style`. - -**Spec touchpoint:** `transforms-and-containment.md § 4`, § 5.1. +**Status:** transform-paint (rotation + (non-)uniform scale) LANDED (R1); the +PAINT clip rect was ALREADY done; perspective / `Preserve3d` / +`BackfaceVisibility` remain C-tier deferred. Do NOT close — the residuals below +keep this entry open. + +**LANDED (R1, as landed):** the 2D affine paint. Extract consumes the +`GlobalTransform` 2D linear part (`global_transform.affine().matrix3` xy columns +— NOT a re-read of `ResolvedTransform`; pillar-5 contract, the bridge already +folded `ResolvedTransform.matrix` into `Transform`) and carries it as +`ExtractedNode.affine`. `PackedInstance` grew by APPENDING the 2x2 basis +(`[m00,m10,m01,m11]`) AFTER the existing 13 floats so every prior offset stays +byte-stable; the named `COLOR_FLOAT_OFFSET = 4` / `ALPHA_FLOAT_OFFSET = 7` +consts were added for R2's degraded-group re-tint. The quad + shadow vertex +stages apply the affine to each box-local corner (`mat2x2 * local`) before the +logical→clip view map, interpolating `frag_logical` for the clip-AABB discard +(stride 52 B → 68 B, vertex attrs `@location(8)/(9)`). Identity basis +`[1,0,0,1]` is byte-identical to the pre-R1 axis-aligned path. GPU rotate/scale +reftest: `tests/render_transform_paint_gpu.rs` (`#[ignore]`). + +**ALREADY done (pre-R1):** the `Containment` PAINT clip rect — the per-primitive +clip AABB via `clip::clip_for_primitive` + `write_clip_rects` (clip.rs ~196), +packed onto every instance (the R8b fragment discard). + +**Residual (still C-tier deferred):** `perspective`, `TransformStyle::Preserve3d`, +and `BackfaceVisibility::Hidden` are stored on `UiTransform` but render does not +consume them (render/mod.rs ~388). The 2D affine path does not carry a +projective channel. + +**Residual A (newly surfaced — layout side):** `transform-origin` is NOT honored +by layout sub-pass 6e (`compose_transform` ignores `ui.origin`), so the composed +matrix rotates/scales about the box-local TOP-LEFT, not the 50%/50% center. +Render transports the affine EXACTLY as `GlobalTransform` encodes it so render == +picking by construction; it must NOT independently re-apply an origin (a +double-apply would diverge from picking). Honoring `transform-origin` is a +layout-side follow-up (a 6e change + a picking re-verify). + +**Residual B (newly surfaced — bridge fidelity):** skew (`TransformMatrix::Skew`) +and general `TransformMatrix::Matrix` paint are BOUNDED by the bridge's lossy +TRS-only `Transform::from_matrix` decompose (bridge.rs; proven lossy by +`from_matrix_drops_projective_perspective_row_keeps_affine`). A Bevy `Transform` +is TRS-only and cannot represent a general shear, so the extracted 2D linear part +is FAITHFUL for rotation + non-uniform scale but skew/general-matrix do NOT paint +faithfully yet. Faithful skew needs the bridge to stop round-tripping through TRS +(or render to read a non-TRS source). + +**Spec touchpoint:** `transforms-and-containment.md § 4`, § 5.1; +`clip-and-transform.md § B.5`. ## Render — R11 forced-colors cross-phase seams (CatalogPaint + BoxShadow draw-skip) @@ -914,23 +951,72 @@ pass via a `Glyph@Rgba16Float` pipeline specialization (mirroring the ## Render — degraded effect groups vanish instead of drawing flat +**Status: Root-degraded LANDED; nested-degraded follow-up filed** (R2). + **Originated:** text campaign T8 implementation reading (the T8 plan's D9). -**Symptom:** a `plan_allocation == false` group gets no pooled target, -`BuiyNode::run` step 1 `continue`s, and its members are excluded from +**As-was symptom:** a `plan_allocation == false` group got no pooled target, +`BuiyNode::run` step 1 `continue`d, and its members were excluded from `flat_ranges` / `glyph_flat_ranges` — so under RT-pool budget pressure a -degraded group's quads AND glyphs paint nowhere, despite the "drawn flat -instead" comments (node.rs step 1; compositor.rs `PreparedEffectTargets`). -Latent under the 64 MiB budget (no fixture degrades today). T8 mirrored the -quad semantics for glyphs (a degraded group's glyph range is likewise -skipped) rather than silently widening scope. - -**Implementation sketch:** either re-route a degraded group's ranges into the -flat draw at prepare (forward compositing, accepting the double-dim -approximation v1 rejected for targets) or document skip-as-degradation; -decide with `buiy-verification-design`'s budget calibration. - -**Spec touchpoint:** `effect-compositor.md § 2.3`. +degraded group's quads AND glyphs painted nowhere, despite the "drawn flat +instead" comments. Latent under the 64 MiB budget (no fixture degraded). T8 +had mirrored the quad semantics for glyphs (a degraded group's glyph range +was likewise skipped) rather than silently widening scope. + +**Resolution (R2):** the route-flat-vs-skip fork is RESOLVED in favor of +**forward-compositing**, per `effect-compositor.md § 2.3` (skip contradicted +the spec). A ROOT degraded group (`parent == None`) now folds `group.opacity` +into each member instance's alpha IN PLACE (quad alpha at +`ALPHA_FLOAT_OFFSET` = 7 on the `[f32;17]` record; glyph alpha at the parallel +`GLYPH_ALPHA_FLOAT_OFFSET` = 11 = `GlyphAlphaInstance.color[3]`) and merges +its instance ranges into `flat_ranges`/`glyph_flat_ranges` so the flat WINDOW +draw paints it — it dims exactly once and paints flat, never vanishes +(`compositor::fold_root_degraded_into_flat`, called from +`prepare_effect_groups`). Per-tier idempotency: the fold runs iff the +corresponding BUFFER was repacked this frame (quad on `quad_dirty`, glyph on +`glyph_dirty` — the buffer-repack signals, which DIFFER from the wider +glyph-partition signal), so a retained buffer never re-compounds. Gated on +`allocate.iter().any(|a| !a)` to preserve the gate-#14 zero-upload steady +state. The budget is overridable via the new `RtPoolBudget` resource so a test +forces degradation deterministically. + +**Out of scope (nested):** a NESTED degraded child (`parent == Some`) is NOT +handled by this slice — see the next section. The fold debug-asserts on it and +leaves it untouched in release (no worse than the prior vanish). + +**Spec touchpoint:** `effect-compositor.md § 2.3` (as-landed note added). + +## Render — nested degraded effect group must forward-composite into the parent target (not the window) + +**Originated:** R2 (degraded-group forward-composite), MAJOR-1 scope decision. + +**Problem:** `plan_allocation` (`compositor.rs`) ranks purely by (extent, +reason) and CAN degrade a NESTED child (`extracted[i].parent == Some`) while +its parent keeps a target. R2's fix routes a degraded group's instance ranges +into `flat_ranges`, which the node draws in the WINDOW pass (`buiy_pass`, +`node.rs`). That equals "the parent target" the spec § 2.3 mandates ONLY when +the degraded group is a ROOT group. For a nested degraded child, window-level +flat-merge would paint it in the wrong space/clip, and the parent's step-2a +composite (which already skips when either end lacks a target) would then +sample a parent target the child never reached — double-wrong. So R2 scoped to +root-degraded. + +**Fix (node-side):** route a degraded nested child's `group_ranges[i]` / +`glyph_group_ranges[i]` into the PARENT group's step-1 target draw (the +parent's `target_view_columns`, into the parent's `Rgba16Float` target), with +`group.opacity` folded per-instance, BEFORE the parent composites — instead of +the window flat draw. This is a different draw path from the root case (parent +off-screen target vs. window flat pass), which is why it was split out. + +**Current containment:** `compositor::fold_root_degraded_into_flat` +`debug_assert!(false, …)`s on a nested degraded group (loud in dev/tests) and +in release leaves it untouched (it vanishes — no worse than today). The GPU +test `nested_degraded_group_does_not_corrupt_parent` +(`tests/render_compositor_gpu.rs`) guards that the slice's flat-merge does NOT +mis-place a nested child at window level. + +**Spec touchpoint:** `effect-compositor.md § 2.3` ("directly into its parent +target" wording). ## Text — production ASCII pre-warm (rejected as unmeasured) diff --git a/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md b/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md index 737c354..7312015 100644 --- a/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md +++ b/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md @@ -497,10 +497,18 @@ Key points: steady-state frame, matching the layout pipeline's steady-state contract. This is one re-run trigger feeding one writer, not two competing filters. - **The transform origin** (`UiTransform.origin`, default `50% 50% 0`) is - *already baked into* `ResolvedTransform.matrix` by sub-pass 6e (it composes - `M = T·R·S·M_transform` around the resolved origin), so the bridge does a - flat `base * matrix` and never re-derives origin. Render and the bridge thus - agree with picking, which applies the inverse of the same matrix + *intended to be baked into* `ResolvedTransform.matrix` by sub-pass 6e (it would + compose `M = T·R·S·M_transform` around the resolved origin), so the bridge does + a flat `base * matrix` and never re-derives origin. **As of R1 (transform-paint + landed), this is the TARGET state, not current:** sub-pass 6e's + `compose_transform` does NOT yet read `ui.origin`, so the composed matrix + rotates/scales about the box-local TOP-LEFT, not the 50%/50% center (a + layout-side residual surfaced by R1 — see the + [follow-up](../../plans/follow-ups.md) residual A). The contract that matters + for render holds regardless: render applies the affine EXACTLY as + `GlobalTransform` encodes it (it does NOT independently re-apply an origin), so + render and the bridge cannot diverge from picking, which applies the inverse of + the same matrix ([transforms-and-containment.md § 1.2](../2026-05-08-buiy-layout-design/transforms-and-containment.md#12-layout-impact)). - **Buiy owns the whole `Transform`.** An author positions UI via Buiy's `Position` / `UiTransform`, never Bevy's `Transform`; the bridge owns the @@ -709,12 +717,31 @@ so it does not perturb the contract). Picking applies the inverse ### B.5 Perspective / `transform-style` / `backface-visibility` consumption +**Status (R1, transform-paint landed):** the **2D affine** half of the +transform-paint follow-up now LANDS via the GPU vertex stage. Extract reads the +`GlobalTransform` 2D linear part (`global_transform.affine().matrix3` xy columns +— NOT a re-read of `ResolvedTransform`, per the pillar-5 contract in § B.2) and +the quad + shadow shaders transform each box-local corner by it before the +logical→clip view map (`PackedInstance` grew 52 B → 68 B by appending the 2x2 +basis after the clip fields; vertex attrs `@location(8)/(9)`). The +**PAINT-clip half** was already done (§ A.3 rule 3 / `clip_for_primitive`). The +**perspective channel / `Preserve3d` / `backface-visibility`** stay C-tier +deferred (the bullets below). + +**Fidelity bound (R1):** render faithfully reproduces **rotation + non-uniform +scale**, but **skew (`TransformMatrix::Skew`) and general +`TransformMatrix::Matrix`** are BOUNDED by the bridge's TRS-only +`Transform::from_matrix` decompose (§ B.2 — a Bevy `Transform` cannot represent +a general shear, lossy by the same decompose that drops the projective row). +Faithful skew is a separate residual; it needs the bridge to stop round-tripping +through TRS (or render to read a non-TRS source). Not covered by R1. + Phase 8 *stored* three `UiTransform` fields with no consumer ([components.rs `UiTransform`](../../../crates/buiy_core/src/layout/components.rs); [transforms-and-containment.md § 4](../2026-05-08-buiy-layout-design/transforms-and-containment.md#4-perspective-and-3d)). -This bridge consumes them — resolving the +The remaining (C-tier) consumption — resolving the [*"`UiTransform` paint + `Containment` PAINT clip + perspective/backface"*](../../plans/follow-ups.md) -follow-up's transform half (the PAINT-clip half is § A.3): +follow-up's perspective/3D half: - **`perspective: Option`** — the 3D viewing distance for `Preserve3d` children. Resolved to logical px and folded into the **perspective matrix diff --git a/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md b/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md index 3fa7020..e94ef40 100644 --- a/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md +++ b/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md @@ -132,6 +132,13 @@ Gate #15 ([foundation verification.md gate #15](../2026-05-07-buiy-foundation/ve **The aggregate live-set budget — `rt_pool_budget` (committed here).** The per-target bucket cap (§ 2.2) bounds *each* target's size, and `frames_since_last_use < 3` eviction (§ 2.2) bounds how long a *transient* target lingers — but neither caps the **concurrent-groups axis**: N simultaneously-painting groups allocate N live targets, and nothing above forbids N from being unbounded. So this file commits an explicit aggregate cap, `rt_pool_budget` (**bytes**; v1 default **64 MiB**), parallel to the atlas `page_budget` ([atlas-and-text-seam.md § 2.4](atlas-and-text-seam.md)): the *shape* — a byte budget on the concurrent live target set, with a defined degradation under pressure — is fixed here; only the *tuned number* defers to `buiy-verification-design` (exactly like the atlas `page_budget`'s tuned page count, [README § 5 #4](README.md#5-open-questions)). **Degradation under pressure (forward-compositing fallback).** When acquiring the next group's target in the prepare pass would push live target bytes past `rt_pool_budget`, the lowest-cost effect groups **fall back to direct-to-parent forward compositing** instead of allocating a target: the group's subtree paints directly into its parent target and the group `opacity` is applied **per-instance** (each painter's alpha multiplied by `group.opacity` in the forward pass) as the **documented approximation** — exactly the per-child approximation § 4 rejects for the *correct* default, accepted here *only* under budget pressure as the graceful-degradation path rather than failing to render. "Lowest-cost" ranks groups by reuse cost — smallest painted-bounds area and `OPACITY`-only reason first (an `OPACITY`-only group degrades to a visible-but-approximate overlap; an `ISOLATION`/reserved group degrades last because its boundary is structural, not just an alpha multiply). This brings the RT pool to parity with the atlas page-budget + LRU-under-pressure story: a hard byte ceiling, plus a defined behavior when the working set would exceed it, rather than unbounded growth or an allocation failure. + + **As landed (R2 — ROOT degraded groups).** The forward-compositing fallback ships for **ROOT** degraded groups (`parent == None`). `prepare_effect_groups` (after `plan_allocation` marks the group degraded) folds `group.opacity` into the ALPHA slot of every member instance **in place** and merges the group's instance ranges into the flat draw, so the existing flat WINDOW pass paints them — the group dims exactly once and paints flat, never vanishes (the as-was bug: a degraded group's quads + glyphs painted nowhere). The alpha offset is **per-tier**: quad alpha is `ALPHA_FLOAT_OFFSET` (= 7) on the `[f32;17]` quad record; glyph alpha is `GLYPH_ALPHA_FLOAT_OFFSET` (= 11 = `GlyphAlphaInstance.color[3]`) on the glyph record — a DIFFERENT offset, because the glyph layout is `rect ++ uv ++ color ++ clip ++ page` (color is the 3rd `vec4`), and using the quad offset 7 on a glyph would corrupt `uv[3]`. **Two DIFFERENT gates per tier — alpha-fold vs range-merge.** The fold has two halves under two different invariants, so each tier carries two gates: + +- The **ALPHA-fold** runs per tier **iff that tier's instance BUFFER was repacked from source this frame** — quad fold on `quad_dirty` (nodes|groups|text_quads), glyph fold on `glyph_dirty` (glyphs), the buffer-repack signals. A retained (already-folded) buffer's alpha is left untouched so the fold never compounds to black. +- The **RANGE-merge** (re-adding the degraded group's range into `flat_ranges`/`glyph_flat_ranges`) runs per tier **iff that tier's flat/group PARTITION was rebuilt this frame**, because `prepare_buiy_instances` rebuilds the partition wholesale and re-EXCLUDES the degraded group's range — the merge must re-stitch it every rebuild or the degraded group vanishes that frame. The **quad** partition rebuilds under `quad_dirty` (so quad's two gates coincide), but the **glyph** partition rebuilds under the UNION `quad_dirty || glyph_dirty` (`partition_glyph_ranges`) — wider than the glyph buffer-repack gate. + +The asymmetry is glyph-only and load-bearing: on a **quad-dirty-only frame** with a live degraded glyph group (e.g. a background/decoration edit while the glyph buffer is retained), the glyph partition is rebuilt (range re-excluded) while the glyph buffer is retained. The glyph **range-merge re-runs** (`merge_glyph = quad_dirty || glyph_dirty` = true, re-adding the range over the already-folded retained buffer) while the glyph **alpha-fold does not** (`fold_glyph = glyph_dirty` = false, the retained alpha already carries last frame's fold). Conflating the two — gating the glyph range-merge on the narrow `glyph_dirty` — drops the merge on that frame and the degraded glyphs vanish (the R2-follow-up bug that split gate fixes). The whole block is gated on "any group degraded" so the no-degradation steady state stays a zero-fold, zero-upload frame (the gate-#14 budget). For a ROOT group, "the flat window draw" **is** "the parent target" this section mandates. **A NESTED degraded group** (`parent == Some`) must instead forward-composite into its **parent's `Rgba16Float` target** ("directly into its parent target", above) — that node-side routing is **not yet implemented**: the impl debug-asserts on a nested degraded group and leaves it untouched in release (it still vanishes, no worse than the prior behavior) rather than mis-placing it at the window level; tracked as a follow-up (`docs/plans/follow-ups.md` — "Render — nested degraded effect group must forward-composite into the parent target"). 2. **Return-to-baseline.** When activity stops, no new `EffectGroup`s appear; within 3 frames every transiently-allocated target's `frames_since_last_use` reaches 3 and `update()` drops it. Bucket count returns to the steady-state working set, which *is* the baseline. The glyph atlas ([atlas-and-text-seam.md](atlas-and-text-seam.md)) gives gate #15 the *same return-to-baseline guarantee* but through a **distinct pool with a distinct mechanism** — these are two pools, not one shared eviction model: render targets ride Bevy's `TextureCache` (`frames_since_last_use < 3` retain), while the atlas uses `guillotiere` allocation plus a tunable-grace LRU (`config.eviction_grace`). Both satisfy "entries return within ε of baseline"; neither shares the other's allocator or policy. 3. **No unbounded growth path.** Because sizing is painted-bounds (§ 2.1) not viewport, and reuse is descriptor-keyed, an adversarial fixture that opens and closes a thousand opacity groups over ten minutes never accumulates a thousand live targets — it recycles a handful of buckets. This is the explicit counter to WebRender's documented cost ("GPU memory for atlases and intermediate targets … a real ceiling," [prior-art/servo-stylo/rendering.md](../../prior-art/servo-stylo/rendering.md)).