diff --git a/crates/buiy_core/src/render/atlas/mod.rs b/crates/buiy_core/src/render/atlas/mod.rs
index 9e9fd57..ad4652c 100644
--- a/crates/buiy_core/src/render/atlas/mod.rs
+++ b/crates/buiy_core/src/render/atlas/mod.rs
@@ -72,7 +72,9 @@ mod atlas;
 pub use atlas::BuiyAtlas;
 
 mod primitive;
-pub use primitive::{GLYPH_ALPHA_INSTANCE_STRIDE_BYTES, GlyphAlphaInstance, IconInstance};
+pub use primitive::{
+    GLYPH_ALPHA_FLOAT_OFFSET, GLYPH_ALPHA_INSTANCE_STRIDE_BYTES, GlyphAlphaInstance, IconInstance,
+};
 
 mod warmup;
 pub use warmup::{AtlasWarmupQueue, AtlasWarmupRequest};
diff --git a/crates/buiy_core/src/render/atlas/primitive.rs b/crates/buiy_core/src/render/atlas/primitive.rs
index c04f65e..cc6b126 100644
--- a/crates/buiy_core/src/render/atlas/primitive.rs
+++ b/crates/buiy_core/src/render/atlas/primitive.rs
@@ -10,6 +10,30 @@ use bytemuck::{Pod, Zeroable};
 /// `coverage.wgsl`'s instance `@location`s read. `[f32;4]×4 + u32 = 68`.
 pub const GLYPH_ALPHA_INSTANCE_STRIDE_BYTES: usize = 68;
 
+/// Float index of the per-glyph straight-alpha (`color[3]`) when a
+/// [`GlyphAlphaInstance`] is viewed as a flat `[f32]` raw record. The fields
+/// pack `rect[4] ++ uv[4] ++ color[4] ++ clip[4] ++ page` (contiguous `#[repr(C)]`,
+/// no pad before `page`), so `color` is the **3rd** `[f32;4]` block and its
+/// alpha lands at float index `8 + 3 = 11`. This is the GLYPH mirror of the
+/// quad-tier `ALPHA_FLOAT_OFFSET` (= 7, a DIFFERENT offset on the `[f32;17]`
+/// quad record): R2's degraded-group forward-composite re-tints glyph alpha,
+/// and using the quad offset 7 on a glyph record would corrupt `uv[3]` (a
+/// silent wrong-pixel bug). NAMED + compile-asserted so the offset is never a
+/// literal `11` at the use site (R1's discipline). The fold itself writes the
+/// typed `color[3]` field; this const documents the raw-view parity for the
+/// spec and any byte-level reader.
+///
+/// [`ALPHA_FLOAT_OFFSET`]: crate::render::instance::ALPHA_FLOAT_OFFSET
+pub const GLYPH_ALPHA_FLOAT_OFFSET: usize = 11;
+
+// Tie `GLYPH_ALPHA_FLOAT_OFFSET` to the layout: `color` is the 3rd `[f32;4]`
+// block (`rect`, `uv`, `color`), so its alpha (`color[3]`) is at float index
+// `8 + 3`. A field reorder that moved `color` would fail this.
+const _: () = assert!(
+    GLYPH_ALPHA_FLOAT_OFFSET == 8 + 3,
+    "GLYPH_ALPHA_FLOAT_OFFSET must index color[3] = the 3rd vec4 block's alpha"
+);
+
 /// One instance per visible glyph (or any single-channel coverage quad, e.g.
 /// a generated mask stamp). The **alpha-as-color** primitive: the atlas
 /// stores `R8` coverage and color is applied per-instance, so one resident
diff --git a/crates/buiy_core/src/render/buckets.rs b/crates/buiy_core/src/render/buckets.rs
index 591e406..afeae03 100644
--- a/crates/buiy_core/src/render/buckets.rs
+++ b/crates/buiy_core/src/render/buckets.rs
@@ -84,13 +84,13 @@ impl Ord for PrimitiveBatchKey {
 /// [`PackedInstance`].
 #[derive(Default)]
 pub struct InstanceBuckets {
-    batches: BTreeMap<PrimitiveBatchKey, Vec<[f32; 13]>>,
+    batches: BTreeMap<PrimitiveBatchKey, Vec<[f32; 17]>>,
 }
 
 impl InstanceBuckets {
-    /// Push one packed instance (as raw `[f32; 13]` =
-    /// pos2+size2+color4+radius1+clip_min2+clip_max2) into its batch.
-    pub fn push(&mut self, key: PrimitiveBatchKey, instance: [f32; 13]) {
+    /// Push one packed instance (as raw `[f32; 17]` =
+    /// pos2+size2+color4+radius1+clip_min2+clip_max2+affine4) into its batch.
+    pub fn push(&mut self, key: PrimitiveBatchKey, instance: [f32; 17]) {
         self.batches.entry(key).or_default().push(instance);
     }
 
@@ -110,20 +110,30 @@ impl InstanceBuckets {
     }
 
     /// Iterate batches in draw order (`(layer, primitive paint order)`).
-    pub fn batches(&self) -> impl Iterator<Item = (&PrimitiveBatchKey, &Vec<[f32; 13]>)> {
+    pub fn batches(&self) -> impl Iterator<Item = (&PrimitiveBatchKey, &Vec<[f32; 17]>)> {
         self.batches.iter()
     }
 }
 
-/// Flatten a [`PackedInstance`] into the raw `[f32; 13]` the bucket store holds.
+/// Flatten a [`PackedInstance`] into the raw `[f32; 17]` the bucket store holds.
 /// Keeps the bucket store decoupled from the concrete instance struct while
 /// the stride is asserted equal in tests.
-pub fn packed_to_raw(p: &PackedInstance) -> [f32; 13] {
+///
+/// LAYOUT INVARIANT (R1 / R2 dependency): indices `0..13` are byte-identical to
+/// the pre-R1 layout — color is at [`COLOR_FLOAT_OFFSET`]`..+4` and alpha at
+/// [`ALPHA_FLOAT_OFFSET`] (R2's degraded-group re-tint reads alpha there). The
+/// 2D affine basis appends at `[13..17]` (`[m00, m10, m01, m11]`); identity
+/// `[1, 0, 0, 1]` paints axis-aligned.
+///
+/// [`COLOR_FLOAT_OFFSET`]: crate::render::instance::COLOR_FLOAT_OFFSET
+/// [`ALPHA_FLOAT_OFFSET`]: crate::render::instance::ALPHA_FLOAT_OFFSET
+pub fn packed_to_raw(p: &PackedInstance) -> [f32; 17] {
     [
         p.rect_pos[0],
         p.rect_pos[1],
         p.rect_size[0],
         p.rect_size[1],
+        // color@COLOR_FLOAT_OFFSET (4..8); alpha@ALPHA_FLOAT_OFFSET (7).
         p.color[0],
         p.color[1],
         p.color[2],
@@ -133,6 +143,11 @@ pub fn packed_to_raw(p: &PackedInstance) -> [f32; 13] {
         p.clip_min[1],
         p.clip_max[0],
         p.clip_max[1],
+        // The 2D affine basis APPENDED after index 13 (offsets 0..13 unchanged).
+        p.affine[0],
+        p.affine[1],
+        p.affine[2],
+        p.affine[3],
     ]
 }
 
@@ -197,7 +212,7 @@ pub fn pack_view(nodes: &[ExtractedNode]) -> InstanceBuckets {
 pub struct PackedPartition {
     /// The full flat quad blob (every instance, in paint order) — identical to
     /// `pack_view`'s single `(Quad, 0)` batch flattened.
-    pub instances: Vec<[f32; 13]>,
+    pub instances: Vec<[f32; 17]>,
     /// `group_ranges[g]` = the `[start, end)` instance range of group `g`'s
     /// members (empty range if the group has no opaque member).
     pub group_ranges: Vec<std::ops::Range<u32>>,
@@ -276,7 +291,7 @@ pub fn pack_view_partitioned(
 /// per-group contiguous ranges (with the contiguity tripwire) and the
 /// complement flat runs.
 struct Partitioner {
-    instances: Vec<[f32; 13]>,
+    instances: Vec<[f32; 17]>,
     ranges: RangePartitioner,
 }
 
@@ -290,7 +305,7 @@ impl Partitioner {
 
     /// Append one instance under group `g` (already bounds-filtered by the
     /// caller), extending or starting the group/flat run it belongs to.
-    fn push(&mut self, instance: [f32; 13], g: Option<usize>) {
+    fn push(&mut self, instance: [f32; 17], g: Option<usize>) {
         self.instances.push(instance);
         self.ranges.push(g);
     }
diff --git a/crates/buiy_core/src/render/compositor.rs b/crates/buiy_core/src/render/compositor.rs
index cb00c5d..cc89018 100644
--- a/crates/buiy_core/src/render/compositor.rs
+++ b/crates/buiy_core/src/render/compositor.rs
@@ -17,7 +17,7 @@ use bevy::render::render_resource::{
     CachedRenderPipelineId, Extent3d, PipelineCache, TextureDescriptor, TextureDimension,
     TextureFormat, TextureUsages,
 };
-use bevy::render::renderer::RenderDevice;
+use bevy::render::renderer::{RenderDevice, RenderQueue};
 use bevy::render::texture::{CachedTexture, TextureCache};
 use bevy::render::view::{Msaa, ViewTarget};
 
@@ -178,6 +178,21 @@ pub fn post_order_indices(parents: &[Option<usize>]) -> Vec<usize> {
 /// with, the glyph atlas pool.
 pub const RT_POOL_BUDGET_BYTES: u64 = 64 * 1024 * 1024;
 
+/// The live aggregate RT-pool budget (bytes) `prepare_effect_groups` feeds to
+/// [`plan_allocation`], as a resource so it is overridable (defaults to
+/// [`RT_POOL_BUDGET_BYTES`]). Production never changes it; a test inserts a small
+/// budget to FORCE degradation deterministically (the root-cause-correct way to
+/// exercise the forward-composite path without an unwieldy 64 MiB-breaching
+/// fixture). No production behavior change — the default IS the const.
+#[derive(Resource, Clone, Copy, Debug)]
+pub struct RtPoolBudget(pub u64);
+
+impl Default for RtPoolBudget {
+    fn default() -> Self {
+        Self(RT_POOL_BUDGET_BYTES)
+    }
+}
+
 /// Bytes one pooled target of `extent` consumes. Group targets are pinned
 /// `Rgba16Float` (effect-compositor.md § 2.2) = 8 bytes/texel.
 pub fn target_bytes(extent: UVec2) -> u64 {
@@ -233,6 +248,174 @@ pub fn plan_allocation(groups: &[(UVec2, EffectReason)], budget: u64) -> Vec<boo
     allocate
 }
 
+/// One degraded effect group's forward-composite inputs (effect-compositor.md
+/// § 2.3): the already-packed quad + glyph instance ranges its members occupy
+/// (extract index == `group_ranges`/`glyph_group_ranges` index), the `Opacity`
+/// to fold per-instance, and its parent link (`None` == ROOT group). Consumed by
+/// [`fold_root_degraded_into_flat`].
+#[derive(Clone, Debug, PartialEq)]
+pub struct DegradedGroup {
+    /// The group's quad-instance range (`BuiyInstanceBuffers::group_ranges[i]`).
+    pub quad_range: Range<u32>,
+    /// The group's glyph-instance range (`glyph_group_ranges[i]`).
+    pub glyph_range: Range<u32>,
+    /// Group opacity to fold into each member instance's alpha.
+    pub opacity: f32,
+    /// Parent group index, or `None` for a ROOT group. Only ROOT groups are
+    /// forward-composited by this slice (§ 2.3 scope); a nested degraded group
+    /// is a documented follow-up (debug-asserted, release-skipped).
+    pub parent: Option<usize>,
+}
+
+/// Coalesce a sorted-or-unsorted list of half-open `[start, end)` ranges into the
+/// minimal set of disjoint runs (adjacent + overlapping runs join). Keeps the
+/// node's flat-draw loop a clean complement with no fragmented draw calls.
+fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
+    ranges.retain(|r| r.start < r.end); // drop empties
+    ranges.sort_by_key(|r| r.start);
+    let mut merged: Vec<Range<u32>> = Vec::with_capacity(ranges.len());
+    for r in ranges.drain(..) {
+        match merged.last_mut() {
+            // Adjacent OR overlapping → extend the last run.
+            Some(last) if r.start <= last.end => last.end = last.end.max(r.end),
+            _ => merged.push(r),
+        }
+    }
+    *ranges = merged;
+}
+
+/// Forward-composite the ROOT degraded effect groups FLAT (effect-compositor.md
+/// § 2.3): for every group `i` with `allocate[i] == false` and `parent == None`,
+/// fold its `opacity` into the ALPHA slot of every member instance IN PLACE
+/// (quad alpha at [`ALPHA_FLOAT_OFFSET`](crate::render::instance::ALPHA_FLOAT_OFFSET),
+/// glyph alpha at `color[3]` =
+/// [`GLYPH_ALPHA_FLOAT_OFFSET`](crate::render::atlas::GLYPH_ALPHA_FLOAT_OFFSET)) and merge its instance ranges into the flat
+/// ranges so the node's flat WINDOW draw paints them — the group dims exactly
+/// once and paints flat, instead of vanishing.
+///
+/// **Two DIFFERENT gates per tier — alpha-fold vs range-merge (§ 2.3).** The
+/// alpha-fold and the range-merge answer to DIFFERENT invariants, so each tier
+/// passes two flags:
+///
+/// - `fold_*` (the BUFFER-repack signal): the alpha-fold runs IFF that tier's
+///   instance buffer was repacked from SOURCE this frame. A freshly repacked
+///   buffer carries SOURCE alpha (fold it once); a RETAINED buffer already holds
+///   last frame's fold (folding it again compounds to black). So the caller
+///   gates `fold_quad`/`fold_glyph` on the buffer-repack signals
+///   (`quad_dirty`/`glyph_dirty`), NOT a wider signal.
+///
+/// - `merge_*` (the PARTITION-rebuild signal): the range-merge into `*_flat` runs
+///   IFF the flat/group partition for that tier was REBUILT this frame, because the
+///   rebuild OVERWRITES `*_flat` wholesale and re-EXCLUDES the degraded group's
+///   range. The merge must re-add it every rebuild or the degraded group vanishes
+///   that frame. The quad partition rebuilds under `quad_dirty` (= the quad fold
+///   gate, so quad is symmetric), but the GLYPH partition rebuilds under the UNION
+///   `quad_dirty || glyph_dirty` (prepare.rs `partition_glyph_ranges`) — wider than
+///   the glyph buffer-repack gate. So on a quad-dirty-only frame with a live
+///   degraded glyph group, the glyph partition is rebuilt (range re-excluded) while
+///   the glyph buffer is RETAINED: `merge_glyph` must be true (re-add the range)
+///   while `fold_glyph` is false (the retained buffer already carries the fold).
+///   Conflating the two would drop the glyph range-merge on that frame and the
+///   degraded glyphs would vanish.
+///
+/// The merge operates on the (possibly retained, already-folded) buffer's range —
+/// correct, because the retained buffer still carries last frame's fold; only the
+/// PARTITION needs re-stitching, not the alpha.
+///
+/// **Scope: ROOT degraded groups only.** `plan_allocation` can degrade a NESTED
+/// child while its parent keeps a target; routing a nested child's ranges into
+/// the WINDOW flat draw paints it in the wrong space/clip and leaves the parent's
+/// composite sampling a target the child never reached. Forward-compositing a
+/// nested child correctly is a node-side change (route into the PARENT's step-1
+/// target pass) tracked as a follow-up. Here a nested degraded group
+/// `debug_assert!(false, …)`s (loud in dev/tests) and in release is left
+/// untouched + un-merged — no worse than today's vanish.
+#[allow(clippy::too_many_arguments)]
+pub fn fold_root_degraded_into_flat(
+    allocate: &[bool],
+    groups: &[DegradedGroup],
+    fold_quad: bool,
+    merge_quad: bool,
+    fold_glyph: bool,
+    merge_glyph: bool,
+    quad_raw: &mut [[f32; 17]],
+    glyph_raw: &mut [crate::render::atlas::GlyphAlphaInstance],
+    quad_flat: &mut Vec<Range<u32>>,
+    glyph_flat: &mut Vec<Range<u32>>,
+) {
+    use crate::render::instance::ALPHA_FLOAT_OFFSET;
+
+    let mut quad_merged = false;
+    let mut glyph_merged = false;
+
+    for (i, group) in groups.iter().enumerate() {
+        // Only degraded groups participate.
+        if allocate.get(i).copied().unwrap_or(true) {
+            continue;
+        }
+        // MAJOR-1: nested degraded groups are out of this slice's charter.
+        if group.parent.is_some() {
+            debug_assert!(
+                false,
+                "nested degraded effect-group forward-composite into the parent \
+                 target is not yet implemented (follow-up); group {i} parent {:?} \
+                 — root-degraded only this slice (effect-compositor.md § 2.3)",
+                group.parent
+            );
+            continue; // release: leave the nested child untouched (vanishes — tracked).
+        }
+
+        let opacity = group.opacity;
+
+        // Quad tier. The alpha-fold gates on the quad BUFFER-repack signal
+        // (`fold_quad`); the range-merge on the quad PARTITION-rebuild signal
+        // (`merge_quad`). They coincide today (both `quad_dirty`), but stay
+        // distinct so the quad tier reads symmetrically with the glyph tier
+        // below — where they genuinely differ.
+        if fold_quad {
+            for idx in group.quad_range.clone() {
+                if let Some(inst) = quad_raw.get_mut(idx as usize) {
+                    inst[ALPHA_FLOAT_OFFSET] *= opacity;
+                }
+            }
+        }
+        if merge_quad && group.quad_range.start < group.quad_range.end {
+            quad_flat.push(group.quad_range.clone());
+            quad_merged = true;
+        }
+
+        // Glyph tier. The alpha-fold gates on the glyph BUFFER-repack signal
+        // (`fold_glyph` = glyph_dirty); the range-merge on the glyph
+        // PARTITION-rebuild signal (`merge_glyph` = quad_dirty || glyph_dirty).
+        // On a quad-dirty-only frame the partition is rebuilt (the degraded
+        // glyph range re-excluded) while the glyph buffer is RETAINED, so
+        // `merge_glyph` re-adds the range while `fold_glyph` leaves the
+        // already-folded retained alpha alone — the MAJOR-2 vanish fix.
+        if fold_glyph {
+            for idx in group.glyph_range.clone() {
+                if let Some(inst) = glyph_raw.get_mut(idx as usize) {
+                    // The glyph alpha is the typed `color[3]` (= GLYPH_ALPHA_FLOAT_OFFSET
+                    // in the raw view) — NOT the quad ALPHA_FLOAT_OFFSET (7), which
+                    // would corrupt `uv[3]`.
+                    inst.color[3] *= opacity;
+                }
+            }
+        }
+        if merge_glyph && group.glyph_range.start < group.glyph_range.end {
+            glyph_flat.push(group.glyph_range.clone());
+            glyph_merged = true;
+        }
+    }
+
+    // Coalesce once per tier so the node's flat loop stays a clean complement.
+    if quad_merged {
+        merge_ranges(quad_flat);
+    }
+    if glyph_merged {
+        merge_ranges(glyph_flat);
+    }
+}
+
 /// The pinned off-screen group-target descriptor (effect-compositor.md § 2.2):
 /// FIXED `Rgba16Float` (linear, NOT the view's SDR format) so group opacity +
 /// isolation composite in linear space; `RENDER_ATTACHMENT` (subtree renders
@@ -323,8 +506,15 @@ pub struct PreparedEffectGroups {
 #[derive(Component, Default, Clone)]
 pub struct PreparedEffectTargets {
     /// Per-group off-screen `Rgba16Float` targets (extract order). `None` == the
-    /// group degraded under budget (`plan_allocation` == false) and has no target
-    /// — the node skips it (v1: degraded groups draw flat, no per-child approx).
+    /// group degraded under budget (`plan_allocation` == false) and has no target.
+    /// The node's step-1 group pass `continue`s on a `None` target — but a ROOT
+    /// degraded group is NOT lost: `prepare_effect_groups` folded its `opacity`
+    /// into its member instances' alpha in place and merged its ranges into
+    /// `flat_ranges`/`glyph_flat_ranges`, so the FLAT window draw paints it
+    /// (effect-compositor.md § 2.3 forward-composite). A NESTED degraded group
+    /// (parent == Some) is the one case still skipped here (its correct
+    /// forward-composite is into the PARENT target, a node-side follow-up;
+    /// `fold_root_degraded_into_flat` debug-asserts on it).
     pub targets: Vec<Option<CachedTexture>>,
     /// Per-group placement: the logical→target view-uniform columns (to render
     /// the group's subtree INTO its target), the composite quad's logical bounds,
@@ -403,10 +593,22 @@ pub struct RtPoolStats {
 pub(crate) fn prepare_effect_groups(
     mut commands: Commands,
     render_device: Res<RenderDevice>,
+    render_queue: Res<RenderQueue>,
     mut texture_cache: ResMut<TextureCache>,
     extracted: Res<ExtractedEffectGroups>,
-    buffers: Res<BuiyInstanceBuffers>,
+    // MAJOR-3: the glyph + text-quad carriers `prepare_buiy_instances` packs from,
+    // so this system can reconstruct the SAME per-tier buffer-repack signals it
+    // saw this frame (Bevy change-detection is per-system; reconstructing the
+    // `is_changed()` gates here is valid). `ExtractedNodesView` (`nodes` below)
+    // and `extracted` (= `ExtractedEffectGroups`, the `groups` term) cover the
+    // rest of the quad-dirty signal.
+    glyphs: Res<crate::render::prepare::ExtractedGlyphs>,
+    text_quads: Res<crate::render::extract::ExtractedTextQuads>,
+    // ResMut so the degraded-group fold can re-tint the already-packed buffers
+    // in place + re-upload the touched ones (effect-compositor.md § 2.3).
+    mut buffers: ResMut<BuiyInstanceBuffers>,
     mut stats: ResMut<RtPoolStats>,
+    budget: Res<RtPoolBudget>,
     pipeline_cache: Res<PipelineCache>,
     composite_pipeline: Res<CompositePipeline>,
     mut group_pipelines: ResMut<BuiySpecializedPipelines>,
@@ -421,6 +623,18 @@ pub(crate) fn prepare_effect_groups(
     views: Query<(Entity, &ViewTarget, &Msaa)>,
     nodes: Res<crate::render::extract::ExtractedNodesView>,
 ) {
+    // MAJOR-2: reconstruct the SAME per-tier BUFFER-repack signals
+    // `prepare_buiy_instances` used this frame (prepare.rs § damage gate). The
+    // degraded-group fold re-tints a buffer IFF that buffer was repacked from
+    // SOURCE this frame; a retained buffer already carries last frame's fold and
+    // must NOT be re-folded (it would compound to black). These mirror
+    // `prepare.rs` `quad_dirty`/`glyph_dirty` exactly — quad on
+    // nodes|groups|text_quads, glyph on glyphs alone (the buffer-repack signal,
+    // which DIFFERS from the wider glyph-partition signal). Computed before the
+    // `extracted.0` shadow below so the `is_changed()` reads the `Res` wrappers.
+    let quad_dirty = nodes.is_changed() || extracted.is_changed() || text_quads.is_changed();
+    let glyph_dirty = glyphs.is_changed();
+
     let extracted = &extracted.0;
 
     // No live groups: clear the carriers off every view so a frame that drops
@@ -520,7 +734,7 @@ pub(crate) fn prepare_effect_groups(
             (extent, g.reason)
         })
         .collect();
-    let allocate = plan_allocation(&alloc_inputs, RT_POOL_BUDGET_BYTES);
+    let allocate = plan_allocation(&alloc_inputs, budget.0);
 
     // Build the post-order composite sequence over the parent links.
     let parents: Vec<Option<usize>> = extracted.iter().map(|g| g.parent).collect();
@@ -627,6 +841,66 @@ pub(crate) fn prepare_effect_groups(
         live_targets,
     };
 
+    // Degraded-group forward composite (effect-compositor.md § 2.3). A ROOT group
+    // that did NOT get a pooled target (`allocate[i] == false`) under budget
+    // pressure must paint FLAT with its `opacity` folded per-instance, not vanish.
+    // Gate on ANY degradation so the no-degradation steady state stays a zero
+    // fold + zero re-upload (gate-#14 budget). Re-uploads only the touched buffer,
+    // and only when that buffer was repacked from SOURCE this frame (the per-tier
+    // idempotency discipline — a retained buffer already holds the fold).
+    if allocate.iter().any(|a| !a) {
+        let degraded: Vec<DegradedGroup> = extracted
+            .iter()
+            .enumerate()
+            .map(|(i, g)| DegradedGroup {
+                quad_range: buffers.group_ranges.get(i).cloned().unwrap_or(0..0),
+                glyph_range: buffers.glyph_group_ranges.get(i).cloned().unwrap_or(0..0),
+                opacity: g.opacity,
+                parent: g.parent,
+            })
+            .collect();
+
+        // Borrow-split: `values_mut()` on each RawBufferVec + the flat-range vecs.
+        // `BuiyInstanceBuffers` exposes the raw quad/glyph stores and the flat
+        // ranges as distinct fields, so split them through a single `&mut buffers`.
+        // Per-tier gates (MAJOR-2). The ALPHA-fold gates on the BUFFER-repack
+        // signal so a retained (already-folded) buffer is left untouched. The
+        // RANGE-merge gates on the PARTITION-rebuild signal, because
+        // `prepare_buiy_instances` rebuilds (and re-excludes the degraded range
+        // from) each flat partition under that signal — the merge must re-add the
+        // range every rebuild. Quad: both are `quad_dirty` (the quad partition
+        // rebuilds under the quad gate). GLYPH: the fold is `glyph_dirty` (buffer
+        // repack) but the MERGE is `quad_dirty || glyph_dirty` (the glyph
+        // partition's union rebuild gate, prepare.rs `partition_glyph_ranges`) —
+        // so a quad-dirty-only frame re-merges the retained degraded glyph range
+        // instead of letting it vanish.
+        let merge_glyph = quad_dirty || glyph_dirty;
+        let buffers = &mut *buffers;
+        fold_root_degraded_into_flat(
+            &allocate,
+            &degraded,
+            quad_dirty,
+            quad_dirty,
+            glyph_dirty,
+            merge_glyph,
+            buffers.quad.values_mut(),
+            buffers.glyph.values_mut(),
+            &mut buffers.flat_ranges,
+            &mut buffers.glyph_flat_ranges,
+        );
+
+        // Re-upload only the buffer(s) whose CPU bytes the fold touched. The fold
+        // runs per tier iff that tier was repacked this frame, so the re-upload
+        // mirrors the same per-tier gate (a retained buffer was neither folded nor
+        // needs re-upload).
+        if quad_dirty {
+            buffers.quad.write_buffer(&render_device, &render_queue);
+        }
+        if glyph_dirty {
+            buffers.glyph.write_buffer(&render_device, &render_queue);
+        }
+    }
+
     let prepared = PreparedEffectGroups {
         groups,
         composite_order,
@@ -654,7 +928,7 @@ pub(crate) fn prepare_effect_groups(
 /// edge — the `BuiyRenderLabel` node group and its edges are owned by
 /// architecture.md § 1.3; the compositor's passes run *inside*
 /// [`BuiyNode::run`](super::node). It registers the per-`EffectGroup`
-/// [`prepare_effect_groups`] system, the [`RtPoolStats`] observable, and (via
+/// `prepare_effect_groups` system, the [`RtPoolStats`] observable, and (via
 /// [`super::composite::register`]) the composite-pipeline specialization cache.
 /// The device-owning composite resources (`CompositePipeline`) init in
 /// `finish` (`composite::register_gpu`).
@@ -667,6 +941,9 @@ pub(crate) fn register(render_app: &mut SubApp) {
     // the textured-quad composite pipeline) — device-free to init here; the
     // concrete pipeline ids materialize lazily through the `PipelineCache`.
     render_app.init_resource::<RtPoolStats>();
+    // The overridable RT-pool budget (defaults to `RT_POOL_BUDGET_BYTES`); a test
+    // inserts a small value to force the degradation path deterministically.
+    render_app.init_resource::<RtPoolBudget>();
     super::composite::register(render_app);
     // The per-`EffectGroup` prepare pass (effect-compositor.md § 1.1) attaches
     // in `RenderSystems::Prepare`. It runs AFTER `prepare_buiy_instances` so the
diff --git a/crates/buiy_core/src/render/extract.rs b/crates/buiy_core/src/render/extract.rs
index 49a38a3..b39676e 100644
--- a/crates/buiy_core/src/render/extract.rs
+++ b/crates/buiy_core/src/render/extract.rs
@@ -72,6 +72,20 @@ pub struct ExtractedNode {
     pub position: Vec2,
     /// Box size in logical px, from `ResolvedLayout.size`.
     pub size: Vec2,
+    /// The 2D linear part of `GlobalTransform`'s affine — the box-local →
+    /// window-logical basis, as column vectors `[col0, col1]` where
+    /// `col0 = [m00, m10]` and `col1 = [m01, m11]`. Applied per-vertex in the
+    /// quad/shadow vertex stage about the box-local origin (the corner the
+    /// composed matrix maps `0` to), so a rotated/scaled element paints with the
+    /// right orientation and size. Identity `[[1,0],[0,1]]` == no rotation/scale
+    /// (the byte-identical fast path). Pillar 5: this reads the propagated
+    /// `GlobalTransform`, NOT `ResolvedTransform` — the bridge already folded
+    /// `ResolvedTransform.matrix` into `Transform` so render == picking by
+    /// construction. FIDELITY: faithful for rotation + (non-)uniform scale;
+    /// skew / general `TransformMatrix::Matrix` are bounded by the bridge's
+    /// TRS-only `Transform::from_matrix` decompose (a lossy shear) — a separate
+    /// residual (clip-and-transform.md § B.5).
+    pub affine: [[f32; 2]; 2],
     /// Resolved background fill (already theme-resolved; `Color::NONE` ==
     /// transparent, extract emits no quad for it downstream).
     pub color: Color,
@@ -111,6 +125,13 @@ pub fn extracted_node_for(
     theme: &Theme,
 ) -> ExtractedNode {
     let translation = global_transform.translation();
+    // The 2D linear part of the composed affine (glam `Affine3A.matrix3`): xy of
+    // the x-axis is col0, xy of the y-axis is col1 (COLUMNS, not rows — a
+    // transpose would rotate the wrong way). For a pure rotation/scale the
+    // matrix maps box-local `0 -> 0`, so `translation.xy` stays the painted
+    // top-left and an identity transform yields the `[[1,0],[0,1]]` fast path.
+    let m = global_transform.affine().matrix3;
+    let affine = [[m.x_axis.x, m.x_axis.y], [m.y_axis.x, m.y_axis.y]];
     let color = match background {
         Some(bg) => crate::render::color::resolve_token(&bg.color, theme),
         None => Color::NONE,
@@ -122,6 +143,7 @@ pub fn extracted_node_for(
         color,
         clip: clip.copied(),
         group: None,
+        affine,
     }
 }
 
diff --git a/crates/buiy_core/src/render/instance.rs b/crates/buiy_core/src/render/instance.rs
index dfe0779..98e7719 100644
--- a/crates/buiy_core/src/render/instance.rs
+++ b/crates/buiy_core/src/render/instance.rs
@@ -1,6 +1,6 @@
 //! Per-instance data layout for the rounded-rect pipeline (the view-uniform
 //! path). The struct stride must equal the per-instance `array_stride` declared
-//! in `pipeline.rs::register` (52 B). Records stay in LOGICAL-pixel units: the
+//! in `pipeline.rs::register` (68 B). Records stay in LOGICAL-pixel units: the
 //! per-view [`BuiyViewUniform`] does the logical → clip transform in the vertex
 //! stage, so the Phase-0 per-instance y-flip / `2/min(w,h)` radius hack is
 //! retired (`buiy-render-pipeline-design`, architecture.md § 3).
@@ -10,6 +10,14 @@
 //! per-batch scissor or re-sort (one order-safe draw). A node with no clip packs
 //! the full-view sentinel (`[±INFINITY]`) so the discard never fires.
 //!
+//! It also carries the 2D affine basis (`affine`, R1 — the `[m00,m10,m01,m11]`
+//! columns of `GlobalTransform`'s 2D linear part), APPENDED after the clip
+//! fields so every prior field offset stays byte-stable (notably
+//! [`COLOR_FLOAT_OFFSET`] / [`ALPHA_FLOAT_OFFSET`], which R2's degraded-group
+//! re-tint reads). The vertex stage transforms each box-local corner by it, so
+//! a rotated/scaled element paints with the right orientation/size. Identity
+//! `[1,0,0,1]` == no transform (the byte-identical fast path).
+//!
 //! [`BuiyViewUniform`]: crate::render::view_uniform::BuiyViewUniform
 
 use crate::render::DrawData;
@@ -18,11 +26,30 @@ use bevy::prelude::*;
 use bytemuck::{Pod, Zeroable};
 
 /// Stride of the logical-pixel [`PackedInstance`] in bytes. Must match the
-/// per-instance `array_stride` declared in `pipeline.rs::register` (52 B). The
+/// per-instance `array_stride` declared in `pipeline.rs::register` (68 B). The
 /// values are LOGICAL pixels — the GPU view uniform
 /// ([`crate::render::view_uniform::BuiyViewUniform`]) applies the logical->clip
 /// transform in the vertex stage.
-pub const PACKED_INSTANCE_STRIDE_BYTES: usize = 52;
+pub const PACKED_INSTANCE_STRIDE_BYTES: usize = 68;
+
+/// Float index of the per-instance color block (`color[0]`) in the raw
+/// [`crate::render::buckets::packed_to_raw`] record. NAMED so the color/alpha
+/// offset is referenced symbolically everywhere (R1 HARD CONSTRAINT): the
+/// append-after-13 affine layout exists precisely to keep this offset stable so
+/// R2's degraded-group re-tint can index it.
+pub const COLOR_FLOAT_OFFSET: usize = 4;
+
+/// Float index of the per-instance alpha (`color[3]`) in the raw record —
+/// `COLOR_FLOAT_OFFSET + 3`. R2's degraded-group forward-composite re-tints by
+/// reading the alpha at this offset; it MUST stay `7` across any layout growth
+/// (the affine basis appends after the clip fields, never before color).
+pub const ALPHA_FLOAT_OFFSET: usize = COLOR_FLOAT_OFFSET + 3;
+
+/// The identity 2D affine basis `[m00, m10, m01, m11] = [1, 0, 0, 1]` — no
+/// rotation/scale. Quads with no `GlobalTransform` linear part (the `DrawData`
+/// and text-quad packers) carry this, so their packed bytes are unchanged by
+/// R1's growth except for the four appended identity floats.
+const IDENTITY_AFFINE: [f32; 4] = [1.0, 0.0, 0.0, 1.0];
 
 /// Full-view clip sentinel for an unclipped instance (`ExtractedNode.clip ==
 /// None`): `clip_min = [-INFINITY; 2]`, `clip_max = [+INFINITY; 2]`. For any
@@ -55,6 +82,12 @@ pub struct PackedInstance {
     /// Clip AABB maximum in LOGICAL px. The fragment discards `frag_pos > clip_max`;
     /// `[+INFINITY; 2]` = no upper bound (the full-view sentinel).
     pub clip_max: [f32; 2],
+    /// The 2D affine basis `[m00, m10, m01, m11]` (the column vectors of
+    /// `GlobalTransform`'s 2D linear part) — R1. APPENDED after the clip fields
+    /// so every prior offset stays byte-stable (the R2 dependency). The vertex
+    /// stage maps each box-local corner `c` to `mat2(col0, col1) * c`. Identity
+    /// `[1, 0, 0, 1]` paints axis-aligned (no rotation/scale).
+    pub affine: [f32; 4],
 }
 
 /// Pack one [`DrawData`] into a logical-pixel [`PackedInstance`]. The clip
@@ -70,6 +103,8 @@ pub fn pack_instance(draw: &DrawData) -> PackedInstance {
         radius: draw.radius,
         clip_min: CLIP_SENTINEL_MIN,
         clip_max: CLIP_SENTINEL_MAX,
+        // `DrawData` has no transform; paint axis-aligned (identity basis).
+        affine: IDENTITY_AFFINE,
     }
 }
 
@@ -94,6 +129,15 @@ pub fn pack_extracted(node: &ExtractedNode) -> PackedInstance {
         radius: 0.0,
         clip_min,
         clip_max,
+        // The 2D affine basis, flattened to columns [m00, m10, m01, m11] (R1):
+        // col0 = node.affine[0], col1 = node.affine[1]. The vertex stage applies
+        // it about the box-local origin so rotation/scale paint correctly.
+        affine: [
+            node.affine[0][0],
+            node.affine[0][1],
+            node.affine[1][0],
+            node.affine[1][1],
+        ],
     }
 }
 
@@ -113,13 +157,15 @@ pub fn pack_text_quad(quad: &TextQuad) -> PackedInstance {
         radius: 0.0,
         clip_min,
         clip_max,
+        // Text quads carry no transform; paint axis-aligned (identity basis).
+        affine: IDENTITY_AFFINE,
     }
 }
 
-/// `true` iff the raw `[f32; 13]` bucket layout is byte-equal to
+/// `true` iff the raw `[f32; 17]` bucket layout is byte-equal to
 /// [`PackedInstance`]'s stride (the pipeline-descriptor invariant). Pins the
 /// agreement the instanced draw relies on.
 pub fn packed_raw_stride_agrees() -> bool {
-    std::mem::size_of::<PackedInstance>() == std::mem::size_of::<[f32; 13]>()
-        && PACKED_INSTANCE_STRIDE_BYTES == std::mem::size_of::<[f32; 13]>()
+    std::mem::size_of::<PackedInstance>() == std::mem::size_of::<[f32; 17]>()
+        && PACKED_INSTANCE_STRIDE_BYTES == std::mem::size_of::<[f32; 17]>()
 }
diff --git a/crates/buiy_core/src/render/node.rs b/crates/buiy_core/src/render/node.rs
index 199d3f5..de307d4 100644
--- a/crates/buiy_core/src/render/node.rs
+++ b/crates/buiy_core/src/render/node.rs
@@ -137,8 +137,14 @@ impl ViewNode for BuiyNode {
                 .and_then(|a| a.coverage_bind_group());
             for group in &prepared.groups {
                 let Some(target) = targets.targets.get(group.index).and_then(|t| t.as_ref()) else {
-                    // Degraded group (no target): members are SKIPPED, not drawn
-                    // flat — see the follow-ups entry T8 files.
+                    // Degraded group (no target): skip the off-screen pass here. A
+                    // ROOT degraded group is NOT lost — `prepare_effect_groups`
+                    // folded its `opacity` into its members' alpha and merged its
+                    // ranges into the flat draw, so the flat WINDOW pass below
+                    // paints it (effect-compositor.md § 2.3 forward-composite). A
+                    // NESTED degraded group is still skipped (its correct
+                    // forward-composite into the parent target is a node-side
+                    // follow-up; the prepare fold debug-asserts on it).
                     continue;
                 };
                 let placement = &targets.placements[group.index];
diff --git a/crates/buiy_core/src/render/prepare.rs b/crates/buiy_core/src/render/prepare.rs
index 518250d..de8462d 100644
--- a/crates/buiy_core/src/render/prepare.rs
+++ b/crates/buiy_core/src/render/prepare.rs
@@ -82,13 +82,13 @@ pub struct GlyphEntityRun {
 /// carriers to components together when the view-entity routing lands.
 ///
 /// The quad instance store is a [`RawBufferVec`] (not a `BufferVec`): the
-/// instance record is a raw `[f32; 13]` POD vertex blob (the pipeline-descriptor
+/// instance record is a raw `[f32; 17]` POD vertex blob (the pipeline-descriptor
 /// layout), which is `NoUninit` but **not** a `ShaderType`, so it rides the
 /// raw, CPU-readable vertex path rather than the std140/encase `BufferVec` path.
 #[derive(Resource)]
 pub struct BuiyInstanceBuffers {
     /// Quad-family instances (the v1 primitive set). Grows in place.
-    pub quad: RawBufferVec<[f32; 13]>,
+    pub quad: RawBufferVec<[f32; 17]>,
     /// Coverage-glyph instances (the alpha-as-color primitive,
     /// atlas-and-text-seam.md § 4.1). A `RawBufferVec<GlyphAlphaInstance>` for
     /// the same reason as `quad`: `GlyphAlphaInstance` is a raw `#[repr(C)]`
@@ -181,9 +181,9 @@ pub struct BufferUploadStats {
 /// R5's `ExtractedNodes.nodes` is fed to [`pack_view`] directly — no `DrawData`
 /// adapter — so the prepare phase consumes R5's component with no parallel
 /// carrier (the packing seam after Task 6's flip).
-pub fn pack_extracted_nodes(nodes: &ExtractedNodes) -> (Vec<[f32; 13]>, [f32; 12]) {
+pub fn pack_extracted_nodes(nodes: &ExtractedNodes) -> (Vec<[f32; 17]>, [f32; 12]) {
     let buckets = pack_view(&nodes.nodes);
-    let instances: Vec<[f32; 13]> = buckets
+    let instances: Vec<[f32; 17]> = buckets
         .batches()
         .flat_map(|(_key, batch)| batch.iter().copied())
         .collect();
diff --git a/crates/buiy_core/src/render/primitive.rs b/crates/buiy_core/src/render/primitive.rs
index de9011d..fbbd1d2 100644
--- a/crates/buiy_core/src/render/primitive.rs
+++ b/crates/buiy_core/src/render/primitive.rs
@@ -13,8 +13,10 @@
 //! module imports it and adds only the `(kind, format)` specialization key.
 //!
 //! The per-instance vertex layout carries the R8b clip AABB
-//! (`clip_min`/`clip_max`) at `@location(6)`/`(7)`, lifting the instance stride
-//! to 52 B; the quad-family shaders discard fragments outside it.
+//! (`clip_min`/`clip_max`) at `@location(6)`/`(7)` and the R1 2D affine basis
+//! (`affine_col0`/`affine_col1`) at `@location(8)`/`(9)`, lifting the instance
+//! stride to 68 B; the quad-family shaders discard fragments outside the clip
+//! and transform each box-local corner by the affine.
 
 use bevy::mesh::VertexBufferLayout;
 use bevy::render::render_resource::{
@@ -62,10 +64,10 @@ pub struct BuiyPrimitives;
 
 impl BuiyPrimitives {
     /// The two interleaved vertex-buffer layouts shared by every quad-family
-    /// primitive (static unit quad, stride 16; per-instance record, stride 52).
+    /// primitive (static unit quad, stride 16; per-instance record, stride 68).
     /// The instance record carries the per-primitive clip AABB at
-    /// `@location(6)`/`(7)` (R8b); its `array_stride` tracks
-    /// [`PACKED_INSTANCE_STRIDE_BYTES`] (52 B).
+    /// `@location(6)`/`(7)` (R8b) and the 2D affine basis at `@location(8)`/`(9)`
+    /// (R1); its `array_stride` tracks [`PACKED_INSTANCE_STRIDE_BYTES`] (68 B).
     ///
     /// [`PACKED_INSTANCE_STRIDE_BYTES`]: crate::render::instance::PACKED_INSTANCE_STRIDE_BYTES
     fn quad_family_vertex_buffers() -> Vec<VertexBufferLayout> {
@@ -87,7 +89,7 @@ impl BuiyPrimitives {
                 ],
             },
             VertexBufferLayout {
-                array_stride: 52,
+                array_stride: 68,
                 step_mode: VertexStepMode::Instance,
                 attributes: vec![
                     VertexAttribute {
@@ -123,6 +125,20 @@ impl BuiyPrimitives {
                         offset: 44,
                         shader_location: 7,
                     },
+                    // R1 2D affine basis: `affine_col0` @ 52, `affine_col1` @ 60
+                    // — appended AFTER the clip fields so offsets 0..52 stay
+                    // byte-stable (the R2 dependency). See `PackedInstance.affine`
+                    // and both quad-family shaders' `Instance.affine_col0/1`.
+                    VertexAttribute {
+                        format: VertexFormat::Float32x2,
+                        offset: 52,
+                        shader_location: 8,
+                    },
+                    VertexAttribute {
+                        format: VertexFormat::Float32x2,
+                        offset: 60,
+                        shader_location: 9,
+                    },
                 ],
             },
         ]
@@ -235,7 +251,9 @@ impl SpecializedRenderPipeline for BuiyPrimitives {
             vec![view_uniform_layout_descriptor()]
         };
         // The glyph instance record is `GlyphAlphaInstance` (stride 68), a
-        // different layout from the quad family's `PackedInstance` (stride 52).
+        // DISTINCT layout from the quad family's `PackedInstance` — even though
+        // both strides are now 68 B (R1), the attr sets, raw types ([f32;17] vs
+        // GlyphAlphaInstance), and pipelines differ and must not be conflated.
         let buffers = if is_glyph {
             Self::glyph_vertex_buffers()
         } else {
diff --git a/crates/buiy_core/src/render/shader.wgsl b/crates/buiy_core/src/render/shader.wgsl
index 6b3b036..e316e24 100644
--- a/crates/buiy_core/src/render/shader.wgsl
+++ b/crates/buiy_core/src/render/shader.wgsl
@@ -24,15 +24,17 @@ struct Instance {
     @location(5) radius: f32,            // logical px
     @location(6) clip_min: vec2<f32>,   // logical px, clip AABB min (-inf = none)
     @location(7) clip_max: vec2<f32>,   // logical px, clip AABB max (+inf = none)
+    @location(8) affine_col0: vec2<f32>, // 2D affine basis col0 = [m00, m10]
+    @location(9) affine_col1: vec2<f32>, // 2D affine basis col1 = [m01, m11]
 };
 
 struct VertexOut {
     @builtin(position) clip_position: vec4<f32>,
-    @location(0) local_uv: vec2<f32>,   // -1..+1 across the rect
-    @location(1) half_size: vec2<f32>,  // logical px
+    @location(0) local_uv: vec2<f32>,   // -1..+1 across the rect (box-local, rotation-invariant)
+    @location(1) half_size: vec2<f32>,  // logical px (box-local SDF half-extent)
     @location(2) color: vec4<f32>,
     @location(3) radius: f32,            // logical px
-    @location(4) rect_center: vec2<f32>, // logical px, window-relative
+    @location(4) frag_logical: vec2<f32>, // affine-transformed window-logical corner (slot 4, was the axis-aligned center)
     @location(5) clip_min: vec2<f32>,   // logical px (clip AABB, ClipRect space)
     @location(6) clip_max: vec2<f32>,   // logical px (clip AABB, ClipRect space)
 };
@@ -44,13 +46,20 @@ fn logical_to_clip(p: vec2<f32>) -> vec2<f32> {
 @vertex
 fn vertex(v: Vertex, i: Instance) -> VertexOut {
     var out: VertexOut;
-    let logical = i.rect_pos + v.uv * i.rect_size; // logical-px corner
+    // R1: transform the box-local corner by the 2D affine BEFORE the
+    // logical->clip view map. The affine maps box-local 0 -> 0 for a pure
+    // rotation/scale, so an identity basis [1,0,0,1] yields rect_pos + local
+    // (byte-identical to the pre-R1 axis-aligned path).
+    let local = v.uv * i.rect_size;                // box-local corner (top-left at 0)
+    let logical = i.rect_pos + mat2x2<f32>(i.affine_col0, i.affine_col1) * local;
     out.clip_position = vec4<f32>(logical_to_clip(logical), 0.0, 1.0);
     out.local_uv = v.uv * 2.0 - 1.0;
     out.half_size = i.rect_size * 0.5;             // positive — no abs needed
     out.color = i.color;
     out.radius = i.radius;
-    out.rect_center = i.rect_pos + out.half_size;  // logical px, window-relative
+    // The affine is linear, so the interpolated frag_logical is the correct
+    // transformed window-space point for the clip-AABB discard.
+    out.frag_logical = logical;
     out.clip_min = i.clip_min;
     out.clip_max = i.clip_max;
     return out;
@@ -67,7 +76,9 @@ fn fragment(in: VertexOut) -> @location(0) vec4<f32> {
     // Per-primitive clip AABB (R8b): discard fragments outside [clip_min,
     // clip_max] in logical-px window space — the same space as ClipRect. The
     // full-view sentinel (±inf) makes this never fire (unclipped / top-layer).
-    let frag_pos = in.rect_center + in.local_uv * in.half_size;
+    // frag_logical is the affine-transformed window-logical corner (R1) — the
+    // correct post-transform point, not the old axis-aligned box center.
+    let frag_pos = in.frag_logical;
     if any(frag_pos < in.clip_min) || any(frag_pos > in.clip_max) {
         return vec4<f32>(0.0, 0.0, 0.0, 0.0);
     }
diff --git a/crates/buiy_core/src/render/shadow.wgsl b/crates/buiy_core/src/render/shadow.wgsl
index 56897c1..d8c0755 100644
--- a/crates/buiy_core/src/render/shadow.wgsl
+++ b/crates/buiy_core/src/render/shadow.wgsl
@@ -1,6 +1,6 @@
 // Buiy box-shadow shader (octet ..02). Closed-form Gaussian-blurred
 // rounded-rect coverage — one draw per shadow, no convolution pass.
-// Inputs match the quad instance layout (stride 52); the instance `blur`
+// Inputs match the quad instance layout (stride 68); the instance `blur`
 // field carries the shadow's effective blur sigma in logical px for this
 // primitive (the sibling component-model phase maps `BoxShadow.blur` into it).
 //
@@ -31,15 +31,17 @@ struct Instance {
     @location(5) blur: f32,             // logical px, effective blur sigma
     @location(6) clip_min: vec2<f32>,   // logical px, clip AABB min (-inf = none)
     @location(7) clip_max: vec2<f32>,   // logical px, clip AABB max (+inf = none)
+    @location(8) affine_col0: vec2<f32>, // 2D affine basis col0 = [m00, m10]
+    @location(9) affine_col1: vec2<f32>, // 2D affine basis col1 = [m01, m11]
 };
 
 struct VertexOut {
     @builtin(position) clip_position: vec4<f32>,
-    @location(0) local_uv: vec2<f32>,   // -1..+1 across the rect
-    @location(1) half_size: vec2<f32>,  // logical px
+    @location(0) local_uv: vec2<f32>,   // -1..+1 across the rect (box-local, rotation-invariant)
+    @location(1) half_size: vec2<f32>,  // logical px (box-local SDF half-extent)
     @location(2) color: vec4<f32>,
     @location(3) blur: f32,             // logical px
-    @location(4) rect_center: vec2<f32>, // logical px, window-relative
+    @location(4) frag_logical: vec2<f32>, // affine-transformed window-logical corner (slot 4, was the axis-aligned center)
     @location(5) clip_min: vec2<f32>,   // logical px (clip AABB, ClipRect space)
     @location(6) clip_max: vec2<f32>,   // logical px (clip AABB, ClipRect space)
 };
@@ -51,13 +53,19 @@ fn logical_to_clip(p: vec2<f32>) -> vec2<f32> {
 @vertex
 fn vertex(v: Vertex, i: Instance) -> VertexOut {
     var out: VertexOut;
-    let logical = i.rect_pos + v.uv * i.rect_size; // logical-px corner
+    // R1: transform the box-local corner by the 2D affine BEFORE the
+    // logical->clip view map (identity basis [1,0,0,1] -> rect_pos + local,
+    // byte-identical to the pre-R1 axis-aligned path).
+    let local = v.uv * i.rect_size;                // box-local corner (top-left at 0)
+    let logical = i.rect_pos + mat2x2<f32>(i.affine_col0, i.affine_col1) * local;
     out.clip_position = vec4<f32>(logical_to_clip(logical), 0.0, 1.0);
     out.local_uv = v.uv * 2.0 - 1.0;
     out.half_size = i.rect_size * 0.5;             // positive — no abs needed
     out.color = i.color;
     out.blur = i.blur;
-    out.rect_center = i.rect_pos + out.half_size;  // logical px, window-relative
+    // The affine is linear, so the interpolated frag_logical is the correct
+    // transformed window-space point for the clip-AABB discard.
+    out.frag_logical = logical;
     out.clip_min = i.clip_min;
     out.clip_max = i.clip_max;
     return out;
@@ -85,7 +93,8 @@ fn fragment(in: VertexOut) -> @location(0) vec4<f32> {
     // Per-primitive clip AABB (R8b): discard fragments outside [clip_min,
     // clip_max] in logical-px window space — the same space as ClipRect. The
     // full-view sentinel (±inf) makes this never fire (unclipped / top-layer).
-    let frag_pos = in.rect_center + in.local_uv * in.half_size;
+    // frag_logical is the affine-transformed window-logical corner (R1).
+    let frag_pos = in.frag_logical;
     if any(frag_pos < in.clip_min) || any(frag_pos > in.clip_max) {
         return vec4<f32>(0.0, 0.0, 0.0, 0.0);
     }
diff --git a/crates/buiy_core/tests/render_buckets.rs b/crates/buiy_core/tests/render_buckets.rs
index eb54914..e892721 100644
--- a/crates/buiy_core/tests/render_buckets.rs
+++ b/crates/buiy_core/tests/render_buckets.rs
@@ -74,9 +74,9 @@ fn buckets_group_pushed_instances_by_key() {
         primitive: BuiyPrimitiveKind::Shadow,
         layer: 0,
     };
-    b.push(q0, [0.0; 13]);
-    b.push(q0, [1.0; 13]);
-    b.push(s0, [2.0; 13]);
+    b.push(q0, [0.0; 17]);
+    b.push(q0, [1.0; 17]);
+    b.push(s0, [2.0; 17]);
     assert_eq!(b.len(q0), 2);
     assert_eq!(b.len(s0), 1);
     assert_eq!(b.total_instances(), 3);
@@ -98,21 +98,21 @@ fn buckets_iterate_in_paint_order() {
             primitive: BuiyPrimitiveKind::Quad,
             layer: 0,
         },
-        [0.0; 13],
+        [0.0; 17],
     );
     b.push(
         PrimitiveBatchKey {
             primitive: BuiyPrimitiveKind::Shadow,
             layer: 0,
         },
-        [0.0; 13],
+        [0.0; 17],
     );
     b.push(
         PrimitiveBatchKey {
             primitive: BuiyPrimitiveKind::Quad,
             layer: 1,
         },
-        [0.0; 13],
+        [0.0; 17],
     );
     let order: Vec<_> = b.batches().map(|(k, _)| *k).collect();
     // shadow@0, quad@0, then quad@1 — sorted ascending.
@@ -140,16 +140,32 @@ fn node(entity: u32, position: Vec2, size: Vec2, color: Color) -> ExtractedNode
         color,
         clip: None,
         group: None,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     }
 }
 
 #[test]
 fn raw_layout_stride_agrees_with_struct() {
-    // The [f32;13] the bucket holds must be byte-identical in size to the
-    // PackedInstance struct the pipeline descriptor declares (52 B). If this
+    // The [f32;17] the bucket holds must be byte-identical in size to the
+    // PackedInstance struct the pipeline descriptor declares (68 B). If this
     // ever drifts, the instanced draw reads garbage.
     assert!(packed_raw_stride_agrees());
-    assert_eq!(std::mem::size_of::<[f32; 13]>(), 52);
+    assert_eq!(std::mem::size_of::<[f32; 17]>(), 68);
+}
+
+#[test]
+fn packed_to_raw_appends_affine_via_offset_consts() {
+    // packed_to_raw returns 17 floats: the affine basis at [13..17], and the
+    // alpha at ALPHA_FLOAT_OFFSET unchanged (the R2 re-tint invariant).
+    use buiy_core::render::buckets::packed_to_raw;
+    use buiy_core::render::instance::ALPHA_FLOAT_OFFSET;
+    let mut n = node(1, Vec2::ZERO, Vec2::splat(10.0), Color::WHITE);
+    n.affine = [[2.0, 0.0], [0.0, 3.0]];
+    let p = pack_extracted(&n);
+    let raw = packed_to_raw(&p);
+    assert_eq!(raw.len(), 17);
+    assert_eq!(&raw[13..17], &[2.0, 0.0, 0.0, 3.0]);
+    assert_eq!(raw[ALPHA_FLOAT_OFFSET], p.color[3]);
 }
 
 #[test]
@@ -234,6 +250,7 @@ fn grouped(entity: u32, color: Color, group: Option<usize>) -> ExtractedNode {
         color,
         clip: None,
         group,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     }
 }
 
diff --git a/crates/buiy_core/tests/render_compositor.rs b/crates/buiy_core/tests/render_compositor.rs
index 945c646..8d66ba9 100644
--- a/crates/buiy_core/tests/render_compositor.rs
+++ b/crates/buiy_core/tests/render_compositor.rs
@@ -368,3 +368,376 @@ fn churn_never_exceeds_rt_pool_budget() {
     );
     assert!(plan.iter().any(|&a| !a), "some groups degraded under churn");
 }
+
+// ---------------------------------------------------------------------------
+// R2 — degraded effect groups forward-composite flat (effect-compositor.md § 2.3)
+// ---------------------------------------------------------------------------
+
+use buiy_core::render::atlas::{GLYPH_ALPHA_FLOAT_OFFSET, GlyphAlphaInstance};
+use buiy_core::render::compositor::{DegradedGroup, fold_root_degraded_into_flat};
+use buiy_core::render::instance::ALPHA_FLOAT_OFFSET;
+use std::ops::Range;
+
+/// A `[f32;17]` quad record with a known alpha at `ALPHA_FLOAT_OFFSET` and a
+/// sentinel in the neighbouring slots so an off-by-one write is caught.
+fn quad_with_alpha(alpha: f32) -> [f32; 17] {
+    let mut r = [0.0f32; 17];
+    // Fill with a recognizable ramp so a stray write to the wrong index shows.
+    for (i, v) in r.iter_mut().enumerate() {
+        *v = i as f32;
+    }
+    r[ALPHA_FLOAT_OFFSET] = alpha;
+    r
+}
+
+fn glyph_with_alpha(alpha: f32) -> GlyphAlphaInstance {
+    GlyphAlphaInstance {
+        rect: [1.0, 2.0, 3.0, 4.0],
+        uv: [5.0, 6.0, 7.0, 8.0],
+        color: [0.1, 0.2, 0.3, alpha],
+        clip: [9.0, 10.0, 11.0, 12.0],
+        page: 0,
+    }
+}
+
+#[test]
+fn degraded_fold_multiplies_quad_alpha_and_merges_flat_range() {
+    // Two ROOT groups: A degraded (opacity 0.5), B keeps its target.
+    // Quad layout: A's members [0,2), B's members [2,4), a non-group run [4,6).
+    let mut quad: Vec<[f32; 17]> = (0..6).map(|i| quad_with_alpha(0.8 + i as f32)).collect();
+    let mut glyph: Vec<GlyphAlphaInstance> = Vec::new();
+    // The flat ranges as prepare's partition would hand them: only the non-group
+    // The non-group tail [4,6) is the only flat run before the fold (group
+    // members A,B excluded). `iter::once` sidesteps the `single_range_in_vec_init`
+    // lint, which fires on both `vec![4..6]` and `[4..6]` array initializers.
+    let mut quad_flat: Vec<Range<u32>> = std::iter::once(4..6).collect();
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+
+    let original: Vec<[f32; 17]> = quad.clone();
+
+    let groups = [
+        DegradedGroup {
+            quad_range: 0..2,
+            glyph_range: 0..0,
+            opacity: 0.5,
+            parent: None,
+        },
+        DegradedGroup {
+            quad_range: 2..4,
+            glyph_range: 0..0,
+            opacity: 0.7,
+            parent: None,
+        },
+    ];
+    let allocate = [false, true]; // A degraded, B allocated.
+
+    fold_root_degraded_into_flat(
+        &allocate,
+        &groups,
+        true, // fold_quad
+        true, // merge_quad
+        true, // fold_glyph
+        true, // merge_glyph
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+
+    // (a) every instance in A's range dimmed by 0.5, read at ALPHA_FLOAT_OFFSET.
+    for i in 0..2 {
+        let want = original[i][ALPHA_FLOAT_OFFSET] * 0.5;
+        assert!(
+            (quad[i][ALPHA_FLOAT_OFFSET] - want).abs() < 1e-6,
+            "A instance {i} alpha folded by 0.5"
+        );
+        // Neighbouring slots untouched (no off-by-one).
+        assert_eq!(
+            quad[i][ALPHA_FLOAT_OFFSET - 1],
+            original[i][ALPHA_FLOAT_OFFSET - 1]
+        );
+        assert_eq!(
+            quad[i][ALPHA_FLOAT_OFFSET + 1],
+            original[i][ALPHA_FLOAT_OFFSET + 1]
+        );
+    }
+    // (b) B's range (allocated, keeps a target) is unchanged.
+    for i in 2..4 {
+        assert_eq!(
+            quad[i], original[i],
+            "B instance {i} unchanged (not degraded)"
+        );
+    }
+    // (c) A's range is merged into flat ranges; B's stays excluded.
+    assert!(
+        quad_flat.contains(&(0..2)),
+        "A's degraded range merged into flat: {quad_flat:?}"
+    );
+    assert!(
+        !quad_flat.iter().any(|r| r.start == 2),
+        "B's range stays excluded from flat: {quad_flat:?}"
+    );
+    // (d) coalescing + order: ranges sorted, no overlaps.
+    for w in quad_flat.windows(2) {
+        assert!(
+            w[0].end <= w[1].start,
+            "flat ranges sorted & disjoint: {quad_flat:?}"
+        );
+    }
+}
+
+#[test]
+fn degraded_fold_coalesces_adjacent_flat_runs() {
+    // A degraded group [2,4) sits exactly between two existing flat runs
+    // [0,2) and [4,6): merging must coalesce all three into [0,6).
+    let mut quad: Vec<[f32; 17]> = (0..6).map(|_| quad_with_alpha(1.0)).collect();
+    let mut glyph: Vec<GlyphAlphaInstance> = Vec::new();
+    let mut quad_flat: Vec<Range<u32>> = vec![0..2, 4..6];
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+
+    let groups = [DegradedGroup {
+        quad_range: 2..4,
+        glyph_range: 0..0,
+        opacity: 0.25,
+        parent: None,
+    }];
+    fold_root_degraded_into_flat(
+        &[false],
+        &groups,
+        true,
+        true,
+        true,
+        true,
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+    assert_eq!(
+        quad_flat,
+        vec![0..6],
+        "adjacent runs coalesce: {quad_flat:?}"
+    );
+}
+
+#[test]
+fn degraded_fold_multiplies_glyph_alpha_at_offset_11() {
+    // The glyph tier folds color[3] (raw float index 11), NOT offset 7.
+    let mut quad: Vec<[f32; 17]> = Vec::new();
+    let mut glyph: Vec<GlyphAlphaInstance> = (0..3)
+        .map(|i| glyph_with_alpha(0.4 + i as f32 * 0.1))
+        .collect();
+    let mut quad_flat: Vec<Range<u32>> = vec![];
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+    let original = glyph.clone();
+
+    let groups = [DegradedGroup {
+        quad_range: 0..0,
+        glyph_range: 0..3,
+        opacity: 0.5,
+        parent: None,
+    }];
+    fold_root_degraded_into_flat(
+        &[false],
+        &groups,
+        true,
+        true,
+        true,
+        true,
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+
+    for i in 0..3 {
+        // color[3] dimmed.
+        assert!(
+            (glyph[i].color[3] - original[i].color[3] * 0.5).abs() < 1e-6,
+            "glyph {i} alpha folded at color[3]"
+        );
+        // Raw-view parity: the named const points at color[3] (= float idx 11).
+        assert_eq!(GLYPH_ALPHA_FLOAT_OFFSET, 11);
+        let raw: &[f32; 17] = bytemuck::cast_ref::<GlyphAlphaInstance, [f32; 17]>(&glyph[i]);
+        assert!(
+            (raw[GLYPH_ALPHA_FLOAT_OFFSET] - glyph[i].color[3]).abs() < 1e-6,
+            "raw float index 11 == color[3]"
+        );
+        // uv/clip/rect untouched — proves we did NOT write offset 7 (= uv[3]).
+        assert_eq!(glyph[i].uv, original[i].uv, "uv untouched");
+        assert_eq!(glyph[i].clip, original[i].clip, "clip untouched");
+        assert_eq!(glyph[i].rect, original[i].rect, "rect untouched");
+        assert_eq!(
+            glyph[i].color[0], original[i].color[0],
+            "color.rgb untouched"
+        );
+    }
+    assert!(glyph_flat.contains(&(0..3)), "glyph range merged into flat");
+}
+
+#[test]
+fn degraded_fold_reads_source_alpha_not_accumulated() {
+    // The fn computes source*opacity over the value it READS once — it does not
+    // accumulate. (The once-per-pack contract is enforced by the system gate;
+    // here we pin that ONE call yields exactly source*opacity.)
+    let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)];
+    let mut glyph: Vec<GlyphAlphaInstance> = Vec::new();
+    let mut quad_flat: Vec<Range<u32>> = vec![];
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+    let groups = [DegradedGroup {
+        quad_range: 0..1,
+        glyph_range: 0..0,
+        opacity: 0.5,
+        parent: None,
+    }];
+    fold_root_degraded_into_flat(
+        &[false],
+        &groups,
+        true,
+        true,
+        true,
+        true,
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+    assert!(
+        (quad[0][ALPHA_FLOAT_OFFSET] - 0.4).abs() < 1e-6,
+        "0.8 * 0.5 == 0.4"
+    );
+}
+
+#[test]
+fn degraded_fold_per_tier_gate_skips_ungated_tier() {
+    // All glyph gates OFF (fold_glyph=false, merge_glyph=false): the quad
+    // buffer/ranges fold + merge; the glyph buffer AND glyph ranges are left
+    // wholly untouched. This pins the case where NEITHER the glyph buffer nor
+    // the glyph partition was rebuilt this frame.
+    let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)];
+    let mut glyph: Vec<GlyphAlphaInstance> = vec![glyph_with_alpha(0.8)];
+    let mut quad_flat: Vec<Range<u32>> = vec![];
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+    let groups = [DegradedGroup {
+        quad_range: 0..1,
+        glyph_range: 0..1,
+        opacity: 0.5,
+        parent: None,
+    }];
+    fold_root_degraded_into_flat(
+        &[false],
+        &groups,
+        true,  // fold_quad
+        true,  // merge_quad
+        false, // fold_glyph — skip
+        false, // merge_glyph — skip
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+    assert!(
+        (quad[0][ALPHA_FLOAT_OFFSET] - 0.4).abs() < 1e-6,
+        "quad folded"
+    );
+    assert_eq!(quad_flat, vec![0..1], "quad range merged");
+    assert!(
+        (glyph[0].color[3] - 0.8).abs() < 1e-6,
+        "glyph NOT folded (gate off)"
+    );
+    assert!(glyph_flat.is_empty(), "glyph range NOT merged (gate off)");
+}
+
+#[test]
+fn degraded_glyph_range_remerges_on_quad_dirty_only_frame() {
+    // MAJOR-2 (the vanish fix). On a quad-dirty-only frame with a live degraded
+    // glyph group, the glyph PARTITION is rebuilt (prepare re-EXCLUDES the
+    // degraded glyph range from `glyph_flat`) while the glyph BUFFER is RETAINED
+    // (already carries last frame's fold). The two glyph gates therefore SPLIT:
+    //   fold_glyph  = glyph_dirty               = false (buffer retained)
+    //   merge_glyph = quad_dirty || glyph_dirty = true  (partition rebuilt)
+    // The range MUST be re-merged (else the degraded glyphs vanish that frame),
+    // and the already-folded retained alpha MUST NOT be re-folded (else it
+    // compounds toward black). This is exactly the frame the #[ignore] GPU test
+    // `degraded_glyph_fold_idempotent_under_quad_dirty_only_frame` exercises
+    // end-to-end; this pins the caller's gate choice headlessly.
+    //
+    // Model the retained buffer: its glyph already carries last frame's fold
+    // (0.8 * 0.5 == 0.4). `glyph_flat` starts EMPTY — prepare's fresh partition
+    // rebuild excluded the degraded range this frame.
+    let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)];
+    let mut glyph: Vec<GlyphAlphaInstance> = vec![glyph_with_alpha(0.4)];
+    let mut quad_flat: Vec<Range<u32>> = vec![];
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+    let groups = [DegradedGroup {
+        quad_range: 0..1,
+        glyph_range: 0..1,
+        opacity: 0.5,
+        parent: None,
+    }];
+    fold_root_degraded_into_flat(
+        &[false],
+        &groups,
+        true,  // fold_quad   (quad buffer repacked this frame)
+        true,  // merge_quad  (quad partition rebuilt)
+        false, // fold_glyph  — glyph buffer RETAINED, do NOT re-fold
+        true,  // merge_glyph — glyph partition rebuilt, re-add the range
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+    // The range is re-merged so the flat draw paints the degraded glyphs.
+    assert_eq!(
+        glyph_flat,
+        vec![0..1],
+        "degraded glyph range re-merged on a quad-dirty-only frame (not vanished)"
+    );
+    // The retained, already-folded alpha is NOT re-folded (stays 0.4, not 0.2).
+    assert!(
+        (glyph[0].color[3] - 0.4).abs() < 1e-6,
+        "retained glyph alpha left untouched (no double-fold to 0.2)"
+    );
+}
+
+#[test]
+fn degraded_fold_skips_nested_group_in_release_path() {
+    // A degraded NESTED group (parent == Some): the slice scopes to root-degraded.
+    // In release, the nested group's ranges are NOT merged and its alpha is left
+    // untouched (no worse than today's vanish — tracked by a follow-up). Under
+    // debug the fn debug_asserts; this test must run release-only to assert the
+    // containment behavior.
+    if cfg!(debug_assertions) {
+        // Debug builds debug_assert!(false) on a nested degraded group — that is
+        // the loud-in-dev guard; the release containment is what we assert.
+        return;
+    }
+    let mut quad: Vec<[f32; 17]> = vec![quad_with_alpha(0.8)];
+    let mut glyph: Vec<GlyphAlphaInstance> = Vec::new();
+    let mut quad_flat: Vec<Range<u32>> = vec![];
+    let mut glyph_flat: Vec<Range<u32>> = vec![];
+    let original = quad.clone();
+    let groups = [DegradedGroup {
+        quad_range: 0..1,
+        glyph_range: 0..0,
+        opacity: 0.5,
+        parent: Some(7), // nested under group 7
+    }];
+    fold_root_degraded_into_flat(
+        &[false],
+        &groups,
+        true,
+        true,
+        true,
+        true,
+        &mut quad,
+        &mut glyph,
+        &mut quad_flat,
+        &mut glyph_flat,
+    );
+    assert_eq!(
+        quad[0], original[0],
+        "nested degraded alpha untouched in release"
+    );
+    assert!(quad_flat.is_empty(), "nested degraded range NOT merged");
+}
diff --git a/crates/buiy_core/tests/render_degraded_group_gpu.rs b/crates/buiy_core/tests/render_degraded_group_gpu.rs
new file mode 100644
index 0000000..35829af
--- /dev/null
+++ b/crates/buiy_core/tests/render_degraded_group_gpu.rs
@@ -0,0 +1,438 @@
+//! GPU-path tests for the DEGRADED effect-group forward-composite (R2 /
+//! effect-compositor.md § 2.3): a ROOT group that loses its pooled
+//! `Rgba16Float` target under RT-pool budget pressure must paint FLAT with its
+//! `opacity` folded per-instance, NOT vanish. These need a wgpu adapter (real
+//! GPU or lavapipe), which CI / this host lack, so they are `#[ignore]` exactly
+//! like tests/render_smoke.rs. Run locally with:
+//!   cargo test -p buiy_core --test render_degraded_group_gpu -- --ignored
+
+mod support;
+
+use bevy::prelude::*;
+
+/// Force the RT-pool degradation path: insert a tiny [`RtPoolBudget`] into the
+/// render world so `plan_allocation` returns `false` for the lowest-cost groups.
+/// The const default is 64 MiB (nothing degrades); a few hundred bytes degrades
+/// almost everything. The render world persists across frames, so one insert
+/// holds for the whole test.
+fn force_tiny_rt_budget(app: &mut App, bytes: u64) {
+    use buiy_core::render::compositor::RtPoolBudget;
+    app.get_sub_app_mut(bevy::render::RenderApp)
+        .expect("RenderApp")
+        .world_mut()
+        .insert_resource(RtPoolBudget(bytes));
+}
+
+#[test]
+#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"]
+fn render_degraded_group_gpu() {
+    // R2 (effect-compositor.md § 2.3): a ROOT `Opacity` group that DEGRADES under
+    // budget pressure must paint FLAT with its opacity folded per-instance — its
+    // pixels are PRESENT at folded opacity, NOT vanished. With a tiny RT budget,
+    // `plan_allocation` degrades the group, `prepare_effect_groups` folds
+    // `opacity` into its members' alpha and merges its range into `flat_ranges`,
+    // and the flat window draw paints it.
+    use buiy_core::Node;
+    use buiy_core::layout::{Inset, Length, Sizing, Style};
+    use buiy_core::render::color::ColorToken;
+    use buiy_core::render::components::{Background, Opacity};
+    use std::borrow::Cow;
+
+    const W: u32 = 64;
+    const H: u32 = 64;
+    let red = Color::srgb(0.9, 0.05, 0.05); // OPAQUE red
+
+    let mut app = support::gpu_render_app(W, H);
+    // Degrade everything: budget far below one group's target bytes.
+    force_tiny_rt_budget(&mut app, 64);
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme.colors.insert("test.red".into(), red);
+    }
+    let target = support::render_to_image(&mut app, W, H);
+    support::spawn_capture_camera(&mut app, target.clone());
+
+    // One ROOT Opacity(0.6) group with a single opaque-red fill child.
+    let fill = app
+        .world_mut()
+        .spawn((
+            Node,
+            Style::default()
+                .absolute()
+                .inset(Inset {
+                    top: Sizing::Length(Length::px(16.0)),
+                    left: Sizing::Length(Length::px(16.0)),
+                    ..default()
+                })
+                .width_px(32.0)
+                .height_px(32.0),
+            Background {
+                color: ColorToken::Token(Cow::Borrowed("test.red")),
+            },
+        ))
+        .id();
+    let parent = app
+        .world_mut()
+        .spawn((Node, Style::default().absolute(), Opacity(0.6)))
+        .id();
+    app.world_mut().entity_mut(parent).add_children(&[fill]);
+    app.world_mut()
+        .spawn((Node, Style::default()))
+        .add_children(&[parent]);
+
+    support::finish_and_run(&mut app, 4);
+    let pixels = support::readback_rgba(&mut app, target);
+    let px = |x: u32, y: u32| support::px(&pixels, W, x, y);
+
+    // Folded-flat expectation: opaque red at alpha 0.6 over the opaque-black
+    // clear, encoded linear→sRGB8 (the Rgba8UnormSrgb target). The fold sets the
+    // instance alpha to 0.6, and the flat straight-alpha SrcOver blend produces
+    // red*0.6 over black.
+    let red_lin = LinearRgba::from(red);
+    let folded = LinearRgba::new(red_lin.red, red_lin.green, red_lin.blue, 0.6);
+    let black = LinearRgba::new(0.0, 0.0, 0.0, 1.0);
+    let a = folded.alpha;
+    let over = LinearRgba::new(
+        folded.red * a + black.red * (1.0 - a),
+        folded.green * a + black.green * (1.0 - a),
+        folded.blue * a + black.blue * (1.0 - a),
+        a + black.alpha * (1.0 - a),
+    );
+    let s = Srgba::from(over);
+    let expected = [
+        (s.red * 255.0).round() as u8,
+        (s.green * 255.0).round() as u8,
+        (s.blue * 255.0).round() as u8,
+        255u8,
+    ];
+
+    let inside = px(28, 28); // deep interior of the 32x32 fill
+    let clear = px(1, 1);
+    println!("degraded inside (28,28) = {inside:?} (expected {expected:?})");
+    println!("clear (1,1) = {clear:?}");
+
+    // (a) the degraded group's pixels are PRESENT (not background) at folded 0.6.
+    assert_ne!(inside, clear, "degraded group must paint, not vanish");
+    const TOL: i32 = 5;
+    for ch in 0..3 {
+        assert!(
+            (inside[ch] as i32 - expected[ch] as i32).abs() <= TOL,
+            "degraded channel {ch}: got {}, expected {} (±{TOL}); folded-flat at 0.6",
+            inside[ch],
+            expected[ch]
+        );
+    }
+}
+
+#[test]
+#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"]
+fn degraded_fold_does_not_compound_over_two_frames() {
+    // Per-tier idempotency (effect-compositor.md § 2.3): on a STEADY-STATE frame
+    // the quad buffer is RETAINED (not repacked from source), so the fold must NOT
+    // re-run — the degraded pixel is identical frame-to-frame. A fold that ran
+    // every frame would compound to black.
+    use buiy_core::Node;
+    use buiy_core::layout::{Inset, Length, Sizing, Style};
+    use buiy_core::render::color::ColorToken;
+    use buiy_core::render::components::{Background, Opacity};
+    use std::borrow::Cow;
+
+    const W: u32 = 64;
+    const H: u32 = 64;
+    let red = Color::srgb(0.9, 0.05, 0.05);
+
+    let mut app = support::gpu_render_app(W, H);
+    force_tiny_rt_budget(&mut app, 64);
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme.colors.insert("test.red".into(), red);
+    }
+    let target = support::render_to_image(&mut app, W, H);
+    support::spawn_capture_camera(&mut app, target.clone());
+
+    let fill = app
+        .world_mut()
+        .spawn((
+            Node,
+            Style::default()
+                .absolute()
+                .inset(Inset {
+                    top: Sizing::Length(Length::px(16.0)),
+                    left: Sizing::Length(Length::px(16.0)),
+                    ..default()
+                })
+                .width_px(32.0)
+                .height_px(32.0),
+            Background {
+                color: ColorToken::Token(Cow::Borrowed("test.red")),
+            },
+        ))
+        .id();
+    let parent = app
+        .world_mut()
+        .spawn((Node, Style::default().absolute(), Opacity(0.6)))
+        .id();
+    app.world_mut().entity_mut(parent).add_children(&[fill]);
+    app.world_mut()
+        .spawn((Node, Style::default()))
+        .add_children(&[parent]);
+
+    support::finish_and_run(&mut app, 4);
+    let frame1 = support::readback_rgba(&mut app, target.clone());
+    // Drive steady-state frames (no paint input changes → quad buffer retained).
+    for _ in 0..3 {
+        app.update();
+    }
+    let frame2 = support::readback_rgba(&mut app, target);
+    let p1 = support::px(&frame1, W, 28, 28);
+    let p2 = support::px(&frame2, W, 28, 28);
+    println!("frame1 (28,28) = {p1:?}  frame2 = {p2:?}");
+    const TOL: i32 = 2;
+    for ch in 0..4 {
+        assert!(
+            (p1[ch] as i32 - p2[ch] as i32).abs() <= TOL,
+            "degraded pixel must not compound across steady frames: ch {ch} \
+             {} vs {} (the fold ran once, not per-frame)",
+            p1[ch],
+            p2[ch]
+        );
+    }
+}
+
+#[test]
+#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"]
+fn degraded_glyph_fold_idempotent_under_quad_dirty_only_frame() {
+    // MAJOR-2 glyph idempotency: a degraded group with BOTH a quad bg and glyphs.
+    // Frame 2 mutates ONLY a quad input (the blue bg color → quad_dirty true,
+    // glyph_dirty false, so the glyph buffer is RETAINED). The degraded WHITE
+    // glyph ink must be unchanged (NOT re-dimmed) AND must still be PRESENT —
+    // proving the glyph ALPHA-fold gates on `glyph_dirty` (no re-fold on the
+    // retained buffer) while the glyph RANGE-MERGE gates on `quad_dirty ||
+    // glyph_dirty` (the partition rebuild re-excludes the degraded glyph range,
+    // so the merge must re-add it or the glyphs VANISH that frame). Without a real
+    // GPU this is `#[ignore]`; it pins the split-gate the headless
+    // `degraded_glyph_range_remerges_on_quad_dirty_only_frame` proves at the
+    // pure-function tier, end-to-end.
+    //
+    // Channel discipline: we assert on the white ink's RED+GREEN, not blue. The
+    // ink sits over the blue bg we deliberately edit, so anti-aliased glyph edges
+    // legitimately blend the new bg into their BLUE channel (correct AA, not a
+    // regression). White ink dominates R+G, which are orthogonal to a blue-only
+    // bg edit, so R+G isolate the glyph fold. A double-fold dims the ink (R+G
+    // drop); a dropped range-merge reverts it to the ~8 bg (R+G collapse) — R+G
+    // stability rejects both. See the per-pixel assertion below.
+    use buiy_core::Node;
+    use buiy_core::layout::{Inset, Length, Sizing, Style};
+    use buiy_core::render::color::ColorToken;
+    use buiy_core::render::components::{Background, Opacity, TextColor};
+    use buiy_core::text::{FontSize, Text};
+    use std::borrow::Cow;
+
+    const W: u32 = 96;
+    const H: u32 = 64;
+    let blue = Color::srgb(0.05, 0.05, 0.9);
+
+    let mut app = support::gpu_render_app(W, H);
+    force_tiny_rt_budget(&mut app, 64);
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme.colors.insert("test.blue".into(), blue);
+        theme.colors.insert("test.white".into(), Color::WHITE);
+    }
+    let target = support::render_to_image(&mut app, W, H);
+    support::spawn_capture_camera(&mut app, target.clone());
+
+    // A degraded Opacity group holding a quad bg AND a glyph run.
+    let bg = app
+        .world_mut()
+        .spawn((
+            Node,
+            Style::default()
+                .absolute()
+                .inset(Inset {
+                    top: Sizing::Length(Length::px(8.0)),
+                    left: Sizing::Length(Length::px(8.0)),
+                    ..default()
+                })
+                .width_px(64.0)
+                .height_px(40.0),
+            Background {
+                color: ColorToken::Token(Cow::Borrowed("test.blue")),
+            },
+        ))
+        .id();
+    let text = app
+        .world_mut()
+        .spawn((
+            Node,
+            Style::default().absolute().inset(Inset {
+                top: Sizing::Length(Length::px(12.0)),
+                left: Sizing::Length(Length::px(12.0)),
+                ..default()
+            }),
+            Text(String::from("Hi")),
+            FontSize(24.0),
+            TextColor(ColorToken::Token(Cow::Borrowed("test.white"))),
+        ))
+        .id();
+    let parent = app
+        .world_mut()
+        .spawn((Node, Style::default().absolute(), Opacity(0.6)))
+        .id();
+    app.world_mut().entity_mut(parent).add_children(&[bg, text]);
+    app.world_mut()
+        .spawn((Node, Style::default()))
+        .add_children(&[parent]);
+
+    support::finish_and_run(&mut app, 4);
+    support::wait_for_text_ready(&mut app, 60);
+    let frame1 = support::readback_rgba(&mut app, target.clone());
+
+    // Mutate ONLY a quad input (the bg color) → quad_dirty, glyph retained.
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme
+            .colors
+            .insert("test.blue".into(), Color::srgb(0.05, 0.05, 0.7));
+    }
+    support::finish_and_run(&mut app, 3);
+    let frame2 = support::readback_rgba(&mut app, target);
+
+    // Sample the WHITE glyph ink of the "Hi" run and assert its red+green
+    // channels are byte-stable frame-to-frame. We test R+G specifically (NOT
+    // blue) because the ink is white over a BLUE background we *deliberately*
+    // mutated this frame: anti-aliased glyph-edge pixels legitimately blend
+    // `white·coverage + bg·(1-coverage)`, so their BLUE channel tracks the bg
+    // change by design — comparing blue would flag correct AA as a regression.
+    // White ink dominates R+G, which are orthogonal to the blue bg edit, so R+G
+    // isolate the glyph fold from the bg. The dominant failure this frame pins is
+    // VANISH: if the range-merge were (wrongly) gated on glyph_dirty instead of
+    // quad_dirty||glyph_dirty, the partition rebuild would drop the degraded glyph
+    // range on this quad-dirty-only frame and the ink would revert to the ~(8,8)
+    // blue background — R+G collapsing from ~150-203 down to ~8, a huge delta this
+    // assertion rejects. (The complementary double-fold/compounding hazard — the
+    // alpha-fold wrongly re-running on the retained buffer — is the charter of
+    // `degraded_fold_does_not_compound_over_two_frames`, which drives multiple
+    // glyph-dirty frames; on THIS single quad-dirty-only frame a CPU re-fold would
+    // not even re-upload, so the split-gate's no-re-fold half is pinned at the
+    // pure-function tier by the headless mirror named in the doc comment above.)
+    // Glyph ink is identified by high R+G (white over blue bg never reaches that:
+    // its R+G are ~8). We require a real population of ink pixels so a fixture
+    // drift that moved the glyphs out of the sampled box fails loudly instead of
+    // silently asserting over zero pixels (the original single-row band sampled
+    // pure background and "passed"/"failed" on the bg, not the ink).
+    let mut ink_pixels = 0usize;
+    for y in 14..34 {
+        for x in 10..40 {
+            let a = support::px(&frame1, W, x, y);
+            let b = support::px(&frame2, W, x, y);
+            // White ink in frame 1: both R and G well above the blue bg's ~8.
+            if a[0] > 150 && a[1] > 150 {
+                ink_pixels += 1;
+                let d_r = (a[0] as i32 - b[0] as i32).abs();
+                let d_g = (a[1] as i32 - b[1] as i32).abs();
+                assert!(
+                    d_r <= 3 && d_g <= 3,
+                    "degraded glyph ink (white) must be stable on a quad-dirty-only \
+                     frame at ({x},{y}): frame1={a:?} frame2={b:?} (R+G must not \
+                     move — the alpha-fold gates on glyph_dirty so the retained \
+                     buffer is not re-folded, while the range-merge gates on \
+                     quad_dirty||glyph_dirty so the glyphs are re-merged, not \
+                     vanished). A double-fold dims the ink; a dropped merge \
+                     reverts it to the ~8 blue background."
+                );
+            }
+        }
+    }
+    assert!(
+        ink_pixels >= 8,
+        "expected the white \"Hi\" run to land in the sampled box (found only \
+         {ink_pixels} ink pixels) — fixture drift moved the glyphs; the stability \
+         assertion would otherwise vacuously pass over background"
+    );
+}
+
+#[test]
+#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"]
+fn nested_degraded_group_does_not_corrupt_parent() {
+    // Scope guard (MAJOR-1): a NESTED group forced to degrade. The root-degraded
+    // slice does NOT handle nested forward-composite (that routes into the PARENT
+    // target, a node-side follow-up). `fold_root_degraded_into_flat`
+    // debug_asserts on a nested degraded group, so in a DEBUG build this fixture
+    // would panic in prepare — which is the intended loud guard. In a RELEASE
+    // build the nested child is left untouched (it vanishes, tracked) and must NOT
+    // mis-place at window level or corrupt the parent. We assert the parent's
+    // composited region is not corrupted (a plausible non-degraded sibling still
+    // paints). Under debug we skip the body (the debug_assert is the containment).
+    if cfg!(debug_assertions) {
+        // The prepare-side debug_assert is the containment in debug builds.
+        return;
+    }
+    use buiy_core::Node;
+    use buiy_core::layout::{Inset, Length, Sizing, Style};
+    use buiy_core::render::color::ColorToken;
+    use buiy_core::render::components::{Background, Opacity};
+    use std::borrow::Cow;
+
+    const W: u32 = 96;
+    const H: u32 = 96;
+    let red = Color::srgb(0.9, 0.05, 0.05);
+
+    // Budget that fits the OUTER group's target but degrades the smaller INNER
+    // (nested) one: plan_allocation degrades lowest-cost (smallest) first.
+    let mut app = support::gpu_render_app(W, H);
+    force_tiny_rt_budget(&mut app, 4096);
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme.colors.insert("test.red".into(), red);
+    }
+    let target = support::render_to_image(&mut app, W, H);
+    support::spawn_capture_camera(&mut app, target.clone());
+
+    // Outer Opacity group (large) containing an inner Opacity group (small) with
+    // a fill — the inner is the nested degrade candidate.
+    let inner_fill = app
+        .world_mut()
+        .spawn((
+            Node,
+            Style::default()
+                .absolute()
+                .inset(Inset {
+                    top: Sizing::Length(Length::px(20.0)),
+                    left: Sizing::Length(Length::px(20.0)),
+                    ..default()
+                })
+                .width_px(16.0)
+                .height_px(16.0),
+            Background {
+                color: ColorToken::Token(Cow::Borrowed("test.red")),
+            },
+        ))
+        .id();
+    let inner = app
+        .world_mut()
+        .spawn((Node, Style::default().absolute(), Opacity(0.5)))
+        .id();
+    app.world_mut()
+        .entity_mut(inner)
+        .add_children(&[inner_fill]);
+    let outer = app
+        .world_mut()
+        .spawn((Node, Style::default().absolute(), Opacity(0.8)))
+        .id();
+    app.world_mut().entity_mut(outer).add_children(&[inner]);
+    app.world_mut()
+        .spawn((Node, Style::default()))
+        .add_children(&[outer]);
+
+    support::finish_and_run(&mut app, 4);
+    let pixels = support::readback_rgba(&mut app, target);
+    // The corner is untouched: the slice's flat-merge must NOT have mis-placed the
+    // nested child at window level (a wrong-space paint would smear it here).
+    let corner = support::px(&pixels, W, 1, 1);
+    assert_eq!(
+        corner,
+        [0, 0, 0, 255],
+        "nested degrade must not mis-place the child at window level (corner clean)"
+    );
+}
diff --git a/crates/buiy_core/tests/render_extract.rs b/crates/buiy_core/tests/render_extract.rs
index ee5a298..819e4df 100644
--- a/crates/buiy_core/tests/render_extract.rs
+++ b/crates/buiy_core/tests/render_extract.rs
@@ -166,6 +166,7 @@ fn assemble_preserves_clip_per_entity() {
             // Only entity 2 carries a clip; the others stay unclipped.
             clip: (x == e(2)).then_some(clip2),
             group: None,
+            affine: [[1.0, 0.0], [0.0, 1.0]],
         })
     });
     let clips: Vec<Option<ClipRect>> = nodes.nodes.iter().map(|n| n.clip).collect();
@@ -266,6 +267,103 @@ fn extracted_node_position_follows_global_transform() {
     assert_eq!(node.position, Vec2::new(200.0, 300.0));
 }
 
+#[test]
+fn extracted_node_carries_affine_basis_from_global_transform() {
+    // The 2D linear part of GlobalTransform's affine is carried onto the record
+    // so the GPU vertex stage can apply rotation/scale (not just the
+    // translation). A 90deg z-rotation is ASYMMETRIC, so it catches a transpose:
+    // R(90) maps x_axis -> (0,1) and y_axis -> (-1,0), so col0 = [0,1] and
+    // col1 = [-1,0]. The translation.xy must still be the painted top-left.
+    use std::f32::consts::FRAC_PI_2;
+    let theme = Theme::default();
+    let layout = ResolvedLayout {
+        position: Vec2::ZERO,
+        size: Vec2::splat(50.0),
+    };
+    let affine3 = bevy::math::Affine3A::from_rotation_translation(
+        Quat::from_rotation_z(FRAC_PI_2),
+        Vec3::new(11.0, 22.0, 0.0),
+    );
+    let gt = GlobalTransform::from(affine3);
+    let node = extracted_node_for(
+        Entity::from_raw_u32(3).unwrap(),
+        &gt,
+        &layout,
+        None,
+        None,
+        &theme,
+    );
+    // col0 = xy of x_axis, col1 = xy of y_axis (columns, NOT rows).
+    let eps = 1e-5;
+    assert!(
+        (node.affine[0][0] - 0.0).abs() < eps,
+        "m00 = {}",
+        node.affine[0][0]
+    );
+    assert!(
+        (node.affine[0][1] - 1.0).abs() < eps,
+        "m10 = {}",
+        node.affine[0][1]
+    );
+    assert!(
+        (node.affine[1][0] - -1.0).abs() < eps,
+        "m01 = {}",
+        node.affine[1][0]
+    );
+    assert!(
+        (node.affine[1][1] - 0.0).abs() < eps,
+        "m11 = {}",
+        node.affine[1][1]
+    );
+    assert_eq!(node.position, Vec2::new(11.0, 22.0));
+}
+
+#[test]
+fn extracted_node_identity_affine_is_identity_basis() {
+    // An identity GlobalTransform yields the [[1,0],[0,1]] basis — the
+    // byte-identical fast path (every pre-affine pixel/test stays unchanged).
+    let theme = Theme::default();
+    let layout = ResolvedLayout {
+        position: Vec2::ZERO,
+        size: Vec2::splat(10.0),
+    };
+    let node = extracted_node_for(
+        Entity::from_raw_u32(4).unwrap(),
+        &GlobalTransform::IDENTITY,
+        &layout,
+        None,
+        None,
+        &theme,
+    );
+    assert_eq!(node.affine, [[1.0, 0.0], [0.0, 1.0]]);
+}
+
+#[test]
+fn extracted_node_nonuniform_scale_basis() {
+    // A (2,3) non-uniform scale yields the diagonal basis [[2,0],[0,3]] —
+    // faithful for non-uniform scale (within the bridge's TRS range).
+    let theme = Theme::default();
+    let layout = ResolvedLayout {
+        position: Vec2::ZERO,
+        size: Vec2::splat(10.0),
+    };
+    let affine3 = bevy::math::Affine3A::from_scale(Vec3::new(2.0, 3.0, 1.0));
+    let gt = GlobalTransform::from(affine3);
+    let node = extracted_node_for(
+        Entity::from_raw_u32(5).unwrap(),
+        &gt,
+        &layout,
+        None,
+        None,
+        &theme,
+    );
+    let eps = 1e-5;
+    assert!((node.affine[0][0] - 2.0).abs() < eps);
+    assert!((node.affine[0][1] - 0.0).abs() < eps);
+    assert!((node.affine[1][0] - 0.0).abs() < eps);
+    assert!((node.affine[1][1] - 3.0).abs() < eps);
+}
+
 use buiy_core::render::extract::{
     ExtractedNode, ExtractedNodes, assemble_context_tree, assemble_in_paint_order,
 };
@@ -298,6 +396,7 @@ fn assemble_emits_in_painters_z_order() {
             color: Color::WHITE,
             clip: None,
             group: None,
+            affine: [[1.0, 0.0], [0.0, 1.0]],
         })
     });
     let got: Vec<Entity> = nodes.nodes.iter().map(|n| n.entity).collect();
@@ -325,6 +424,7 @@ fn assemble_drops_skipped_entities() {
                 color: Color::WHITE,
                 clip: None,
                 group: None,
+                affine: [[1.0, 0.0], [0.0, 1.0]],
             })
         }
     });
@@ -360,6 +460,7 @@ fn hit_test_order_is_paint_order_reversed() {
             color: Color::WHITE,
             clip: None,
             group: None,
+            affine: [[1.0, 0.0], [0.0, 1.0]],
         })
     });
     // Paint order is painters_z forward.
@@ -420,6 +521,7 @@ fn nested_context_is_entered_atomically_at_its_parent_position() {
                 color: Color::WHITE,
                 clip: None,
                 group: None,
+                affine: [[1.0, 0.0], [0.0, 1.0]],
             })
         },
         &mut out,
@@ -468,6 +570,7 @@ fn tree_assembly_skips_dropped_entities_across_the_boundary() {
                     color: Color::WHITE,
                     clip: None,
                     group: None,
+                    affine: [[1.0, 0.0], [0.0, 1.0]],
                 })
             }
         },
diff --git a/crates/buiy_core/tests/render_instance.rs b/crates/buiy_core/tests/render_instance.rs
index 90a7b5b..2639708 100644
--- a/crates/buiy_core/tests/render_instance.rs
+++ b/crates/buiy_core/tests/render_instance.rs
@@ -42,13 +42,14 @@ use buiy_core::render::instance::{PACKED_INSTANCE_STRIDE_BYTES, PackedInstance,
 #[test]
 fn packed_instance_stride_matches_logical_pipeline_descriptor() {
     // pos(2*4) + size(2*4) + color(4*4) + radius(1*4) + clip_min(2*4) +
-    // clip_max(2*4) = 52, in LOGICAL px (not clip). The clip AABB rides every
-    // instance (R8b fragment discard); the const must equal the struct stride.
+    // clip_max(2*4) + affine(4*4) = 68, in LOGICAL px (not clip). The clip AABB
+    // and the 2D affine basis ride every instance (R8b fragment discard + R1
+    // transform paint); the const must equal the struct stride.
     assert_eq!(
         std::mem::size_of::<PackedInstance>(),
         PACKED_INSTANCE_STRIDE_BYTES
     );
-    assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 52);
+    assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 68);
 }
 
 #[test]
@@ -102,20 +103,68 @@ fn node_with_clip(clip: Option<ClipRect>) -> ExtractedNode {
         color: Color::WHITE,
         clip,
         group: None,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     }
 }
 
 #[test]
-fn packed_instance_stride_is_52() {
-    // R8b: pos(2)+size(2)+color(4)+radius(1)+clip_min(2)+clip_max(2) = 13 f32 = 52 B.
-    // The struct stride, the const, and the raw [f32;13] must all agree (52 B);
-    // any drift makes the instanced draw read garbage.
-    assert_eq!(std::mem::size_of::<PackedInstance>(), 52);
+fn packed_instance_stride_is_68() {
+    // R8b + R1: pos(2)+size(2)+color(4)+radius(1)+clip_min(2)+clip_max(2)
+    // +affine(4) = 17 f32 = 68 B. The struct stride, the const, and the raw
+    // [f32;17] must all agree (68 B); any drift makes the instanced draw read
+    // garbage.
+    assert_eq!(std::mem::size_of::<PackedInstance>(), 68);
     assert_eq!(
         std::mem::size_of::<PackedInstance>(),
-        std::mem::size_of::<[f32; 13]>()
+        std::mem::size_of::<[f32; 17]>()
     );
-    assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 52);
+    assert_eq!(PACKED_INSTANCE_STRIDE_BYTES, 68);
+}
+
+#[test]
+fn packed_instance_appends_affine_after_existing_thirteen() {
+    // R1 HARD CONSTRAINT (campaign-review MAJOR — R2 depends on it): the 2x2
+    // affine basis appends AFTER the existing 13 floats so every existing field
+    // offset is UNCHANGED (notably color@4 / alpha@7). The raw record carries
+    // the flattened basis [m00,m10,m01,m11] at [13..17], and raw[0..13] is
+    // byte-identical to the pre-R1 layout.
+    use buiy_core::render::buckets::packed_to_raw;
+    let mut node = node_with_clip(Some(ClipRect {
+        min: Vec2::new(5.0, 6.0),
+        max: Vec2::new(105.0, 206.0),
+    }));
+    node.affine = [[2.0, 3.0], [4.0, 5.0]]; // col0 = [m00,m10], col1 = [m01,m11]
+    let p = pack_extracted(&node);
+    let raw = packed_to_raw(&p);
+    assert_eq!(
+        &raw[13..17],
+        &[2.0, 3.0, 4.0, 5.0],
+        "affine appended at [13..17]"
+    );
+    // The pre-R1 layout is byte-identical: pos/size/color/radius/clip unchanged.
+    assert_eq!(raw[0], 10.0);
+    assert_eq!(raw[1], 20.0);
+    assert_eq!(raw[2], 30.0);
+    assert_eq!(raw[3], 40.0);
+    let lin = LinearRgba::from(Color::WHITE);
+    assert_eq!(&raw[4..8], &[lin.red, lin.green, lin.blue, lin.alpha]);
+    assert_eq!(raw[8], 0.0); // radius
+    assert_eq!(&raw[9..13], &[5.0, 6.0, 105.0, 206.0]); // clip min/max
+}
+
+#[test]
+fn color_and_alpha_offset_consts_point_at_color() {
+    // R2 (degraded-group re-tint) reads alpha via ALPHA_FLOAT_OFFSET, so the
+    // named consts must point at the color block (color@4, alpha@7) — the
+    // invariant the append-after-13 layout exists to preserve.
+    use buiy_core::render::buckets::packed_to_raw;
+    use buiy_core::render::instance::{ALPHA_FLOAT_OFFSET, COLOR_FLOAT_OFFSET};
+    assert_eq!(COLOR_FLOAT_OFFSET, 4);
+    assert_eq!(ALPHA_FLOAT_OFFSET, 7);
+    let p = pack_extracted(&node_with_clip(None));
+    let raw = packed_to_raw(&p);
+    assert_eq!(raw[ALPHA_FLOAT_OFFSET], p.color[3]);
+    assert_eq!(&raw[COLOR_FLOAT_OFFSET..COLOR_FLOAT_OFFSET + 4], &p.color);
 }
 
 #[test]
@@ -143,8 +192,8 @@ fn pack_extracted_uses_full_view_sentinel_when_clip_absent() {
 }
 
 #[test]
-fn packed_raw_stride_agrees_with_thirteen_floats() {
-    // The raw bucket layout is [f32;13] and byte-equal to PackedInstance's stride.
+fn packed_raw_stride_agrees_with_seventeen_floats() {
+    // The raw bucket layout is [f32;17] and byte-equal to PackedInstance's stride.
     assert!(buiy_core::render::instance::packed_raw_stride_agrees());
 }
 
diff --git a/crates/buiy_core/tests/render_paint_order.rs b/crates/buiy_core/tests/render_paint_order.rs
index 019961a..f4b3593 100644
--- a/crates/buiy_core/tests/render_paint_order.rs
+++ b/crates/buiy_core/tests/render_paint_order.rs
@@ -95,6 +95,7 @@ fn top_layer_tail_is_tier_ordered_fullscreen_to_modal() {
                 color: Color::WHITE,
                 clip: None,
                 group: None,
+                affine: [[1.0, 0.0], [0.0, 1.0]],
             })
             .collect(),
         ..Default::default()
@@ -149,6 +150,7 @@ fn modal_is_first_hit_candidate_over_popover() {
                 color: Color::WHITE,
                 clip: None,
                 group: None,
+                affine: [[1.0, 0.0], [0.0, 1.0]],
             })
         },
         &mut assembled,
diff --git a/crates/buiy_core/tests/render_prepare.rs b/crates/buiy_core/tests/render_prepare.rs
index 8cb4398..cd2d88d 100644
--- a/crates/buiy_core/tests/render_prepare.rs
+++ b/crates/buiy_core/tests/render_prepare.rs
@@ -51,6 +51,7 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() {
                 color: Color::srgb(1.0, 0.0, 0.0),
                 clip: None,
                 group: None,
+                affine: [[1.0, 0.0], [0.0, 1.0]],
             },
             ExtractedNode {
                 entity: Entity::from_raw_u32(2).unwrap(),
@@ -59,6 +60,7 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() {
                 color: Color::srgb(0.0, 1.0, 0.0),
                 clip: None,
                 group: None,
+                affine: [[1.0, 0.0], [0.0, 1.0]],
             },
         ],
     };
@@ -71,6 +73,12 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() {
         2,
         "populated carrier must yield one quad per node"
     );
+    // R1: each raw instance is 17 floats (the affine basis appended at [13..17]).
+    assert_eq!(
+        instances[0].len(),
+        17,
+        "raw instance carries 17 floats (incl. the appended affine basis)"
+    );
     // The uniform is built from the carrier's logical_size + scale_factor: the
     // std140 array carries scale_factor at slot 8.
     assert!(
@@ -84,6 +92,32 @@ fn pack_extracted_nodes_populated_carrier_yields_nonempty_quad_batch() {
     assert!(empty.is_empty(), "empty carrier yields no quads");
 }
 
+#[test]
+fn pack_extracted_nodes_carries_non_identity_affine() {
+    // A node with a non-identity affine carries the flattened basis at [13..17]
+    // through the prepare pack (the GPU vertex stage reads it as the 2x2 mat).
+    let nodes = ExtractedNodes {
+        logical_size: Vec2::new(800.0, 600.0),
+        scale_factor: 1.0,
+        nodes: vec![ExtractedNode {
+            entity: Entity::from_raw_u32(1).unwrap(),
+            position: Vec2::new(10.0, 20.0),
+            size: Vec2::new(100.0, 50.0),
+            color: Color::srgb(1.0, 0.0, 0.0),
+            clip: None,
+            group: None,
+            affine: [[0.0, 1.0], [-1.0, 0.0]], // 90deg rotation basis
+        }],
+    };
+    let (instances, _) = pack_extracted_nodes(&nodes);
+    assert_eq!(instances.len(), 1);
+    assert_eq!(
+        &instances[0][13..17],
+        &[0.0, 1.0, -1.0, 0.0],
+        "the 2D affine basis rides the packed instance at [13..17]"
+    );
+}
+
 #[test]
 fn extracted_nodes_pack_view_routes_records_to_quad_layer_0() {
     // R6 consumes R5's ExtractedNodes and packs its `nodes` via pack_view — no
@@ -103,6 +137,7 @@ fn extracted_nodes_pack_view_routes_records_to_quad_layer_0() {
         color: Color::srgb(1.0, 0.0, 0.0),
         clip: None,
         group: None,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     });
     let buckets = pack_view(&view.nodes);
     let quad0 = PrimitiveBatchKey {
diff --git a/crates/buiy_core/tests/render_primitive_descriptor.rs b/crates/buiy_core/tests/render_primitive_descriptor.rs
index 1ae035a..930493f 100644
--- a/crates/buiy_core/tests/render_primitive_descriptor.rs
+++ b/crates/buiy_core/tests/render_primitive_descriptor.rs
@@ -67,9 +67,9 @@ fn quad_descriptor_keeps_alpha_blending_and_entry_points() {
 
 #[test]
 fn quad_descriptor_has_two_vertex_buffers_with_phase0_strides() {
-    // Static unit-quad VBO (stride 16) + per-instance buffer (stride 52 after
-    // R8b appends the clip AABB at @location(6)/(7)); the unit-quad VBO is
-    // untouched.
+    // Static unit-quad VBO (stride 16) + per-instance buffer (stride 68 after
+    // R1 appends the 2x2 affine basis at @location(8)/(9), on top of R8b's clip
+    // AABB at @location(6)/(7)); the unit-quad VBO is untouched.
     let d = BuiyPrimitives::default().specialize(BuiyPrimitiveKey {
         kind: BuiyPrimitiveKind::Quad,
         format: TextureFormat::Rgba8UnormSrgb,
@@ -78,21 +78,55 @@ fn quad_descriptor_has_two_vertex_buffers_with_phase0_strides() {
     let buffers = &d.vertex.buffers;
     assert_eq!(buffers.len(), 2, "vertex + instance buffer layouts");
     assert_eq!(buffers[0].array_stride, 16);
-    assert_eq!(buffers[1].array_stride, 52);
+    assert_eq!(buffers[1].array_stride, 68);
 }
 
 #[test]
-fn instance_buffer_stride_is_52_with_clip_fields() {
-    // The per-instance record grew from 36 B (R7) to 52 B (R8b) when the clip
-    // AABB (`clip_min`/`clip_max`, two Float32x2) was appended; the vertex
-    // layout's `array_stride` must track `PackedInstance`'s 52-byte stride or
-    // wgpu mis-strides the instance buffer.
+fn instance_buffer_stride_is_68_with_clip_and_affine_fields() {
+    // The per-instance record grew from 52 B (R8b) to 68 B (R1) when the 2x2
+    // affine basis (two Float32x2 columns) was appended after the clip AABB; the
+    // vertex layout's `array_stride` must track `PackedInstance`'s 68-byte stride
+    // or wgpu mis-strides the instance buffer.
+    use buiy_core::render::instance::PACKED_INSTANCE_STRIDE_BYTES;
     let d = BuiyPrimitives::default().specialize(BuiyPrimitiveKey {
         kind: BuiyPrimitiveKind::Quad,
         format: TextureFormat::Rgba8UnormSrgb,
         samples: 1,
     });
-    assert_eq!(d.vertex.buffers[1].array_stride, 52);
+    assert_eq!(d.vertex.buffers[1].array_stride, 68);
+    assert_eq!(
+        d.vertex.buffers[1].array_stride as usize,
+        PACKED_INSTANCE_STRIDE_BYTES
+    );
+}
+
+#[test]
+fn instance_keeps_clip_attrs_byte_stable_and_appends_affine() {
+    // R1 HARD CONSTRAINT: the existing 6 instance attrs (locations 2..7, offsets
+    // 0..44) are UNCHANGED, and two NEW Float32x2 affine columns append at
+    // @location(8) offset 52 (col0 = [m00,m10]) and @location(9) offset 60
+    // (col1 = [m01,m11]).
+    let d = BuiyPrimitives::default().specialize(BuiyPrimitiveKey {
+        kind: BuiyPrimitiveKind::Quad,
+        format: TextureFormat::Rgba8UnormSrgb,
+        samples: 1,
+    });
+    let attrs = &d.vertex.buffers[1].attributes;
+    let at = |loc: u32| attrs.iter().find(|a| a.shader_location == loc).copied();
+    // Existing six attrs unchanged.
+    assert_eq!(at(2).unwrap().offset, 0);
+    assert_eq!(at(3).unwrap().offset, 8);
+    assert_eq!(at(4).unwrap().offset, 16); // color
+    assert_eq!(at(5).unwrap().offset, 32); // radius/blur
+    assert_eq!(at(6).unwrap().offset, 36); // clip_min
+    assert_eq!(at(7).unwrap().offset, 44); // clip_max
+    // New affine columns appended.
+    let col0 = at(8).expect("instance layout has @location(8) affine col0");
+    assert_eq!(col0.format, VertexFormat::Float32x2);
+    assert_eq!(col0.offset, 52);
+    let col1 = at(9).expect("instance layout has @location(9) affine col1");
+    assert_eq!(col1.format, VertexFormat::Float32x2);
+    assert_eq!(col1.offset, 60);
 }
 
 #[test]
diff --git a/crates/buiy_core/tests/render_shader_wgsl.rs b/crates/buiy_core/tests/render_shader_wgsl.rs
index 1f67f55..7dafe4b 100644
--- a/crates/buiy_core/tests/render_shader_wgsl.rs
+++ b/crates/buiy_core/tests/render_shader_wgsl.rs
@@ -78,3 +78,63 @@ fn shadow_shader_with_clip_parses() {
         "shadow clip inputs bound at @location(6)/(7) (matches the vertex layout)"
     );
 }
+
+#[test]
+fn quad_shader_applies_affine_via_mat2x2() {
+    // R1: the quad shader declares the 2D affine basis instance inputs at
+    // @location(8)/(9), builds the window-logical corner via a `mat2x2`, and
+    // interpolates `frag_logical` for the clip discard — `rect_center` is GONE
+    // (it was the axis-aligned corner, wrong under rotation). naga PARSES (not
+    // string-grep) so a malformed VertexOut/fragment-input mismatch is rejected.
+    let m = parse_wgsl("quad", QUAD_WGSL);
+    assert!(has_entry_point(&m, "vertex"));
+    assert!(has_entry_point(&m, "fragment"));
+    assert!(
+        QUAD_WGSL.contains("@location(8)") && QUAD_WGSL.contains("@location(9)"),
+        "quad affine inputs bound at @location(8)/(9) (matches the vertex layout)"
+    );
+    assert!(
+        QUAD_WGSL.contains("affine_col0") && QUAD_WGSL.contains("affine_col1"),
+        "quad shader declares the affine basis columns"
+    );
+    assert!(
+        QUAD_WGSL.contains("mat2x2"),
+        "quad vertex builds the logical corner via a mat2x2 affine"
+    );
+    assert!(
+        QUAD_WGSL.contains("frag_logical"),
+        "quad carries the affine-transformed window-logical corner for the clip discard"
+    );
+    assert!(
+        !QUAD_WGSL.contains("rect_center"),
+        "rect_center (the axis-aligned corner) is dropped — replaced by frag_logical"
+    );
+}
+
+#[test]
+fn shadow_shader_applies_affine_via_mat2x2() {
+    // The shadow shader mirrors the quad shader's affine path identically.
+    let m = parse_wgsl("shadow", SHADOW_WGSL);
+    assert!(has_entry_point(&m, "vertex"));
+    assert!(has_entry_point(&m, "fragment"));
+    assert!(
+        SHADOW_WGSL.contains("@location(8)") && SHADOW_WGSL.contains("@location(9)"),
+        "shadow affine inputs bound at @location(8)/(9)"
+    );
+    assert!(
+        SHADOW_WGSL.contains("affine_col0") && SHADOW_WGSL.contains("affine_col1"),
+        "shadow shader declares the affine basis columns"
+    );
+    assert!(
+        SHADOW_WGSL.contains("mat2x2"),
+        "shadow vertex builds the logical corner via a mat2x2 affine"
+    );
+    assert!(
+        SHADOW_WGSL.contains("frag_logical"),
+        "shadow carries the affine-transformed window-logical corner"
+    );
+    assert!(
+        !SHADOW_WGSL.contains("rect_center"),
+        "rect_center is dropped in the shadow shader too"
+    );
+}
diff --git a/crates/buiy_core/tests/render_text_quads.rs b/crates/buiy_core/tests/render_text_quads.rs
index 8f9add1..3de1d19 100644
--- a/crates/buiy_core/tests/render_text_quads.rs
+++ b/crates/buiy_core/tests/render_text_quads.rs
@@ -16,6 +16,7 @@ fn node(entity: Entity, x: f32, color: Color, group: Option<usize>) -> Extracted
         color,
         clip: None,
         group,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     }
 }
 
diff --git a/crates/buiy_core/tests/render_transform_paint_gpu.rs b/crates/buiy_core/tests/render_transform_paint_gpu.rs
new file mode 100644
index 0000000..abcc522
--- /dev/null
+++ b/crates/buiy_core/tests/render_transform_paint_gpu.rs
@@ -0,0 +1,167 @@
+//! GPU reftest (#[ignore]) for R1 transform paint: a UiTransform's 2D affine
+//! (rotation / scale) is applied in the quad vertex stage so a transformed fill
+//! paints OFF the axis-aligned box. Needs a real wgpu adapter; the headless gate
+//! proves the byte layout + the WGSL naga-parse shape, the human runs the GPU
+//! lane (`cargo test -p buiy_core -- --ignored --test-threads=1`).
+//!
+//! SCOPE: pure rotation / pure scale only — within the bridge's faithful TRS
+//! range. Skew / general TransformMatrix::Matrix are bounded by the bridge's
+//! TRS-only Transform::from_matrix decompose (a separate residual,
+//! clip-and-transform.md § B.5), so this fixture deliberately avoids them.
+
+mod support;
+
+/// A pure 2x scale about the box-local top-left grows a 10×10 fill to 20×20, so
+/// a pixel ~15px from the top-left — INSIDE the scaled fill but OUTSIDE the
+/// unscaled 10×10 box — must be painted. If the affine were dropped (the R1
+/// bug), render would paint the original 10×10 axis-aligned box and that pixel
+/// would read the clear color. The scaled-grown corner is direction-independent,
+/// so this is the unambiguous transform-paint assertion.
+#[test]
+#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"]
+fn scaled_fill_paints_beyond_unscaled_box() {
+    use bevy::prelude::*;
+    use buiy_core::Node;
+    use buiy_core::layout::{Inset, Length, Sizing, Style};
+    use buiy_core::render::color::ColorToken;
+    use buiy_core::render::components::Background;
+    use std::borrow::Cow;
+
+    const W: u32 = 64;
+    const H: u32 = 64;
+
+    let mut app = support::gpu_render_app(W, H);
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme.colors.insert("test.fill".into(), Color::WHITE);
+    }
+
+    let target = support::render_to_image(&mut app, W, H);
+    support::spawn_capture_camera(&mut app, target.clone());
+
+    // A 10×10 fill at top-left (16,16), scaled 2x about its box-local top-left →
+    // occupies x∈[16,36), y∈[16,36). The unscaled box is x∈[16,26), y∈[16,26).
+    let child = (
+        Node,
+        Style::default()
+            .absolute()
+            .inset(Inset {
+                top: Sizing::Length(Length::px(16.0)),
+                left: Sizing::Length(Length::px(16.0)),
+                ..default()
+            })
+            .width_px(10.0)
+            .height_px(10.0)
+            .scale(2.0),
+        Background {
+            color: ColorToken::Token(Cow::Borrowed("test.fill")),
+        },
+    );
+    let c = app.world_mut().spawn(child).id();
+    app.world_mut()
+        .spawn((Node, Style::default()))
+        .add_children(&[c]);
+
+    support::finish_and_run(&mut app, 3);
+    let pixels = support::readback_rgba(&mut app, target);
+    assert_eq!(pixels.len(), (W * H * 4) as usize);
+    let px = |x: u32, y: u32| -> [u8; 4] {
+        let i = ((y * W + x) * 4) as usize;
+        [pixels[i], pixels[i + 1], pixels[i + 2], pixels[i + 3]]
+    };
+
+    let clear = px(1, 1);
+    assert_eq!(
+        clear,
+        [0, 0, 0, 255],
+        "untouched corner reads the clear color"
+    );
+
+    // Deep interior of the scaled-only region (x∈[26,36), y∈[26,36)) — OUTSIDE
+    // the unscaled 10×10 box, well clear of the SDF rim. The R1 bug (axis-aligned
+    // paint, scale dropped) leaves this at the clear color.
+    let scaled_only = px(30, 30);
+    assert_ne!(
+        scaled_only,
+        [0, 0, 0, 255],
+        "the 2x scale must paint at (30,30), beyond the unscaled 10×10 box \
+         (a dropped affine would leave this at the clear color)"
+    );
+}
+
+/// A pure 90° rotation about the box-local top-left sweeps a tall thin rect into
+/// a horizontal extent the unrotated rect never reaches. The exact swept
+/// quadrant depends on the rotation sign, so this asserts the rotated fill
+/// paints SOME pixel off the unrotated rect's vertical column (a column the
+/// axis-aligned rect would leave at the clear color), which holds for either
+/// sign of a 90° turn about the top-left.
+#[test]
+#[ignore = "needs a wgpu adapter (real GPU or lavapipe); run with --ignored"]
+fn rotated_fill_paints_off_axis() {
+    use bevy::prelude::*;
+    use buiy_core::Node;
+    use buiy_core::layout::{Inset, Length, Sizing, Style};
+    use buiy_core::render::color::ColorToken;
+    use buiy_core::render::components::Background;
+    use std::borrow::Cow;
+    use std::f32::consts::FRAC_PI_2;
+
+    const W: u32 = 64;
+    const H: u32 = 64;
+
+    let mut app = support::gpu_render_app(W, H);
+    {
+        let mut theme = app.world_mut().resource_mut::<buiy_core::theme::Theme>();
+        theme.colors.insert("test.fill".into(), Color::WHITE);
+    }
+
+    let target = support::render_to_image(&mut app, W, H);
+    support::spawn_capture_camera(&mut app, target.clone());
+
+    // A 4px-wide × 30px-tall rect with its top-left at (32,32) (image center).
+    // Unrotated it occupies x∈[32,36), y∈[32,62). Rotated 90° about its top-left
+    // it sweeps a ~30px HORIZONTAL extent the unrotated thin column never covers.
+    let child = (
+        Node,
+        Style::default()
+            .absolute()
+            .inset(Inset {
+                top: Sizing::Length(Length::px(32.0)),
+                left: Sizing::Length(Length::px(32.0)),
+                ..default()
+            })
+            .width_px(4.0)
+            .height_px(30.0)
+            .rotate_z(FRAC_PI_2),
+        Background {
+            color: ColorToken::Token(Cow::Borrowed("test.fill")),
+        },
+    );
+    let c = app.world_mut().spawn(child).id();
+    app.world_mut()
+        .spawn((Node, Style::default()))
+        .add_children(&[c]);
+
+    support::finish_and_run(&mut app, 3);
+    let pixels = support::readback_rgba(&mut app, target);
+    let px = |x: u32, y: u32| -> [u8; 4] {
+        let i = ((y * W + x) * 4) as usize;
+        [pixels[i], pixels[i + 1], pixels[i + 2], pixels[i + 3]]
+    };
+
+    // The unrotated thin column is x∈[32,36): any painted pixel with x far from
+    // that column (≥10px away horizontally) proves the rotation moved fill onto
+    // a horizontal extent the axis-aligned rect never reaches. Scan the rotated
+    // sweep band near the pivot row.
+    let off_axis_painted = (0..W).any(|x| {
+        (x + 10 < 32 || x > 36 + 10) && {
+            // sample a few rows around the pivot (y=32) where a 90° turn lands fill
+            (28..=36).any(|y| px(x, y) != [0, 0, 0, 255])
+        }
+    });
+    assert!(
+        off_axis_painted,
+        "the 90° rotation must paint fill off the unrotated thin column \
+         (a dropped affine would paint only the axis-aligned x∈[32,36) column)"
+    );
+}
diff --git a/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap b/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap
index 949ce83..140c931 100644
--- a/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap
+++ b/crates/buiy_core/tests/snapshots/pack_extracted_finite_clip.snap
@@ -2,4 +2,4 @@
 source: crates/buiy_core/tests/render_instance.rs
 expression: pack_extracted_finite_clip
 ---
-000020410000a0410000f041000020420000803f0000803f0000803f0000803f000000000000a0400000c0400000d24200004e43
+000020410000a0410000f041000020420000803f0000803f0000803f0000803f000000000000a0400000c0400000d24200004e430000803f00000000000000000000803f
diff --git a/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap b/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap
index 15858d1..308b1c1 100644
--- a/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap
+++ b/crates/buiy_core/tests/snapshots/pack_extracted_sentinel_clip.snap
@@ -2,4 +2,4 @@
 source: crates/buiy_core/tests/render_instance.rs
 expression: pack_extracted_sentinel_clip
 ---
-000020410000a0410000f041000020420000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f
+000020410000a0410000f041000020420000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f0000803f00000000000000000000803f
diff --git a/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap b/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap
index bbdce65..0bffb8f 100644
--- a/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap
+++ b/crates/buiy_core/tests/snapshots/pack_instance_logical_px.snap
@@ -2,4 +2,4 @@
 source: crates/buiy_core/tests/render_instance.rs
 expression: pack_instance_logical_px
 ---
-0000c84200004842000048430000a0420000803f0000803f0000803f0000803f00004041000080ff000080ff0000807f0000807f
+0000c84200004842000048430000a0420000803f0000803f0000803f0000803f00004041000080ff000080ff0000807f0000807f0000803f00000000000000000000803f
diff --git a/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap b/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap
index d98c5d0..633ed8b 100644
--- a/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap
+++ b/crates/buiy_core/tests/snapshots/pack_view_node_payload.snap
@@ -2,4 +2,4 @@
 source: crates/buiy_core/tests/render_buckets.rs
 expression: pack_view_node_payload
 ---
-0000e0400000104100004040000080400000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f
+0000e0400000104100004040000080400000803f0000803f0000803f0000803f00000000000080ff000080ff0000807f0000807f0000803f00000000000000000000803f
diff --git a/crates/buiy_verify/src/invariant/predicates.rs b/crates/buiy_verify/src/invariant/predicates.rs
index 8d57ac1..427a56b 100644
--- a/crates/buiy_verify/src/invariant/predicates.rs
+++ b/crates/buiy_verify/src/invariant/predicates.rs
@@ -307,7 +307,7 @@ pub fn all_finite(nodes: &ExtractedNodes) -> Result<(), Violation> {
 /// clip" and are checked separately.
 pub fn all_finite_packed(packed: &[PackedInstance]) -> Result<(), Violation> {
     for (i, p) in packed.iter().enumerate() {
-        let finite_fields: [(&str, f32); 9] = [
+        let finite_fields: [(&str, f32); 13] = [
             ("rect_pos.x", p.rect_pos[0]),
             ("rect_pos.y", p.rect_pos[1]),
             ("rect_size.x", p.rect_size[0]),
@@ -317,6 +317,12 @@ pub fn all_finite_packed(packed: &[PackedInstance]) -> Result<(), Violation> {
             ("color.b", p.color[2]),
             ("color.a", p.color[3]),
             ("radius", p.radius),
+            // The 2D affine basis (R1) — always finite for a valid
+            // GlobalTransform; a NaN/inf here is a real packing bug.
+            ("affine.m00", p.affine[0]),
+            ("affine.m10", p.affine[1]),
+            ("affine.m01", p.affine[2]),
+            ("affine.m11", p.affine[3]),
         ];
         for (field, v) in finite_fields {
             if !v.is_finite() {
diff --git a/crates/buiy_verify/src/invariant/scene.rs b/crates/buiy_verify/src/invariant/scene.rs
index d2832fb..f6268bc 100644
--- a/crates/buiy_verify/src/invariant/scene.rs
+++ b/crates/buiy_verify/src/invariant/scene.rs
@@ -669,6 +669,8 @@ fn extracted_node(entity: Entity, n: &FlatNode) -> ExtractedNode {
         color,
         clip,
         group: None,
+        // The synthetic scene carries no UiTransform; paint axis-aligned.
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     }
 }
 
diff --git a/crates/buiy_verify/src/snapshot.rs b/crates/buiy_verify/src/snapshot.rs
index 4c7b96a..55e9bb6 100644
--- a/crates/buiy_verify/src/snapshot.rs
+++ b/crates/buiy_verify/src/snapshot.rs
@@ -431,7 +431,7 @@ pub fn display_list_dump(nodes: &ExtractedNodes, names: &NameLookup) -> String {
 // ---------------------------------------------------------------------------
 
 /// Hex-dump a [`PackedInstance`] as `bytemuck::bytes_of(p)` — a byte-exact
-/// snapshot of the GPU upload payload (52 B → 104 hex chars), independent of
+/// snapshot of the GPU upload payload (68 B → 136 hex chars), independent of
 /// the Display dump's format version. A packing arithmetic change (e.g. the
 /// half-size sign bug `render_instance.rs` regression-tests) flips the hex even
 /// when the rounded Display dump rounds it away (snapshots.md § byte-exact).
diff --git a/crates/buiy_verify/tests/invariant_mutations.rs b/crates/buiy_verify/tests/invariant_mutations.rs
index 5c83917..abbac84 100644
--- a/crates/buiy_verify/tests/invariant_mutations.rs
+++ b/crates/buiy_verify/tests/invariant_mutations.rs
@@ -38,6 +38,7 @@ fn node(entity: Entity, size: Vec2) -> ExtractedNode {
             max: size,
         }),
         group: None,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     }
 }
 
@@ -231,6 +232,7 @@ fn packed(rect_size: [f32; 2]) -> PackedInstance {
         radius: 0.0,
         clip_min: [f32::NEG_INFINITY, f32::NEG_INFINITY],
         clip_max: [f32::INFINITY, f32::INFINITY],
+        affine: [1.0, 0.0, 0.0, 1.0],
     }
 }
 
diff --git a/crates/buiy_verify/tests/snapshot_display_list.rs b/crates/buiy_verify/tests/snapshot_display_list.rs
index bed7509..858289c 100644
--- a/crates/buiy_verify/tests/snapshot_display_list.rs
+++ b/crates/buiy_verify/tests/snapshot_display_list.rs
@@ -32,6 +32,7 @@ fn two_node_scene(swap: bool) -> (ExtractedNodes, NameLookup) {
         color: Color::srgba(0.1, 0.2, 0.3, 1.0),
         clip: None,
         group: None,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     };
     let tooltip = ExtractedNode {
         entity: tooltip_e,
@@ -43,6 +44,7 @@ fn two_node_scene(swap: bool) -> (ExtractedNodes, NameLookup) {
             max: Vec2::new(80.0, 24.0),
         }),
         group: Some(0),
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     };
     let nodes = ExtractedNodes {
         // Stored paint order is modal (bottom) then tooltip (top); the dump
@@ -117,6 +119,7 @@ fn missing_token_surfaces_as_magenta() {
         color: MISSING_TOKEN_FALLBACK,
         clip: None,
         group: None,
+        affine: [[1.0, 0.0], [0.0, 1.0]],
     };
     let nodes = ExtractedNodes {
         nodes: vec![node],
diff --git a/crates/buiy_verify/tests/snapshot_instance_hex.rs b/crates/buiy_verify/tests/snapshot_instance_hex.rs
index d1917da..943cfa9 100644
--- a/crates/buiy_verify/tests/snapshot_instance_hex.rs
+++ b/crates/buiy_verify/tests/snapshot_instance_hex.rs
@@ -9,7 +9,7 @@ use buiy_verify::snapshot::instance_hex;
 fn hex_round_trips_bytes() {
     // `instance_hex(p)` → parse hex → `bytemuck::pod_read_unaligned` must
     // reconstruct the ORIGINAL `PackedInstance` bit-for-bit, proving the hex is
-    // lossless and matches the GPU upload payload (52 B → 104 hex chars).
+    // lossless and matches the GPU upload payload (68 B → 136 hex chars).
     let p = PackedInstance {
         rect_pos: [10.0, 20.0],
         rect_size: [100.0, 40.0],
@@ -17,10 +17,11 @@ fn hex_round_trips_bytes() {
         radius: 8.0,
         clip_min: [0.0, 0.0],
         clip_max: [200.0, 100.0],
+        affine: [1.0, 0.0, 0.0, 1.0],
     };
 
     let hex = instance_hex(&p);
-    assert_eq!(hex.len(), 104, "52 bytes → 104 hex chars");
+    assert_eq!(hex.len(), 136, "68 bytes → 136 hex chars");
 
     // Parse the hex back into the 52 bytes.
     let bytes: Vec<u8> = (0..hex.len())
@@ -49,6 +50,7 @@ fn hex_flips_on_a_packing_change() {
         radius: 0.0,
         clip_min: [f32::NEG_INFINITY, f32::NEG_INFINITY],
         clip_max: [f32::INFINITY, f32::INFINITY],
+        affine: [1.0, 0.0, 0.0, 1.0],
     };
     let mut flipped = base;
     // The half-size sign bug `render_instance.rs` regression-tests: a negated
diff --git a/docs/plans/follow-ups.md b/docs/plans/follow-ups.md
index 5e75857..963e85d 100644
--- a/docs/plans/follow-ups.md
+++ b/docs/plans/follow-ups.md
@@ -594,16 +594,53 @@ stays open until such a mechanism exists.
 
 **Originated:** Phase 8 (spec § 4 — render-side concerns stored only).
 
-**Symptom:** `perspective`, `TransformStyle::Preserve3d`, and
-`BackfaceVisibility::Hidden` are stored on `UiTransform` and the
-`LAYOUT` / `PAINT` / `STYLE` contain flags are stored on `Containment`,
-but render does not yet consume them.
-
-**Implementation sketch:** render consumes `ResolvedTransform` + the
-containment flags — applies the composed matrix, the PAINT clip rect, and
-honors perspective / backface / `transform-style`.
-
-**Spec touchpoint:** `transforms-and-containment.md § 4`, § 5.1.
+**Status:** transform-paint (rotation + (non-)uniform scale) LANDED (R1); the
+PAINT clip rect was ALREADY done; perspective / `Preserve3d` /
+`BackfaceVisibility` remain C-tier deferred. Do NOT close — the residuals below
+keep this entry open.
+
+**LANDED (R1, as landed):** the 2D affine paint. Extract consumes the
+`GlobalTransform` 2D linear part (`global_transform.affine().matrix3` xy columns
+— NOT a re-read of `ResolvedTransform`; pillar-5 contract, the bridge already
+folded `ResolvedTransform.matrix` into `Transform`) and carries it as
+`ExtractedNode.affine`. `PackedInstance` grew by APPENDING the 2x2 basis
+(`[m00,m10,m01,m11]`) AFTER the existing 13 floats so every prior offset stays
+byte-stable; the named `COLOR_FLOAT_OFFSET = 4` / `ALPHA_FLOAT_OFFSET = 7`
+consts were added for R2's degraded-group re-tint. The quad + shadow vertex
+stages apply the affine to each box-local corner (`mat2x2 * local`) before the
+logical→clip view map, interpolating `frag_logical` for the clip-AABB discard
+(stride 52 B → 68 B, vertex attrs `@location(8)/(9)`). Identity basis
+`[1,0,0,1]` is byte-identical to the pre-R1 axis-aligned path. GPU rotate/scale
+reftest: `tests/render_transform_paint_gpu.rs` (`#[ignore]`).
+
+**ALREADY done (pre-R1):** the `Containment` PAINT clip rect — the per-primitive
+clip AABB via `clip::clip_for_primitive` + `write_clip_rects` (clip.rs ~196),
+packed onto every instance (the R8b fragment discard).
+
+**Residual (still C-tier deferred):** `perspective`, `TransformStyle::Preserve3d`,
+and `BackfaceVisibility::Hidden` are stored on `UiTransform` but render does not
+consume them (render/mod.rs ~388). The 2D affine path does not carry a
+projective channel.
+
+**Residual A (newly surfaced — layout side):** `transform-origin` is NOT honored
+by layout sub-pass 6e (`compose_transform` ignores `ui.origin`), so the composed
+matrix rotates/scales about the box-local TOP-LEFT, not the 50%/50% center.
+Render transports the affine EXACTLY as `GlobalTransform` encodes it so render ==
+picking by construction; it must NOT independently re-apply an origin (a
+double-apply would diverge from picking). Honoring `transform-origin` is a
+layout-side follow-up (a 6e change + a picking re-verify).
+
+**Residual B (newly surfaced — bridge fidelity):** skew (`TransformMatrix::Skew`)
+and general `TransformMatrix::Matrix` paint are BOUNDED by the bridge's lossy
+TRS-only `Transform::from_matrix` decompose (bridge.rs; proven lossy by
+`from_matrix_drops_projective_perspective_row_keeps_affine`). A Bevy `Transform`
+is TRS-only and cannot represent a general shear, so the extracted 2D linear part
+is FAITHFUL for rotation + non-uniform scale but skew/general-matrix do NOT paint
+faithfully yet. Faithful skew needs the bridge to stop round-tripping through TRS
+(or render to read a non-TRS source).
+
+**Spec touchpoint:** `transforms-and-containment.md § 4`, § 5.1;
+`clip-and-transform.md § B.5`.
 
 ## Render — R11 forced-colors cross-phase seams (CatalogPaint + BoxShadow draw-skip)
 
@@ -914,23 +951,72 @@ pass via a `Glyph@Rgba16Float` pipeline specialization (mirroring the
 
 ## Render — degraded effect groups vanish instead of drawing flat
 
+**Status: Root-degraded LANDED; nested-degraded follow-up filed** (R2).
+
 **Originated:** text campaign T8 implementation reading (the T8 plan's D9).
 
-**Symptom:** a `plan_allocation == false` group gets no pooled target,
-`BuiyNode::run` step 1 `continue`s, and its members are excluded from
+**As-was symptom:** a `plan_allocation == false` group got no pooled target,
+`BuiyNode::run` step 1 `continue`d, and its members were excluded from
 `flat_ranges` / `glyph_flat_ranges` — so under RT-pool budget pressure a
-degraded group's quads AND glyphs paint nowhere, despite the "drawn flat
-instead" comments (node.rs step 1; compositor.rs `PreparedEffectTargets`).
-Latent under the 64 MiB budget (no fixture degrades today). T8 mirrored the
-quad semantics for glyphs (a degraded group's glyph range is likewise
-skipped) rather than silently widening scope.
-
-**Implementation sketch:** either re-route a degraded group's ranges into the
-flat draw at prepare (forward compositing, accepting the double-dim
-approximation v1 rejected for targets) or document skip-as-degradation;
-decide with `buiy-verification-design`'s budget calibration.
-
-**Spec touchpoint:** `effect-compositor.md § 2.3`.
+degraded group's quads AND glyphs painted nowhere, despite the "drawn flat
+instead" comments. Latent under the 64 MiB budget (no fixture degraded). T8
+had mirrored the quad semantics for glyphs (a degraded group's glyph range
+was likewise skipped) rather than silently widening scope.
+
+**Resolution (R2):** the route-flat-vs-skip fork is RESOLVED in favor of
+**forward-compositing**, per `effect-compositor.md § 2.3` (skip contradicted
+the spec). A ROOT degraded group (`parent == None`) now folds `group.opacity`
+into each member instance's alpha IN PLACE (quad alpha at
+`ALPHA_FLOAT_OFFSET` = 7 on the `[f32;17]` record; glyph alpha at the parallel
+`GLYPH_ALPHA_FLOAT_OFFSET` = 11 = `GlyphAlphaInstance.color[3]`) and merges
+its instance ranges into `flat_ranges`/`glyph_flat_ranges` so the flat WINDOW
+draw paints it — it dims exactly once and paints flat, never vanishes
+(`compositor::fold_root_degraded_into_flat`, called from
+`prepare_effect_groups`). Per-tier idempotency: the fold runs iff the
+corresponding BUFFER was repacked this frame (quad on `quad_dirty`, glyph on
+`glyph_dirty` — the buffer-repack signals, which DIFFER from the wider
+glyph-partition signal), so a retained buffer never re-compounds. Gated on
+`allocate.iter().any(|a| !a)` to preserve the gate-#14 zero-upload steady
+state. The budget is overridable via the new `RtPoolBudget` resource so a test
+forces degradation deterministically.
+
+**Out of scope (nested):** a NESTED degraded child (`parent == Some`) is NOT
+handled by this slice — see the next section. The fold debug-asserts on it and
+leaves it untouched in release (no worse than the prior vanish).
+
+**Spec touchpoint:** `effect-compositor.md § 2.3` (as-landed note added).
+
+## Render — nested degraded effect group must forward-composite into the parent target (not the window)
+
+**Originated:** R2 (degraded-group forward-composite), MAJOR-1 scope decision.
+
+**Problem:** `plan_allocation` (`compositor.rs`) ranks purely by (extent,
+reason) and CAN degrade a NESTED child (`extracted[i].parent == Some`) while
+its parent keeps a target. R2's fix routes a degraded group's instance ranges
+into `flat_ranges`, which the node draws in the WINDOW pass (`buiy_pass`,
+`node.rs`). That equals "the parent target" the spec § 2.3 mandates ONLY when
+the degraded group is a ROOT group. For a nested degraded child, window-level
+flat-merge would paint it in the wrong space/clip, and the parent's step-2a
+composite (which already skips when either end lacks a target) would then
+sample a parent target the child never reached — double-wrong. So R2 scoped to
+root-degraded.
+
+**Fix (node-side):** route a degraded nested child's `group_ranges[i]` /
+`glyph_group_ranges[i]` into the PARENT group's step-1 target draw (the
+parent's `target_view_columns`, into the parent's `Rgba16Float` target), with
+`group.opacity` folded per-instance, BEFORE the parent composites — instead of
+the window flat draw. This is a different draw path from the root case (parent
+off-screen target vs. window flat pass), which is why it was split out.
+
+**Current containment:** `compositor::fold_root_degraded_into_flat`
+`debug_assert!(false, …)`s on a nested degraded group (loud in dev/tests) and
+in release leaves it untouched (it vanishes — no worse than today). The GPU
+test `nested_degraded_group_does_not_corrupt_parent`
+(`tests/render_compositor_gpu.rs`) guards that the slice's flat-merge does NOT
+mis-place a nested child at window level.
+
+**Spec touchpoint:** `effect-compositor.md § 2.3` ("directly into its parent
+target" wording).
 
 ## Text — production ASCII pre-warm (rejected as unmeasured)
 
diff --git a/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md b/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md
index 737c354..7312015 100644
--- a/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md
+++ b/docs/specs/2026-06-03-buiy-render-pipeline-design/clip-and-transform.md
@@ -497,10 +497,18 @@ Key points:
   steady-state frame, matching the layout pipeline's steady-state contract. This
   is one re-run trigger feeding one writer, not two competing filters.
 - **The transform origin** (`UiTransform.origin`, default `50% 50% 0`) is
-  *already baked into* `ResolvedTransform.matrix` by sub-pass 6e (it composes
-  `M = T·R·S·M_transform` around the resolved origin), so the bridge does a
-  flat `base * matrix` and never re-derives origin. Render and the bridge thus
-  agree with picking, which applies the inverse of the same matrix
+  *intended to be baked into* `ResolvedTransform.matrix` by sub-pass 6e (it would
+  compose `M = T·R·S·M_transform` around the resolved origin), so the bridge does
+  a flat `base * matrix` and never re-derives origin. **As of R1 (transform-paint
+  landed), this is the TARGET state, not current:** sub-pass 6e's
+  `compose_transform` does NOT yet read `ui.origin`, so the composed matrix
+  rotates/scales about the box-local TOP-LEFT, not the 50%/50% center (a
+  layout-side residual surfaced by R1 — see the
+  [follow-up](../../plans/follow-ups.md) residual A). The contract that matters
+  for render holds regardless: render applies the affine EXACTLY as
+  `GlobalTransform` encodes it (it does NOT independently re-apply an origin), so
+  render and the bridge cannot diverge from picking, which applies the inverse of
+  the same matrix
   ([transforms-and-containment.md § 1.2](../2026-05-08-buiy-layout-design/transforms-and-containment.md#12-layout-impact)).
 - **Buiy owns the whole `Transform`.** An author positions UI via Buiy's
   `Position` / `UiTransform`, never Bevy's `Transform`; the bridge owns the
@@ -709,12 +717,31 @@ so it does not perturb the contract). Picking applies the inverse
 
 ### B.5 Perspective / `transform-style` / `backface-visibility` consumption
 
+**Status (R1, transform-paint landed):** the **2D affine** half of the
+transform-paint follow-up now LANDS via the GPU vertex stage. Extract reads the
+`GlobalTransform` 2D linear part (`global_transform.affine().matrix3` xy columns
+— NOT a re-read of `ResolvedTransform`, per the pillar-5 contract in § B.2) and
+the quad + shadow shaders transform each box-local corner by it before the
+logical→clip view map (`PackedInstance` grew 52 B → 68 B by appending the 2x2
+basis after the clip fields; vertex attrs `@location(8)/(9)`). The
+**PAINT-clip half** was already done (§ A.3 rule 3 / `clip_for_primitive`). The
+**perspective channel / `Preserve3d` / `backface-visibility`** stay C-tier
+deferred (the bullets below).
+
+**Fidelity bound (R1):** render faithfully reproduces **rotation + non-uniform
+scale**, but **skew (`TransformMatrix::Skew`) and general
+`TransformMatrix::Matrix`** are BOUNDED by the bridge's TRS-only
+`Transform::from_matrix` decompose (§ B.2 — a Bevy `Transform` cannot represent
+a general shear, lossy by the same decompose that drops the projective row).
+Faithful skew is a separate residual; it needs the bridge to stop round-tripping
+through TRS (or render to read a non-TRS source). Not covered by R1.
+
 Phase 8 *stored* three `UiTransform` fields with no consumer
 ([components.rs `UiTransform`](../../../crates/buiy_core/src/layout/components.rs);
 [transforms-and-containment.md § 4](../2026-05-08-buiy-layout-design/transforms-and-containment.md#4-perspective-and-3d)).
-This bridge consumes them — resolving the
+The remaining (C-tier) consumption — resolving the
 [*"`UiTransform` paint + `Containment` PAINT clip + perspective/backface"*](../../plans/follow-ups.md)
-follow-up's transform half (the PAINT-clip half is § A.3):
+follow-up's perspective/3D half:
 
 - **`perspective: Option<Length>`** — the 3D viewing distance for `Preserve3d`
   children. Resolved to logical px and folded into the **perspective matrix
diff --git a/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md b/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md
index 3fa7020..e94ef40 100644
--- a/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md
+++ b/docs/specs/2026-06-03-buiy-render-pipeline-design/effect-compositor.md
@@ -132,6 +132,13 @@ Gate #15 ([foundation verification.md gate #15](../2026-05-07-buiy-foundation/ve
    **The aggregate live-set budget — `rt_pool_budget` (committed here).** The per-target bucket cap (§ 2.2) bounds *each* target's size, and `frames_since_last_use < 3` eviction (§ 2.2) bounds how long a *transient* target lingers — but neither caps the **concurrent-groups axis**: N simultaneously-painting groups allocate N live targets, and nothing above forbids N from being unbounded. So this file commits an explicit aggregate cap, `rt_pool_budget` (**bytes**; v1 default **64 MiB**), parallel to the atlas `page_budget` ([atlas-and-text-seam.md § 2.4](atlas-and-text-seam.md)): the *shape* — a byte budget on the concurrent live target set, with a defined degradation under pressure — is fixed here; only the *tuned number* defers to `buiy-verification-design` (exactly like the atlas `page_budget`'s tuned page count, [README § 5 #4](README.md#5-open-questions)).
 
    **Degradation under pressure (forward-compositing fallback).** When acquiring the next group's target in the prepare pass would push live target bytes past `rt_pool_budget`, the lowest-cost effect groups **fall back to direct-to-parent forward compositing** instead of allocating a target: the group's subtree paints directly into its parent target and the group `opacity` is applied **per-instance** (each painter's alpha multiplied by `group.opacity` in the forward pass) as the **documented approximation** — exactly the per-child approximation § 4 rejects for the *correct* default, accepted here *only* under budget pressure as the graceful-degradation path rather than failing to render. "Lowest-cost" ranks groups by reuse cost — smallest painted-bounds area and `OPACITY`-only reason first (an `OPACITY`-only group degrades to a visible-but-approximate overlap; an `ISOLATION`/reserved group degrades last because its boundary is structural, not just an alpha multiply). This brings the RT pool to parity with the atlas page-budget + LRU-under-pressure story: a hard byte ceiling, plus a defined behavior when the working set would exceed it, rather than unbounded growth or an allocation failure.
+
+   **As landed (R2 — ROOT degraded groups).** The forward-compositing fallback ships for **ROOT** degraded groups (`parent == None`). `prepare_effect_groups` (after `plan_allocation` marks the group degraded) folds `group.opacity` into the ALPHA slot of every member instance **in place** and merges the group's instance ranges into the flat draw, so the existing flat WINDOW pass paints them — the group dims exactly once and paints flat, never vanishes (the as-was bug: a degraded group's quads + glyphs painted nowhere). The alpha offset is **per-tier**: quad alpha is `ALPHA_FLOAT_OFFSET` (= 7) on the `[f32;17]` quad record; glyph alpha is `GLYPH_ALPHA_FLOAT_OFFSET` (= 11 = `GlyphAlphaInstance.color[3]`) on the glyph record — a DIFFERENT offset, because the glyph layout is `rect ++ uv ++ color ++ clip ++ page` (color is the 3rd `vec4`), and using the quad offset 7 on a glyph would corrupt `uv[3]`. **Two DIFFERENT gates per tier — alpha-fold vs range-merge.** The fold has two halves under two different invariants, so each tier carries two gates:
+
+- The **ALPHA-fold** runs per tier **iff that tier's instance BUFFER was repacked from source this frame** — quad fold on `quad_dirty` (nodes|groups|text_quads), glyph fold on `glyph_dirty` (glyphs), the buffer-repack signals. A retained (already-folded) buffer's alpha is left untouched so the fold never compounds to black.
+- The **RANGE-merge** (re-adding the degraded group's range into `flat_ranges`/`glyph_flat_ranges`) runs per tier **iff that tier's flat/group PARTITION was rebuilt this frame**, because `prepare_buiy_instances` rebuilds the partition wholesale and re-EXCLUDES the degraded group's range — the merge must re-stitch it every rebuild or the degraded group vanishes that frame. The **quad** partition rebuilds under `quad_dirty` (so quad's two gates coincide), but the **glyph** partition rebuilds under the UNION `quad_dirty || glyph_dirty` (`partition_glyph_ranges`) — wider than the glyph buffer-repack gate.
+
+The asymmetry is glyph-only and load-bearing: on a **quad-dirty-only frame** with a live degraded glyph group (e.g. a background/decoration edit while the glyph buffer is retained), the glyph partition is rebuilt (range re-excluded) while the glyph buffer is retained. The glyph **range-merge re-runs** (`merge_glyph = quad_dirty || glyph_dirty` = true, re-adding the range over the already-folded retained buffer) while the glyph **alpha-fold does not** (`fold_glyph = glyph_dirty` = false, the retained alpha already carries last frame's fold). Conflating the two — gating the glyph range-merge on the narrow `glyph_dirty` — drops the merge on that frame and the degraded glyphs vanish (the R2-follow-up bug that split gate fixes). The whole block is gated on "any group degraded" so the no-degradation steady state stays a zero-fold, zero-upload frame (the gate-#14 budget). For a ROOT group, "the flat window draw" **is** "the parent target" this section mandates. **A NESTED degraded group** (`parent == Some`) must instead forward-composite into its **parent's `Rgba16Float` target** ("directly into its parent target", above) — that node-side routing is **not yet implemented**: the impl debug-asserts on a nested degraded group and leaves it untouched in release (it still vanishes, no worse than the prior behavior) rather than mis-placing it at the window level; tracked as a follow-up (`docs/plans/follow-ups.md` — "Render — nested degraded effect group must forward-composite into the parent target").
 2. **Return-to-baseline.** When activity stops, no new `EffectGroup`s appear; within 3 frames every transiently-allocated target's `frames_since_last_use` reaches 3 and `update()` drops it. Bucket count returns to the steady-state working set, which *is* the baseline. The glyph atlas ([atlas-and-text-seam.md](atlas-and-text-seam.md)) gives gate #15 the *same return-to-baseline guarantee* but through a **distinct pool with a distinct mechanism** — these are two pools, not one shared eviction model: render targets ride Bevy's `TextureCache` (`frames_since_last_use < 3` retain), while the atlas uses `guillotiere` allocation plus a tunable-grace LRU (`config.eviction_grace`). Both satisfy "entries return within ε of baseline"; neither shares the other's allocator or policy.
 3. **No unbounded growth path.** Because sizing is painted-bounds (§ 2.1) not viewport, and reuse is descriptor-keyed, an adversarial fixture that opens and closes a thousand opacity groups over ten minutes never accumulates a thousand live targets — it recycles a handful of buckets. This is the explicit counter to WebRender's documented cost ("GPU memory for atlases and intermediate targets … a real ceiling," [prior-art/servo-stylo/rendering.md](../../prior-art/servo-stylo/rendering.md)).