diff --git a/README.md b/README.md index 3e3baf2b..2e93bc09 100644 --- a/README.md +++ b/README.md @@ -136,17 +136,17 @@ fn main() -> @location(0) i32 { global_var GV0 in spv.StorageClass.Output: s32 func F0() -> spv.OpTypeVoid { - loop(v0: s32 <- 1s32, v1: s32 <- 1s32) { - v2 = spv.OpSLessThan(v1, 10s32): bool - (v3: s32, v4: s32) = if v2 { - v5 = spv.OpIMul(v0, v1): s32 - v6 = spv.OpIAdd(v1, 1s32): s32 - (v5, v6) + (_: s32, _: s32, v0: s32) = loop(v1: s32 <- 1s32, v2: s32 <- 1s32, _: s32 <- spv.OpUndef: s32) { + v3 = spv.OpSLessThan(v2, 10s32): bool + (v4: s32, v5: s32) = if v3 { + v6 = spv.OpIMul(v1, v2): s32 + v7 = spv.OpIAdd(v2, 1s32): s32 + (v6, v7) } else { (spv.OpUndef: s32, spv.OpUndef: s32) } - (v3, v4) -> (v0, v1) - } while v2 + (v4, v5, v1) -> (v1, v2, _) + } while v3 spv.OpStore(Pointer: &GV0, Object: v0) } ``` diff --git a/src/cf/structurize.rs b/src/cf/structurize.rs index 2d4de1fb..46a0d09d 100644 --- a/src/cf/structurize.rs +++ b/src/cf/structurize.rs @@ -236,18 +236,17 @@ enum LazyCond { False, True, - Merge(Rc), + Dyn(Rc), } -enum LazyCondMerge { - Select { - node: Node, - // FIXME(eddyb) the lowest level of `LazyCond` ends up containing only - // `LazyCond::{Undef,False,True}`, and that could more efficiently be - // expressed using e.g. bitsets, but the `Rc` in `LazyCond::Merge` - // means that this is more compact than it would otherwise be. - per_case_conds: SmallVec<[LazyCond; 4]>, - }, +struct LazyCondDyn { + node: Node, + + // FIXME(eddyb) the lowest level of `LazyCond` ends up containing only + // `LazyCond::{Undef,False,True}`, and that could more efficiently be + // expressed using e.g. bitsets, but the `Rc` in `LazyCond::Dyn` + // means that this is more compact than it would otherwise be. + per_child_region_conds: SmallVec<[LazyCond; 4]>, } /// A target for one of the edge bundles in a [`DeferredEdgeBundleSet`], mostly @@ -268,7 +267,7 @@ enum DeferredTarget { /// that exactly one [`DeferredEdgeBundle`] condition must be `true` at any /// given time (the only non-trivial case, [`DeferredEdgeBundleSet::Choice`], /// satisfies it because it's only used for merging `Select` cases, and so -/// all the conditions will end up using disjoint [`LazyCond::Merge`]s). +/// all the conditions will end up using disjoint [`LazyCond::Dyn`]s). enum DeferredEdgeBundleSet { Unreachable, @@ -798,6 +797,10 @@ impl<'a> Structurizer<'a> { // the loop body itself was originally. // NOTE(eddyb) both input declarations and the child `Loop` node are // added later down below, after the `Loop` node is created. + // + // TODO(eddyb) does the above comment even make sense? output-side + // hermetic loops are now implemented, but the wrapper is for the + // input side, instead. let wrapper_region = self.func_def_body.regions.define(self.cx, RegionDef::default()); // Any loop body region inputs, which must receive values from both @@ -808,6 +811,9 @@ impl<'a> Structurizer<'a> { // FIXME(eddyb) `Loop` `Node`s should be changed to be hermetic // and have the loop state be output from the whole node itself, // for any outside uses of values defined within the loop body. + // + // TODO(eddyb) update above comment (and other comments elsewhere!) + // for the actual implementation of hermetic loops. let body_def = &mut self.func_def_body.regions[body]; let original_body_input_vars = mem::take(&mut body_def.inputs); assert!(body_def.outputs.is_empty()); @@ -877,7 +883,10 @@ impl<'a> Structurizer<'a> { // FIXME(eddyb) could it be possible to synthesize attrs // from `ControlInst`s' attrs and/or `OpLoopMerge`'s? attrs: AttrSet::default(), - kind: NodeKind::Loop { repeat_condition }, + kind: NodeKind::Loop { + // TODO(eddyb) make this a regular body output (first one?). + repeat_condition, + }, inputs: initial_inputs, child_regions: [body].into_iter().collect(), outputs: [].into_iter().collect(), @@ -885,6 +894,97 @@ impl<'a> Structurizer<'a> { .into(), ); + // HACK(eddyb) create matching `loop_node` output `Var`s, for all the + // "loop state" (body inputs->outputs etc.). + // FIXME(eddyb) could these be decoupled somehow? + self.func_def_body.nodes[loop_node].outputs = self.func_def_body.regions[body] + .inputs + .iter() + .map(|&input_var| { + // FIXME(eddyb) this fully duplicates attributes, could be messy? + let input_var_decl = self.func_def_body.vars[input_var].clone(); + self.func_def_body.vars.define( + self.cx, + VarDecl { def_parent: Either::Right(loop_node), ..input_var_decl }, + ) + }) + .collect(); + + // TODO(eddyb) WIP hermetic loops (output-side). + match &mut deferred_edges { + DeferredEdgeBundleSet::Unreachable | DeferredEdgeBundleSet::Always { .. } => {} + DeferredEdgeBundleSet::Choice { target_to_deferred } => { + for deferred in target_to_deferred.values_mut() { + match deferred.condition { + LazyCond::Undef | LazyCond::False | LazyCond::True => {} + + LazyCond::Dyn(_) => { + deferred.condition = LazyCond::Dyn(Rc::new(LazyCondDyn { + node: loop_node, + per_child_region_conds: [deferred.condition.clone()] + .into_iter() + .collect(), + })); + } + } + } + } + } + let all_deferred_edge_inputs = deferred_edges + .iter_targets_with_edge_bundle_mut() + .flat_map(|(_, edge_bundle)| &mut edge_bundle.target_inputs); + for v in all_deferred_edge_inputs { + VarReplacer(&self.var_replacements).transform_value_use(v).apply_to(v); + + let var = match v { + Value::Const(_) => continue, + Value::Var(var) => var, + }; + + // FIXME(eddyb) this fully duplicates attributes, could be messy? + let var_decl = self.func_def_body.vars[*var].clone(); + + if var_decl.def_parent == Either::Left(wrapper_region) { + continue; + } + + // FIXME(eddyb) reuse loop outputs for repeats of the same `var`, + // instead of introducing new ones for every single occurence. + + let undef_of_var_ty = self.const_undef(var_decl.ty); + + let body_def = &mut self.func_def_body.regions[body]; + + let next_loop_var_idx = body_def.inputs.len(); + body_def.inputs.push(self.func_def_body.vars.define( + self.cx, + VarDecl { + def_parent: Either::Left(body), + def_idx: next_loop_var_idx.try_into().unwrap(), + ..var_decl.clone() + }, + )); + + assert_eq!(body_def.outputs.len(), next_loop_var_idx); + body_def.outputs.push(Value::Var(*var)); + + let loop_node_def = &mut self.func_def_body.nodes[loop_node]; + + assert_eq!(loop_node_def.inputs.len(), next_loop_var_idx); + loop_node_def.inputs.push(Value::Const(undef_of_var_ty)); + + assert_eq!(loop_node_def.outputs.len(), next_loop_var_idx); + *var = self.func_def_body.vars.define( + self.cx, + VarDecl { + def_parent: Either::Right(loop_node), + def_idx: next_loop_var_idx.try_into().unwrap(), + ..var_decl.clone() + }, + ); + loop_node_def.outputs.push(*var); + } + self.func_def_body.regions[wrapper_region] .children .insert_last(loop_node, &mut self.func_def_body.nodes); @@ -1539,9 +1639,9 @@ impl<'a> Structurizer<'a> { { LazyCond::True } else { - LazyCond::Merge(Rc::new(LazyCondMerge::Select { + LazyCond::Dyn(Rc::new(LazyCondDyn { node: get_or_define_select_node(self, &cases), - per_case_conds: per_case_conds.cloned().collect(), + per_child_region_conds: per_case_conds.cloned().collect(), })) }; @@ -1594,50 +1694,44 @@ impl<'a> Structurizer<'a> { LazyCond::False => Value::Const(self.const_false), LazyCond::True => Value::Const(self.const_true), - // `LazyCond::Merge` was only created in the first place if a merge + // `LazyCond::Dyn` was only created in the first place if a merge // was actually necessary, so there shouldn't be simplifications to // do here (i.e. the value provided is if `materialize_lazy_cond` // never gets called because the target has become unconditional). // // FIXME(eddyb) there is still an `if cond { true } else { false }` - // special-case (repalcing with just `cond`), that cannot be expressed + // special-case (replacing with just `cond`), that cannot be expressed // currently in `LazyCond` itself (but maybe it should be). - LazyCond::Merge(merge) => { - let LazyCondMerge::Select { node, ref per_case_conds } = **merge; + LazyCond::Dyn(merge) => { + let LazyCondDyn { node, ref per_child_region_conds } = **merge; // HACK(eddyb) this won't actually allocate most of the time, // and avoids complications later below, when mutating the cases. - let per_case_conds: SmallVec<[_; 8]> = per_case_conds + let per_child_region_conds: SmallVec<[_; 8]> = per_child_region_conds .into_iter() .map(|cond| self.materialize_lazy_cond(cond)) .collect(); let NodeDef { attrs: _, kind, inputs, child_regions, outputs: output_vars } = &mut *self.func_def_body.nodes[node]; - let cases = match kind { - NodeKind::Select(kind) => { - assert_eq!(child_regions.len(), per_case_conds.len()); - - if let SelectionKind::BoolCond = kind { - let cond = inputs[0]; - - let [val_false, val_true] = - [self.const_false, self.const_true].map(Value::Const); - if per_case_conds[..] == [val_true, val_false] { - return cond; - } else if per_case_conds[..] == [val_false, val_true] { - // FIXME(eddyb) this could also be special-cased, - // at least when called from the topmost level, - // where which side is `false`/`true` doesn't - // matter (or we could even generate `!cond`?). - let _not_cond = cond; - } - } - child_regions + assert_eq!(child_regions.len(), per_child_region_conds.len()); + + if let NodeKind::Select(SelectionKind::BoolCond) = kind { + let cond = inputs[0]; + + let [val_false, val_true] = + [self.const_false, self.const_true].map(Value::Const); + if per_child_region_conds[..] == [val_true, val_false] { + return cond; + } else if per_child_region_conds[..] == [val_false, val_true] { + // FIXME(eddyb) this could also be special-cased, + // at least when called from the topmost level, + // where which side is `false`/`true` doesn't + // matter (or we could even generate `!cond`?). + let _not_cond = cond; } - _ => unreachable!(), - }; + } let output_var = self.func_def_body.vars.define( self.cx, @@ -1650,12 +1744,32 @@ impl<'a> Structurizer<'a> { ); output_vars.push(output_var); - for (&case, cond) in cases.iter().zip_eq(per_case_conds) { + for (&case, cond) in child_regions.iter().zip_eq(per_child_region_conds) { let RegionDef { outputs, .. } = &mut self.func_def_body.regions[case]; outputs.push(cond); assert_eq!(outputs.len(), output_vars.len()); } + // `Loop`s outputs have to have matching loop body inputs + // (and also initial loop inputs). + if let NodeKind::Loop { .. } = kind { + let body = child_regions[0]; + let input_vars = &mut self.func_def_body.regions[body].inputs; + let input_var = self.func_def_body.vars.define( + self.cx, + VarDecl { + attrs: AttrSet::default(), + ty: self.type_bool, + def_parent: Either::Left(body), + def_idx: input_vars.len().try_into().unwrap(), + }, + ); + input_vars.push(input_var); + + let initial_input = Value::Const(self.const_undef(self.type_bool)); + self.func_def_body.nodes[node].inputs.push(initial_input); + } + Value::Var(output_var) } } diff --git a/src/lib.rs b/src/lib.rs index e92afb90..7c4a1771 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -783,15 +783,9 @@ pub struct FuncDefBody { /// (i.e. in all possible execution paths, the definition precedes all uses) /// /// But unlike SPIR-V, SPIR-T's structured control-flow has implications for SSA: -/// * dominance is simpler, so values defined in a [`Region`](crate::Region) can be used: -/// * later in that region, including in the region's `outputs` -/// (which allows "exporting" values out to the rest of the function) -/// * outside that region, but *only* if the parent [`Node`](crate::Node) -/// is a `Loop` (that is, when the region is a loop's body) -/// * this is an "emergent" property, stemming from the region having to -/// execute (at least once) before the parent [`Node`](crate::Node) -/// can complete, but is not is not ideal and should eventually be replaced -/// with passing all such values through loop (body) `outputs` +/// * dominance is simpler, so values defined in a [`Region`](crate::Region) can +/// *only* be used later in that region, including in the region's `outputs` +/// (which allows "exporting" values out to the rest of the function) /// * instead of φ ("phi") nodes, SPIR-T uses region `outputs` to merge values /// coming from separate control-flow paths (i.e. the cases of a `Select`), /// and region `inputs` for passing values back along loop backedges @@ -823,15 +817,10 @@ pub struct RegionDef { pub children: EntityList, /// Output values from this [`Region`], provided to the parent: - /// * when this is the function body: these are the structured return values - /// * when this is a `Select` case: these are the values for the parent - /// [`Node`]'s outputs (accessed using [`VarKind::NodeOutput`]) - /// * when this is a `Loop` body: these are the values to be used for the - /// next loop iteration's body `inputs` - /// * **not** accessible through [`VarKind::NodeOutput`] on the `Loop`, - /// as it's both confusing regarding [`VarKind::RegionInput`], and - /// also there's nothing stopping body-defined values from directly being - /// used outside the loop (once that changes, this aspect can be flipped) + /// * when exiting a [`Node`]: these are the values for its outputs + /// (the caller's `FuncCall` [`Node`], in the case of a function body) + /// * when continuing a `Loop`: these are the values fed back into + /// the next loop iteration's body `inputs` pub outputs: SmallVec<[Value; 2]>, } @@ -861,6 +850,7 @@ pub struct NodeDef { /// * values provided by `region.outputs`, where `region` is the executed /// child [`Region`]: /// * when this is a `Select`: the case that was chosen + /// * when this is a `Loop`: the last iteration of the body // TODO(eddyb) include former `DataInst`s in above docs. pub outputs: SmallVec<[Var; 2]>, } @@ -974,6 +964,7 @@ pub enum VarKind { /// * value provided by `region.outputs[output_idx]`, where `region` is the /// executed child [`Region`] (of `node`): /// * when `node` is a `Select`: the case that was chosen + /// * when `node` is a `Loop`: the last iteration of the body // TODO(eddyb) include former `DataInst`s in above docs. NodeOutput { node: Node, output_idx: u32 }, } diff --git a/src/print/mod.rs b/src/print/mod.rs index c18e6c6c..3da90cb9 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -3755,9 +3755,8 @@ impl Print for FuncAt<'_, Node> { inputs[0], child_regions.iter().map(|&case| self.at(case).print(printer)), ), + // TODO(eddyb) rethink loop printing with (output-side) hermeticity. NodeKind::Loop { repeat_condition } => { - assert!(outputs.is_empty()); - let initial_inputs = inputs; let body = child_regions[0]; let inputs = &self.at(body).def().inputs; diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 38a73c17..e928fdaf 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -620,8 +620,8 @@ impl<'a> FuncLifting<'a> { } CfgPoint::NodeExit(node) => { let node_def = func_def_body.at(node).def(); - match &node_def.kind { - NodeKind::Select(_) => node_def + if !node_def.child_regions.is_empty() { + node_def .outputs .iter() .map(|&output_var| { @@ -636,8 +636,9 @@ impl<'a> FuncLifting<'a> { default_value: None, }) }) - .collect::>()?, - _ => SmallVec::new(), + .collect::>()? + } else { + SmallVec::new() } } }; @@ -790,7 +791,7 @@ impl<'a> FuncLifting<'a> { NodeKind::Loop { repeat_condition } => { let backedge = CfgPoint::NodeEntry(parent_node); - let target_phi_values = region_outputs + let mut target_phi_values = region_outputs .map(|outputs| (backedge, outputs)) .into_iter() .collect(); @@ -816,6 +817,14 @@ impl<'a> FuncLifting<'a> { merge: None, } } else { + // FIXME(eddyb) this will cause redundant `OpPhi`s + // (in that SSA dominance rules do allow directly + // referencing values defined inside the loop body), + // they should be soundly optimized out somehow, + // maybe even reuse `cf::cfgssa` infrastructure? + if let Some(outputs) = region_outputs { + target_phi_values.insert(parent_exit, outputs); + } Terminator { attrs: AttrSet::default(), kind: Cow::Owned(