Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,17 @@ fn main() -> @location(0) i32 {
global_var GV0 in spv.StorageClass.Output: s32

func F0() -> spv.OpTypeVoid {
loop(v0: s32 <- 1s32, v1: s32 <- 1s32) {
v2 = spv.OpSLessThan(v1, 10s32): bool
(v3: s32, v4: s32) = if v2 {
v5 = spv.OpIMul(v0, v1): s32
v6 = spv.OpIAdd(v1, 1s32): s32
(v5, v6)
(_: s32, _: s32, v0: s32) = loop(v1: s32 <- 1s32, v2: s32 <- 1s32, _: s32 <- spv.OpUndef: s32) {
v3 = spv.OpSLessThan(v2, 10s32): bool
(v4: s32, v5: s32) = if v3 {
v6 = spv.OpIMul(v1, v2): s32
v7 = spv.OpIAdd(v2, 1s32): s32
(v6, v7)
} else {
(spv.OpUndef: s32, spv.OpUndef: s32)
}
(v3, v4) -> (v0, v1)
} while v2
(v4, v5, v1) -> (v1, v2, _)
} while v3
spv.OpStore(Pointer: &GV0, Object: v0)
}
```
Expand Down
198 changes: 156 additions & 42 deletions src/cf/structurize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,18 +236,17 @@ enum LazyCond {
False,
True,

Merge(Rc<LazyCondMerge>),
Dyn(Rc<LazyCondDyn>),
}

enum LazyCondMerge {
Select {
node: Node,
// FIXME(eddyb) the lowest level of `LazyCond` ends up containing only
// `LazyCond::{Undef,False,True}`, and that could more efficiently be
// expressed using e.g. bitsets, but the `Rc` in `LazyCond::Merge`
// means that this is more compact than it would otherwise be.
per_case_conds: SmallVec<[LazyCond; 4]>,
},
struct LazyCondDyn {
node: Node,

// FIXME(eddyb) the lowest level of `LazyCond` ends up containing only
// `LazyCond::{Undef,False,True}`, and that could more efficiently be
// expressed using e.g. bitsets, but the `Rc` in `LazyCond::Dyn`
// means that this is more compact than it would otherwise be.
per_child_region_conds: SmallVec<[LazyCond; 4]>,
}

/// A target for one of the edge bundles in a [`DeferredEdgeBundleSet`], mostly
Expand All @@ -268,7 +267,7 @@ enum DeferredTarget {
/// that exactly one [`DeferredEdgeBundle`] condition must be `true` at any
/// given time (the only non-trivial case, [`DeferredEdgeBundleSet::Choice`],
/// satisfies it because it's only used for merging `Select` cases, and so
/// all the conditions will end up using disjoint [`LazyCond::Merge`]s).
/// all the conditions will end up using disjoint [`LazyCond::Dyn`]s).
enum DeferredEdgeBundleSet {
Unreachable,

Expand Down Expand Up @@ -798,6 +797,10 @@ impl<'a> Structurizer<'a> {
// the loop body itself was originally.
// NOTE(eddyb) both input declarations and the child `Loop` node are
// added later down below, after the `Loop` node is created.
//
// TODO(eddyb) does the above comment even make sense? output-side
// hermetic loops are now implemented, but the wrapper is for the
// input side, instead.
let wrapper_region = self.func_def_body.regions.define(self.cx, RegionDef::default());

// Any loop body region inputs, which must receive values from both
Expand All @@ -808,6 +811,9 @@ impl<'a> Structurizer<'a> {
// FIXME(eddyb) `Loop` `Node`s should be changed to be hermetic
// and have the loop state be output from the whole node itself,
// for any outside uses of values defined within the loop body.
//
// TODO(eddyb) update above comment (and other comments elsewhere!)
// for the actual implementation of hermetic loops.
let body_def = &mut self.func_def_body.regions[body];
let original_body_input_vars = mem::take(&mut body_def.inputs);
assert!(body_def.outputs.is_empty());
Expand Down Expand Up @@ -877,14 +883,108 @@ impl<'a> Structurizer<'a> {
// FIXME(eddyb) could it be possible to synthesize attrs
// from `ControlInst`s' attrs and/or `OpLoopMerge`'s?
attrs: AttrSet::default(),
kind: NodeKind::Loop { repeat_condition },
kind: NodeKind::Loop {
// TODO(eddyb) make this a regular body output (first one?).
repeat_condition,
},
inputs: initial_inputs,
child_regions: [body].into_iter().collect(),
outputs: [].into_iter().collect(),
}
.into(),
);

// HACK(eddyb) create matching `loop_node` output `Var`s, for all the
// "loop state" (body inputs->outputs etc.).
// FIXME(eddyb) could these be decoupled somehow?
self.func_def_body.nodes[loop_node].outputs = self.func_def_body.regions[body]
.inputs
.iter()
.map(|&input_var| {
// FIXME(eddyb) this fully duplicates attributes, could be messy?
let input_var_decl = self.func_def_body.vars[input_var].clone();
self.func_def_body.vars.define(
self.cx,
VarDecl { def_parent: Either::Right(loop_node), ..input_var_decl },
)
})
.collect();

// TODO(eddyb) WIP hermetic loops (output-side).
match &mut deferred_edges {
DeferredEdgeBundleSet::Unreachable | DeferredEdgeBundleSet::Always { .. } => {}
DeferredEdgeBundleSet::Choice { target_to_deferred } => {
for deferred in target_to_deferred.values_mut() {
match deferred.condition {
LazyCond::Undef | LazyCond::False | LazyCond::True => {}

LazyCond::Dyn(_) => {
deferred.condition = LazyCond::Dyn(Rc::new(LazyCondDyn {
node: loop_node,
per_child_region_conds: [deferred.condition.clone()]
.into_iter()
.collect(),
}));
}
}
}
}
}
let all_deferred_edge_inputs = deferred_edges
.iter_targets_with_edge_bundle_mut()
.flat_map(|(_, edge_bundle)| &mut edge_bundle.target_inputs);
for v in all_deferred_edge_inputs {
VarReplacer(&self.var_replacements).transform_value_use(v).apply_to(v);

let var = match v {
Value::Const(_) => continue,
Value::Var(var) => var,
};

// FIXME(eddyb) this fully duplicates attributes, could be messy?
let var_decl = self.func_def_body.vars[*var].clone();

if var_decl.def_parent == Either::Left(wrapper_region) {
continue;
}

// FIXME(eddyb) reuse loop outputs for repeats of the same `var`,
// instead of introducing new ones for every single occurence.

let undef_of_var_ty = self.const_undef(var_decl.ty);

let body_def = &mut self.func_def_body.regions[body];

let next_loop_var_idx = body_def.inputs.len();
body_def.inputs.push(self.func_def_body.vars.define(
self.cx,
VarDecl {
def_parent: Either::Left(body),
def_idx: next_loop_var_idx.try_into().unwrap(),
..var_decl.clone()
},
));

assert_eq!(body_def.outputs.len(), next_loop_var_idx);
body_def.outputs.push(Value::Var(*var));

let loop_node_def = &mut self.func_def_body.nodes[loop_node];

assert_eq!(loop_node_def.inputs.len(), next_loop_var_idx);
loop_node_def.inputs.push(Value::Const(undef_of_var_ty));

assert_eq!(loop_node_def.outputs.len(), next_loop_var_idx);
*var = self.func_def_body.vars.define(
self.cx,
VarDecl {
def_parent: Either::Right(loop_node),
def_idx: next_loop_var_idx.try_into().unwrap(),
..var_decl.clone()
},
);
loop_node_def.outputs.push(*var);
}

self.func_def_body.regions[wrapper_region]
.children
.insert_last(loop_node, &mut self.func_def_body.nodes);
Expand Down Expand Up @@ -1539,9 +1639,9 @@ impl<'a> Structurizer<'a> {
{
LazyCond::True
} else {
LazyCond::Merge(Rc::new(LazyCondMerge::Select {
LazyCond::Dyn(Rc::new(LazyCondDyn {
node: get_or_define_select_node(self, &cases),
per_case_conds: per_case_conds.cloned().collect(),
per_child_region_conds: per_case_conds.cloned().collect(),
}))
};

Expand Down Expand Up @@ -1594,50 +1694,44 @@ impl<'a> Structurizer<'a> {
LazyCond::False => Value::Const(self.const_false),
LazyCond::True => Value::Const(self.const_true),

// `LazyCond::Merge` was only created in the first place if a merge
// `LazyCond::Dyn` was only created in the first place if a merge
// was actually necessary, so there shouldn't be simplifications to
// do here (i.e. the value provided is if `materialize_lazy_cond`
// never gets called because the target has become unconditional).
//
// FIXME(eddyb) there is still an `if cond { true } else { false }`
// special-case (repalcing with just `cond`), that cannot be expressed
// special-case (replacing with just `cond`), that cannot be expressed
// currently in `LazyCond` itself (but maybe it should be).
LazyCond::Merge(merge) => {
let LazyCondMerge::Select { node, ref per_case_conds } = **merge;
LazyCond::Dyn(merge) => {
let LazyCondDyn { node, ref per_child_region_conds } = **merge;

// HACK(eddyb) this won't actually allocate most of the time,
// and avoids complications later below, when mutating the cases.
let per_case_conds: SmallVec<[_; 8]> = per_case_conds
let per_child_region_conds: SmallVec<[_; 8]> = per_child_region_conds
.into_iter()
.map(|cond| self.materialize_lazy_cond(cond))
.collect();

let NodeDef { attrs: _, kind, inputs, child_regions, outputs: output_vars } =
&mut *self.func_def_body.nodes[node];
let cases = match kind {
NodeKind::Select(kind) => {
assert_eq!(child_regions.len(), per_case_conds.len());

if let SelectionKind::BoolCond = kind {
let cond = inputs[0];

let [val_false, val_true] =
[self.const_false, self.const_true].map(Value::Const);
if per_case_conds[..] == [val_true, val_false] {
return cond;
} else if per_case_conds[..] == [val_false, val_true] {
// FIXME(eddyb) this could also be special-cased,
// at least when called from the topmost level,
// where which side is `false`/`true` doesn't
// matter (or we could even generate `!cond`?).
let _not_cond = cond;
}
}

child_regions
assert_eq!(child_regions.len(), per_child_region_conds.len());

if let NodeKind::Select(SelectionKind::BoolCond) = kind {
let cond = inputs[0];

let [val_false, val_true] =
[self.const_false, self.const_true].map(Value::Const);
if per_child_region_conds[..] == [val_true, val_false] {
return cond;
} else if per_child_region_conds[..] == [val_false, val_true] {
// FIXME(eddyb) this could also be special-cased,
// at least when called from the topmost level,
// where which side is `false`/`true` doesn't
// matter (or we could even generate `!cond`?).
let _not_cond = cond;
}
_ => unreachable!(),
};
}

let output_var = self.func_def_body.vars.define(
self.cx,
Expand All @@ -1650,12 +1744,32 @@ impl<'a> Structurizer<'a> {
);
output_vars.push(output_var);

for (&case, cond) in cases.iter().zip_eq(per_case_conds) {
for (&case, cond) in child_regions.iter().zip_eq(per_child_region_conds) {
let RegionDef { outputs, .. } = &mut self.func_def_body.regions[case];
outputs.push(cond);
assert_eq!(outputs.len(), output_vars.len());
}

// `Loop`s outputs have to have matching loop body inputs
// (and also initial loop inputs).
if let NodeKind::Loop { .. } = kind {
let body = child_regions[0];
let input_vars = &mut self.func_def_body.regions[body].inputs;
let input_var = self.func_def_body.vars.define(
self.cx,
VarDecl {
attrs: AttrSet::default(),
ty: self.type_bool,
def_parent: Either::Left(body),
def_idx: input_vars.len().try_into().unwrap(),
},
);
input_vars.push(input_var);

let initial_input = Value::Const(self.const_undef(self.type_bool));
self.func_def_body.nodes[node].inputs.push(initial_input);
}

Value::Var(output_var)
}
}
Expand Down
27 changes: 9 additions & 18 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -783,15 +783,9 @@ pub struct FuncDefBody {
/// (i.e. in all possible execution paths, the definition precedes all uses)
///
/// But unlike SPIR-V, SPIR-T's structured control-flow has implications for SSA:
/// * dominance is simpler, so values defined in a [`Region`](crate::Region) can be used:
/// * later in that region, including in the region's `outputs`
/// (which allows "exporting" values out to the rest of the function)
/// * outside that region, but *only* if the parent [`Node`](crate::Node)
/// is a `Loop` (that is, when the region is a loop's body)
/// * this is an "emergent" property, stemming from the region having to
/// execute (at least once) before the parent [`Node`](crate::Node)
/// can complete, but is not is not ideal and should eventually be replaced
/// with passing all such values through loop (body) `outputs`
/// * dominance is simpler, so values defined in a [`Region`](crate::Region) can
/// *only* be used later in that region, including in the region's `outputs`
/// (which allows "exporting" values out to the rest of the function)
/// * instead of φ ("phi") nodes, SPIR-T uses region `outputs` to merge values
/// coming from separate control-flow paths (i.e. the cases of a `Select`),
/// and region `inputs` for passing values back along loop backedges
Expand Down Expand Up @@ -823,15 +817,10 @@ pub struct RegionDef {
pub children: EntityList<Node>,

/// Output values from this [`Region`], provided to the parent:
/// * when this is the function body: these are the structured return values
/// * when this is a `Select` case: these are the values for the parent
/// [`Node`]'s outputs (accessed using [`VarKind::NodeOutput`])
/// * when this is a `Loop` body: these are the values to be used for the
/// next loop iteration's body `inputs`
/// * **not** accessible through [`VarKind::NodeOutput`] on the `Loop`,
/// as it's both confusing regarding [`VarKind::RegionInput`], and
/// also there's nothing stopping body-defined values from directly being
/// used outside the loop (once that changes, this aspect can be flipped)
/// * when exiting a [`Node`]: these are the values for its outputs
/// (the caller's `FuncCall` [`Node`], in the case of a function body)
/// * when continuing a `Loop`: these are the values fed back into
/// the next loop iteration's body `inputs`
pub outputs: SmallVec<[Value; 2]>,
}

Expand Down Expand Up @@ -861,6 +850,7 @@ pub struct NodeDef {
/// * values provided by `region.outputs`, where `region` is the executed
/// child [`Region`]:
/// * when this is a `Select`: the case that was chosen
/// * when this is a `Loop`: the last iteration of the body
// TODO(eddyb) include former `DataInst`s in above docs.
pub outputs: SmallVec<[Var; 2]>,
}
Expand Down Expand Up @@ -974,6 +964,7 @@ pub enum VarKind {
/// * value provided by `region.outputs[output_idx]`, where `region` is the
/// executed child [`Region`] (of `node`):
/// * when `node` is a `Select`: the case that was chosen
/// * when `node` is a `Loop`: the last iteration of the body
// TODO(eddyb) include former `DataInst`s in above docs.
NodeOutput { node: Node, output_idx: u32 },
}
Expand Down
3 changes: 1 addition & 2 deletions src/print/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3755,9 +3755,8 @@ impl Print for FuncAt<'_, Node> {
inputs[0],
child_regions.iter().map(|&case| self.at(case).print(printer)),
),
// TODO(eddyb) rethink loop printing with (output-side) hermeticity.
NodeKind::Loop { repeat_condition } => {
assert!(outputs.is_empty());

let initial_inputs = inputs;
let body = child_regions[0];
let inputs = &self.at(body).def().inputs;
Expand Down
Loading
Loading