diff --git a/packages/preview/slr8/0.0.1/LICENSE b/packages/preview/slr8/0.0.1/LICENSE new file mode 100644 index 0000000000..624cc3aa8f --- /dev/null +++ b/packages/preview/slr8/0.0.1/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 sjfhsjfh + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/packages/preview/slr8/0.0.1/README.md b/packages/preview/slr8/0.0.1/README.md new file mode 100644 index 0000000000..eec26251b6 --- /dev/null +++ b/packages/preview/slr8/0.0.1/README.md @@ -0,0 +1,85 @@ +# slr8 + +Takes a grammar and a sentence and walks through SLR(1) parsing: augmented +grammar, FIRST/FOLLOW, canonical LR(0) items, the DFA, the ACTION/GOTO +table, a shift-reduce trace, the parse tree. All computed in Typst, no +external script generating a table beforehand. + +It's SLR(1) — LR(0) item sets, but reduce actions only go in when the +symbol's in FOLLOW(LHS). That's the whole difference from plain LR(0). + +See [`example.pdf`](docs/example.pdf) for what it looks like end to end on: + +``` +C → id ( A ) +A → A , E | ε | E +E → E + T | T +T → id | num | C +``` +parsing `id ( num + id , id ( num + id ) )`. + +## Using it + +```typst +#import "@preview/slr8:0.0.1": * + +#let my-grammar = ( + ("C", ("id", "(", "A", ")")), + ("A", ("A", ",", "E")), + ("A", ("\\epsilon",)), + ("A", ("E",)), + ("E", ("E", "+", "T")), + ("E", ("T",)), + ("T", ("id",)), + ("T", ("num",)), + ("T", ("C",)), +) + +#let my-sentence = ("id", "(", "num", "+", "id", ",", "id", "(", "num", "+", "id", ")", ")") + +#show-grammar(my-grammar) +#show-parse-table(my-grammar) +#show-parse-trace(my-grammar, my-sentence) +#show-parse-tree(my-grammar, my-sentence) +``` + +A grammar is just a list of `(LHS, RHS)` pairs, RHS being a tuple of +symbols. First production's LHS = start symbol. Anything that's not a LHS +anywhere is a terminal. `"\\epsilon"` for empty productions. Don't use `"."` +as a symbol — it's the LR item dot internally and things will break in +confusing ways if it collides. + +A sentence is just the terminals, no `$` at the end, that gets added for +you. + +## What each function gives you + +`show-grammar` / `show-aug-grammar` — the production list, plain or with the +`S' → S` row added. + +`show-first-follow` — FIRST/FOLLOW per non-terminal. + +`show-canonical-items` — every I_n state, with where it came from. + +`show-automaton(grammar, width: 100%)` — the LR(0) DFA, rendered via +diagraph/Graphviz. + +`show-parse-table` — ACTION/GOTO, conflicts called out in red if there are +any. + +`show-parse-trace` / `show-parse-tree` — take `(grammar, sentence)`, give +you the stack-by-stack trace and the resulting tree. + +All of them take the grammar *before* augmentation — that part's handled +for you. + +## Known gaps + +- SLR(1) conflict resolution, not LALR/LR(1) — if your grammar genuinely + needs per-state lookahead to be unambiguous, you'll see conflicts here + even though a stronger parser wouldn't have any. +- FIRST-set computation only skips direct left recursion (`A → A α`). + Indirect left recursion through another non-terminal isn't handled. +- When there's a real shift/reduce or reduce/reduce conflict, whichever + action got inserted first wins — there's no precedence/associativity + table backing this, it's reported and left at that. diff --git a/packages/preview/slr8/0.0.1/docs/example.pdf b/packages/preview/slr8/0.0.1/docs/example.pdf new file mode 100644 index 0000000000..56968917b1 Binary files /dev/null and b/packages/preview/slr8/0.0.1/docs/example.pdf differ diff --git a/packages/preview/slr8/0.0.1/docs/example.typ b/packages/preview/slr8/0.0.1/docs/example.typ new file mode 100644 index 0000000000..fc9473c9a3 --- /dev/null +++ b/packages/preview/slr8/0.0.1/docs/example.typ @@ -0,0 +1,103 @@ +#import "@preview/slr8:0.0.1": * + +// Use "\\epsilon" for empty productions +// The first production's LHS is treated as the start symbol +// The dot character "." is reserved, do not use it as a symbol + +#let my-grammar = (("C", ("id", "(", "A", ")")) + ,("A", ("A", ",", "E")) + ,("A", ("\\epsilon",)) + ,("A", ("E",)) + ,("E", ("E", "+", "T")) + ,("E", ("T",)) + ,("T", ("id",)) + ,("T", ("num",)) + ,("T", ("C",))) + +#let my-sentence = ("id", "(", "num", "+", "id", "," , "id", "(" , "num", "+" , "id" , ")" , ")") + +// DOCUMENT (comment out any section you don't need) + +#set page(margin: 2cm) +#set text(font: "New Computer Modern", size: 10pt) + +#align(center)[ + #text()[Your Little Name] + + #text(size: 16pt, weight: "bold")[SLR Parser Visualization] + + #v(4pt) + + #text(weight: "bold",size: 10pt)[ + Input: #raw(my-sentence.join(" ")) + ] +] + +#v(16pt) + + +#grid( + columns: (1fr, 1fr), + align: center, + [ + == Original grammar + + #v(5pt) + + #show-grammar(my-grammar) + ], + [ + == Augmented grammar + + #v(5pt) + + #show-aug-grammar(my-grammar) + ] +) + + +== First and Follow sets + +#v(5pt) + +#show-first-follow(my-grammar) + +#v(16pt) + +== Set of items + +#v(5pt) + +#show-canonical-items(my-grammar) + +#v(16pt) + +== DFA + +#v(12pt) + +#show-automaton(my-grammar) + +#v(18pt) + +== States table + +#v(16pt) + +#align(center, show-parse-table(my-grammar)) + +#pagebreak() + +== Stack for #raw(my-sentence.join(" ")) + +#v(16pt) + +#show-parse-trace(my-grammar, my-sentence) + +#pagebreak() + +== Parse Tree for #raw(my-sentence.join(" ")) + +#v(16pt) + +#show-parse-tree(my-grammar, my-sentence) diff --git a/packages/preview/slr8/0.0.1/src/lib.typ b/packages/preview/slr8/0.0.1/src/lib.typ new file mode 100644 index 0000000000..a7978d505a --- /dev/null +++ b/packages/preview/slr8/0.0.1/src/lib.typ @@ -0,0 +1,397 @@ +#let action-type = (SHIFT: 0, REDUCE: 1, ACCEPT: 2, ERROR: 3) + + +// Grammar helpers + +#let augment-grammar(grammar) = { + let first-variable = grammar.at(0).at(0) + let new-first-prod = (first-variable + "'", (first-variable,)) + let augmented-grammar = (new-first-prod,) + grammar + return augmented-grammar +} + +#let get-variables(grammar) = { + let variables = () + for item in grammar { + variables.push(item.at(0)) + } + return variables.dedup() +} + +#let get-terminals(grammar) = { + let vars = get-variables(grammar) + let terminals = () + for item in grammar { + for t in item.at(1) { + if t not in vars and t != "." and t != "\\epsilon" { + terminals.push(t) + } + } + } + return terminals.dedup() +} + +#let get-all-symbols(grammar) = { + let symbols = get-variables(grammar) + get-terminals(grammar) + return symbols.dedup() +} + + +// LR item manipulation + +#let append-dot(item) = { + let (lhs, rhs) = item + return (lhs, (".",) + rhs) +} + +#let append-dot-in-grammar(grammar, target) = { + grammar.enumerate().map(((i, item)) => { + if i == target { + append-dot(item) + } else { + item + } + }) +} + +#let advance-dot(item) = { + let (lhs, rhs) = item + let dot-index = rhs.position(x => x == ".") + + if dot-index != none and (dot-index + 1) < rhs.len() { + let next-symbol = rhs.at(dot-index + 1) + let new-rhs = rhs.slice(0, dot-index) + (next-symbol, ".") + rhs.slice(dot-index + 2) + return (lhs, new-rhs) + } + return item +} + + + +#let closure(state, grammar) = { + let J = state + let changed = true + + while changed { + changed = false + + for item in J { + let rhs = item.at(1) + let dot-index = rhs.position(x => x == ".") + + if dot-index != none and (dot-index + 1) < rhs.len() { + let target = rhs.at(dot-index + 1) + let target-prods = grammar.filter(x => x.at(0) == target) + + for prod in target-prods { + let new-item = append-dot(prod) + + if new-item not in J { + J.push(new-item) + changed = true + } + } + } + } + } + + return J +} + +#let goto(state, symbol, grammar) = { + let moved-items = () + + for item in state { + let rhs = item.at(1) + let dot-index = rhs.position(x => x == ".") + + if dot-index != none and (dot-index + 1) < rhs.len() { + let next-symbol = rhs.at(dot-index + 1) + + if next-symbol == symbol { + moved-items.push(advance-dot(item)) + } + } + } + + return closure(moved-items, grammar) +} + + + +#let canonical-items(augmented-grammar) = { + let I0 = closure((append-dot(augmented-grammar.at(0)),), augmented-grammar) + let C = (I0,) + let changed = true + let symbols = get-all-symbols(augmented-grammar).filter(x => x != "\\epsilon") + + while changed { + changed = false + + for state in C { + for symbol in symbols { + let next-state = goto(state, symbol, augmented-grammar) + + if (next-state.len() != 0) and (next-state not in C) { + C.push(next-state) + changed = true + } + } + } + } + + return C +} + + + +#let get-first-of-sequence(seq, first-sets) = { + if seq.len() == 0 or seq == ("\\epsilon",) { return ("\\epsilon",) } + let result = () + let all-epsilon = true + for sym in seq { + let sym-first = first-sets.at(sym, default: ()) + for f in sym-first { + if f != "\\epsilon" and f not in result { result.push(f) } + } + if "\\epsilon" not in sym-first { + all-epsilon = false + break + } + } + if all-epsilon and "\\epsilon" not in result { result.push("\\epsilon") } + return result +} + +#let compute-first(grammar) = { + let first-sets = (:) + for sym in get-terminals(grammar) { first-sets.insert(sym, (sym,)) } + for sym in get-variables(grammar) { first-sets.insert(sym, ()) } + + let changed = true + while changed { + changed = false + for prod in grammar { + let lhs = prod.at(0) + let rhs = prod.at(1) + + // skip left-recursive productions entirely + if rhs.at(0) == lhs { continue } + + let rhs-first = get-first-of-sequence(rhs, first-sets) + for f in rhs-first { + let current-first = first-sets.at(lhs) + if f not in current-first { + current-first.push(f) + first-sets.insert(lhs, current-first) + changed = true + } + } + } + } + return first-sets +} + +#let compute-follow(grammar, first-sets) = { + let follow-sets = (:) + let non-terminals = get-variables(grammar) + for nt in non-terminals { follow-sets.insert(nt, ()) } + + let original-start = grammar.at(0).at(1).at(0) + follow-sets.insert(original-start, ("$",)) + + let changed = true + while changed { + changed = false + for prod in grammar { + let lhs = prod.at(0) + let rhs = prod.at(1) + if rhs == ("\\epsilon",) { continue } + + for i in range(rhs.len()) { + let symbol = rhs.at(i) + if symbol in non-terminals { + let beta = rhs.slice(i + 1) + let first-beta = get-first-of-sequence(beta, first-sets) + + for f in first-beta { + if f != "\\epsilon" and f not in follow-sets.at(symbol) { + let sym-follow = follow-sets.at(symbol) + sym-follow.push(f) + follow-sets.insert(symbol, sym-follow) + changed = true + } + } + if "\\epsilon" in first-beta or beta.len() == 0 { + for f in follow-sets.at(lhs) { + if f not in follow-sets.at(symbol) { + let sym-follow = follow-sets.at(symbol) + sym-follow.push(f) + follow-sets.insert(symbol, sym-follow) + changed = true + } + } + } + } + } + } + } + return follow-sets +} + + +// SLR(1) table construction + +// Returns (ACTION: array of dicts, GOTO: array of dicts, conflicts: array) +// Each conflict is a dict: (state: int, symbol: str, existing: action, incoming: action) +// Conflicts are reported but the first entry written wins (shift preferred on shift/reduce so the caller can decide what to do with the conflict list) + +#let build-tables(C, augmented-grammar) = { + let ACTION = () + let GOTO = () + let conflicts = () + + let terminals = get-terminals(augmented-grammar).filter(x => x != "\\epsilon") + let variables = get-variables(augmented-grammar) + + let first-sets = compute-first(augmented-grammar) + let follow-sets = compute-follow(augmented-grammar, first-sets) + + // S' → S . + let start-item = append-dot(augmented-grammar.at(0)) // S' → . S + let accept-item = advance-dot(start-item) // S' → S . + + for (state-idx, state) in C.enumerate() { + let current-action = (:) + let current-goto = (:) + + for symbol in terminals { + let next-state = goto(state, symbol, augmented-grammar) + + if next-state.len() > 0 { + let j = C.position(x => x == next-state) + current-action.insert(symbol, (action-type.SHIFT, j)) + } + } + + for symbol in variables { + let next-state = goto(state, symbol, augmented-grammar) + + if next-state.len() > 0 { + let j = C.position(x => x == next-state) + current-goto.insert(symbol, j) + } + } + + if accept-item in state { + current-action.insert("$", (action-type.ACCEPT, none)) + } + + for item in state { + let lhs = item.at(0) + let rhs = item.at(1) + let dot-index = rhs.position(x => x == ".") + + // item is complete (dot at end) and is not the accept item + if dot-index == (rhs.len() - 1) and item != accept-item { + let pure-rhs = rhs.slice(0, dot-index) + if pure-rhs.len() == 0 { pure-rhs = ("\\epsilon",) } + + let rule-index = augmented-grammar.position( + x => x.at(0) == lhs and x.at(1) == pure-rhs + ) + + let follow-A = follow-sets.at(lhs) + for f in follow-A { + let reduce-action = (action-type.REDUCE, rule-index) + + if f in current-action { + // conflict detected and existing entry wins + // TODO: precedence/associativity rules + conflicts.push(( + state: state-idx, + symbol: f, + existing: current-action.at(f), + incoming: reduce-action, + )) + } else { + current-action.insert(f, reduce-action) + } + } + } + } + + ACTION.push(current-action) + GOTO.push(current-goto) + } + + return (ACTION: ACTION, GOTO: GOTO, conflicts: conflicts) +} + + + + + + +#let parse-input(input, ACTION, GOTO, augmented-grammar) = { + let stack = (0,) + let tree-stack = () + let current-input = input + let step = 0 + let history = () + + while current-input.len() > 0 { + step += 1 + // TODO: proper infinite loop detection + if step > 1000 { break } + + let state = stack.last() + let token = current-input.at(0) + let action-dict = ACTION.at(state) + + let act = action-dict.at(token, default: (action-type.ERROR, none)) + + history.push((step: step, stack: stack, input: current-input, action: act)) + + if act.at(0) == action-type.SHIFT { + let next-state = act.at(1) + stack.push(token) + stack.push(next-state) + current-input.remove(0) + tree-stack.push((label: token, children: ())) + + } else if act.at(0) == action-type.REDUCE { + let rule-index = act.at(1) + let rule = augmented-grammar.at(rule-index) + let lhs = rule.at(0) + let rhs = rule.at(1) + + let rhs-len = if rhs == ("\\epsilon",) { 0 } else { rhs.len() } + let children = () + + if rhs-len > 0 { + let pop-count = rhs-len * 2 + stack = stack.slice(0, stack.len() - pop-count) + children = tree-stack.slice(tree-stack.len() - rhs-len) + tree-stack = tree-stack.slice(0, tree-stack.len() - rhs-len) + } else { + children = ((label: "\\epsilon", children: ()),) + } + + let top-state = stack.last() + let next-state = GOTO.at(top-state).at(lhs) + + stack.push(lhs) + stack.push(next-state) + tree-stack.push((label: lhs, children: children)) + + } else if act.at(0) == action-type.ACCEPT { + return (success: true, log: history, ast: tree-stack.last()) + + } else { + return (success: false, log: history, ast: none) + } + } + + return (success: false, log: history, ast: none) +} diff --git a/packages/preview/slr8/0.0.1/src/vis.typ b/packages/preview/slr8/0.0.1/src/vis.typ new file mode 100644 index 0000000000..71a2629528 --- /dev/null +++ b/packages/preview/slr8/0.0.1/src/vis.typ @@ -0,0 +1,376 @@ +#import "lib.typ": * +#import "@preview/diagraph:0.3.7": raw-render + + + +#let render-item(item) = { + let (lhs, rhs) = item + let rhs-str = rhs.map(s => if s == "." { text(fill: red.darken(20%))[$bullet.op$] } else if s == "\\epsilon" { $epsilon$ } else { $#s$ }) + box[#$#lhs arrow.r$ #rhs-str.join(h(2pt))] +} + +#let fmt-sym(s, variables) = { + if s == "\\epsilon" { $epsilon$ } + else if s == "$" { text(font: "DejaVu Sans Mono", size: 8pt)[\$] } + else if s in variables { $#s$ } + else { text(font: "DejaVu Sans Mono", size: 8pt)[#s] } +} + +#let dot-escape(s) = { + s.replace("\\", "\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("'", "\\'") +} + +#let item-to-str(item) = { + let (lhs, rhs) = item + let rhs-s = rhs.map(s => if s == "." { "•" } else if s == "\\epsilon" { "ε" } else { s }).join(" ") + lhs + " → " + rhs-s +} + + + + +#let show-grammar(grammar) = { + block(width: 100%)[ + #table( + columns: (auto, auto, auto, auto), + stroke: none, + align: (center, center, center, left), + inset: (x: 6pt, y: 3pt), + [*\#*], [*Var*], [], [*Production*], + ..grammar.enumerate().map(((i, prod)) => { + let (lhs, rhs) = prod + ( + text(fill: purple.darken(20%))[$#i$], + $#lhs$, + $arrow.r$, + rhs.map(s => if s == "\\epsilon" { $epsilon$ } else { $#s$ }).join($space$), + ) + }).flatten() + ) + ] +} + + +#let show-aug-grammar(grammar) = { + let aug = augment-grammar(grammar) + block(width: 100%)[ + #table( + columns: (auto, auto, auto, auto), + stroke: none, + align: (center, center, center, left), + inset: (x: 6pt, y: 3pt), + [*\#*], [*Var*], [], [*Production*], + ..aug.enumerate().map(((i, prod)) => { + let (lhs, rhs) = prod + ( + text(fill: purple.darken(20%))[$#i$], + $#lhs$, + $arrow.r$, + rhs.map(s => if s == "\\epsilon" { $epsilon$ } else { $#s$ }).join($space$), + ) + }).flatten() + ) + ] +} + + + +#let show-first-follow(grammar) = { + let aug = augment-grammar(grammar) + let vars = get-variables(aug).filter(v => v != aug.at(0).at(0)) + let first = compute-first(aug) + let follow = compute-follow(aug, first) + + block(width: 100%)[ + #table( + columns: (auto, 1fr, 1fr), + inset: (x: 8pt, y: 5pt), + fill: (col, row) => { + if row == 0 { luma(220) } + else if calc.odd(row) { luma(248) } + else { white } + }, [*Variable*], [*FIRST*], [*FOLLOW*], + ..vars.map(v => { + let fi = first.at(v, default: ()).map(s => + if s == "\\epsilon" { strong($epsilon$) } else if s == "$" { strong([\$]) } else { strong($#s$) } + ).join([, ]) + let fo = follow.at(v, default: ()).map(s => + if s == "$" { strong([\$]) } else { strong($#s$) } + ).join([, ]) + ($#v$, fi, fo) + }).flatten() + ) + ] +} + + + +#let show-canonical-items(grammar) = { + let aug = augment-grammar(grammar) + let C = canonical-items(aug) + let symbols = get-all-symbols(aug).filter(x => x != "\\epsilon") + + // Build origin map: state-index -> (parent-index, symbol) + let origins = (:) + for (i, state) in C.enumerate() { + for sym in symbols { + let next = goto(state, sym, aug) + if next.len() > 0 { + let j = C.position(x => x == next) + if j != none and str(j) not in origins { + origins.insert(str(j), (i, sym)) + } + } + } + } + + block(width: 100%)[ + #grid( + columns: (1fr, 1fr, 1fr, 1fr, 1fr), + gutter: 10pt, + ..C.enumerate().map(((i, state)) => { + let origin-label = if str(i) in origins { + let (par, sym) = origins.at(str(i)) + text(size: 10pt, fill: purple.darken(20%))[(I#sub[#par], #sym)] + } else { + text(size: 10pt, fill: purple.darken(20%))[#emph[inicial]] + } + block( + stroke: 1pt + luma(100), + inset: 8pt, + width: 100%, + )[ + #text(size: 14pt, weight: "bold", fill: purple.darken(20%))[I#sub[#i]] + #h(4pt) + #origin-label + #stack( + dir: ttb, + spacing: 2pt, + ..state.map(item => render-item(item)) + ) + ] + }) + ) + ] +} + + + +#let show-automaton(grammar, width: 100%) = { + let aug = augment-grammar(grammar) + let C = canonical-items(aug) + let symbols = get-all-symbols(aug).filter(x => x != "\\epsilon") + + let edges = () + for (i, state) in C.enumerate() { + for sym in symbols { + let next = goto(state, sym, aug) + if next.len() > 0 { + let j = C.position(x => x == next) + if j != none { + edges.push((i, j, sym)) + } + } + } + } + + + let edge-lines = edges.map(e => { + let (a, b, sym) = e + let lbl = dot-escape(if sym == "\\epsilon" { "ε" } else { sym }) + " I" + str(a) + " -> I" + str(b) + " [label=\"" + lbl + "\"];" + }) + + let dot-src = ( + "digraph LR0 {\n" + + " rankdir=LR;\n" + + " node [margin=\"0.05,0.17\", fontsize=18, fontcolor=darkviolet];\n" + + " edge [fontsize=15];\n" + + edge-lines.join("\n") + "\n" + + "}" + ) + + block(width: 100%)[ + #raw-render(raw(dot-src, lang: "dot"), width: width) + ] +} + + + +#let show-parse-table(grammar) = { + let aug = augment-grammar(grammar) + let C = canonical-items(aug) + let result = build-tables(C, aug) + let ACTION = result.ACTION + let GOTO = result.GOTO + let conflicts = result.conflicts + + let terminals = get-terminals(aug).filter(x => x != "\\epsilon") + ("$",) + let variables = get-variables(aug).filter(x => x != aug.at(0).at(0)) // drop S' + + let n-cols = 1 + terminals.len() + variables.len() + + let render-cell(entry) = { + if entry == none { return [] } + let (kind, val) = entry + if kind == action-type.SHIFT { text(weight: "bold", fill: blue.darken(20%))[s#val] } + else if kind == action-type.REDUCE { text(weight: "bold", fill: green.darken(20%))[r#val] } + else if kind == action-type.ACCEPT { text(fill: purple, weight: "bold")[acc] } + else { [] } + } + + block(width: 100%)[ + #if conflicts.len() > 0 { + block( + fill: red.lighten(80%), + stroke: 0.5pt + red, + inset: 6pt, + radius: 3pt, + )[ + #text(fill: red.darken(20%), weight: "bold")[⚠ Conflicts (#conflicts.len())] + #for c in conflicts [ + - State #c.state, symbol `#c.symbol`: existing #repr(c.existing) vs incoming #repr(c.incoming) + ] + ] + v(6pt) + } + + #table( + columns: range(n-cols).map(_ => auto), + inset: (x: 10pt, y: 6pt), + fill: (col, row) => { + if row == 0 or row == 1 { luma(220) } + else if calc.odd(row) { luma(248) } + else { white } + }, + align: center, + + table.cell(colspan: 1)[], + table.cell(colspan: terminals.len(), stroke: (bottom: 0.5pt + black))[*ACTION*], + table.cell(colspan: variables.len(), stroke: (bottom: 0.5pt + black))[*GOTO*], + + [*STATE*], + ..terminals.map(t => + text(font: "DejaVu Sans Mono", size: 8pt)[*#t*] + ), + ..variables.map(v => $bold(#v)$), + + // Data rows + ..C.enumerate().map(((i, _)) => { + let act-row = ACTION.at(i) + let got-row = GOTO.at(i) + ( + [*#i*], + ..terminals.map(t => render-cell(act-row.at(t, default: none))), + ..variables.map(v => { + let g = got-row.at(v, default: none) + if g != none { [#g] } else { [] } + }), + ) + }).flatten() + ) + ] +} + + + +#let show-parse-trace(grammar, sentence) = { + let aug = augment-grammar(grammar) + let C = canonical-items(aug) + let tables = build-tables(C, aug) + let input = sentence + ("$",) + let result = parse-input(input, tables.ACTION, tables.GOTO, aug) + + let action-label(act) = { + let (kind, val) = act + if kind == action-type.SHIFT { [shift #val] } + else if kind == action-type.REDUCE { + let rule = aug.at(val) + let rhs-s = rule.at(1).map(s => if s == "\\epsilon" { $epsilon$ } else { $#s$ }).join($space$) + [$"reduce" #rule.at(0) arrow.r #rhs-s$] + } + else if kind == action-type.ACCEPT { text(fill: purple, weight: "bold")[accept] } + else { text(fill: red)[error] } + } + + block(width: 100%)[ + #table( + columns: (auto, auto, auto, auto), + inset: (x: 7pt, y: 6pt), + fill: (_, row) => if row == 0 { luma(220) } else if calc.odd(row) { luma(248) } else { white }, + [*Step*], [*Stack*], [*Entry*], [*Action*], + ..result.log.map(entry => { + let stk = entry.stack.map(s => str(s)).join(" ") + let inp = entry.input.map(s => if s == "$" { [\$] } else { text(font: "DejaVu Sans Mono")[#s] }).join(h(3pt)) + ( + [#entry.step], + text(font: "DejaVu Sans Mono", size: 8pt)[#stk], + inp, + action-label(entry.action), + ) + }).flatten() + ) + ] +} + + + +#let _ast-to-dot(root) = { + let nodes = () // (id, label, is-leaf) + let edges = () // (parent-id, child-id) + let queue = ((0, root),) + let next-id = 1 + + while queue.len() > 0 { + let (my-id, node) = queue.remove(0) + let is-leaf = node.children.len() == 0 + nodes.push((my-id, node.label, is-leaf)) + + for child in node.children { + edges.push((my-id, next-id)) + queue.push((next-id, child)) + next-id = next-id + 1 + } + } + + let node-lines = nodes.map(n => { + let (id, lbl-raw, leaf) = n + let lbl = dot-escape(if lbl-raw == "\\epsilon" { "ε" } else { lbl-raw }) + let shape = if leaf { "ellipse" } else { "rectangle" } + let fill = if leaf { "#d6eaf8" } else { "#d5f5e3" } + let line = " n" + str(id) + " [label=\"" + lbl + "\", shape=" + shape + ", style=filled, fillcolor=\"" + fill + "\", fontname=\"Courier\", fontsize=10];" + line + }) + + let edge-lines = edges.map(e => { + " n" + str(e.at(0)) + " -> n" + str(e.at(1)) + ";" + }) + + ( + "digraph ParseTree {\n" + + " rankdir=TB;\n" + + node-lines.join("\n") + "\n" + + edge-lines.join("\n") + "\n" + + "}" + ) +} + +#let show-parse-tree(grammar, sentence) = { + let aug = augment-grammar(grammar) + let C = canonical-items(aug) + let tables = build-tables(C, aug) + let input = sentence + ("$",) + let result = parse-input(input, tables.ACTION, tables.GOTO, aug) + + block(width: 100%)[ + #if result.ast == none { + text(fill: red)[No parse tree — input was rejected.] + } else { + let dot-src = _ast-to-dot(result.ast) + raw-render(raw(dot-src, lang: "dot")) + } + ] +} diff --git a/packages/preview/slr8/0.0.1/typst.toml b/packages/preview/slr8/0.0.1/typst.toml new file mode 100644 index 0000000000..f0cb71f543 --- /dev/null +++ b/packages/preview/slr8/0.0.1/typst.toml @@ -0,0 +1,13 @@ +[package] +name = "slr8" +version = "0.0.1" +entrypoint = "src/vis.typ" +authors = ["Lucas Ramos"] +license = "MIT" +description = "A package to visualize SLR parser steps." +repository = "https://github.com/Lukinhasram/slr8" +compiler = "0.15.0" +categories = ["components", "visualization"] +disciplines = ["computer-science", "mathematics", "education", "linguistics"] +keywords = ["parser", "slr", "grammar", "compiler", "AST"] +exclude = ["docs", "example.typ", "example.pdf"] \ No newline at end of file