From c71583a895c34d69c20d90faa3a4d4b26cc6f837 Mon Sep 17 00:00:00 2001 From: David Chanin Date: Tue, 2 Jan 2024 17:41:01 -0500 Subject: [PATCH] adding to/from Penman/Graph/Tree convenience methods --- README.md | 31 +++++--- docs/docs/quick-start.md | 35 ++++---- docs/docs/trees-graphs-epigraphs.md | 10 +++ src/lib/codec.ts | 4 +- src/lib/graph.spec.ts | 73 +++++++++++++++++ src/lib/graph.ts | 119 ++++++++++++++++++++++++++++ src/lib/layout.ts | 10 +-- src/lib/tree.spec.ts | 40 ++++++++++ src/lib/tree.ts | 111 +++++++++++++++++++++++++- 9 files changed, 393 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 9fbde2c..1adba72 100644 --- a/README.md +++ b/README.md @@ -23,36 +23,41 @@ npm install penman-js ## Basic usage -The most faithful representation of AMR text in the library is the `Tree` class. The `parse` function turns an AMR text string into a `Tree`, and `format` does the reverse, turning a `Tree` back into a string. +The most faithful representation of AMR text in the library is the `Tree` class. The `Tree.fromPenman()` method turns an AMR text string into a `Tree`, and `tree.toPenman()` does the reverse, turning a `Tree` back into a string. ```js -import { parse, format } from 'penman-js'; +import { Tree } from 'penman-js'; -const t = penman.parse('(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))'); -const [variable, branches] = t.node; +const tree = Tree.fromPenman( + '(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))', +); +const [variable, branches] = tree.node; console.log(variable); // ouput: 'w' console.log(branches.length); // output: 3 const [role, target] = branches[2]; console.log(role); // output: ':ARG1' -console.log(format(target)); +const subtree = new Tree(target); +console.log(subtree.toPenman()); // (g / go // :ARG0 b) ``` -Users wanting to interact with graphs might find the `decode` and -`encode` functions a good place to start. +Users wanting to interact with graphs might find the `Graph.fromPenman()` and +`graph.toPenman()` methods a good place to start. ```js -import { encode, decode } from 'penman-js'; -const g = penman.decode('(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))'); -console.log(g.top); +import { Graph } from 'penman-js'; +const graph = Graph.fromPenman( + '(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))', +); +console.log(graph.top); // 'w' -console.log(g.triples.length); +console.log(graph.triples.length); // 6 -console.log(g.instances().map((instance) => instance[2])); +console.log(graph.instances().map((instance) => instance[2])); // ['want-01', 'boy', 'go'] -console.log(encode(g, { top: 'b' })); +console.log(graph.toPenman({ top: 'b' })); // (b / boy // :ARG0-of (w / want-01 // :ARG1 (g / go diff --git a/docs/docs/quick-start.md b/docs/docs/quick-start.md index 4e31381..62c2462 100644 --- a/docs/docs/quick-start.md +++ b/docs/docs/quick-start.md @@ -10,42 +10,43 @@ npm install penman-js ## Basic usage -The most faithful representation of AMR text in the library is the `Tree` class. The `parse` function turns an AMR text string into a `Tree`, and `format` does the reverse, turning a `Tree` back into a string. +The most faithful representation of AMR text in the library is the `Tree` class. The `Tree.fromPenman()` method turns an AMR text string into a `Tree`, and `tree.toPenman()` does the reverse, turning a `Tree` back into a string. ```js -import { parse, format } from 'penman-js'; +import { Tree } from 'penman-js'; -const t = penman.parse('(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))'); -const [variable, branches] = t.node; +const tree = Tree.fromPenman( + '(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))', +); +const [variable, branches] = tree.node; console.log(variable); // ouput: 'w' console.log(branches.length); // output: 3 const [role, target] = branches[2]; console.log(role); // output: ':ARG1' -console.log(format(target)); +const subtree = new Tree(target); +console.log(subtree.toPenman()); // (g / go // :ARG0 b) ``` -Users wanting to interact with graphs might find the `decode` and -`encode` functions a good place to start. +Users wanting to interact with graphs might find the `Graph.fromPenman()` and +`graph.toPenman()` methods a good place to start. ```js -import { encode, decode } from 'penman-js'; -const g = penman.decode('(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))'); -console.log(g.top); +import { Graph } from 'penman-js'; +const graph = Graph.fromPenman( + '(w / want-01 :ARG0 (b / boy) :ARG1 (g / go :ARG0 b))', +); +console.log(graph.top); // 'w' -console.log(g.triples.length); +console.log(graph.triples.length); // 6 -console.log(g.instances().map((instance) => instance[2])); +console.log(graph.instances().map((instance) => instance[2])); // ['want-01', 'boy', 'go'] -console.log(encode(g, { top: 'b' })); +console.log(graph.toPenman({ top: 'b' })); // (b / boy // :ARG0-of (w / want-01 // :ARG1 (g / go // :ARG0 b))) ``` - -The `decode` and `encode` functions work with one PENMAN -graph. The `load` and `dump` functions work with -collections of graphs. diff --git a/docs/docs/trees-graphs-epigraphs.md b/docs/docs/trees-graphs-epigraphs.md index b1243d1..e6b0528 100644 --- a/docs/docs/trees-graphs-epigraphs.md +++ b/docs/docs/trees-graphs-epigraphs.md @@ -18,6 +18,16 @@ string is **formatting**, while the whole process is called ![The three stages of PENMAN structure](/img/representations.png) +These functions for moving between Penman notation, trees, and graphs are also +available in Penman JS as methods on `Graph` and `Tree` objects, as below: + +- `Graph.fromPenman()`: equivalent of `decode()` above +- `graph.toPenman()`: equivalent of `encode()` above +- `graph.toTree()`: equivalent of `configure()` above +- `Tree.fromPenman()`: equivalent of `parse()` above +- `tree.toPenman()`: equivalent of `format()` above +- `tree.toGraph()`: equivalent of `interpret()` above + Conversion from a PENMAN string to a `Tree`, and vice versa, is straightforward and lossless. Conversion to a `Graph`, however, is potentially lossy as the diff --git a/src/lib/codec.ts b/src/lib/codec.ts index bd01323..e7cbcfd 100644 --- a/src/lib/codec.ts +++ b/src/lib/codec.ts @@ -226,7 +226,7 @@ export class PENMANCodec { * * @param s - A string containing a single PENMAN-serialized graph. * @param options - Optional arguments. - * - `model` - The model used for interpreting the graph. + * @param options.model - The model used for interpreting the graph. * @returns The Graph object described by `s`. * @example * import { decode } from 'penman-js'; @@ -247,7 +247,7 @@ export function decode(s: string, options: DecodeOptions = {}): Graph { * * @param lines - A string or open file containing PENMAN-serialized graphs. * @param options - Optional arguments. - * - `model` - The model used for interpreting the graph. + * @param options.model - The model used for interpreting the graph. * @returns An iterator yielding `Graph` objects described in `lines`. * @example * import { iterdecode } from 'penman-js'; diff --git a/src/lib/graph.spec.ts b/src/lib/graph.spec.ts index a170c27..0d43594 100644 --- a/src/lib/graph.spec.ts +++ b/src/lib/graph.spec.ts @@ -31,6 +31,79 @@ test('init', (t) => { t.is(g3.top, 'b'); }); +test('Graph.fromPenman', (t) => { + // unlabeled single node + let g = Graph.fromPenman('(a)'); + t.is(g.top, 'a'); + t.deepEqual(g.triples, [['a', ':instance', null]]); + + // labeled node + g = Graph.fromPenman('(a / alpha)'); + t.is(g.top, 'a'); + t.deepEqual(g.triples, [['a', ':instance', 'alpha']]); + + // unlabeled edge to unlabeled node + g = Graph.fromPenman('(a : (b))'); + t.is(g.top, 'a'); + t.deepEqual(g.triples, [ + ['a', ':instance', null], + ['a', ':', 'b'], + ['b', ':instance', null], + ]); + + // inverted unlabeled edge + g = Graph.fromPenman('(b :-of (a))'); + t.is(g.top, 'b'); + t.deepEqual(g.triples, [ + ['b', ':instance', null], + ['a', ':', 'b'], + ['a', ':instance', null], + ]); + + // labeled edge to unlabeled node + g = Graph.fromPenman('(a :ARG (b))'); + t.is(g.top, 'a'); + t.deepEqual(g.triples, [ + ['a', ':instance', null], + ['a', ':ARG', 'b'], + ['b', ':instance', null], + ]); + + // inverted edge + g = Graph.fromPenman('(b :ARG-of (a))'); + t.is(g.top, 'b'); + t.deepEqual(g.triples, [ + ['b', ':instance', null], + ['a', ':ARG', 'b'], + ['a', ':instance', null], + ]); + + // fuller examples + t.deepEqual(Graph.fromPenman(x1()[0]).triples, x1()[1]); +}); + +test('toTree', (t) => { + const g = new Graph([ + ['b', ':instance', 'bark-01'], + ['b', ':ARG0', 'd'], + ['d', ':instance', 'dog'], + ]); + + const tree = g.toTree(); + t.deepEqual(tree.node, [ + 'b', + [ + ['/', 'bark-01'], + [':ARG0', ['d', [['/', 'dog']]]], + ], + ]); +}); + +test('toPenman', (t) => { + const g = new Graph([['h', 'instance', 'hi']]); + t.deepEqual(g.toPenman(), '(h / hi)'); +}); + test('__or__', (t) => { const p = new Graph(); const g = p.__or__(p); diff --git a/src/lib/graph.ts b/src/lib/graph.ts index 9d1955c..b569b28 100644 --- a/src/lib/graph.ts +++ b/src/lib/graph.ts @@ -6,8 +6,12 @@ import cloneDeep from 'lodash.clonedeep'; import differenceWith from 'lodash.differencewith'; import isEqual from 'lodash.isequal'; +import { decode, encode } from './codec'; import { EpidataMap } from './epigraph'; import { GraphError } from './exceptions'; +import { configure } from './layout'; +import { Model } from './model'; +import { Tree } from './tree'; import type { Attribute, Constant, @@ -47,6 +51,28 @@ export interface GraphEdgesOptions { target?: Constant; } +export interface GraphFromPenmanOptions { + /** The model used for interpreting the graph. */ + model?: Model; +} + +export interface GraphToTreeOptions { + /** If given, the node to use as the top in serialization. */ + top?: Variable; + /** The model used for interpreting the graph. */ + model?: Model; +} +export interface GraphToPenmanOptions { + /** The model used for interpreting the graph. */ + model?: Model; + /** If given, the node to use as the top in serialization. */ + top?: Variable; + /** How to indent formatted strings. */ + indent?: number | null; + /** If `true`, put initial attributes on the first line. */ + compact?: boolean; +} + /** * Represents a basic class for modeling a rooted, directed acyclic graph. * @@ -105,6 +131,99 @@ export class Graph { this._id = graphIdCounter++; } + /** + * Deserialize PENMAN-serialized string `s` into its Graph object. + * + * This is equivalent to `decode()` in the Python library + * + * `options` consists of the following: + * - `model` - The model used for interpreting the graph. + * + * @param penmanString - A string containing a single PENMAN-serialized graph. + * @param options - Optional arguments. + * @param options.model - The model used for interpreting the graph. + * @returns The Graph object described by `penmanString`. + * @example + * import { Graph } from 'penman-js'; + * + * const graph = Graph.fromPenman('(b / bark-01 :ARG0 (d / dog))'); + */ + static fromPenman( + penmanString: string, + options: GraphFromPenmanOptions = {}, + ): Graph { + return decode(penmanString, options); + } + + /** + * Create a tree from the graph by making as few decisions as possible. + * + * A graph created from a valid tree will + * contain epigraphical markers that describe how the triples of a + * graph are to be expressed in a tree, and thus configuring this + * tree requires only a single pass through the list of triples. If + * the markers are missing or out of order, or if the graph has been + * modified, then the process of creating the tree will have to make + * decisions about where to insert tree branches. These decisions are + * deterministic, but may result in a tree different than the one + * expected. + * + * This is equivalent to `configure()` in the Python library. + * + * `options` consists of the following: + * - `top` is the variable to use as the top of the graph; if `null`, the top of `g` will be used. + * - `model` is the `Model` used to configure the tree. + * + * @param options - Optional arguments. + * @param options.top` is the variable to use as the top of the graph; if `null`, the top of `g` will be used. + * @param options.model` is the `Model` used to configure the tree. + * @returns The `Tree` object. + * @example + * import { Graph } from 'penman-js'; + * + * const g = new Graph([ + * ['b', ':instance', 'bark-01'], + * ['b', ':ARG0', 'd'], + * ['d', ':instance', 'dog'] + * ]); + * + * const t = g.toTree()); + * console.log(t); + * // Tree(['b', [['/', 'bark-01'], [':ARG0', ['d', [['/', 'dog']]]]]]) + */ + toTree(options: GraphToTreeOptions = {}): Tree { + return configure(this, options); + } + + /** + * Serialize the graph from `top` to PENMAN notation. + * + * This is equivalent to `encode()` in the Python library. + * + * `options` consists of the following: + * - `top` - If given, the node to use as the top in serialization. + * - `indent` - How to indent formatted strings. + * - `compact` - If `true`, put initial attributes on the first line. + * - `model` - The model used for interpreting the graph. + * + * @param options - Optional arguments. + * @param options.top - If given, the node to use as the top in serialization. + * @param options.indent - How to indent formatted strings. + * @param options.compact - If `true`, put initial attributes on the first line. + * @param options.model - The model used for interpreting the graph. + * @returns The PENMAN-serialized string of the graph. + * @example + * import { Graph } from 'penman-js'; + * + * const g = new Graph([['h', 'instance', 'hi']]); + * + * console.log(g.toPenman()); + * // '(h / hi)' + */ + toPenman(options: GraphToPenmanOptions = {}): string { + return encode(this, options); + } + /** @ignore */ __repr__() { const name = this.constructor.name; diff --git a/src/lib/layout.ts b/src/lib/layout.ts index 6fe64e9..5094bb6 100644 --- a/src/lib/layout.ts +++ b/src/lib/layout.ts @@ -123,12 +123,12 @@ export interface InterpretOptions { * @example * import { Tree, interpret } from 'penman-js'; * - * const t = new Tree('b', [ + * const t = new Tree(['b', [ * ['/', 'bark-01'], - * ['ARG0', new Tree('d', [ + * ['ARG0', ['d', [ * ['/', 'dog'] - * ])] - * ]); + * ]]] + * ]]); * * const g = interpret(t); * for (const triple of g.triples) { @@ -286,7 +286,7 @@ export interface ConfigureOptions { * * const t = configure(g); * console.log(t); - * // Tree('b', [['/', 'bark-01'], [':ARG0', new Tree('d', [['/', 'dog']])]]) + * // Tree('b', [['/', 'bark-01'], [':ARG0', ['d', [['/', 'dog']]]]]) */ export function configure(g: Graph, options: ConfigureOptions = {}): Tree { const { top = g.top, model = _default_model } = options; diff --git a/src/lib/tree.spec.ts b/src/lib/tree.spec.ts index 3c55b52..1973736 100644 --- a/src/lib/tree.spec.ts +++ b/src/lib/tree.spec.ts @@ -53,6 +53,46 @@ test('_init__', (t) => { t.deepEqual(t2.metadata, { snt: 'Alpha.' }); }); +test('Tree.fromPenman', (t) => { + const tree = Tree.fromPenman('(b / bark-01 :ARG0 (d / dog))'); + t.deepEqual(tree.node, [ + 'b', + [ + ['/', 'bark-01'], + [':ARG0', ['d', [['/', 'dog']]]], + ], + ]); +}); + +test('toGraph', (t) => { + const tree = new Tree([ + 'b', + [ + ['/', 'bark-01'], + ['ARG0', ['d', [['/', 'dog']]]], + ], + ]); + + const g = tree.toGraph(); + t.deepEqual(g.triples, [ + ['b', ':instance', 'bark-01'], + ['b', ':ARG0', 'd'], + ['d', ':instance', 'dog'], + ]); +}); + +test('toPenman', (t) => { + const tree = new Tree([ + 'b', + [ + ['/', 'bark-01'], + [':ARG0', ['d', [['/', 'dog']]]], + ], + ]); + + t.deepEqual(tree.toPenman(), '(b / bark-01\n :ARG0 (d / dog))'); +}); + // def test_nodes(self, one_arg_node, reentrant): // t = tree.Tree(one_arg_node) // assert t.nodes() == [one_arg_node, ('b', [('/', 'beta')])] diff --git a/src/lib/tree.ts b/src/lib/tree.ts index 5086bc5..6bd8520 100644 --- a/src/lib/tree.ts +++ b/src/lib/tree.ts @@ -1,8 +1,13 @@ /** Definitions of tree structures. */ import isEqual from 'lodash.isequal'; -import format from 'string-format'; +import formatString from 'string-format'; +import { format } from './format'; +import { Graph } from './graph'; +import { interpret } from './layout'; +import { Model } from './model'; +import { parse } from './parse'; import type { Branch, Node, Variable } from './types'; type _Step = [number[], Branch]; // see Tree.walk() @@ -13,6 +18,17 @@ export interface TreeOptions { metadata?: { [key: string]: string }; } +export interface TreeToPenmanOptions { + /** How to indent formatted strings. */ + indent?: number | null; + /** If `true`, put initial attributes on the first line. */ + compact?: boolean; +} + +export interface TreeToGraphOptions { + model?: Model; +} + /** * A tree structure. * @@ -43,6 +59,95 @@ export class Tree { } return isEqual(this.node, other); } + + /** + * Parse a penman-formatted string into a Tree. + * + * This is equivalent to the `parse()` function in the Python library + * + * @param penmanString - A string containing a single PENMAN-serialized tree. + * @example + * import { Tree } from 'penman-js'; + * + * const tree = Tree.fromPenman(` + * (t / try-01 + * :ARG0 (d / dog) + * :ARG1 (b / bark-01 + * :ARG0 d)) + * `) + */ + static fromPenman(penmanString: string): Tree { + return parse(penmanString); + } + + /** + * Return this `Tree` as a PENMAN string. + * + * This is equivalent to `format()` in the Python library + * + * `options` consists of the following: + * - `indent`: How to indent formatted strings. + * - `compact`: If `true`, put initial attributes on the first line. + * + * @param options - Optional arguments. + * @param options.indent - How to indent formatted strings. + * @param options.compact - If `true`, put initial attributes on the first line. + * @returns The PENMAN-serialized string of the `Tree` object. + * @example + * import { Tree } from 'penman-js'; + * + * const tree = new Tree( + * ['b', [['/', 'bark-01'], + * [':ARG0', ['d', [['/', 'dog']]]]] + * ] + * ); + * + * console.log(tree.toPenman()); + * // (b / bark-01 + * // :ARG0 (d / dog)) + */ + toPenman(options: TreeToPenmanOptions = {}): string { + return format(this, options); + } + + /** + * Return this tree as a graph using `model`. + * + * Tree interpretation is the process of transforming the nodes and + * edges of a tree into a directed graph. A semantic model determines + * which edges are inverted and how to deinvert them. If `model` is + * not provided, the default model will be used. + * + * This is equivalent to `interpret()` in the Python library. + * + * `options` consists of the following: + * - `model`: a model to use for the transformation + * + * @param options - Optional arguments. + * @param options.model - a model to use for the transformation + * @returns The interpreted `Graph` object. + * @example + * import { Tree } from 'penman-js'; + * + * const t = new Tree(['b', [ + * ['/', 'bark-01'], + * ['ARG0', ['d', [ + * ['/', 'dog'] + * ]]] + * ]]); + * + * const g = t.toGraph(); + * for (const triple of g.triples) { + * console.log(triple); + * } + * // ['b', ':instance', 'bark-01'] + * // ['b', ':ARG0', 'd'] + * // ['d', ':instance', 'dog'] + */ + toGraph(options: TreeToGraphOptions = {}): Graph { + return interpret(this, options); + } + /** * Return `true` if this tree is equal to other tree * @@ -119,7 +224,7 @@ export class Tree { let i = 0; let newvar: string | null = null; while (newvar == null || used.has(newvar)) { - newvar = format(fmt, { + newvar = formatString(fmt, { prefix: pre, i: i, j: i === 0 ? '' : `${i + 1}`, @@ -141,7 +246,7 @@ const _format = (node: Node, level: number): string => { const branch_strings = branches.map((branch) => _formatBranch(branch, next_level), ); - return format( + return formatString( '({}, [{}{}])', variable, indent,