Full-parity Python port of the canonical TypeScript implementation.
For motivation, language-neutral concepts, and the cross-language parity matrix, see the top-level README and REPORT.md.
cd python
pip install -e .Package: voxgig_struct (single module
voxgig_struct/voxgig_struct.py).
Or, without installing, add the source directory to sys.path:
import sys
sys.path.insert(0, '/path/to/struct/python')
from voxgig_struct import getpath, transform, validatefrom voxgig_struct import (
getpath, setpath, merge, walk,
inject, transform, validate, select,
)
getpath({'db': {'host': 'localhost'}}, 'db.host')
# 'localhost'
transform(
{'user': {'first': 'Ada', 'last': 'Lovelace'}, 'age': 36},
{'name': '`user.first`', 'surname': '`user.last`', 'years': '`age`'},
)
# {'name': 'Ada', 'surname': 'Lovelace', 'years': 36}
validate(
{'name': 'Ada', 'age': 36},
{'name': '`$STRING`', 'age': '`$INTEGER`'},
)
# {'name': 'Ada', 'age': 36} (raises on mismatch)from voxgig_struct import (
# core canonical functions (the full canonical API is 48 names,
# including the builders, re_* helpers, and injection helpers below)
clone, delprop, escre, escurl, filter, flatten,
getdef, getelem, getpath, getprop, haskey,
inject, isempty, isfunc, iskey, islist, ismap, isnode,
items, join, jsonify, keysof, merge,
pad, pathify, select, setpath, setprop,
size, slice, strkey, stringify, transform,
typename, typify, validate, walk,
# builders
jm, jt,
# extras (Python-specific convenience)
replace, joinurl,
# injection helpers
Injection, StructUtility,
checkPlacement, injectorArgs, injectChild,
# sentinels and type constants
SKIP, DELETE,
T_any, T_noval, T_boolean, T_decimal, T_integer, T_number,
T_string, T_function, T_symbol, T_null,
T_list, T_map, T_instance, T_scalar, T_node,
M_KEYPRE, M_KEYPOST, M_VAL,
)Source: voxgig_struct/voxgig_struct.py.
def isnode(val) # bool — map or list
def ismap(val) # bool — dict
def islist(val) # bool — list
def iskey(key) # bool — non-empty str or int
def isempty(val) # bool — None/''/{}/[]
def isfunc(val) # bool — callableisnode({'a': 1}) # Trueismap({'a': 1}) # Trueismap([]) # Falseislist([1, 2]) # Trueislist({'a': 1}) # Falseiskey('name') # Trueiskey(0) # True
iskey('') # Falseisempty([]) # Trueisempty(None) # True
isempty('') # True
isempty({}) # True
isempty(0) # False
isfunc(lambda: 1) # Truedef typify(value) -> int # bit-field
def typename(t: int) -> str # human nametypify returns a bit-field combining a "kind" flag (T_scalar or
T_node) with a specific type flag. typename looks up a
human-friendly name.
typify(1) # T_scalar | T_number | T_integer (201326720)typify(42) # T_scalar | T_number | T_integer
typify('hi') # T_scalar | T_string
typify(None) # T_scalar | T_null
typify({}) # T_node | T_maptypename(8192) # 'map' (8192 == T_map)typename(typify('hi')) # 'string'def size(val) -> int
def slice(val, start=UNDEF, end=UNDEF, mutate=False) -> Any
def pad(s, padding=UNDEF, padchar=UNDEF) -> strsize([1,2,3]) # 3size({'a':1,'b':2}) # 2
size('abc') # 3slice keeps the first N; a negative start drops the last |start|
items, and end is exclusive:
slice([1,2,3,4,5], 1, 4) # [2, 3, 4]slice('abcdef', -3) # 'abc' (drops the last 3)pad('a', 3) # 'a 'pad('hi', 5) # 'hi '
pad('hi', -5, '*') # '***hi'def getprop(val, key, alt=UNDEF) -> Any
def setprop(parent, key, val) -> parent
def delprop(parent, key) -> parent
def getelem(val, key, alt=UNDEF) -> Any
def getdef(val, alt) -> Any
def haskey(val, key) -> bool
def keysof(val) -> list[str]
def items(val, apply=None) -> list
def strkey(key) -> strgetprop({'x': 1}, 'x') # 1getprop({'a': 1}, 'b', 'def') # 'def'
getprop([10, 20, 30], 1) # 20setprop({'a': 1}, 'b', 2) # {'a': 1, 'b': 2}delprop({'a': 1, 'b': 2}, 'a') # {'b': 2}getelem([10, 20, 30], -1) # 30getdef(None, 'fallback') # 'fallback'haskey({'a': 1}, 'a') # Trueitems({'a': 1, 'b': 2}) # [['a', 1], ['b', 2]]strkey(2.2) # '2'strkey(1) # '1'
strkey('foo') # 'foo'keysof({'b': 4, 'a': 5}) # ['a', 'b'] (sorted)def getpath(store, path, injdef=UNDEF) -> Any
def setpath(store, path, val, injdef=UNDEF) -> store
def pathify(val, startin=UNDEF, endin=UNDEF) -> strgetpath({'a': {'b': {'c': 42}}}, 'a.b.c') # 42getpath({'a': [10, 20]}, 'a.1') # 20
store = {}
setpath(store, 'db.host', 'localhost')
# store == {'db': {'host': 'localhost'}}setpath({'a': 1, 'b': 2}, 'b', 22) # {'a': 1, 'b': 22}pathify(['a', 'b', 'c']) # 'a.b.c'def walk(val, apply=None, key=UNDEF, parent=UNDEF, path=UNDEF,
*, before=None, after=None, maxdepth=None, pool=UNDEF) -> Any
def merge(objs, maxdepth=None) -> Any
def clone(val) -> Any
def flatten(lst, depth=None) -> list
def filter(val, check) -> listdef visit(key, val, parent, path):
return 'DEFAULT' if val is None else val
walk(tree, after=visit)Last input wins; maps deep-merge; lists merge by index:
merge([
{'a': 1, 'b': 2, 'k': [10, 20], 'x': {'y': 5, 'z': 6}},
{'b': 3, 'd': 4, 'e': 8, 'k': [11], 'x': {'y': 7}},
])
# {'a': 1, 'b': 3, 'd': 4, 'e': 8, 'k': [11, 20], 'x': {'y': 7, 'z': 6}}clone({'a': {'b': [1, 2]}}) # {'a': {'b': [1, 2]}} (a deep copy)flatten([1, [2, [3]]]) # [1, 2, [3]] (one level by default)flatten([1, [2, [3, [4]]]]) # [1, 2, [3, [4]]]filter passes each (key, value) pair to the check and returns the
matching values (not the pairs):
filter([1, 2, 3, 4, 5], lambda kv: kv[1] > 3)
# [4, 5]def escre(s) -> str
def escurl(s) -> str
def join(arr, sep=UNDEF, url=UNDEF) -> str
def joinurl(parts) -> str # convenience: join(parts, '/', True)
def jsonify(val, flags=None) -> str
def stringify(val, maxlen=UNDEF, pretty=None) -> str
def replace(s, from_pat, to_str) -> strescre('a.b+c') # 'a\\.b\\+c'escurl('hello world?') # 'hello%20world%3F'join(['a', 'b', 'c'], '/') # 'a/b/c'joinurl(['http:', '/foo/', '/bar']) # 'http:/foo/bar'jsonify pretty-prints by default (indent 2); pass {'indent': 0} for the
compact form:
jsonify({'a': 1})
# {
# "a": 1
# }jsonify({'a': 1, 'b': 2}, {'indent': 0}) # '{"a":1,"b":2}'stringify is the compact, quote-light form — keys are sorted and object
braces are kept; the second argument caps the length (the ... counts):
stringify({'a': 1, 'b': [2, 3]}) # '{a:1,b:[2,3]}'stringify('verylongstring', 5) # 've...'def inject(val, store, injdef=UNDEF) -> Any
def transform(data, spec, injdef=UNDEF) -> Any
def validate(data, spec, injdef=UNDEF) -> Any
def select(children, query) -> list# Backtick refs in strings are replaced by store values.
inject({'x': '`a`', 'y': 2}, {'a': 1}) # {'x': 1, 'y': 2}inject(
{'greeting': 'hello `name`'},
{'name': 'Ada'}
)
# {'greeting': 'hello Ada'}
# Build a result by example.
transform(
{'hold': {'x': 1}, 'top': 99},
{'a': '`hold.x`', 'b': '`top`'}
)
# {'a': 1, 'b': 99}# Validate against a shape (raises on mismatch).
validate(
{'name': 'Ada', 'age': 36},
{'name': '`$STRING`', 'age': '`$INTEGER`'}
)
# {'name': 'Ada', 'age': 36}# Find children matching a query.
select(
{'a': {'name': 'Alice', 'age': 30}, 'b': {'name': 'Bob', 'age': 25}},
{'age': 30}
)
# [{'name': 'Alice', 'age': 30, '$KEY': 'a'}]Transform commands drive structural ops. A command like $EACH appears in
value position — as the first element of a list ['$EACH', path, subspec]
— mapping the sub-spec over every entry at path:
transform(
{'v': 1, 'a': [{'q': 13}, {'q': 23}]},
{'x': {'y': ['`$EACH`', 'a', {'q': '`$COPY`', 'r': '`.q`', 'p': '`...v`'}]}}
)
# {'x': {'y': [{'q': 13, 'r': 13, 'p': 1}, {'q': 23, 'r': 23, 'p': 1}]}}Putting a command in key position (or, for $APPLY, directly under a map)
is an error — commands must be list values:
transform({}, {'x': '`$APPLY`'})
# raises: $APPLY: invalid placement in parent map, expected: list.jm('a', 1, 'b', 2) # {'a': 1, 'b': 2}
jt(1, 2, 3) # [1, 2, 3]def checkPlacement(modes, ijname, parentTypes, inj) -> bool
def injectorArgs(argTypes, args) -> Any
def injectChild(child, store, inj) -> InjectionSKIP # emit nothing for this key
DELETE # remove this key from the parentT_any T_noval T_boolean T_decimal T_integer T_number T_string
T_function T_symbol T_null T_list T_map T_instance T_scalar T_nodeM_KEYPRE M_KEYPOST M_VAL$DELETE $COPY $KEY $META $ANNO
$MERGE $EACH $PACK $REF $FORMAT $APPLY
$MAP $LIST $STRING $NUMBER $INTEGER $DECIMAL $BOOLEAN
$NULL $NIL $FUNCTION $INSTANCE $ANY $CHILD $ONE $EXACT
Python has only None. Internally the port uses an UNDEF sentinel
(= None for ergonomics) to mean "absent". JSON null and "absent"
both map to None at the user-facing API.
typify(None) returns T_scalar | T_null. Where the test corpus
needs to disambiguate, the runner uses string sentinels __NULL__
and __UNDEF__.
Where canonical TypeScript has positional optional parameters, the Python port uses keyword arguments. For example:
walk(tree, before=None, after=visit, maxdepth=10)Function names match canonical TypeScript exactly: getpath,
setpath, getprop, etc. PEP 8 would suggest get_path, but
parity with other ports beats style here.
94 tests pass against the shared corpus (3 skipped).
Uniform six-function regex API (see /design/REGEX_API.md). The Python port
wraps the stdlib re module.
| Function | Maps to |
|---|---|
re_compile(pattern, flags=0) |
re.compile(pattern, flags) |
re_test(pattern, input) |
bool(re.search(pattern, input)) |
re_find(pattern, input) |
first match as [whole, group1, ...] or None |
re_find_all(pattern, input) |
all matches, one row per match |
re_replace(pattern, input, repl) |
re.sub(pattern, repl, input) |
re_escape(s) |
re.escape(s) |
Patterns must stay inside the RE2 subset documented in /design/REGEX.md.
Python's re supports backreferences and lookaround; using them will
not be portable to the Go / Rust / C / Lua / Zig ports.
- Catastrophic backtracking. Python's
re(the default C engine) is backtracking.^(a+)+$against 22 a's plus!runs ~190 ms here; RE2-style ports finish the same case in <0.1 ms. Use flat patterns. - Zero-width
replace.re_replace("a*", "abc", "X")returns"XXbXcX"— the ECMA convention shared by all PCRE/ECMA/.NET/Java/Onigmo engines plus the in-tree Thompson ports. Go (RE2) returns"XbXcX"instead; see/design/REGEX_PATHOLOGICAL.md.
See /design/REGEX_PATHOLOGICAL.md for the cross-port pathological-input panel.
cd python
make testTests live in tests/ and read fixtures from
../build/test/.