diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f51abe..cc229a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,3 @@ -# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions - name: build on: @@ -16,10 +13,10 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - node-version: [24.x] + node-version: [24.x, latest] + + runs-on: ${{ matrix.os }} - runs-on: ${{ matrix.os }} - steps: - uses: actions/checkout@v4 - name: Use Node.js ${{ matrix.node-version }} @@ -30,9 +27,24 @@ jobs: - run: npm run build --if-present - run: npm test - - name: Coveralls - uses: coverallsapp/github-action@master - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: ./coverage/lcov.info + build-go: + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + - name: Build + working-directory: go + run: go build ./... + - name: Test + working-directory: go + run: go test -v ./... diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..41401b7 --- /dev/null +++ b/Makefile @@ -0,0 +1,50 @@ +.PHONY: all build test clean build-ts build-go test-ts test-go clean-ts clean-go publish-go tags-go tidy-go reset + +all: build test + +build: build-ts build-go + +test: test-ts test-go + +clean: clean-ts clean-go + +# TypeScript +build-ts: + npm run build + +test-ts: + npm test + +clean-ts: + rm -rf dist dist-test + +# Go +build-go: + cd go && go build ./... + +test-go: + cd go && go test ./... + +clean-go: + cd go && go clean -cache + +# Publish Go module: make publish-go V=0.1.7 +publish-go: test-go + @test -n "$(V)" || (echo "Usage: make publish-go V=x.y.z" && exit 1) + git add go/plugin.go + git commit -m "go: v$(V)" + git tag go/v$(V) + git push origin main go/v$(V) + if command -v gh >/dev/null 2>&1; then gh release create go/v$(V) --title "go/v$(V)" --notes "Go module release v$(V)"; fi + +tidy-go: + cd go && go mod tidy + +tags-go: + git tag -l 'go/v*' --sort=-version:refname + +reset: + npm run reset + cd go && go clean -cache + cd go && go build ./... + cd go && go test -v ./... diff --git a/README.md b/README.md index bbbae33..928ccdb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ -# @jsonic/csv (JSONIC syntax plugin) +# @jsonic/csv -This plugin allows the [Jsonic](https://jsonic.senecajs.org) JSON -parser to support csv syntax. +A [Jsonic](https://jsonic.senecajs.org) syntax plugin that parses +CSV text into objects or arrays, with support for headers, quoted +fields, custom delimiters, streaming, and strict/non-strict modes. +Available for TypeScript and Go. [![npm version](https://img.shields.io/npm/v/@jsonic/csv.svg)](https://npmjs.com/package/@jsonic/csv) @@ -15,33 +17,43 @@ parser to support csv syntax. | ---------------------------------------------------- | --------------------------------------------------------------------------------------- | +## Quick example - -## Options -* _comment_: `null` (default: null) - comment -* _field_ - * _empty_: `string` (default: ) - empty - * _exact_: `boolean` (default: false) - exact - * _names_: `any` (default: undefined) - names - * _nonameprefix_: `string` (default: field~) - nonameprefix - * _separation_: `null` (default: null) - separation -* _header_: `boolean` (default: true) - header -* _number_: `null` (default: null) - number -* _object_: `boolean` (default: true) - object -* _record_ - * _empty_: `boolean` (default: false) - empty - * _separators_: `null` (default: null) - separators -* _stream_: `null` (default: null) - stream -* _strict_: `boolean` (default: true) - strict -* _string_ - * _csv_: `null` (default: null) - csv - * _quote_: `string` (default: ") - quote -* _trim_: `null` (default: null) - trim -* _value_: `null` (default: null) - value - +**TypeScript** +```typescript +import { Jsonic } from 'jsonic' +import { Csv } from '@jsonic/csv' +const parse = Jsonic.make().use(Csv) +parse("name,age\nAlice,30\nBob,25") +// [{ name: 'Alice', age: '30' }, { name: 'Bob', age: '25' }] +parse('a,b\n1,"hello, world"') +// [{ a: '1', b: 'hello, world' }] +``` +**Go** +```go +import csv "github.com/jsonicjs/csv/go" + +result, _ := csv.Parse("name,age\nAlice,30\nBob,25") +// [{name:Alice age:30} {name:Bob age:25}] +``` + + +## Documentation + +Full documentation following the [Diataxis](https://diataxis.fr) +framework (tutorials, how-to guides, explanation, reference): + +- [TypeScript documentation](doc/csv-ts.md) +- [Go documentation](doc/csv-go.md) + + +## License + +Copyright (c) 2021-2025 Richard Rodger and other contributors, +[MIT License](LICENSE). diff --git a/csv-grammar.jsonic b/csv-grammar.jsonic new file mode 100644 index 0000000..b7c599b --- /dev/null +++ b/csv-grammar.jsonic @@ -0,0 +1,52 @@ +# CSV Grammar Definition +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #LN - line ending (removed from per-instance IGNORE set) +# #SP - whitespace (removed from per-instance IGNORE set in strict mode) +# #CA - comma / field separator +# #ZZ - end of input +# #VAL - token set: text, string, number, value literals +# +# Rules csv, newline, record, text are fully defined here. +# Rules list, elem, val are modified in code (strict mode defines from scratch; +# non-strict prepends to existing defaults to preserve JSON parsing). + +{ + rule: csv: open: [ + { s: '#ZZ' } + { s: '#LN' p: newline c: '@not-record-empty' } + { p: record } + ] + + rule: newline: open: [ + { s: '#LN #LN' r: newline } + { s: '#LN' r: newline } + { s: '#ZZ' } + { r: record } + ] + rule: newline: close: [ + { s: '#LN #LN' r: newline } + { s: '#LN' r: newline } + { s: '#ZZ' } + { r: record } + ] + + rule: record: open: [ + { p: list } + ] + rule: record: close: [ + { s: '#ZZ' } + { s: '#LN #ZZ' b: 1 } + { s: '#LN' r: '@record-close-next' } + ] + + rule: text: open: [ + { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } + { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } + { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } + { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } + {} + ] +} diff --git a/doc/csv-go.md b/doc/csv-go.md new file mode 100644 index 0000000..9c7322f --- /dev/null +++ b/doc/csv-go.md @@ -0,0 +1,264 @@ +# CSV plugin for Jsonic (Go) + +A Jsonic syntax plugin that parses CSV text into Go slices of maps +or slices, with support for headers, quoted fields, custom +delimiters, streaming, and strict/non-strict modes. + +```bash +go get github.com/jsonicjs/csv/go@latest +``` + + +## Tutorials + +### Parse a basic CSV file + +Parse CSV text with a header row into a slice of ordered maps: + +```go +package main + +import ( + "fmt" + csv "github.com/jsonicjs/csv/go" +) + +func main() { + result, _ := csv.Parse("name,age\nAlice,30\nBob,25") + fmt.Println(result) + // [{name:Alice age:30} {name:Bob age:25}] +} +``` + +### Parse CSV without headers + +Return rows as slices instead of maps, with no header row: + +```go +result, _ := csv.Parse("a,b,c\n1,2,3", csv.CsvOptions{ + Header: boolPtr(false), + Object: boolPtr(false), +}) +// [[a b c] [1 2 3]] +``` + +### Parse CSV with quoted fields + +Double-quoted fields handle commas, newlines, and escaped quotes: + +```go +result, _ := csv.Parse(`name,bio +Alice,"Likes ""cats"" and dogs" +Bob,"Line1 +Line2"`) +// [{name:Alice bio:Likes "cats" and dogs} {name:Bob bio:Line1\nLine2}] +``` + + +## How-to guides + +### Use a custom field delimiter + +Set `Field.Separation` to use a delimiter other than comma: + +```go +result, _ := csv.Parse("name\tage\nAlice\t30", csv.CsvOptions{ + Field: &csv.FieldOptions{Separation: "\t"}, +}) +// [{name:Alice age:30}] +``` + +### Enable number and value parsing + +By default in strict mode, all values are strings. Enable `Number` +and `Value` to parse numeric and boolean values: + +```go +result, _ := csv.Parse("a,b,c\n1,true,null", csv.CsvOptions{ + Number: boolPtr(true), + Value: boolPtr(true), +}) +// [{a:1 b:true c:}] +``` + +### Trim whitespace from fields + +Enable `Trim` to remove leading and trailing whitespace from field +values: + +```go +result, _ := csv.Parse("a , b \n 1 , 2 ", csv.CsvOptions{ + Trim: boolPtr(true), +}) +// [{a:1 b:2}] +``` + +### Stream records as they are parsed + +Use the `Stream` callback to receive records one at a time: + +```go +var records []any + +result, _ := csv.Parse("a,b\n1,2\n3,4", csv.CsvOptions{ + Stream: func(what string, record any) { + if what == "record" { + records = append(records, record) + } + }, +}) +// result is [] (empty, records were streamed) +// records contains [{a:1 b:2}, {a:3 b:4}] +``` + +### Provide explicit field names + +Set `Field.Names` when the CSV has no header row but you want +map output with named fields: + +```go +result, _ := csv.Parse("1,2,3\n4,5,6", csv.CsvOptions{ + Header: boolPtr(false), + Field: &csv.FieldOptions{Names: []string{"x", "y", "z"}}, +}) +// [{x:1 y:2 z:3} {x:4 y:5 z:6}] +``` + +### Enforce exact field counts + +Set `Field.Exact` to error when a row has more or fewer fields +than the header: + +```go +_, err := csv.Parse("a,b\n1,2,3", csv.CsvOptions{ + Field: &csv.FieldOptions{Exact: true}, +}) +// err: unexpected extra field value +``` + +### Create a reusable parser + +Use `MakeJsonic` to create a configured Jsonic instance you can +call repeatedly: + +```go +j := csv.MakeJsonic(csv.CsvOptions{ + Number: boolPtr(true), +}) + +r1, _ := j.Parse("a,b\n1,2") +r2, _ := j.Parse("x,y\n3,4") +``` + +### Enable comment lines + +Enable `Comment` to skip lines starting with `#`: + +```go +result, _ := csv.Parse("a,b\n# skip\n1,2", csv.CsvOptions{ + Comment: boolPtr(true), +}) +// [{a:1 b:2}] +``` + + +## Explanation + +### Strict vs non-strict mode + +In **strict mode** (default), the CSV plugin disables Jsonic's +built-in JSON parsing. All field values are treated as raw strings +unless `Number` or `Value` options are enabled. This matches the +behaviour of standard CSV parsers. + +In **non-strict mode** (`Strict: boolPtr(false)`), the plugin +preserves Jsonic's ability to parse JSON values. Fields can contain +objects, arrays, booleans, numbers, and quoted strings using Jsonic +syntax. Non-strict mode enables `Trim`, `Comment`, and `Number` by +default. + +### How quoted fields work + +The plugin includes a custom CSV string matcher that handles the +RFC 4180 double-quote escaping convention: + +- A field wrapped in double quotes can contain commas, newlines, + and quotes. +- A literal quote inside a quoted field is represented as `""`. +- For example: `"a""b"` parses to `a"b`. + + +## Reference + +### `Parse` (Function) + +```go +func Parse(src string, opts ...CsvOptions) ([]any, error) +``` + +Parse CSV text with the given options. Returns a slice of records. + +### `MakeJsonic` (Function) + +```go +func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic +``` + +Create a reusable Jsonic instance configured for CSV parsing. + +### `CsvOptions` + +```go +type CsvOptions struct { + Object *bool // Return maps (true) or slices (false). Default: true + Header *bool // First row is header. Default: true + Trim *bool // Trim whitespace. Default: nil (false strict, true non-strict) + Comment *bool // Enable # comments. Default: nil (false strict, true non-strict) + Number *bool // Parse numbers. Default: nil (false strict, true non-strict) + Value *bool // Parse true/false/null. Default: nil + Strict *bool // Strict CSV mode. Default: true + Field *FieldOptions + Record *RecordOptions + String *StringOptions + Stream StreamFunc +} +``` + +### `FieldOptions` + +```go +type FieldOptions struct { + Separation string // Field separator. Default: "," + NonamePrefix string // Prefix for unnamed extra fields. Default: "field~" + Empty string // Value for empty fields. Default: "" + Names []string // Explicit field names. + Exact bool // Error on field count mismatch. Default: false +} +``` + +### `RecordOptions` + +```go +type RecordOptions struct { + Separators string // Custom record separator characters. + Empty bool // Preserve empty lines as records. Default: false +} +``` + +### `StringOptions` + +```go +type StringOptions struct { + Quote string // Quote character. Default: `"` + Csv *bool // Force CSV string mode (nil=auto). +} +``` + +### `StreamFunc` + +```go +type StreamFunc func(what string, record any) +``` + +Callback for streaming CSV parsing. Called with `"start"`, `"record"`, +`"end"`, or `"error"`. diff --git a/doc/csv-ts.md b/doc/csv-ts.md new file mode 100644 index 0000000..2e8f9b5 --- /dev/null +++ b/doc/csv-ts.md @@ -0,0 +1,286 @@ +# CSV plugin for Jsonic (TypeScript) + +A Jsonic syntax plugin that parses CSV text into JavaScript arrays +of objects or arrays, with support for headers, quoted fields, +custom delimiters, streaming, and strict/non-strict modes. + +```bash +npm install @jsonic/csv +``` + +Requires `jsonic` >= 2 as a peer dependency. + + +## Tutorials + +### Parse a basic CSV file + +Parse CSV text with a header row into an array of objects: + +```typescript +import { Jsonic } from 'jsonic' +import { Csv } from '@jsonic/csv' + +const j = Jsonic.make().use(Csv) + +j("name,age\nAlice,30\nBob,25") +// [{ name: 'Alice', age: '30' }, { name: 'Bob', age: '25' }] +``` + +### Parse CSV without headers + +Return rows as arrays instead of objects, with no header row: + +```typescript +import { Jsonic } from 'jsonic' +import { Csv } from '@jsonic/csv' + +const j = Jsonic.make().use(Csv, { header: false, object: false }) + +j("a,b,c\n1,2,3") +// [['a', 'b', 'c'], ['1', '2', '3']] +``` + +### Parse CSV with quoted fields + +Double-quoted fields handle commas, newlines, and escaped quotes: + +```typescript +import { Jsonic } from 'jsonic' +import { Csv } from '@jsonic/csv' + +const j = Jsonic.make().use(Csv) + +j('name,bio\nAlice,"Likes ""cats"" and dogs"\nBob,"Line1\nLine2"') +// [ +// { name: 'Alice', bio: 'Likes "cats" and dogs' }, +// { name: 'Bob', bio: 'Line1\nLine2' } +// ] +``` + + +## How-to guides + +### Use a custom field delimiter + +Set `field.separation` to use a delimiter other than comma: + +```typescript +const j = Jsonic.make().use(Csv, { + field: { separation: '\t' } +}) + +j("name\tage\nAlice\t30") +// [{ name: 'Alice', age: '30' }] +``` + +### Enable number and value parsing + +By default in strict mode, all values are strings. Enable `number` +and `value` to parse numeric and boolean values: + +```typescript +const j = Jsonic.make().use(Csv, { + number: true, + value: true, +}) + +j("a,b,c\n1,true,null") +// [{ a: 1, b: true, c: null }] +``` + +### Trim whitespace from fields + +Enable `trim` to remove leading and trailing whitespace from field +values: + +```typescript +const j = Jsonic.make().use(Csv, { trim: true }) + +j("a , b \n 1 , 2 ") +// [{ a: '1', b: '2' }] +``` + +### Stream records as they are parsed + +Use the `stream` callback to receive records one at a time without +storing them all in memory: + +```typescript +const records: any[] = [] + +const j = Jsonic.make().use(Csv, { + stream: (what, record) => { + if (what === 'record') records.push(record) + }, +}) + +j("a,b\n1,2\n3,4") +// returns [] (empty, records were streamed) +// records === [{ a: '1', b: '2' }, { a: '3', b: '4' }] +``` + +### Provide explicit field names + +Set `field.names` when the CSV has no header row but you want +object output with named fields: + +```typescript +const j = Jsonic.make().use(Csv, { + header: false, + field: { names: ['x', 'y', 'z'] }, +}) + +j("1,2,3\n4,5,6") +// [{ x: '1', y: '2', z: '3' }, { x: '4', y: '5', z: '6' }] +``` + +### Enforce exact field counts + +Set `field.exact` to error when a row has more or fewer fields +than the header: + +```typescript +const j = Jsonic.make().use(Csv, { + field: { exact: true }, +}) + +// j("a,b\n1,2,3") // throws: unexpected extra field value +// j("a,b\n1") // throws: missing field +``` + +### Use non-strict mode for embedded JSON + +Disable `strict` to allow Jsonic syntax inside CSV fields, +including JSON objects, arrays, and expressions: + +```typescript +const j = Jsonic.make().use(Csv, { strict: false }) + +j("a,b\ntrue,[1,2]") +// [{ a: true, b: [1, 2] }] +``` + +### Enable comment lines + +Enable `comment` to skip lines starting with `#`: + +```typescript +const j = Jsonic.make().use(Csv, { comment: true }) + +j("a,b\n# skip this\n1,2") +// [{ a: '1', b: '2' }] +``` + +### Preserve empty records + +By default, blank lines are skipped. Set `record.empty` to +preserve them as empty-field records: + +```typescript +const j = Jsonic.make().use(Csv, { record: { empty: true } }) + +j("a\n1\n\n2") +// [{ a: '1' }, { a: '' }, { a: '2' }] +``` + + +## Explanation + +### Strict vs non-strict mode + +In **strict mode** (default), the CSV plugin disables Jsonic's +built-in JSON parsing. All field values are treated as raw strings +unless `number` or `value` options are enabled. This matches the +behaviour of standard CSV parsers. + +In **non-strict mode** (`strict: false`), the plugin preserves +Jsonic's ability to parse JSON values. Fields can contain objects +(`{x:1}`), arrays (`[1,2]`), booleans, numbers, and quoted strings +using Jsonic syntax. Non-strict mode enables `trim`, `comment`, and +`number` by default. + +### How quoted fields work + +The plugin includes a custom CSV string matcher that handles the +RFC 4180 double-quote escaping convention: + +- A field wrapped in double quotes can contain commas, newlines, + and quotes. +- A literal quote inside a quoted field is represented as `""`. +- For example: `"a""b"` parses to `a"b`. + + +## Reference + +### `Csv` (Plugin) + +The plugin function. Register with `Jsonic.make().use(Csv, options)`. + +### `CsvOptions` + +```typescript +type CsvOptions = { + // Trim surrounding whitespace. Default: null (false in strict, true in non-strict) + trim: boolean | null + + // Enable # line comments. Default: null (false in strict, true in non-strict) + comment: boolean | null + + // Parse numeric values. Default: null (false in strict, true in non-strict) + number: boolean | null + + // Parse value keywords (true/false/null). Default: null (false in strict, false in non-strict) + value: boolean | null + + // First row is a header row. Default: true + header: boolean + + // Return records as objects (true) or arrays (false). Default: true + object: boolean + + // Stream callback. Default: null + stream: null | ((what: string, record?: Record | Error) => void) + + // Strict CSV mode (disables Jsonic syntax). Default: true + strict: boolean + + field: { + // Field separator string. Default: null (uses comma) + separation: null | string + + // Prefix for unnamed extra fields. Default: 'field~' + nonameprefix: string + + // Value for empty fields. Default: '' + empty: any + + // Explicit field names (overrides header). Default: undefined + names: undefined | string[] + + // Error on field count mismatch. Default: false + exact: boolean + } + + record: { + // Custom record separator characters. Default: null + separators: null | string + + // Preserve empty lines as records. Default: false + empty: boolean + } + + string: { + // Quote character. Default: '"' + quote: string + + // Force CSV string mode (null=auto). Default: null + csv: null | boolean + } +} +``` + +### `buildCsvStringMatcher` (Function) + +Exported for advanced use. Creates the custom CSV double-quote +string matcher used internally by the plugin. diff --git a/embed-grammar.js b/embed-grammar.js new file mode 100644 index 0000000..308ff58 --- /dev/null +++ b/embed-grammar.js @@ -0,0 +1,74 @@ +#!/usr/bin/env node + +// Embed csv-grammar.jsonic into TypeScript and Go source files. +// Run via: npm run embed (or: node embed-grammar.js) + +const fs = require('fs') +const path = require('path') + +const GRAMMAR_FILE = path.join(__dirname, 'csv-grammar.jsonic') +const TS_FILE = path.join(__dirname, 'src', 'csv.ts') +const GO_FILE = path.join(__dirname, 'go', 'plugin.go') + +const BEGIN = '// --- BEGIN EMBEDDED csv-grammar.jsonic ---' +const END = '// --- END EMBEDDED csv-grammar.jsonic ---' + +const grammar = fs.readFileSync(GRAMMAR_FILE, 'utf8') + +// --- TypeScript embedding --- +function embedTS() { + let src = fs.readFileSync(TS_FILE, 'utf8') + const startIdx = src.indexOf(BEGIN) + const endIdx = src.indexOf(END) + if (startIdx === -1 || endIdx === -1) { + console.error('TS markers not found in', TS_FILE) + process.exit(1) + } + + // Escape backticks and template expressions for a JS template literal. + const escaped = grammar + .replace(/\\/g, '\\\\') + .replace(/`/g, '\\`') + .replace(/\$\{/g, '\\${') + + const replacement = + BEGIN + + '\nconst grammarText = `\n' + + escaped + + '`\n' + + END + + src = src.substring(0, startIdx) + replacement + src.substring(endIdx + END.length) + fs.writeFileSync(TS_FILE, src) + console.log('Embedded grammar into', TS_FILE) +} + +// --- Go embedding --- +function embedGo() { + let src = fs.readFileSync(GO_FILE, 'utf8') + const startIdx = src.indexOf(BEGIN) + const endIdx = src.indexOf(END) + if (startIdx === -1 || endIdx === -1) { + console.error('Go markers not found in', GO_FILE) + process.exit(1) + } + + if (grammar.includes('`')) { + console.error('Grammar contains backticks, incompatible with Go raw strings') + process.exit(1) + } + + const replacement = + BEGIN + + '\nconst grammarText = `\n' + + grammar + + '`\n' + + END + + src = src.substring(0, startIdx) + replacement + src.substring(endIdx + END.length) + fs.writeFileSync(GO_FILE, src) + console.log('Embedded grammar into', GO_FILE) +} + +embedTS() +embedGo() diff --git a/go/csv.go b/go/csv.go index c142573..a97ab1f 100644 --- a/go/csv.go +++ b/go/csv.go @@ -67,6 +67,172 @@ func boolOpt(p *bool, def bool) bool { return def } +// optionsToMap converts CsvOptions to a map[string]any for the plugin interface. +func optionsToMap(o *CsvOptions) map[string]any { + m := make(map[string]any) + if o.Object != nil { + m["object"] = *o.Object + } + if o.Header != nil { + m["header"] = *o.Header + } + if o.Trim != nil { + m["trim"] = *o.Trim + } + if o.Comment != nil { + m["comment"] = *o.Comment + } + if o.Number != nil { + m["number"] = *o.Number + } + if o.Value != nil { + m["value"] = *o.Value + } + if o.Strict != nil { + m["strict"] = *o.Strict + } + + if o.Field != nil { + fm := make(map[string]any) + if o.Field.Separation != "" { + fm["separation"] = o.Field.Separation + } + if o.Field.NonamePrefix != "" { + fm["nonameprefix"] = o.Field.NonamePrefix + } + fm["empty"] = o.Field.Empty + if o.Field.Exact { + fm["exact"] = true + } + if o.Field.Names != nil { + fm["names"] = o.Field.Names + } + m["field"] = fm + } + if o.Record != nil { + rm := make(map[string]any) + if o.Record.Separators != "" { + rm["separators"] = o.Record.Separators + } + if o.Record.Empty { + rm["empty"] = true + } + m["record"] = rm + } + if o.String != nil { + sm := make(map[string]any) + if o.String.Quote != "" { + sm["quote"] = o.String.Quote + } + if o.String.Csv != nil { + sm["csv"] = *o.String.Csv + } + m["string"] = sm + } + if o.Stream != nil { + m["_stream"] = o.Stream + } + return m +} + +// mapToOptions converts a map[string]any (plugin options) to CsvOptions. +func mapToOptions(m map[string]any) CsvOptions { + var o CsvOptions + if m == nil { + return o + } + + if v, ok := m["object"]; ok { + b := toBool(v) + o.Object = &b + } + if v, ok := m["header"]; ok { + b := toBool(v) + o.Header = &b + } + if v, ok := m["trim"]; ok { + b := toBool(v) + o.Trim = &b + } + if v, ok := m["comment"]; ok { + b := toBool(v) + o.Comment = &b + } + if v, ok := m["number"]; ok { + b := toBool(v) + o.Number = &b + } + if v, ok := m["value"]; ok { + b := toBool(v) + o.Value = &b + } + if v, ok := m["strict"]; ok { + b := toBool(v) + o.Strict = &b + } + + if fm, ok := m["field"].(map[string]any); ok { + o.Field = &FieldOptions{} + if v, ok := fm["separation"].(string); ok { + o.Field.Separation = v + } + if v, ok := fm["nonameprefix"].(string); ok { + o.Field.NonamePrefix = v + } + if v, ok := fm["empty"].(string); ok { + o.Field.Empty = v + } + if v, ok := fm["exact"].(bool); ok { + o.Field.Exact = v + } + if v, ok := fm["names"].([]any); ok { + for _, n := range v { + if s, ok := n.(string); ok { + o.Field.Names = append(o.Field.Names, s) + } + } + } + if v, ok := fm["names"].([]string); ok { + o.Field.Names = v + } + } + + if rm, ok := m["record"].(map[string]any); ok { + o.Record = &RecordOptions{} + if v, ok := rm["separators"].(string); ok { + o.Record.Separators = v + } + if v, ok := rm["empty"].(bool); ok { + o.Record.Empty = v + } + } + + if sm, ok := m["string"].(map[string]any); ok { + o.String = &StringOptions{} + if v, ok := sm["quote"].(string); ok { + o.String.Quote = v + } + if v, ok := sm["csv"].(bool); ok { + o.String.Csv = &v + } + } + + if v, ok := m["_stream"].(StreamFunc); ok { + o.Stream = v + } + + return o +} + +func toBool(v any) bool { + switch b := v.(type) { + case bool: + return b + default: + return false + } +} + func resolve(o *CsvOptions) *resolved { strict := boolOpt(o.Strict, true) r := &resolved{ diff --git a/go/go.mod b/go/go.mod index 23040f5..dd02190 100644 --- a/go/go.mod +++ b/go/go.mod @@ -2,4 +2,4 @@ module github.com/jsonicjs/csv/go go 1.24.7 -require github.com/jsonicjs/jsonic/go v0.1.6 // indirect +require github.com/jsonicjs/jsonic/go v0.1.16-0.20260413211036-3ede30eae13d diff --git a/go/go.sum b/go/go.sum index 2f8eb56..531277f 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,4 +1,2 @@ -github.com/jsonicjs/jsonic/go v0.1.4 h1:V1KEzmg/jIwk25+JYj8ig1+B7190rHmH8WqZbT7XlgA= -github.com/jsonicjs/jsonic/go v0.1.4/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= -github.com/jsonicjs/jsonic/go v0.1.6 h1:oUw4vxCK6tqa7SGN87vjCtx3sCpeHXdqfl25hx5LKP0= -github.com/jsonicjs/jsonic/go v0.1.6/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= +github.com/jsonicjs/jsonic/go v0.1.16-0.20260413211036-3ede30eae13d h1:xPVFzEJuLnlC2ikww4blr+73TcLCjpIwN8SJ5pml8/E= +github.com/jsonicjs/jsonic/go v0.1.16-0.20260413211036-3ede30eae13d/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= diff --git a/go/plugin.go b/go/plugin.go index 81b03b2..e3acff5 100644 --- a/go/plugin.go +++ b/go/plugin.go @@ -7,8 +7,65 @@ import ( jsonic "github.com/jsonicjs/jsonic/go" ) +// --- BEGIN EMBEDDED csv-grammar.jsonic --- +const grammarText = ` +# CSV Grammar Definition +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #LN - line ending (removed from per-instance IGNORE set) +# #SP - whitespace (removed from per-instance IGNORE set in strict mode) +# #CA - comma / field separator +# #ZZ - end of input +# #VAL - token set: text, string, number, value literals +# +# Rules csv, newline, record, text are fully defined here. +# Rules list, elem, val are modified in code (strict mode defines from scratch; +# non-strict prepends to existing defaults to preserve JSON parsing). + +{ + rule: csv: open: [ + { s: '#ZZ' } + { s: '#LN' p: newline c: '@not-record-empty' } + { p: record } + ] + + rule: newline: open: [ + { s: '#LN #LN' r: newline } + { s: '#LN' r: newline } + { s: '#ZZ' } + { r: record } + ] + rule: newline: close: [ + { s: '#LN #LN' r: newline } + { s: '#LN' r: newline } + { s: '#ZZ' } + { r: record } + ] + + rule: record: open: [ + { p: list } + ] + rule: record: close: [ + { s: '#ZZ' } + { s: '#LN #ZZ' b: 1 } + { s: '#LN' r: '@record-close-next' } + ] + + rule: text: open: [ + { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } + { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } + { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } + { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } + {} + ] +} +` +// --- END EMBEDDED csv-grammar.jsonic --- + // Csv is a jsonic plugin that adds CSV parsing support. -// It mirrors the TypeScript Csv plugin, defining grammar rules +// It mirrors the TypeScript Csv plugin, using a shared grammar definition // (csv, newline, record, list, elem, val, text) and a custom // CSV string matcher. func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { @@ -43,10 +100,6 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { delete(cfg.FixedTokens, ":") cfg.SortFixedTokens() - // When the CSV string matcher is active, disable the built-in string - // matcher and remove quote chars from string chars. This way: - // - The CSV string matcher (custom, priority 100000) handles proper quoted fields - // - Mid-field quotes (B" in A,B",C) are treated as literal text by the text matcher if useCsvString { cfg.StringLex = false for ch := range cfg.StringChars { @@ -54,7 +107,6 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { } } - // Exclude jsonic and imp rule groups. j.Exclude("jsonic", "imp") } else { useCsvString := false @@ -79,9 +131,7 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { // Custom "comma" (field separator) if opts.fieldSep != "," { cfg := j.Config() - // Remove old comma mapping delete(cfg.FixedTokens, ",") - // Add custom separator j.Token("#CA", opts.fieldSep) cfg.SortFixedTokens() } @@ -93,14 +143,11 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { cfg.ValueLex = optValue cfg.CommentLex = comment - // When comments are disabled, clear comment line starters so the text matcher - // doesn't stop at '#' or '//'. Otherwise '#' becomes unmatchable. if !comment { cfg.CommentLine = nil cfg.CommentBlock = nil } - // Set start rule cfg.RuleStart = "csv" if opts.recordSep != "" { @@ -112,149 +159,172 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { } } - // Register custom token types that are NOT in jsonic's global IGNORE set. - // In the TS version, the IGNORE set is configurable per-instance. - // In Go, TinSetIGNORE is a global map, so we use custom tokens instead. - RL := j.Token("#RL") // Record Line (non-ignored LN equivalent) - RS := j.Token("#RS") // Record Space (non-ignored SP equivalent) + // Modify the per-instance IGNORE set so that #LN and #SP tokens are + // visible to the parser (matching the TS approach exactly). + // In strict mode, both #SP and #LN are non-ignored. + // In non-strict mode, only #LN is non-ignored (#SP stays ignored). + delete(cfg.IgnoreSet, jsonic.TinLN) + if strict { + delete(cfg.IgnoreSet, jsonic.TinSP) + } - // Intercept the line matcher: emit #RL instead of #LN so it's not ignored. - // Each line ending (\n or \r\n) is emitted as a separate token so the grammar - // can distinguish multiple newlines (important for empty record handling). - cfg.LineCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { - pnt := lex.Cursor() - src := lex.Src - sI := pnt.SI - rI := pnt.RI - if sI >= pnt.Len { - return nil - } - if !cfg.LineChars[rune(src[sI])] { - return nil - } - startI := sI - // Consume one line ending: \r\n or \r or \n - if src[sI] == '\r' { - sI++ - if sI < pnt.Len && src[sI] == '\n' { - sI++ - } - rI++ - } else if cfg.LineChars[rune(src[sI])] { - if cfg.RowChars[rune(src[sI])] { - rI++ - } - sI++ - } - tkn := lex.Token("#RL", RL, nil, src[startI:sI]) - pnt.SI = sI - pnt.RI = rI - pnt.CI = 1 - return &jsonic.LexCheckResult{Done: true, Token: tkn} + // Enable single-newline token emission so each \n or \r\n is a + // separate #LN token (required by the grammar's #LN #LN patterns). + cfg.LineSingle = true + + // Parse the grammar text using a fresh jsonic instance. + parser := jsonic.Make() + parsed, err := parser.Parse(grammarText) + if err != nil { + panic("failed to parse csv grammar: " + err.Error()) + } + parsedMap, ok := parsed.(map[string]any) + if !ok { + panic("csv grammar did not parse to a map") } - // In strict mode, also intercept space to emit #RS. - // In non-strict mode, spaces are handled by the grammar too. - cfg.SpaceCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { - pnt := lex.Cursor() - src := lex.Src - sI := pnt.SI - cI := pnt.CI - if sI >= pnt.Len { - return nil - } - if !cfg.SpaceChars[rune(src[sI])] { - return nil - } - startI := sI - for sI < pnt.Len && cfg.SpaceChars[rune(src[sI])] { - sI++ - cI++ - } - tkn := lex.Token("#RS", RS, nil, src[startI:sI]) - pnt.SI = sI - pnt.CI = cI - return &jsonic.LexCheckResult{Done: true, Token: tkn} + // Build refs map. + refs := buildRefs(opts, strict, objres, header, trim, recordEmpty, stream) + + // Convert parsed grammar to GrammarSpec and apply. + gs := mapToGrammarSpec(parsedMap, refs) + if err := j.Grammar(gs); err != nil { + panic("failed to apply csv grammar: " + err.Error()) } - // Get token Tins - use our custom non-ignored tokens - LN := RL // Use RL (non-ignored) instead of LN (ignored) + // Token Tins for the code-based rule definitions below. + LN := j.Token("#LN") CA := j.Token("#CA") - SP := RS // Use RS (non-ignored) instead of SP (ignored) + SP := j.Token("#SP") ZZ := j.Token("#ZZ") - VAL := j.TokenSet("VAL") // [TX, NR, ST, VL] + VAL := j.TokenSet("VAL") - // ======= csv rule (starting rule) ======= - j.Rule("csv", func(rs *jsonic.RuleSpec) { - rs.Clear() + // Rules list, elem, val are defined in code (not grammar) because + // in non-strict mode the default jsonic alternatives must be preserved. + // In Go, these are always defined from scratch (strict mode is default). + // ======= list rule ======= + j.Rule("list", func(rs *jsonic.RuleSpec) { + rs.Clear() rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - if ctx.Meta == nil { - ctx.Meta = make(map[string]any) - } - ctx.Meta["recordI"] = 0 - if stream != nil { - stream("start", nil) - } r.Node = make([]any, 0) }) - - openAlts := []*jsonic.AltSpec{ - // End immediately if EOF - {S: [][]jsonic.Tin{{ZZ}}}, + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{LN}}, B: 1}, + {P: "elem"}, } - // Ignore empty lines from the start (if not preserving empty records) - if !recordEmpty { - openAlts = append(openAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, P: "newline"}) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{LN}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, } - // Look for the first record - openAlts = append(openAlts, &jsonic.AltSpec{P: "record"}) - rs.Open = openAlts - - rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { - if stream != nil { - stream("end", nil) - } - }) }) - // ======= newline rule ======= - j.Rule("newline", func(rs *jsonic.RuleSpec) { + // ======= elem rule ======= + j.Rule("elem", func(rs *jsonic.RuleSpec) { rs.Clear() + rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{LN}, {LN}}, R: "newline"}, - {S: [][]jsonic.Tin{{LN}}, R: "newline"}, - {S: [][]jsonic.Tin{{ZZ}}}, - {R: "record"}, + {S: [][]jsonic.Tin{{CA}}, B: 1, + A: jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + r.U["done"] = true + })}, + {P: "val"}, } + rs.Close = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{LN}, {LN}}, R: "newline"}, - {S: [][]jsonic.Tin{{LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{CA}, {LN, ZZ}}, B: 1, + A: jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + })}, + {S: [][]jsonic.Tin{{CA}}, R: "elem"}, + {S: [][]jsonic.Tin{{LN}}, B: 1}, {S: [][]jsonic.Tin{{ZZ}}}, - {R: "record"}, } + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + done, _ := r.U["done"].(bool) + if !done && !jsonic.IsUndefined(r.Child.Node) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, r.Child.Node) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + }) }) - // ======= record rule ======= - j.Rule("record", func(rs *jsonic.RuleSpec) { + // ======= val rule ======= + j.Rule("val", func(rs *jsonic.RuleSpec) { rs.Clear() - rs.Open = []*jsonic.AltSpec{ - {P: "list"}, - } + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = jsonic.Undefined + }) - closeAlts := []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{ZZ}}}, - {S: [][]jsonic.Tin{{LN}, {ZZ}}, B: 1}, - } - if recordEmpty { - closeAlts = append(closeAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, R: "record"}) - } else { - closeAlts = append(closeAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, R: "newline"}) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{VAL, {SP}}, B: 2, P: "text"}, + {S: [][]jsonic.Tin{{SP}}, B: 1, P: "text"}, + {S: [][]jsonic.Tin{VAL}}, + {S: [][]jsonic.Tin{{LN}}, B: 1}, } - rs.Close = closeAlts rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if jsonic.IsUndefined(r.Node) { + if jsonic.IsUndefined(r.Child.Node) { + if r.OS == 0 { + r.Node = jsonic.Undefined + } else { + r.Node = r.O0.ResolveVal() + } + } else { + r.Node = r.Child.Node + } + } + }) + }) +} + +// buildRefs creates the refs map for the grammar. +func buildRefs( + opts *resolved, + strict, objres, header bool, + trim, recordEmpty bool, + stream StreamFunc, +) map[jsonic.FuncRef]any { + refs := map[jsonic.FuncRef]any{ + + // === State actions (auto-wired by @rulename-{bo,ao,bc,ac}) === + + "@csv-bo": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if ctx.Meta == nil { + ctx.Meta = make(map[string]any) + } + ctx.Meta["recordI"] = 0 + if stream != nil { + stream("start", nil) + } + r.Node = make([]any, 0) + }), + + "@csv-ac": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if stream != nil { + stream("end", nil) + } + }), + + "@record-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { recordI, _ := ctx.Meta["recordI"].(int) fields := ctx.Meta["fields"] fieldNames := opts.fieldNames @@ -269,9 +339,7 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { fieldSlice = fieldNames } - // First line is fields if header=true if recordI == 0 && header { - // Extract header names from child node if childArr, ok := r.Child.Node.([]any); ok { names := make([]string, len(childArr)) for i, v := range childArr { @@ -286,7 +354,6 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { ctx.Meta["fields"] = []string{} } } else { - // A normal record line var rawRecord []any if childArr, ok := r.Child.Node.([]any); ok { rawRecord = childArr @@ -294,7 +361,6 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { rawRecord = []any{} } - // Validate field count if exact mode enabled if opts.fieldExact && fieldSlice != nil { if len(rawRecord) != len(fieldSlice) { errCode := "csv_missing_field" @@ -329,7 +395,6 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { } i = len(fieldSlice) } - // Handle extra unnamed fields for ; i < len(rawRecord); i++ { fieldName := opts.noNamePrefix + strconv.Itoa(i) val := rawRecord[i] @@ -343,15 +408,12 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { } else { if arr, ok := r.Node.([]any); ok { r.Node = append(arr, record) - // Propagate updated slice up through parent chain - // (Go slices may reallocate on append) if r.Parent != jsonic.NoRule && r.Parent != nil { r.Parent.Node = r.Node } } } } else { - // Return records as arrays for i := 0; i < len(rawRecord); i++ { if rawRecord[i] == nil { rawRecord[i] = opts.emptyField @@ -370,242 +432,298 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { } } ctx.Meta["recordI"] = recordI + 1 - }) - }) + }), - // ======= list rule ======= - j.Rule("list", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - r.Node = make([]any, 0) - }) - rs.Open = []*jsonic.AltSpec{ - // If at end of line, backtrack (empty record) - {S: [][]jsonic.Tin{{LN}}, B: 1}, - // Otherwise, start parsing elements - {P: "elem"}, - } - rs.Close = []*jsonic.AltSpec{ - // LN ends record - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {S: [][]jsonic.Tin{{ZZ}}}, - } - }) + "@text-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if !jsonic.IsUndefined(r.Child.Node) { + r.Parent.Node = r.Child.Node + } else { + r.Parent.Node = r.Node + } + }), - // ======= elem rule ======= - j.Rule("elem", func(rs *jsonic.RuleSpec) { - rs.Clear() + // === Alt actions === - rs.Open = []*jsonic.AltSpec{ - // An empty element (comma without value before it) - {S: [][]jsonic.Tin{{CA}}, B: 1, - A: func(r *jsonic.Rule, ctx *jsonic.Context) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, opts.emptyField) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } + "@elem-open-empty": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + r.U["done"] = true + }), + + "@elem-close-trailing": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + }), + + "@text-follows": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + result := val + tokenStr(r.O0) + r.Node = result + if textN == 1 { + // first text rule + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }), + + "@text-leads": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if textN >= 2 || !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.O1.Src + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }), + + "@text-end": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }), + + "@text-space": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if strict { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s } - r.U["done"] = true - }}, - // Normal element - delegate to val - {P: "val"}, - } + } + spaceStr := "" + if !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + } + }), - rs.Close = []*jsonic.AltSpec{ - // An empty element at the end of the line: CA followed by LN or ZZ - {S: [][]jsonic.Tin{{CA}, {LN, ZZ}}, B: 1, - A: func(r *jsonic.Rule, ctx *jsonic.Context) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, opts.emptyField) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - }}, - // Comma means next element - {S: [][]jsonic.Tin{{CA}}, R: "elem"}, - // LN ends record - {S: [][]jsonic.Tin{{LN}}, B: 1}, - // EOF ends record - {S: [][]jsonic.Tin{{ZZ}}}, - } + // === Conditions === - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - done, _ := r.U["done"].(bool) - if !done && !jsonic.IsUndefined(r.Child.Node) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, r.Child.Node) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } + "@not-record-empty": jsonic.AltCond(func(r *jsonic.Rule, ctx *jsonic.Context) bool { + return !recordEmpty + }), + + // === FuncRef for dynamic rule names === + + "@record-close-next": func(r *jsonic.Rule, ctx *jsonic.Context) string { + if recordEmpty { + return "record" } - }) - }) + return "newline" + }, - // ======= val rule ======= - j.Rule("val", func(rs *jsonic.RuleSpec) { - rs.Clear() + "@text-space-push": func(r *jsonic.Rule, ctx *jsonic.Context) string { + if strict { + return "" + } + return "val" + }, + } - rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - r.Node = jsonic.Undefined - }) + return refs +} - rs.Open = []*jsonic.AltSpec{ - // Handle text and space concatenation - {S: [][]jsonic.Tin{VAL, {SP}}, B: 2, P: "text"}, - {S: [][]jsonic.Tin{{SP}}, B: 1, P: "text"}, - // Plain value (no trailing space) - {S: [][]jsonic.Tin{VAL}}, - // LN ends record - {S: [][]jsonic.Tin{{LN}}, B: 1}, +// mapToGrammarSpec converts a parsed grammar map to a GrammarSpec. +func mapToGrammarSpec(parsed map[string]any, ref map[jsonic.FuncRef]any) *jsonic.GrammarSpec { + gs := &jsonic.GrammarSpec{ + Ref: ref, + } + + ruleMap, _ := parsed["rule"].(map[string]any) + if ruleMap == nil { + return gs + } + + gs.Rule = make(map[string]*jsonic.GrammarRuleSpec, len(ruleMap)) + for name, rDef := range ruleMap { + rd, ok := rDef.(map[string]any) + if !ok { + continue } + grs := &jsonic.GrammarRuleSpec{} + if openDef, ok := rd["open"]; ok { + grs.Open = convertAlts(openDef) + } + if closeDef, ok := rd["close"]; ok { + grs.Close = convertAlts(closeDef) + } + gs.Rule[name] = grs + } - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - if jsonic.IsUndefined(r.Node) { - if jsonic.IsUndefined(r.Child.Node) { - if r.OS == 0 { - r.Node = jsonic.Undefined - } else { - r.Node = r.O0.ResolveVal() - } - } else { - r.Node = r.Child.Node + return gs +} + +// convertAlts converts a parsed grammar alt definition to []*GrammarAltSpec +// or *GrammarAltListSpec. +func convertAlts(def any) any { + switch v := def.(type) { + case []any: + alts := make([]*jsonic.GrammarAltSpec, 0, len(v)) + for _, a := range v { + alt := convertAlt(a) + if alt != nil { + alts = append(alts, alt) + } + } + return alts + case map[string]any: + // May be an AltListSpec with "alts" and "inject" + als := &jsonic.GrammarAltListSpec{} + if altsArr, ok := v["alts"].([]any); ok { + for _, a := range altsArr { + alt := convertAlt(a) + if alt != nil { + als.Alts = append(als.Alts, alt) } } - }) - }) + } + if inj, ok := v["inject"].(map[string]any); ok { + als.Inject = &jsonic.GrammarInjectSpec{} + if app, ok := inj["append"].(bool); ok { + als.Inject.Append = app + } + } + return als + } + return nil +} - // ======= text rule ======= - j.Rule("text", func(rs *jsonic.RuleSpec) { - rs.Clear() +// convertAlt converts a single parsed alt map to a GrammarAltSpec. +func convertAlt(def any) *jsonic.GrammarAltSpec { + m, ok := def.(map[string]any) + if !ok { + // Empty alt spec {} + return &jsonic.GrammarAltSpec{} + } - rs.Open = []*jsonic.AltSpec{ - // Space within non-space is preserved as part of text value - {S: [][]jsonic.Tin{VAL, {SP}}, B: 1, R: "text", - N: map[string]int{"text": 1}, - G: "csv,space,follows", - A: func(r *jsonic.Rule, ctx *jsonic.Context) { - textN := r.N["text"] - var val string - if textN == 1 { - val = "" - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - if s, ok := r.Prev.Node.(string); ok { - val = s - } - } - result := val + tokenStr(r.O0) - r.Node = result - if textN == 1 { - // first text rule - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }}, - - // SP VAL - {S: [][]jsonic.Tin{{SP}, VAL}, R: "text", - N: map[string]int{"text": 1}, - G: "csv,space,leads", - A: func(r *jsonic.Rule, ctx *jsonic.Context) { - textN := r.N["text"] - var val string - if textN == 1 { - val = "" - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - if s, ok := r.Prev.Node.(string); ok { - val = s - } - } - spaceStr := "" - if textN >= 2 || !trim { - spaceStr = r.O0.Src - } - result := val + spaceStr + r.O1.Src - r.Node = result - if textN == 1 { - // first - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }}, - - // SP [CA, LN, ZZ] - trailing space - {S: [][]jsonic.Tin{{SP}, {CA, LN, ZZ}}, B: 1, - N: map[string]int{"text": 1}, - G: "csv,end", - A: func(r *jsonic.Rule, ctx *jsonic.Context) { - textN := r.N["text"] - var val string - if textN == 1 { - val = "" - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - if s, ok := r.Prev.Node.(string); ok { - val = s - } - } - spaceStr := "" - if !trim { - spaceStr = r.O0.Src - } - result := val + spaceStr - r.Node = result - if textN == 1 { - // first - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }}, - - // SP only - {S: [][]jsonic.Tin{{SP}}, - N: map[string]int{"text": 1}, - G: "csv,space", - A: func(r *jsonic.Rule, ctx *jsonic.Context) { - if strict { - textN := r.N["text"] - var val string - if textN == 1 { - val = "" - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - if s, ok := r.Prev.Node.(string); ok { - val = s - } - } - spaceStr := "" - if !trim { - spaceStr = r.O0.Src - } - result := val + spaceStr - r.Node = result - if textN == 1 { - // first - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - } - }, - P: func() string { - if strict { - return "" - } - return "val" - }()}, + ga := &jsonic.GrammarAltSpec{} - // Accept anything after text - {}, + if s, ok := m["s"]; ok { + switch sv := s.(type) { + case string: + ga.S = sv + case []any: + strs := make([]string, len(sv)) + for i, item := range sv { + strs[i], _ = item.(string) + } + ga.S = strs } + } - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - if !jsonic.IsUndefined(r.Child.Node) { - r.Parent.Node = r.Child.Node - } else { - r.Parent.Node = r.Node + if b, ok := m["b"]; ok { + switch bv := b.(type) { + case float64: + ga.B = int(bv) + case int: + ga.B = bv + case string: + ga.B = bv + } + } + + if p, ok := m["p"].(string); ok { + ga.P = p + } + + if r, ok := m["r"].(string); ok { + ga.R = r + } + + if a, ok := m["a"].(string); ok { + ga.A = a + } + + if e, ok := m["e"].(string); ok { + ga.E = e + } + + if c, ok := m["c"]; ok { + switch cv := c.(type) { + case string: + ga.C = cv + case map[string]any: + ga.C = cv + } + } + + if n, ok := m["n"].(map[string]any); ok { + ga.N = make(map[string]int, len(n)) + for k, v := range n { + switch nv := v.(type) { + case float64: + ga.N[k] = int(nv) + case int: + ga.N[k] = nv } - }) - }) + } + } + + if g, ok := m["g"].(string); ok { + ga.G = g + } + + if u, ok := m["u"].(map[string]any); ok { + ga.U = u + } + + return ga } // tokenStr gets the string value from a token (Val for ST, Src otherwise). @@ -641,13 +759,11 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { return nil } - // Only match when quote is at the start of a field: - // beginning of input, after a comma/separator, after a newline, or after whitespace. + // Only match when quote is at the start of a field if sI > 0 { prev := rune(src[sI-1]) _, isFixed := cfg.FixedTokens[string(prev)] if !isFixed && !cfg.LineChars[prev] && !cfg.SpaceChars[prev] { - // Mid-field quote - don't match, let text matcher handle it return nil } } @@ -657,7 +773,7 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { rI := pnt.RI cI := pnt.CI - sI += qLen // skip opening quote + sI += qLen cI += qLen var s strings.Builder @@ -665,12 +781,10 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { for sI < srclen { cI++ - // Check for quote character if strings.HasPrefix(src[sI:], q) { sI += qLen cI += qLen - 1 - // Check for escaped quote (double quote) if sI < srclen && strings.HasPrefix(src[sI:], q) { s.WriteString(q) sI += qLen @@ -678,7 +792,6 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { continue } - // String finished val := s.String() ssrc := src[pnt.SI:sI] tkn := lex.Token("#ST", jsonic.TinST, val, ssrc) @@ -690,7 +803,6 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { ch := src[sI] - // Check for line characters (newlines in quoted fields) cfg := j.Config() if cfg.LineChars[rune(ch)] { if cfg.RowChars[rune(ch)] { @@ -703,13 +815,10 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { continue } - // Check for unprintable characters if ch < 32 { - // Bad token return nil } - // Body part of string - fast scan bI := sI qFirst := q[0] for sI < srclen && src[sI] >= 32 && src[sI] != qFirst { @@ -723,7 +832,6 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { s.WriteString(src[bI:sI]) } - // Unterminated string - return a bad token badSrc := src[pnt.SI:sI] tkn := lex.Token("#BD", jsonic.TinBD, nil, badSrc) tkn.Why = "unterminated_string" @@ -800,7 +908,6 @@ func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic { j := jsonic.Make(jopts) - // Convert CsvOptions to map for plugin pluginMap := optionsToMap(&o) j.Use(Csv, pluginMap) @@ -810,168 +917,3 @@ func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic { func boolPtr(b bool) *bool { return &b } - -// optionsToMap converts CsvOptions to a map[string]any for the plugin interface. -func optionsToMap(o *CsvOptions) map[string]any { - m := make(map[string]any) - if o.Object != nil { - m["object"] = *o.Object - } - if o.Header != nil { - m["header"] = *o.Header - } - if o.Trim != nil { - m["trim"] = *o.Trim - } - if o.Comment != nil { - m["comment"] = *o.Comment - } - if o.Number != nil { - m["number"] = *o.Number - } - if o.Value != nil { - m["value"] = *o.Value - } - if o.Strict != nil { - m["strict"] = *o.Strict - } - if o.Field != nil { - fm := make(map[string]any) - if o.Field.Separation != "" { - fm["separation"] = o.Field.Separation - } - if o.Field.NonamePrefix != "" { - fm["nonameprefix"] = o.Field.NonamePrefix - } - fm["empty"] = o.Field.Empty - if o.Field.Exact { - fm["exact"] = true - } - if o.Field.Names != nil { - fm["names"] = o.Field.Names - } - m["field"] = fm - } - if o.Record != nil { - rm := make(map[string]any) - if o.Record.Separators != "" { - rm["separators"] = o.Record.Separators - } - if o.Record.Empty { - rm["empty"] = true - } - m["record"] = rm - } - if o.String != nil { - sm := make(map[string]any) - if o.String.Quote != "" { - sm["quote"] = o.String.Quote - } - if o.String.Csv != nil { - sm["csv"] = *o.String.Csv - } - m["string"] = sm - } - if o.Stream != nil { - m["_stream"] = o.Stream - } - return m -} - -// mapToOptions converts a map[string]any (plugin options) to CsvOptions. -func mapToOptions(m map[string]any) CsvOptions { - var o CsvOptions - if m == nil { - return o - } - - if v, ok := m["object"]; ok { - b := toBool(v) - o.Object = &b - } - if v, ok := m["header"]; ok { - b := toBool(v) - o.Header = &b - } - if v, ok := m["trim"]; ok { - b := toBool(v) - o.Trim = &b - } - if v, ok := m["comment"]; ok { - b := toBool(v) - o.Comment = &b - } - if v, ok := m["number"]; ok { - b := toBool(v) - o.Number = &b - } - if v, ok := m["value"]; ok { - b := toBool(v) - o.Value = &b - } - if v, ok := m["strict"]; ok { - b := toBool(v) - o.Strict = &b - } - - if fm, ok := m["field"].(map[string]any); ok { - o.Field = &FieldOptions{} - if v, ok := fm["separation"].(string); ok { - o.Field.Separation = v - } - if v, ok := fm["nonameprefix"].(string); ok { - o.Field.NonamePrefix = v - } - if v, ok := fm["empty"].(string); ok { - o.Field.Empty = v - } - if v, ok := fm["exact"].(bool); ok { - o.Field.Exact = v - } - if v, ok := fm["names"].([]any); ok { - for _, n := range v { - if s, ok := n.(string); ok { - o.Field.Names = append(o.Field.Names, s) - } - } - } - if v, ok := fm["names"].([]string); ok { - o.Field.Names = v - } - } - - if rm, ok := m["record"].(map[string]any); ok { - o.Record = &RecordOptions{} - if v, ok := rm["separators"].(string); ok { - o.Record.Separators = v - } - if v, ok := rm["empty"].(bool); ok { - o.Record.Empty = v - } - } - - if sm, ok := m["string"].(map[string]any); ok { - o.String = &StringOptions{} - if v, ok := sm["quote"].(string); ok { - o.String.Quote = v - } - if v, ok := sm["csv"].(bool); ok { - o.String.Csv = &v - } - } - - if v, ok := m["_stream"].(StreamFunc); ok { - o.Stream = v - } - - return o -} - -func toBool(v any) bool { - switch b := v.(type) { - case bool: - return b - default: - return false - } -} diff --git a/package.json b/package.json index 8166ebe..73a709c 100644 --- a/package.json +++ b/package.json @@ -20,33 +20,30 @@ "url": "git://github.com/jsonicjs/csv.git" }, "scripts": { - "test": "node --test dist-test/csv.test.js", - "test-some": "node --test --test-name-pattern", + "test": "node --enable-source-maps --test \"dist-test/*.test.js\"", + "test-some": "node --enable-source-maps --test-name-pattern=\"$npm_config_pattern\" --test \"dist-test/*.test.js\"", "test-watch": "node --test --watch dist-test/csv.test.js", + "embed": "node embed-grammar.js", "watch": "tsc --build src test -w", - "doc": "jsonic-doc", - "build": "tsc --build src test", - "prettier": "prettier --write --no-semi --single-quote src/*.ts test/*.ts", + "build": "node embed-grammar.js && tsc --build src test", "clean": "rm -rf node_modules dist dist-test yarn.lock package-lock.json", "reset": "npm run clean && npm i && npm run build && npm test", "repo-tag": "REPO_VERSION=`node -e \"console.log(require('./package').version)\"` && echo TAG: v$REPO_VERSION && git commit -a -m v$REPO_VERSION && git push && git tag v$REPO_VERSION && git push --tags;", "repo-publish": "npm run clean && npm i && npm run repo-publish-quick", - "repo-publish-quick": "npm run prettier && npm run build && npm run test && npm run doc && npm run repo-tag && npm publish --access public --registry https://registry.npmjs.org " + "repo-publish-quick": "npm run build && npm run test && npm run repo-tag && npm publish --access public --registry https://registry.npmjs.org " }, "license": "MIT", "files": [ + "src", "dist", "LICENSE" ], "devDependencies": { - "@jsonic/doc": "^0.0.9", "@types/node": "^25.3.3", "csv-spectrum": "^2.0.0", - "esbuild": "^0.27.3", - "prettier": "^3.8.1", "typescript": "^5.9.3" }, "peerDependencies": { - "jsonic": ">=2" + "jsonic": "^2.22.2" } } diff --git a/src/csv.ts b/src/csv.ts index d6326ca..429b266 100644 --- a/src/csv.ts +++ b/src/csv.ts @@ -1,7 +1,4 @@ -/* Copyright (c) 2021-2024 Richard Rodger, MIT License */ - -// NOTE: Good example of use case for `r` control in open rule, where -// close state only gets called on last rule. +/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ // Import Jsonic types used by plugins. import { @@ -13,7 +10,6 @@ import { Config, Options, Lex, - AltSpec, } from 'jsonic' // See defaults below for commentary. @@ -43,6 +39,63 @@ type CsvOptions = { } } +// --- BEGIN EMBEDDED csv-grammar.jsonic --- +const grammarText = ` +# CSV Grammar Definition +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #LN - line ending (removed from per-instance IGNORE set) +# #SP - whitespace (removed from per-instance IGNORE set in strict mode) +# #CA - comma / field separator +# #ZZ - end of input +# #VAL - token set: text, string, number, value literals +# +# Rules csv, newline, record, text are fully defined here. +# Rules list, elem, val are modified in code (strict mode defines from scratch; +# non-strict prepends to existing defaults to preserve JSON parsing). + +{ + rule: csv: open: [ + { s: '#ZZ' } + { s: '#LN' p: newline c: '@not-record-empty' } + { p: record } + ] + + rule: newline: open: [ + { s: '#LN #LN' r: newline } + { s: '#LN' r: newline } + { s: '#ZZ' } + { r: record } + ] + rule: newline: close: [ + { s: '#LN #LN' r: newline } + { s: '#LN' r: newline } + { s: '#ZZ' } + { r: record } + ] + + rule: record: open: [ + { p: list } + ] + rule: record: close: [ + { s: '#ZZ' } + { s: '#LN #ZZ' b: 1 } + { s: '#LN' r: '@record-close-next' } + ] + + rule: text: open: [ + { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } + { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } + { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } + { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } + {} + ] +} +` +// --- END EMBEDDED csv-grammar.jsonic --- + // Plugin implementation. const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { // Normalize boolean options. @@ -62,7 +115,6 @@ const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { // In strict mode, Jsonic field content is not parsed. if (strict) { if (false !== options.string.csv) { - // jsonic.lex(buildCsvStringMatcher(options)) jsonic.options({ lex: { match: { @@ -79,7 +131,6 @@ const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { // Fields may contain Jsonic content. else { if (true === options.string.csv) { - // jsonic.lex(buildCsvStringMatcher(options)) jsonic.options({ lex: { match: { @@ -127,9 +178,6 @@ const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { token['#CA'] = options.field.separation } - // Usually [#TX, #SP, #NR, #VL] - let VAL = jsonic.tokenSet.VAL - // Jsonic option overrides. let jsonicOptions: any = { rule: { @@ -139,8 +187,6 @@ const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { token, }, tokenSet: { - // See jsonic/src/defaults.ts; and util.deep merging - // ignore: [ IGNORE: [ strict ? null : undefined, // Handle #SP space null, // Handle #LN newlines @@ -183,133 +229,148 @@ fields per row are expected.`, jsonic.options(jsonicOptions) - let { LN, CA, SP, ZZ } = jsonic.token - // Starting rule. - jsonic.rule('csv', (rs: RuleSpec): RuleSpec => { - rs.bo((r: Rule, ctx: Context) => { - ctx.u.recordI = 0 // Record counter. - stream && stream('start') // If streaming, send 'start' event. - r.node = [] // Top level list of records - the result! - }) - .open([ - // End immediately if EOF - { s: [ZZ] }, + // Named function references for declarative grammar definition. + const refs: Record = { - // Ignore empty lines from the start. - // !record_empty && { s: [LN], p: 'newline' }, - !record_empty - ? { s: [LN], p: 'newline' } - : (null as unknown as AltSpec), + // === State actions (auto-wired by @rulename-{bo,ao,bc,ac} convention) === - // Look for the first record. - { p: 'record' }, - ]) - .ac(() => { - stream && stream('end') - }) + '@csv-bo': (r: Rule, ctx: Context) => { + ctx.u.recordI = 0 + stream && stream('start') + r.node = [] + }, - return rs - }) + '@csv-ac': (_r: Rule) => { + stream && stream('end') + }, - // Ignore empty lines. Keep consuming LN until there's a record or EOF. - jsonic.rule('newline', (rs: RuleSpec) => { - rs.open([ - // NOTE: r in open means no close except final - { s: [LN, LN], r: 'newline' }, - { s: [LN], r: 'newline' }, - { s: [ZZ] }, - { r: 'record' }, - ]).close([ - { s: [LN, LN], r: 'newline' }, - { s: [LN], r: 'newline' }, - { s: [ZZ] }, - { r: 'record' }, - ]) - }) + '@record-bc': (r: Rule, ctx: Context) => { + let fields: string[] = ctx.u.fields || options.field.names + + if (0 === ctx.u.recordI && header) { + ctx.u.fields = undefined === r.child.node ? [] : r.child.node + } else { + let record: any = r.child.node || [] + + if (objres) { + let obj: Record = {} + let i = 0 + + if (fields) { + if (options.field.exact) { + if (record.length !== fields.length) { + return ctx.t0.bad( + record.length > fields.length + ? 'csv_extra_field' + : 'csv_missing_field', + ) + } + } - // A CSV record line. - jsonic.rule('record', (rs: RuleSpec) => { - rs.open([ - // Reuse Jsonic list rule - { p: 'list' }, - ]) - .close([ - // EOF also ends CSV - { s: [ZZ] }, + let fI = 0 + for (; fI < fields.length; fI++) { + obj[fields[fI]] = + undefined === record[fI] ? options.field.empty : record[fI] + } + i = fI + } - // Last LN is not a record. - { s: [LN, ZZ] }, + for (; i < record.length; i++) { + let field_name = options.field.nonameprefix + i + obj[field_name] = + undefined === record[i] ? options.field.empty : record[i] + } - // Ignore (or not) empty lines. - { s: [LN], r: record_empty ? 'record' : 'newline' }, - ]) - .bc((rule: Rule, ctx: Context) => { - // Record field names - let fields: string[] = ctx.u.fields || options.field.names + record = obj + } else { + for (let i = 0; i < record.length; i++) { + record[i] = + undefined === record[i] ? options.field.empty : record[i] + } + } - // First line is fields if options.header=true - if (0 === ctx.u.recordI && header) { - ctx.u.fields = undefined === rule.child.node ? [] : rule.child.node + if (stream) { + stream('record', record) + } else { + r.node.push(record) } + } - // A normal record line. - else { - let record: any = rule.child.node || [] - - // Return records as objects with names fields - if (objres) { - let obj: Record = {} - let i = 0 - - if (fields) { - if (options.field.exact) { - if (record.length !== fields.length) { - return ctx.t0.bad( - record.length > fields.length - ? 'csv_extra_field' - : 'csv_missing_field', - ) - } - } + ctx.u.recordI++ + }, - let fI = 0 - for (; fI < fields.length; fI++) { - obj[fields[fI]] = - undefined === record[fI] ? options.field.empty : record[fI] - } - i = fI - } + '@text-bc': (r: Rule) => { + r.parent.node = undefined === r.child.node ? r.node : r.child.node + }, - // Handle extra unnamed fields. - for (; i < record.length; i++) { - let field_name = options.field.nonameprefix + i - obj[field_name] = - undefined === record[i] ? options.field.empty : record[i] - } - record = obj - } + // === Alt actions === - // Return records as arrays. - else { - for (let i = 0; i < record.length; i++) { - record[i] = - undefined === record[i] ? options.field.empty : record[i] - } - } + '@elem-open-empty': (r: Rule) => { + r.node.push(options.field.empty) + r.u.done = true + }, - if (stream) { - stream('record', record) - } else { - rule.node.push(record) - } - } + '@elem-close-trailing': (r: Rule) => { + r.node.push(options.field.empty) + }, - ctx.u.recordI++ - }) - return rs - }) + '@text-follows': (r: Rule) => { + let v = 1 === r.n.text ? r : r.prev + r.node = v.node = (1 === r.n.text ? '' : r.prev.node) + r.o0.val + }, + + '@text-leads': (r: Rule) => { + let v = 1 === r.n.text ? r : r.prev + r.node = v.node = + (1 === r.n.text ? '' : r.prev.node) + + (2 <= r.n.text || !trim ? r.o0.src : '') + + r.o1.src + }, + + '@text-end': (r: Rule) => { + let v = 1 === r.n.text ? r : r.prev + r.node = v.node = + (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') + }, + + '@text-space': (r: Rule) => { + if (strict) { + let v = 1 === r.n.text ? r : r.prev + r.node = v.node = + (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') + } + }, + + + // === Condition refs === + + '@not-record-empty': () => !record_empty, + + + // === FuncRef for dynamic rule names === + + '@record-close-next': () => record_empty ? 'record' : 'newline', + + '@text-space-push': () => strict ? '' : 'val', + } + + + // Usually [#TX, #ST, #NR, #VL] + let VAL = jsonic.tokenSet.VAL + + let { LN, CA, SP, ZZ } = jsonic.token + + // Parse embedded grammar definition using a separate standard Jsonic instance. + const grammarDef = Jsonic.make()(grammarText) + grammarDef.ref = refs + jsonic.grammar(grammarDef) + + + // Rules list, elem, val are modified in code rather than the grammar file, + // because in non-strict mode the default jsonic alternatives must be preserved + // to support embedded JSON values like [1,2] and {x:1}. jsonic.rule('list', (rs: RuleSpec) => { return rs @@ -339,7 +400,6 @@ fields per row are expected.`, }, }, ], - // { append: false } ) .close( @@ -354,7 +414,6 @@ fields per row are expected.`, // LN ends record { s: [LN], b: 1 }, ], - // { append: false } ) }) @@ -368,80 +427,14 @@ fields per row are expected.`, // LN ends record { s: [LN], b: 1 }, ], - // { append: false } ) }) - // Handle text and space concatentation - // NOTE: trim and string are complications. + // Close is called on final rule - set parent val node jsonic.rule('text', (rs: RuleSpec) => { - return ( - rs - - // Space within non-space is preserved as part of text value. - .open([ - { - // NOTE: r in open means no close except final - s: [VAL, SP], - b: 1, - r: 'text', - n: { text: 1 }, - g: 'csv,space,follows', - a: (r: Rule) => { - // Keep appending space to prev node - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = (1 === r.n.text ? '' : r.prev.node) + r.o0.val - }, - }, - { - s: [SP, VAL], - r: 'text', - n: { text: 1 }, - g: 'csv,space,leads', - a: (r: Rule) => { - // Inner space - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + - (2 <= r.n.text || !trim ? r.o0.src : '') + - r.o1.src - }, - }, - { - s: [SP, [CA, LN, ZZ]], - b: 1, - n: { text: 1 }, - g: 'csv,end', - a: (r: Rule) => { - // Final space - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') - }, - }, - { - s: [SP], - n: { text: 1 }, - g: 'csv,space', - a: (r: Rule) => { - if (strict) { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') - } - }, - p: strict ? undefined : 'val', - }, - - // Accept anything after text. - {}, - ]) - - // Close is called on final rule - set parent val node - .bc((r: Rule) => { - r.parent.node = undefined === r.child.node ? r.node : r.child.node - }) - ) + rs.bc((r: Rule) => { + r.parent.node = undefined === r.child.node ? r.node : r.child.node + }) }) } @@ -465,7 +458,6 @@ function buildCsvStringMatcher(csvopts: CsvOptions) { ++cI let s: string[] = [] - // let rs: string | undefined for (sI; sI < srclen; sI++) { cI++ @@ -487,7 +479,6 @@ function buildCsvStringMatcher(csvopts: CsvOptions) { else { let bI = sI - // TODO: move to cfgx let qc = q.charCodeAt(0) let cc = src.charCodeAt(sI) @@ -522,7 +513,6 @@ function buildCsvStringMatcher(csvopts: CsvOptions) { const tkn = lex.token( '#ST', - // s.join(EMPTY), s.join(''), src.substring(pnt.sI, sI), pnt, @@ -539,65 +529,27 @@ function buildCsvStringMatcher(csvopts: CsvOptions) { // Default option values. Csv.defaults = { - // Trim surrounding space. Default: false (!strict=>true) - // trim$: 'Trim surrounding space', trim: null, - - // Support comments. Default: false (!strict=>true) comment: null, - - // Support numbers. Default: false (!strict=>true) number: null, - - // Support exact values (such as booleans). Default: false (!strict=>true) value: null, - - // First row is headers. header: true, - - // Records are returned as objects. If false, as arrays. object: true, - - // Stream records. stream: null, - - // Parse standard CSV, ignoring embedded JSON. Default: false. - // When true, changes some defaults, e.g. trim=>true strict: true, - - // Control field handling field: { - // Separator string (only one!) separation: null, - - // Create numbered names for extra fields found in a record. nonameprefix: 'field~', - - // Value to insert for empty fields. empty: '', - - // Predefined field names (string[]). names: undefined, - - // Require each row to have an exact number of fields (same number as headers). exact: false, }, - - // Control record handling. record: { - // Separator characters (not string!) separators: null, - - // Allow empty lines to generate records. empty: false, }, - - // Control string handling. string: { - // Quote character for CSV-style strings. quote: '"', - - // If false, use Jsonic-style strings. csv: null, }, } as CsvOptions