diff --git a/go/csv_test.go b/go/csv_test.go index 4206304..835c8c8 100644 --- a/go/csv_test.go +++ b/go/csv_test.go @@ -13,10 +13,11 @@ import ( // fixtureEntry represents one entry in the test manifest. type fixtureEntry struct { - Name string `json:"name"` - CsvFile string `json:"csvFile,omitempty"` - Opt map[string]any `json:"opt,omitempty"` - Err string `json:"err,omitempty"` + Name string `json:"name"` + CsvFile string `json:"csvFile,omitempty"` + Opt map[string]any `json:"opt,omitempty"` + JsonicOpt map[string]any `json:"jsonicOpt,omitempty"` + Err string `json:"err,omitempty"` } func fixturesDir() string { @@ -50,7 +51,7 @@ func TestFixtures(t *testing.T) { } opts := mapToOptions(entry.Opt) - result, err := Parse(string(csvData), opts) + result, err := parseWithJsonicOpt(string(csvData), opts, entry.JsonicOpt) if err != nil { if entry.Err != "" { return // expected error @@ -379,6 +380,106 @@ func TestRecordSeparators(t *testing.T) { }) } +// parseWithJsonicOpt parses CSV with optional jsonic-level options (custom comment defs, value defs, etc.) +func parseWithJsonicOpt(src string, opts CsvOptions, jsonicOpt map[string]any) ([]any, error) { + if len(jsonicOpt) == 0 { + return Parse(src, opts) + } + + r := resolve(&opts) + + jopts := jsonic.Options{ + Rule: &jsonic.RuleOptions{ + Start: "csv", + }, + Number: &jsonic.NumberOptions{ + Lex: boolPtr(r.number), + }, + Value: &jsonic.ValueOptions{ + Lex: boolPtr(r.value), + }, + Comment: &jsonic.CommentOptions{ + Lex: boolPtr(r.comment), + }, + Lex: &jsonic.LexOptions{ + EmptyResult: []any{}, + }, + } + + // Apply jsonicOpt: value.def + // Start with defaults and merge custom defs. A null value removes the def. + if valOpt, ok := jsonicOpt["value"].(map[string]any); ok { + if defMap, ok := valOpt["def"].(map[string]any); ok { + if jopts.Value == nil { + jopts.Value = &jsonic.ValueOptions{} + } + // Start with defaults + jopts.Value.Def = map[string]*jsonic.ValueDef{ + "true": {Val: true}, + "false": {Val: false}, + "null": {Val: nil}, + } + // Merge custom defs + for k, v := range defMap { + if v == nil { + // null means remove this def + delete(jopts.Value.Def, k) + } else if vm, ok := v.(map[string]any); ok { + jopts.Value.Def[k] = &jsonic.ValueDef{Val: vm["val"]} + } + } + } + } + + // Apply jsonicOpt: comment.def + if cmtOpt, ok := jsonicOpt["comment"].(map[string]any); ok { + if defMap, ok := cmtOpt["def"].(map[string]any); ok { + if jopts.Comment == nil { + jopts.Comment = &jsonic.CommentOptions{} + } + jopts.Comment.Def = make(map[string]*jsonic.CommentDef) + for name, v := range defMap { + if cm, ok := v.(map[string]any); ok { + def := &jsonic.CommentDef{} + if start, ok := cm["start"].(string); ok { + def.Start = start + } + if end, ok := cm["end"].(string); ok { + def.End = end + } else { + // No end marker means line comment + def.Line = true + } + jopts.Comment.Def[name] = def + } + } + } + } + + if r.recordSep != "" { + jopts.Line = &jsonic.LineOptions{ + Chars: r.recordSep, + RowChars: r.recordSep, + } + } + + j := jsonic.Make(jopts) + pluginMap := optionsToMap(&opts) + j.Use(Csv, pluginMap) + + result, err := j.Parse(src) + if err != nil { + return nil, err + } + if result == nil { + return []any{}, nil + } + if arr, ok := result.([]any); ok { + return arr, nil + } + return []any{}, nil +} + // Helper functions func assertRecords(t *testing.T, name string, result []any, expected []map[string]any) { diff --git a/go/go.mod b/go/go.mod index dd82828..23040f5 100644 --- a/go/go.mod +++ b/go/go.mod @@ -2,4 +2,4 @@ module github.com/jsonicjs/csv/go go 1.24.7 -require github.com/jsonicjs/jsonic/go v0.1.4 // indirect +require github.com/jsonicjs/jsonic/go v0.1.6 // indirect diff --git a/go/go.sum b/go/go.sum index dc99d17..2f8eb56 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,2 +1,4 @@ github.com/jsonicjs/jsonic/go v0.1.4 h1:V1KEzmg/jIwk25+JYj8ig1+B7190rHmH8WqZbT7XlgA= github.com/jsonicjs/jsonic/go v0.1.4/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= +github.com/jsonicjs/jsonic/go v0.1.6 h1:oUw4vxCK6tqa7SGN87vjCtx3sCpeHXdqfl25hx5LKP0= +github.com/jsonicjs/jsonic/go v0.1.6/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= diff --git a/go/plugin.go b/go/plugin.go index c0de518..81b03b2 100644 --- a/go/plugin.go +++ b/go/plugin.go @@ -43,6 +43,17 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { delete(cfg.FixedTokens, ":") cfg.SortFixedTokens() + // When the CSV string matcher is active, disable the built-in string + // matcher and remove quote chars from string chars. This way: + // - The CSV string matcher (custom, priority 100000) handles proper quoted fields + // - Mid-field quotes (B" in A,B",C) are treated as literal text by the text matcher + if useCsvString { + cfg.StringLex = false + for ch := range cfg.StringChars { + delete(cfg.StringChars, ch) + } + } + // Exclude jsonic and imp rule groups. j.Exclude("jsonic", "imp") } else { @@ -283,6 +294,24 @@ func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { rawRecord = []any{} } + // Validate field count if exact mode enabled + if opts.fieldExact && fieldSlice != nil { + if len(rawRecord) != len(fieldSlice) { + errCode := "csv_missing_field" + if len(rawRecord) > len(fieldSlice) { + errCode = "csv_extra_field" + } + errTkn := &jsonic.Token{ + Name: "#BD", + Tin: jsonic.TinBD, + Why: errCode, + Src: errCode, + } + ctx.ParseErr = errTkn + return + } + } + if objres { obj := make(map[string]any) var keys []string @@ -596,7 +625,8 @@ func tokenStr(t *jsonic.Token) string { // double-quote escaping: "a""b" → a"b func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { quoteChar := opts.quote - return func(lex *jsonic.Lex) *jsonic.Token { + cfg := j.Config() + return func(lex *jsonic.Lex, rule *jsonic.Rule) *jsonic.Token { pnt := lex.Cursor() src := lex.Src sI := pnt.SI @@ -611,6 +641,17 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { return nil } + // Only match when quote is at the start of a field: + // beginning of input, after a comma/separator, after a newline, or after whitespace. + if sI > 0 { + prev := rune(src[sI-1]) + _, isFixed := cfg.FixedTokens[string(prev)] + if !isFixed && !cfg.LineChars[prev] && !cfg.SpaceChars[prev] { + // Mid-field quote - don't match, let text matcher handle it + return nil + } + } + q := quoteChar qLen := len(q) rI := pnt.RI @@ -682,8 +723,14 @@ func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { s.WriteString(src[bI:sI]) } - // Unterminated string - return nil + // Unterminated string - return a bad token + badSrc := src[pnt.SI:sI] + tkn := lex.Token("#BD", jsonic.TinBD, nil, badSrc) + tkn.Why = "unterminated_string" + pnt.SI = sI + pnt.RI = rI + pnt.CI = cI + return tkn } }