diff --git a/CHANGES.md b/CHANGES.md index 14c77060..0a123ab9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,6 +16,7 @@ * Fix `JSON.generate` `strict: true` mode to also restrict hash keys. * Fix `JSON::Coder` to also invoke block for hash keys that aren't strings nor symbols. * Fix `JSON.unsafe_load` usage with proc +* Fix the parser to more consistently reject invalid UTF-16 surogate pairs. ### 2025-07-28 (2.13.2) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index e34f1999..297031dc 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -713,11 +713,16 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c } if (pe[0] == '\\' && pe[1] == 'u') { uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2); + + if ((sur & 0xFC00) != 0xDC00) { + raise_parse_error_at("invalid surrogate pair at %s", state, p); + } + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); pe += 5; } else { - unescape = (char *) "?"; + raise_parse_error_at("incomplete surrogate pair at %s", state, p); break; } } diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index a9b0624f..9d387cb8 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -319,6 +319,12 @@ def test_invalid_unicode_escape assert_raise(JSON::ParserError) { parse('"\u111___"') } end + def test_invalid_surogates + assert_raise(JSON::ParserError) { parse('"\\uD800"') } + assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') } + assert_raise(JSON::ParserError) { parse('"\\uD800\\u0041"') } + end + def test_parse_big_integers json1 = JSON(orig = (1 << 31) - 1) assert_equal orig, parse(json1)