From 797aaa72f92b6eef082e2bcf3ee78a524c6a3a2e Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Sat, 1 Nov 2025 22:00:22 -0500 Subject: [PATCH 1/2] Various small optimizations and cleanups in the parser. --- ext/json/ext/parser/parser.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 20754249..50f6e168 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -84,17 +84,51 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring cache->entries[index] = rstring; } -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring) { +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && defined(__has_builtin) && __has_builtin(__builtin_bswap64) + const char *rptr; + long rstring_length; + + RSTRING_GETMEM(rstring, rptr, rstring_length); + + if (length != rstring_length) { + return (int)(length - rstring_length); + } + + long i = 0; + + for (; i+8 <= length; i += 8) { + uint64_t a, b; + memcpy(&a, str + i, 8); + memcpy(&b, rptr + i, 8); + if (a != b) { + a = __builtin_bswap64(a); + b = __builtin_bswap64(b); + return (a < b) ? -1 : 1; + } + } + + for (; i < length; i++) { + unsigned char ca = (unsigned char)str[i]; + unsigned char cb = (unsigned char)rptr[i]; + if (ca != cb) { + return (ca < cb) ? -1 : 1; + } + } + + return 0; +#else long rstring_length = RSTRING_LEN(rstring); if (length == rstring_length) { return memcmp(str, RSTRING_PTR(rstring), length); } else { return (int)(length - rstring_length); } +#endif } -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) { int low = 0; int high = cache->length - 1; From cb6391ed85a2fb99a3946a8a506778fab88e4761 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 3 Nov 2025 11:19:56 +0100 Subject: [PATCH 2/2] parser.c: Extract `rstring_memcmp` --- ext/json/ext/parser/parser.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 50f6e168..fba204e7 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -84,18 +84,9 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring cache->entries[index] = rstring; } -static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && defined(__has_builtin) && __has_builtin(__builtin_bswap64) - const char *rptr; - long rstring_length; - - RSTRING_GETMEM(rstring, rptr, rstring_length); - - if (length != rstring_length) { - return (int)(length - rstring_length); - } - +static ALWAYS_INLINE() int rstring_memcmp(const char *str, const char *rptr, const long length) +{ long i = 0; for (; i+8 <= length; i += 8) { @@ -118,14 +109,23 @@ static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, } return 0; +} #else - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { +#define rstring_memcmp memcmp +#endif + +static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + const char *rptr; + long rstring_length; + + RSTRING_GETMEM(rstring, rptr, rstring_length); + + if (length != rstring_length) { return (int)(length - rstring_length); } -#endif + + return rstring_memcmp(str, rptr, length); } static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)