55 * Created:
66 * April 12, 1961 at 09:07:34 PM GMT+3
77 * Modified:
8- * February 16, 2026 at 4:01:29 PM GMT+3
8+ * February 16, 2026 at 4:40:34 PM GMT+3
99 *
1010 */
1111/*
@@ -46,7 +46,7 @@ extern const signed char hex_lookup[256];
4646#define JSON_FALSE_LEN 5
4747#define HEX_OFFSET 10
4848#define MIN_PRINTABLE_ASCII 0x20
49- #define MAX_PRINTABLE_ASCII 0x7E
49+ #define MAX_PRINTABLE_ASCII 0x80
5050#ifndef _WIN32
5151#define HIGH_SURROGATE_START 0xD800
5252#define HIGH_SURROGATE_END 0xDBFF
@@ -228,15 +228,19 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
228228 while (true) {
229229 const size_t len = end - p ;
230230 size_t span = 0 ;
231+ const char * s_chk = p - span ;
232+ size_t len_chk = span ;
233+ size_t j = 0 ;
231234#if defined(__AVX2__ )
232- const size_t offset = 32 ;
235+ const size_t offset_2 = 32 ;
236+ const size_t offset_4 = 64 ;
233237 const __m256i quote_vec = _mm256_set1_epi8 ('\"' );
234238 const __m256i escape_vec = _mm256_set1_epi8 ('\\' );
235239 size_t i = 0 ;
236240 /* Unrolled: process 64 bytes per iteration (2 AVX2 registers) */
237- for (; i + 64 <= len ; i += 64 ) {
241+ for (; i + offset_4 <= len ; i += offset_4 ) {
238242 __m256i chunk1 = _mm256_loadu_si256 ((const __m256i * )(p + i ));
239- __m256i chunk2 = _mm256_loadu_si256 ((const __m256i * )(p + i + 32 ));
243+ __m256i chunk2 = _mm256_loadu_si256 ((const __m256i * )(p + i + offset_2 ));
240244 __m256i match1 = _mm256_or_si256 (_mm256_cmpeq_epi8 (chunk1 , quote_vec ), _mm256_cmpeq_epi8 (chunk1 , escape_vec ));
241245 __m256i match2 = _mm256_or_si256 (_mm256_cmpeq_epi8 (chunk2 , quote_vec ), _mm256_cmpeq_epi8 (chunk2 , escape_vec ));
242246 int mask1 = _mm256_movemask_epi8 (match1 );
@@ -246,12 +250,12 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
246250 goto found ;
247251 }
248252 if (mask2 != 0 ) {
249- span = i + 32 + __builtin_ctz (mask2 );
253+ span = i + offset_2 + __builtin_ctz (mask2 );
250254 goto found ;
251255 }
252256 }
253257 /* Process remaining chunks of 32 bytes */
254- for (; i + offset <= len ; i += offset ) {
258+ for (; i + offset_2 <= len ; i += offset_2 ) {
255259 __m256i chunk = _mm256_loadu_si256 ((const __m256i * )(p + i ));
256260 __m256i quote_match = _mm256_cmpeq_epi8 (chunk , quote_vec );
257261 __m256i escape_match = _mm256_cmpeq_epi8 (chunk , escape_vec );
@@ -268,18 +272,46 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
268272 goto found ;
269273 }
270274 }
275+ const __m256i limit2 = _mm256_set1_epi8 (0x20 );
276+ const __m256i high_bit2 = _mm256_set1_epi8 (0x80 );
277+ const __m256i limit_shifted2 = _mm256_xor_si256 (limit2 , high_bit2 );
278+ for (; j + 64 <= len_chk ; j += 64 ) {
279+ __m256i chunk1 = _mm256_loadu_si256 ((const __m256i * )(s_chk + j ));
280+ __m256i chunk2 = _mm256_loadu_si256 ((const __m256i * )(s_chk + j + offset_2 ));
281+ __m256i chunk1_shifted = _mm256_xor_si256 (chunk1 , high_bit2 );
282+ __m256i chunk2_shifted = _mm256_xor_si256 (chunk2 , high_bit2 );
283+ __m256i result_mask1 = _mm256_cmpgt_epi8 (limit_shifted2 , chunk1_shifted );
284+ __m256i result_mask2 = _mm256_cmpgt_epi8 (limit_shifted2 , chunk2_shifted );
285+ if (_mm256_movemask_epi8 (result_mask1 ) != 0 || _mm256_movemask_epi8 (result_mask2 ) != 0 ) {
286+ return false;
287+ }
288+ }
289+ for (; j + offset_2 <= len_chk ; j += offset_2 ) {
290+ __m256i chunk = _mm256_loadu_si256 ((const __m256i * )(s_chk + j ));
291+ __m256i chunk_shifted = _mm256_xor_si256 (chunk , high_bit2 );
292+ __m256i result_mask = _mm256_cmpgt_epi8 (limit_shifted2 , chunk_shifted );
293+ if (_mm256_movemask_epi8 (result_mask ) != 0 ) {
294+ return false;
295+ }
296+ }
271297 span = len ;
272298#elif defined(__SSE2__ )
273- const size_t offset = 16 ;
299+ const size_t offset_1 = 16 ;
300+ const size_t offset_2 = 32 ;
301+ const size_t offset_3 = 48 ;
302+ const size_t offset_4 = 64 ;
274303 const __m128i quote_vec = _mm_set1_epi8 ('\"' );
275304 const __m128i escape_vec = _mm_set1_epi8 ('\\' );
305+ const __m128i limit2 = _mm_set1_epi8 (0x20 );
306+ const __m128i high_bit2 = _mm_set1_epi8 (0x80 );
307+ const __m128i limit_shifted2 = _mm_xor_si128 (limit2 , high_bit2 );
276308 size_t i = 0 ;
277309 /* Unrolled: process 64 bytes per iteration (4 SSE2 registers) */
278- for (; i + 64 <= len ; i += 64 ) {
310+ for (; i + offset_4 <= len ; i += offset_4 ) {
279311 __m128i chunk1 = _mm_loadu_si128 ((const __m128i * )(p + i ));
280- __m128i chunk2 = _mm_loadu_si128 ((const __m128i * )(p + i + 16 ));
281- __m128i chunk3 = _mm_loadu_si128 ((const __m128i * )(p + i + 32 ));
282- __m128i chunk4 = _mm_loadu_si128 ((const __m128i * )(p + i + 48 ));
312+ __m128i chunk2 = _mm_loadu_si128 ((const __m128i * )(p + i + offset_1 ));
313+ __m128i chunk3 = _mm_loadu_si128 ((const __m128i * )(p + i + offset_2 ));
314+ __m128i chunk4 = _mm_loadu_si128 ((const __m128i * )(p + i + offset_3 ));
283315 __m128i match1 = _mm_or_si128 (_mm_cmpeq_epi8 (chunk1 , quote_vec ), _mm_cmpeq_epi8 (chunk1 , escape_vec ));
284316 __m128i match2 = _mm_or_si128 (_mm_cmpeq_epi8 (chunk2 , quote_vec ), _mm_cmpeq_epi8 (chunk2 , escape_vec ));
285317 __m128i match3 = _mm_or_si128 (_mm_cmpeq_epi8 (chunk3 , quote_vec ), _mm_cmpeq_epi8 (chunk3 , escape_vec ));
@@ -293,20 +325,20 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
293325 goto found ;
294326 }
295327 if (mask2 != 0 ) {
296- span = i + 16 + __builtin_ctz (mask2 );
328+ span = i + offset_1 + __builtin_ctz (mask2 );
297329 goto found ;
298330 }
299331 if (mask3 != 0 ) {
300- span = i + 32 + __builtin_ctz (mask3 );
332+ span = i + offset_2 + __builtin_ctz (mask3 );
301333 goto found ;
302334 }
303335 if (mask4 != 0 ) {
304- span = i + 48 + __builtin_ctz (mask4 );
336+ span = i + offset_3 + __builtin_ctz (mask4 );
305337 goto found ;
306338 }
307339 }
308340 /* Process remaining chunks of 16 bytes */
309- for (; i + offset <= len ; i += offset ) {
341+ for (; i + offset_1 <= len ; i += offset_1 ) {
310342 __m128i chunk = _mm_loadu_si128 ((const __m128i * )(p + i ));
311343 __m128i quote_match = _mm_cmpeq_epi8 (chunk , quote_vec );
312344 __m128i escape_match = _mm_cmpeq_epi8 (chunk , escape_vec );
@@ -323,6 +355,32 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
323355 goto found ;
324356 }
325357 }
358+ for (; j + offset_4 <= len_chk ; j += offset_4 ) {
359+ __m128i chunk1 = _mm_loadu_si128 ((const __m128i * )(s_chk + j ));
360+ __m128i chunk2 = _mm_loadu_si128 ((const __m128i * )(s_chk + j + offset_1 ));
361+ __m128i chunk3 = _mm_loadu_si128 ((const __m128i * )(s_chk + j + offset_2 ));
362+ __m128i chunk4 = _mm_loadu_si128 ((const __m128i * )(s_chk + j + offset_3 ));
363+ __m128i chunk1_shifted = _mm_xor_si128 (chunk1 , high_bit2 );
364+ __m128i chunk2_shifted = _mm_xor_si128 (chunk2 , high_bit2 );
365+ __m128i chunk3_shifted = _mm_xor_si128 (chunk3 , high_bit2 );
366+ __m128i chunk4_shifted = _mm_xor_si128 (chunk4 , high_bit2 );
367+ __m128i result_mask1 = _mm_cmplt_epi8 (chunk1_shifted , limit_shifted2 );
368+ __m128i result_mask2 = _mm_cmplt_epi8 (chunk2_shifted , limit_shifted2 );
369+ __m128i result_mask3 = _mm_cmplt_epi8 (chunk3_shifted , limit_shifted2 );
370+ __m128i result_mask4 = _mm_cmplt_epi8 (chunk4_shifted , limit_shifted2 );
371+ if (_mm_movemask_epi8 (result_mask1 ) != 0 || _mm_movemask_epi8 (result_mask2 ) != 0 ||
372+ _mm_movemask_epi8 (result_mask3 ) != 0 || _mm_movemask_epi8 (result_mask4 ) != 0 ) {
373+ return false;
374+ }
375+ }
376+ for (; j + offset_1 <= len_chk ; j += offset_1 ) {
377+ __m128i chunk = _mm_loadu_si128 ((const __m128i * )(s_chk + j ));
378+ __m128i chunk_shifted = _mm_xor_si128 (chunk , high_bit2 );
379+ __m128i result_mask = _mm_cmplt_epi8 (chunk_shifted , limit_shifted2 );
380+ if (_mm_movemask_epi8 (result_mask ) != 0 ) {
381+ return false;
382+ }
383+ }
326384 span = len ;
327385#else
328386 /* non-SSE2 fallback: simple byte scan */
@@ -336,9 +394,15 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
336394 span = len ;
337395#endif
338396
397+ for (; j < len_chk ; j ++ ) {
398+ if ((unsigned char )s_chk [j ] < MIN_PRINTABLE_ASCII ) {
399+ return false;
400+ }
401+ }
402+
339403 /* scalar tail */
340404 for (; i < len ; i ++ ) {
341- if ((unsigned char )p [i ] < 0x20 || (unsigned char )p [i ] >= 0x80 ) {
405+ if ((unsigned char )p [i ] < MIN_PRINTABLE_ASCII || (unsigned char )p [i ] >= MAX_PRINTABLE_ASCII ) {
342406 return false;
343407 }
344408 if (p [i ] == '"' || p [i ] == '\\' ) {
@@ -352,77 +416,14 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
352416 p += span ;
353417 if (p == end )
354418 return false;
355- #ifdef STRING_VALIDATION
356- {
357- const char * s_chk = p - span ;
358- size_t len_chk = span ;
359- size_t j = 0 ;
360- #if defined(__AVX2__ )
361- const size_t offset2 = 32 ;
362- const __m256i limit2 = _mm256_set1_epi8 (0x20 );
363- const __m256i high_bit2 = _mm256_set1_epi8 (0x80 );
364- const __m256i limit_shifted2 = _mm256_xor_si256 (limit2 , high_bit2 );
365- for (; j + 64 <= len_chk ; j += 64 ) {
366- __m256i chunk1 = _mm256_loadu_si256 ((const __m256i * )(s_chk + j ));
367- __m256i chunk2 = _mm256_loadu_si256 ((const __m256i * )(s_chk + j + 32 ));
368- __m256i chunk1_shifted = _mm256_xor_si256 (chunk1 , high_bit2 );
369- __m256i chunk2_shifted = _mm256_xor_si256 (chunk2 , high_bit2 );
370- __m256i result_mask1 = _mm256_cmpgt_epi8 (limit_shifted2 , chunk1_shifted );
371- __m256i result_mask2 = _mm256_cmpgt_epi8 (limit_shifted2 , chunk2_shifted );
372- if (_mm256_movemask_epi8 (result_mask1 ) != 0 || _mm256_movemask_epi8 (result_mask2 ) != 0 ) {
373- return false;
374- }
375- }
376- for (; j + offset2 <= len_chk ; j += offset2 ) {
377- __m256i chunk = _mm256_loadu_si256 ((const __m256i * )(s_chk + j ));
378- __m256i chunk_shifted = _mm256_xor_si256 (chunk , high_bit2 );
379- __m256i result_mask = _mm256_cmpgt_epi8 (limit_shifted2 , chunk_shifted );
380- if (_mm256_movemask_epi8 (result_mask ) != 0 ) {
381- return false;
382- }
383- }
384- #elif defined(__SSE2__ )
385- const size_t offset2 = 16 ;
386- const __m128i limit2 = _mm_set1_epi8 (0x20 );
387- const __m128i high_bit2 = _mm_set1_epi8 (0x80 );
388- const __m128i limit_shifted2 = _mm_xor_si128 (limit2 , high_bit2 );
389- for (; j + 64 <= len_chk ; j += 64 ) {
390- __m128i chunk1 = _mm_loadu_si128 ((const __m128i * )(s_chk + j ));
391- __m128i chunk2 = _mm_loadu_si128 ((const __m128i * )(s_chk + j + 16 ));
392- __m128i chunk3 = _mm_loadu_si128 ((const __m128i * )(s_chk + j + 32 ));
393- __m128i chunk4 = _mm_loadu_si128 ((const __m128i * )(s_chk + j + 48 ));
394- __m128i chunk1_shifted = _mm_xor_si128 (chunk1 , high_bit2 );
395- __m128i chunk2_shifted = _mm_xor_si128 (chunk2 , high_bit2 );
396- __m128i chunk3_shifted = _mm_xor_si128 (chunk3 , high_bit2 );
397- __m128i chunk4_shifted = _mm_xor_si128 (chunk4 , high_bit2 );
398- __m128i result_mask1 = _mm_cmplt_epi8 (chunk1_shifted , limit_shifted2 );
399- __m128i result_mask2 = _mm_cmplt_epi8 (chunk2_shifted , limit_shifted2 );
400- __m128i result_mask3 = _mm_cmplt_epi8 (chunk3_shifted , limit_shifted2 );
401- __m128i result_mask4 = _mm_cmplt_epi8 (chunk4_shifted , limit_shifted2 );
402- if (_mm_movemask_epi8 (result_mask1 ) != 0 || _mm_movemask_epi8 (result_mask2 ) != 0 ||
403- _mm_movemask_epi8 (result_mask3 ) != 0 || _mm_movemask_epi8 (result_mask4 ) != 0 ) {
404- return false;
405- }
406- }
407- for (; j + offset2 <= len_chk ; j += offset2 ) {
408- __m128i chunk = _mm_loadu_si128 ((const __m128i * )(s_chk + j ));
409- __m128i chunk_shifted = _mm_xor_si128 (chunk , high_bit2 );
410- __m128i result_mask = _mm_cmplt_epi8 (chunk_shifted , limit_shifted2 );
411- if (_mm_movemask_epi8 (result_mask ) != 0 ) {
412- return false;
413- }
414- }
415- #else
416- const size_t offset2 = 1 ;
417- #endif
418- for (; j < len_chk ; j ++ ) {
419- if ((unsigned char )s_chk [j ] < 0x20 ) {
419+ if (* p == '\"' ) {
420+ /* Validate the parsed string span for control characters */
421+ const char * chk = v -> u .string .ptr ;
422+ while (chk < p ) {
423+ if ((unsigned char )* chk < MIN_PRINTABLE_ASCII )
420424 return false;
421- }
425+ chk ++ ;
422426 }
423- }
424- #endif
425- if (* p == '\"' ) {
426427 v -> u .string .len = p - * s - 1 ;
427428 * s = p + 1 ;
428429 return true;
0 commit comments