Skip to content

Commit b284365

Browse files
updates tests
1 parent 6f3d363 commit b284365

1 file changed

Lines changed: 87 additions & 86 deletions

File tree

src/json.c

Lines changed: 87 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Created:
66
* April 12, 1961 at 09:07:34 PM GMT+3
77
* Modified:
8-
* February 16, 2026 at 4:01:29 PM GMT+3
8+
* February 16, 2026 at 4:40:34 PM GMT+3
99
*
1010
*/
1111
/*
@@ -46,7 +46,7 @@ extern const signed char hex_lookup[256];
4646
#define JSON_FALSE_LEN 5
4747
#define HEX_OFFSET 10
4848
#define MIN_PRINTABLE_ASCII 0x20
49-
#define MAX_PRINTABLE_ASCII 0x7E
49+
#define MAX_PRINTABLE_ASCII 0x80
5050
#ifndef _WIN32
5151
#define HIGH_SURROGATE_START 0xD800
5252
#define HIGH_SURROGATE_END 0xDBFF
@@ -228,15 +228,19 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
228228
while (true) {
229229
const size_t len = end - p;
230230
size_t span = 0;
231+
const char *s_chk = p - span;
232+
size_t len_chk = span;
233+
size_t j = 0;
231234
#if defined(__AVX2__)
232-
const size_t offset = 32;
235+
const size_t offset_2 = 32;
236+
const size_t offset_4 = 64;
233237
const __m256i quote_vec = _mm256_set1_epi8('\"');
234238
const __m256i escape_vec = _mm256_set1_epi8('\\');
235239
size_t i = 0;
236240
/* Unrolled: process 64 bytes per iteration (2 AVX2 registers) */
237-
for (; i + 64 <= len; i += 64) {
241+
for (; i + offset_4 <= len; i += offset_4) {
238242
__m256i chunk1 = _mm256_loadu_si256((const __m256i *)(p + i));
239-
__m256i chunk2 = _mm256_loadu_si256((const __m256i *)(p + i + 32));
243+
__m256i chunk2 = _mm256_loadu_si256((const __m256i *)(p + i + offset_2));
240244
__m256i match1 = _mm256_or_si256(_mm256_cmpeq_epi8(chunk1, quote_vec), _mm256_cmpeq_epi8(chunk1, escape_vec));
241245
__m256i match2 = _mm256_or_si256(_mm256_cmpeq_epi8(chunk2, quote_vec), _mm256_cmpeq_epi8(chunk2, escape_vec));
242246
int mask1 = _mm256_movemask_epi8(match1);
@@ -246,12 +250,12 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
246250
goto found;
247251
}
248252
if (mask2 != 0) {
249-
span = i + 32 + __builtin_ctz(mask2);
253+
span = i + offset_2 + __builtin_ctz(mask2);
250254
goto found;
251255
}
252256
}
253257
/* Process remaining chunks of 32 bytes */
254-
for (; i + offset <= len; i += offset) {
258+
for (; i + offset_2 <= len; i += offset_2) {
255259
__m256i chunk = _mm256_loadu_si256((const __m256i *)(p + i));
256260
__m256i quote_match = _mm256_cmpeq_epi8(chunk, quote_vec);
257261
__m256i escape_match = _mm256_cmpeq_epi8(chunk, escape_vec);
@@ -268,18 +272,46 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
268272
goto found;
269273
}
270274
}
275+
const __m256i limit2 = _mm256_set1_epi8(0x20);
276+
const __m256i high_bit2 = _mm256_set1_epi8(0x80);
277+
const __m256i limit_shifted2 = _mm256_xor_si256(limit2, high_bit2);
278+
for (; j + 64 <= len_chk; j += 64) {
279+
__m256i chunk1 = _mm256_loadu_si256((const __m256i *)(s_chk + j));
280+
__m256i chunk2 = _mm256_loadu_si256((const __m256i *)(s_chk + j + offset_2));
281+
__m256i chunk1_shifted = _mm256_xor_si256(chunk1, high_bit2);
282+
__m256i chunk2_shifted = _mm256_xor_si256(chunk2, high_bit2);
283+
__m256i result_mask1 = _mm256_cmpgt_epi8(limit_shifted2, chunk1_shifted);
284+
__m256i result_mask2 = _mm256_cmpgt_epi8(limit_shifted2, chunk2_shifted);
285+
if (_mm256_movemask_epi8(result_mask1) != 0 || _mm256_movemask_epi8(result_mask2) != 0) {
286+
return false;
287+
}
288+
}
289+
for (; j + offset_2 <= len_chk; j += offset_2) {
290+
__m256i chunk = _mm256_loadu_si256((const __m256i *)(s_chk + j));
291+
__m256i chunk_shifted = _mm256_xor_si256(chunk, high_bit2);
292+
__m256i result_mask = _mm256_cmpgt_epi8(limit_shifted2, chunk_shifted);
293+
if (_mm256_movemask_epi8(result_mask) != 0) {
294+
return false;
295+
}
296+
}
271297
span = len;
272298
#elif defined(__SSE2__)
273-
const size_t offset = 16;
299+
const size_t offset_1 = 16;
300+
const size_t offset_2 = 32;
301+
const size_t offset_3 = 48;
302+
const size_t offset_4 = 64;
274303
const __m128i quote_vec = _mm_set1_epi8('\"');
275304
const __m128i escape_vec = _mm_set1_epi8('\\');
305+
const __m128i limit2 = _mm_set1_epi8(0x20);
306+
const __m128i high_bit2 = _mm_set1_epi8(0x80);
307+
const __m128i limit_shifted2 = _mm_xor_si128(limit2, high_bit2);
276308
size_t i = 0;
277309
/* Unrolled: process 64 bytes per iteration (4 SSE2 registers) */
278-
for (; i + 64 <= len; i += 64) {
310+
for (; i + offset_4 <= len; i += offset_4) {
279311
__m128i chunk1 = _mm_loadu_si128((const __m128i *)(p + i));
280-
__m128i chunk2 = _mm_loadu_si128((const __m128i *)(p + i + 16));
281-
__m128i chunk3 = _mm_loadu_si128((const __m128i *)(p + i + 32));
282-
__m128i chunk4 = _mm_loadu_si128((const __m128i *)(p + i + 48));
312+
__m128i chunk2 = _mm_loadu_si128((const __m128i *)(p + i + offset_1));
313+
__m128i chunk3 = _mm_loadu_si128((const __m128i *)(p + i + offset_2));
314+
__m128i chunk4 = _mm_loadu_si128((const __m128i *)(p + i + offset_3));
283315
__m128i match1 = _mm_or_si128(_mm_cmpeq_epi8(chunk1, quote_vec), _mm_cmpeq_epi8(chunk1, escape_vec));
284316
__m128i match2 = _mm_or_si128(_mm_cmpeq_epi8(chunk2, quote_vec), _mm_cmpeq_epi8(chunk2, escape_vec));
285317
__m128i match3 = _mm_or_si128(_mm_cmpeq_epi8(chunk3, quote_vec), _mm_cmpeq_epi8(chunk3, escape_vec));
@@ -293,20 +325,20 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
293325
goto found;
294326
}
295327
if (mask2 != 0) {
296-
span = i + 16 + __builtin_ctz(mask2);
328+
span = i + offset_1 + __builtin_ctz(mask2);
297329
goto found;
298330
}
299331
if (mask3 != 0) {
300-
span = i + 32 + __builtin_ctz(mask3);
332+
span = i + offset_2 + __builtin_ctz(mask3);
301333
goto found;
302334
}
303335
if (mask4 != 0) {
304-
span = i + 48 + __builtin_ctz(mask4);
336+
span = i + offset_3 + __builtin_ctz(mask4);
305337
goto found;
306338
}
307339
}
308340
/* Process remaining chunks of 16 bytes */
309-
for (; i + offset <= len; i += offset) {
341+
for (; i + offset_1 <= len; i += offset_1) {
310342
__m128i chunk = _mm_loadu_si128((const __m128i *)(p + i));
311343
__m128i quote_match = _mm_cmpeq_epi8(chunk, quote_vec);
312344
__m128i escape_match = _mm_cmpeq_epi8(chunk, escape_vec);
@@ -323,6 +355,32 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
323355
goto found;
324356
}
325357
}
358+
for (; j + offset_4 <= len_chk; j += offset_4) {
359+
__m128i chunk1 = _mm_loadu_si128((const __m128i *)(s_chk + j));
360+
__m128i chunk2 = _mm_loadu_si128((const __m128i *)(s_chk + j + offset_1));
361+
__m128i chunk3 = _mm_loadu_si128((const __m128i *)(s_chk + j + offset_2));
362+
__m128i chunk4 = _mm_loadu_si128((const __m128i *)(s_chk + j + offset_3));
363+
__m128i chunk1_shifted = _mm_xor_si128(chunk1, high_bit2);
364+
__m128i chunk2_shifted = _mm_xor_si128(chunk2, high_bit2);
365+
__m128i chunk3_shifted = _mm_xor_si128(chunk3, high_bit2);
366+
__m128i chunk4_shifted = _mm_xor_si128(chunk4, high_bit2);
367+
__m128i result_mask1 = _mm_cmplt_epi8(chunk1_shifted, limit_shifted2);
368+
__m128i result_mask2 = _mm_cmplt_epi8(chunk2_shifted, limit_shifted2);
369+
__m128i result_mask3 = _mm_cmplt_epi8(chunk3_shifted, limit_shifted2);
370+
__m128i result_mask4 = _mm_cmplt_epi8(chunk4_shifted, limit_shifted2);
371+
if (_mm_movemask_epi8(result_mask1) != 0 || _mm_movemask_epi8(result_mask2) != 0 ||
372+
_mm_movemask_epi8(result_mask3) != 0 || _mm_movemask_epi8(result_mask4) != 0) {
373+
return false;
374+
}
375+
}
376+
for (; j + offset_1 <= len_chk; j += offset_1) {
377+
__m128i chunk = _mm_loadu_si128((const __m128i *)(s_chk + j));
378+
__m128i chunk_shifted = _mm_xor_si128(chunk, high_bit2);
379+
__m128i result_mask = _mm_cmplt_epi8(chunk_shifted, limit_shifted2);
380+
if (_mm_movemask_epi8(result_mask) != 0) {
381+
return false;
382+
}
383+
}
326384
span = len;
327385
#else
328386
/* non-SSE2 fallback: simple byte scan */
@@ -336,9 +394,15 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
336394
span = len;
337395
#endif
338396

397+
for (; j < len_chk; j++) {
398+
if ((unsigned char)s_chk[j] < MIN_PRINTABLE_ASCII) {
399+
return false;
400+
}
401+
}
402+
339403
/* scalar tail */
340404
for (; i < len; i++) {
341-
if ((unsigned char)p[i] < 0x20 || (unsigned char)p[i] >= 0x80) {
405+
if ((unsigned char)p[i] < MIN_PRINTABLE_ASCII || (unsigned char)p[i] >= MAX_PRINTABLE_ASCII) {
342406
return false;
343407
}
344408
if (p[i] == '"' || p[i] == '\\') {
@@ -352,77 +416,14 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
352416
p += span;
353417
if (p == end)
354418
return false;
355-
#ifdef STRING_VALIDATION
356-
{
357-
const char *s_chk = p - span;
358-
size_t len_chk = span;
359-
size_t j = 0;
360-
#if defined(__AVX2__)
361-
const size_t offset2 = 32;
362-
const __m256i limit2 = _mm256_set1_epi8(0x20);
363-
const __m256i high_bit2 = _mm256_set1_epi8(0x80);
364-
const __m256i limit_shifted2 = _mm256_xor_si256(limit2, high_bit2);
365-
for (; j + 64 <= len_chk; j += 64) {
366-
__m256i chunk1 = _mm256_loadu_si256((const __m256i *)(s_chk + j));
367-
__m256i chunk2 = _mm256_loadu_si256((const __m256i *)(s_chk + j + 32));
368-
__m256i chunk1_shifted = _mm256_xor_si256(chunk1, high_bit2);
369-
__m256i chunk2_shifted = _mm256_xor_si256(chunk2, high_bit2);
370-
__m256i result_mask1 = _mm256_cmpgt_epi8(limit_shifted2, chunk1_shifted);
371-
__m256i result_mask2 = _mm256_cmpgt_epi8(limit_shifted2, chunk2_shifted);
372-
if (_mm256_movemask_epi8(result_mask1) != 0 || _mm256_movemask_epi8(result_mask2) != 0) {
373-
return false;
374-
}
375-
}
376-
for (; j + offset2 <= len_chk; j += offset2) {
377-
__m256i chunk = _mm256_loadu_si256((const __m256i *)(s_chk + j));
378-
__m256i chunk_shifted = _mm256_xor_si256(chunk, high_bit2);
379-
__m256i result_mask = _mm256_cmpgt_epi8(limit_shifted2, chunk_shifted);
380-
if (_mm256_movemask_epi8(result_mask) != 0) {
381-
return false;
382-
}
383-
}
384-
#elif defined(__SSE2__)
385-
const size_t offset2 = 16;
386-
const __m128i limit2 = _mm_set1_epi8(0x20);
387-
const __m128i high_bit2 = _mm_set1_epi8(0x80);
388-
const __m128i limit_shifted2 = _mm_xor_si128(limit2, high_bit2);
389-
for (; j + 64 <= len_chk; j += 64) {
390-
__m128i chunk1 = _mm_loadu_si128((const __m128i *)(s_chk + j));
391-
__m128i chunk2 = _mm_loadu_si128((const __m128i *)(s_chk + j + 16));
392-
__m128i chunk3 = _mm_loadu_si128((const __m128i *)(s_chk + j + 32));
393-
__m128i chunk4 = _mm_loadu_si128((const __m128i *)(s_chk + j + 48));
394-
__m128i chunk1_shifted = _mm_xor_si128(chunk1, high_bit2);
395-
__m128i chunk2_shifted = _mm_xor_si128(chunk2, high_bit2);
396-
__m128i chunk3_shifted = _mm_xor_si128(chunk3, high_bit2);
397-
__m128i chunk4_shifted = _mm_xor_si128(chunk4, high_bit2);
398-
__m128i result_mask1 = _mm_cmplt_epi8(chunk1_shifted, limit_shifted2);
399-
__m128i result_mask2 = _mm_cmplt_epi8(chunk2_shifted, limit_shifted2);
400-
__m128i result_mask3 = _mm_cmplt_epi8(chunk3_shifted, limit_shifted2);
401-
__m128i result_mask4 = _mm_cmplt_epi8(chunk4_shifted, limit_shifted2);
402-
if (_mm_movemask_epi8(result_mask1) != 0 || _mm_movemask_epi8(result_mask2) != 0 ||
403-
_mm_movemask_epi8(result_mask3) != 0 || _mm_movemask_epi8(result_mask4) != 0) {
404-
return false;
405-
}
406-
}
407-
for (; j + offset2 <= len_chk; j += offset2) {
408-
__m128i chunk = _mm_loadu_si128((const __m128i *)(s_chk + j));
409-
__m128i chunk_shifted = _mm_xor_si128(chunk, high_bit2);
410-
__m128i result_mask = _mm_cmplt_epi8(chunk_shifted, limit_shifted2);
411-
if (_mm_movemask_epi8(result_mask) != 0) {
412-
return false;
413-
}
414-
}
415-
#else
416-
const size_t offset2 = 1;
417-
#endif
418-
for (; j < len_chk; j++) {
419-
if ((unsigned char)s_chk[j] < 0x20) {
419+
if (*p == '\"') {
420+
/* Validate the parsed string span for control characters */
421+
const char *chk = v->u.string.ptr;
422+
while (chk < p) {
423+
if ((unsigned char)*chk < MIN_PRINTABLE_ASCII)
420424
return false;
421-
}
425+
chk++;
422426
}
423-
}
424-
#endif
425-
if (*p == '\"') {
426427
v->u.string.len = p - *s - 1;
427428
*s = p + 1;
428429
return true;

0 commit comments

Comments
 (0)