From 44e6d7242b836cb68de478b5e6dafffa68e74274 Mon Sep 17 00:00:00 2001 From: emcastillo Date: Sat, 10 May 2025 22:42:20 -0700 Subject: [PATCH 1/5] sve2 histcnt support for ondemand parsing --- include/sonic/internal/arch/sve2-128/skip.h | 131 ++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/include/sonic/internal/arch/sve2-128/skip.h b/include/sonic/internal/arch/sve2-128/skip.h index 8ad6891..205595d 100644 --- a/include/sonic/internal/arch/sve2-128/skip.h +++ b/include/sonic/internal/arch/sve2-128/skip.h @@ -34,12 +34,143 @@ using sonic_json::internal::common::SkipLiteral; #include "../common/arm_common/skip.inc.h" + +// Requires clang vx or GCC>=14 +#if (defined(__clang__) && (__clang_major__ >= 14)) || \ +(defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 14)) + +#define USE_SVE_HIST 1 + +#include + +template +sonic_force_inline uint64_t GetStringBits(const T& v, + uint64_t &prev_instring, + uint64_t &prev_escaped, int backslash_count, int quotes_count) { + //const T v(data); + uint64_t escaped = 0; + uint64_t bs_bits = 0; + if (backslash_count) { + uint64_t bs_bits = v.eq('\\'); + escaped = common::GetEscaped<64>(prev_escaped, bs_bits); + } else { + escaped = prev_escaped; + prev_escaped = 0; + } + uint64_t in_string = prev_instring; + if (quotes_count) { + uint64_t quote_bits = v.eq('"') & ~escaped; + in_string = PrefixXor(quote_bits) ^ prev_instring; + prev_instring = uint64_t(static_cast(in_string) >> 63); + } + return in_string; +} + +sonic_force_inline uint32_t count_chars(const uint8x16_t& data, svuint8_t& tokens, uint8_t left, uint8_t right) { + svuint8_t v = svundef_u8(); + v = svset_neonq_u8(v, data); + svuint32_t vec32 = svreinterpret_u32(svhistseg_u8(tokens, v)); + return vgetq_lane_u32(svget_neonq_u32(vec32),0); +} + +template +sonic_force_inline bool skip_container_sve(const uint8_t *data, size_t &pos, + size_t len, uint8_t left, + uint8_t right) { + uint64_t prev_instring = 0, prev_escaped = 0, instring = 0; + int rbrace_num = 0, lbrace_num = 0, last_lbrace_num; + const uint8_t *p; + svuint8_t tokens = svreinterpret_u8_u32(svdup_n_u32(0x5C22 | (left << 24) | (right<<16))); + + while (pos + 64 <= len) { + p = data + pos; + + T v(p); + uint32_t counts = count_chars(v.chunks[0], tokens, left, right); + // We know they don't overflow, max is 16*4, so we can directly accomulate + counts += count_chars(v.chunks[1], tokens, left, right); + counts += count_chars(v.chunks[2], tokens, left, right); + counts += count_chars(v.chunks[3], tokens, left, right); + + +#define SKIP_LOOP() \ + { \ + int q_c = (counts) & 0xff; \ + int b_c = (counts >> 8) & 0xff; \ + int r_c = (counts >> 16) & 0xff; \ + int l_c = (counts >> 24) & 0xff; \ + last_lbrace_num = lbrace_num; \ + instring = GetStringBits(v, prev_instring, prev_escaped, b_c, q_c);\ + uint64_t lbrace = 0; \ + if(l_c) { \ + lbrace = v.eq(left) & ~instring; \ + } \ + if(r_c) { \ + uint64_t rbrace = v.eq(right) & ~instring; \ + /* traverse each '}' */ \ + while (rbrace > 0) { \ + rbrace_num++; \ + /* counts the number of {{ that happens before } */ \ + lbrace_num = last_lbrace_num + CountOnes((rbrace - 1) & lbrace); \ + bool is_closed = lbrace_num < rbrace_num; \ + if (is_closed) { \ + sonic_assert(rbrace_num == lbrace_num + 1); \ + pos += TrailingZeroes(rbrace) + 1; \ + return true; \ + } \ + rbrace &= (rbrace - 1); \ + } \ + } \ + lbrace_num = last_lbrace_num + CountOnes(lbrace); \ + } + if (!counts) { + // Skip, no interesting characters here + prev_escaped = 0; + } else if((counts < 256) && prev_escaped == 0) { + // counts < 256 means that all the values besides the last byte + // (quotes) are 0. + // Only quotes, other vals are 0, need to check the number to see + // if string is open or not + // last byte of counts is the string number + prev_instring ^= (0 - (int) (counts & 1)); + prev_escaped = 0; + } else if(!(counts &0xff) && prev_instring) { + // only backslahes and no quotes, the whole 64 bytes are inside a string + // so we dont care about left & right + // just check if the last character is a backslash + prev_escaped = (p[63] == '\\'); + } else { + SKIP_LOOP(); + } + pos += 64; + } + uint8_t buf[64] = {0}; + std::memcpy(buf, data + pos, len - pos); + p = buf; + T v(p); + uint32_t counts = count_chars(v.chunks[0], tokens, left, right); + // We know they don't overflow, max is 16*4, so we can directly accomulate + counts += count_chars(v.chunks[1], tokens, left, right); + counts += count_chars(v.chunks[2], tokens, left, right); + counts += count_chars(v.chunks[3], tokens, left, right); + SKIP_LOOP(); +#undef SKIP_LOOP + return false; +} + +#endif + sonic_force_inline bool SkipContainer(const uint8_t *data, size_t &pos, size_t len, uint8_t left, uint8_t right) { // We use neon for the on demand parser since it is currently faster for // comparisons than sve +#ifdef USE_SVE_HIST + return skip_container_sve>( + data, pos, len, left, right); +#else return skip_container>( data, pos, len, left, right); +#endif } // TODO: optimize by removing bound checking. From f44c9f83c01f4329f8384d31599114f62d636709 Mon Sep 17 00:00:00 2001 From: emcastillo Date: Sun, 11 May 2025 07:46:22 +0200 Subject: [PATCH 2/5] Fixes --- include/sonic/internal/arch/sve2-128/skip.h | 128 ++++++++++---------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/include/sonic/internal/arch/sve2-128/skip.h b/include/sonic/internal/arch/sve2-128/skip.h index 205595d..6bb53a3 100644 --- a/include/sonic/internal/arch/sve2-128/skip.h +++ b/include/sonic/internal/arch/sve2-128/skip.h @@ -34,20 +34,20 @@ using sonic_json::internal::common::SkipLiteral; #include "../common/arm_common/skip.inc.h" - // Requires clang vx or GCC>=14 #if (defined(__clang__) && (__clang_major__ >= 14)) || \ -(defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 14)) + (defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 14)) #define USE_SVE_HIST 1 #include template -sonic_force_inline uint64_t GetStringBits(const T& v, - uint64_t &prev_instring, - uint64_t &prev_escaped, int backslash_count, int quotes_count) { - //const T v(data); +sonic_force_inline uint64_t GetStringBits(const T &v, uint64_t &prev_instring, + uint64_t &prev_escaped, + int backslash_count, + int quotes_count) { + // const T v(data); uint64_t escaped = 0; uint64_t bs_bits = 0; if (backslash_count) { @@ -59,28 +59,31 @@ sonic_force_inline uint64_t GetStringBits(const T& v, } uint64_t in_string = prev_instring; if (quotes_count) { - uint64_t quote_bits = v.eq('"') & ~escaped; - in_string = PrefixXor(quote_bits) ^ prev_instring; - prev_instring = uint64_t(static_cast(in_string) >> 63); + uint64_t quote_bits = v.eq('"') & ~escaped; + in_string = PrefixXor(quote_bits) ^ prev_instring; + prev_instring = uint64_t(static_cast(in_string) >> 63); } return in_string; } -sonic_force_inline uint32_t count_chars(const uint8x16_t& data, svuint8_t& tokens, uint8_t left, uint8_t right) { - svuint8_t v = svundef_u8(); - v = svset_neonq_u8(v, data); - svuint32_t vec32 = svreinterpret_u32(svhistseg_u8(tokens, v)); - return vgetq_lane_u32(svget_neonq_u32(vec32),0); +sonic_force_inline uint32_t count_chars(const uint8x16_t &data, + svuint8_t &tokens, uint8_t left, + uint8_t right) { + svuint8_t v = svundef_u8(); + v = svset_neonq_u8(v, data); + svuint32_t vec32 = svreinterpret_u32(svhistseg_u8(tokens, v)); + return vgetq_lane_u32(svget_neonq_u32(vec32), 0); } template sonic_force_inline bool skip_container_sve(const uint8_t *data, size_t &pos, - size_t len, uint8_t left, - uint8_t right) { + size_t len, uint8_t left, + uint8_t right) { uint64_t prev_instring = 0, prev_escaped = 0, instring = 0; int rbrace_num = 0, lbrace_num = 0, last_lbrace_num; const uint8_t *p; - svuint8_t tokens = svreinterpret_u8_u32(svdup_n_u32(0x5C22 | (left << 24) | (right<<16))); + svuint8_t tokens = + svreinterpret_u8_u32(svdup_n_u32(0x5C22 | (left << 24) | (right << 16))); while (pos + 64 <= len) { p = data + pos; @@ -92,55 +95,54 @@ sonic_force_inline bool skip_container_sve(const uint8_t *data, size_t &pos, counts += count_chars(v.chunks[2], tokens, left, right); counts += count_chars(v.chunks[3], tokens, left, right); - -#define SKIP_LOOP() \ - { \ - int q_c = (counts) & 0xff; \ - int b_c = (counts >> 8) & 0xff; \ - int r_c = (counts >> 16) & 0xff; \ - int l_c = (counts >> 24) & 0xff; \ - last_lbrace_num = lbrace_num; \ - instring = GetStringBits(v, prev_instring, prev_escaped, b_c, q_c);\ - uint64_t lbrace = 0; \ - if(l_c) { \ - lbrace = v.eq(left) & ~instring; \ - } \ - if(r_c) { \ - uint64_t rbrace = v.eq(right) & ~instring; \ - /* traverse each '}' */ \ - while (rbrace > 0) { \ - rbrace_num++; \ - /* counts the number of {{ that happens before } */ \ - lbrace_num = last_lbrace_num + CountOnes((rbrace - 1) & lbrace); \ - bool is_closed = lbrace_num < rbrace_num; \ - if (is_closed) { \ - sonic_assert(rbrace_num == lbrace_num + 1); \ - pos += TrailingZeroes(rbrace) + 1; \ - return true; \ - } \ - rbrace &= (rbrace - 1); \ - } \ - } \ - lbrace_num = last_lbrace_num + CountOnes(lbrace); \ +#define SKIP_LOOP() \ + { \ + int q_c = (counts) & 0xff; \ + int b_c = (counts >> 8) & 0xff; \ + int r_c = (counts >> 16) & 0xff; \ + int l_c = (counts >> 24) & 0xff; \ + last_lbrace_num = lbrace_num; \ + instring = GetStringBits(v, prev_instring, prev_escaped, b_c, q_c); \ + uint64_t lbrace = 0; \ + if (l_c) { \ + lbrace = v.eq(left) & ~instring; \ + } \ + if (r_c) { \ + uint64_t rbrace = v.eq(right) & ~instring; \ + /* traverse each '}' */ \ + while (rbrace > 0) { \ + rbrace_num++; \ + /* counts the number of {{ that happens before } */ \ + lbrace_num = last_lbrace_num + CountOnes((rbrace - 1) & lbrace); \ + bool is_closed = lbrace_num < rbrace_num; \ + if (is_closed) { \ + sonic_assert(rbrace_num == lbrace_num + 1); \ + pos += TrailingZeroes(rbrace) + 1; \ + return true; \ + } \ + rbrace &= (rbrace - 1); \ + } \ + } \ + lbrace_num = last_lbrace_num + CountOnes(lbrace); \ } if (!counts) { - // Skip, no interesting characters here - prev_escaped = 0; - } else if((counts < 256) && prev_escaped == 0) { - // counts < 256 means that all the values besides the last byte - // (quotes) are 0. - // Only quotes, other vals are 0, need to check the number to see - // if string is open or not - // last byte of counts is the string number - prev_instring ^= (0 - (int) (counts & 1)); - prev_escaped = 0; - } else if(!(counts &0xff) && prev_instring) { - // only backslahes and no quotes, the whole 64 bytes are inside a string - // so we dont care about left & right - // just check if the last character is a backslash - prev_escaped = (p[63] == '\\'); + // Skip, no interesting characters here + prev_escaped = 0; + } else if ((counts < 256) && prev_escaped == 0) { + // counts < 256 means that all the values besides the last byte + // (quotes) are 0. + // Only quotes, other vals are 0, need to check the number to see + // if string is open or not + // last byte of counts is the string number + prev_instring ^= (0 - (int)(counts & 1)); + prev_escaped = 0; + } else if (!(counts & 0xff) && prev_instring) { + // only backslahes and no quotes, the whole 64 bytes are inside a string + // so we dont care about left & right + // just check if the last character is a backslash + prev_escaped = (p[63] == '\\'); } else { - SKIP_LOOP(); + SKIP_LOOP(); } pos += 64; } From 36b675bcfe36ae15a1c5aec80fc84ec6102a50a7 Mon Sep 17 00:00:00 2001 From: emcastillo Date: Mon, 12 May 2025 10:09:54 +0200 Subject: [PATCH 3/5] clang-format --- include/sonic/internal/arch/sve2-128/skip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sonic/internal/arch/sve2-128/skip.h b/include/sonic/internal/arch/sve2-128/skip.h index 6bb53a3..dcb4269 100644 --- a/include/sonic/internal/arch/sve2-128/skip.h +++ b/include/sonic/internal/arch/sve2-128/skip.h @@ -97,7 +97,7 @@ sonic_force_inline bool skip_container_sve(const uint8_t *data, size_t &pos, #define SKIP_LOOP() \ { \ - int q_c = (counts) & 0xff; \ + int q_c = counts & 0xff; \ int b_c = (counts >> 8) & 0xff; \ int r_c = (counts >> 16) & 0xff; \ int l_c = (counts >> 24) & 0xff; \ From 15ed14ab2ad32bd83338c091e242217dd0e5c2f9 Mon Sep 17 00:00:00 2001 From: emcastillo Date: Sun, 17 Aug 2025 06:40:00 -0700 Subject: [PATCH 4/5] Review comments --- include/sonic/internal/arch/sve2-128/skip.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/include/sonic/internal/arch/sve2-128/skip.h b/include/sonic/internal/arch/sve2-128/skip.h index dcb4269..673c740 100644 --- a/include/sonic/internal/arch/sve2-128/skip.h +++ b/include/sonic/internal/arch/sve2-128/skip.h @@ -34,14 +34,16 @@ using sonic_json::internal::common::SkipLiteral; #include "../common/arm_common/skip.inc.h" -// Requires clang vx or GCC>=14 -#if (defined(__clang__) && (__clang_major__ >= 14)) || \ - (defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 14)) - -#define USE_SVE_HIST 1 - -#include +#ifdef __has_include +# if __has_include() +# include +# ifndef USE_SVE_HIST +# define USE_SVE_HIST 1 +# endif +# endif +#endif +#ifdef USE_SVE_HIST template sonic_force_inline uint64_t GetStringBits(const T &v, uint64_t &prev_instring, uint64_t &prev_escaped, From dcf7c82a45296a3b56dc1732c04c5d51430fbf54 Mon Sep 17 00:00:00 2001 From: ecastillo Date: Mon, 18 Aug 2025 00:23:52 +0900 Subject: [PATCH 5/5] Clang format --- include/sonic/internal/arch/sve2-128/skip.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/sonic/internal/arch/sve2-128/skip.h b/include/sonic/internal/arch/sve2-128/skip.h index 673c740..65a372b 100644 --- a/include/sonic/internal/arch/sve2-128/skip.h +++ b/include/sonic/internal/arch/sve2-128/skip.h @@ -35,12 +35,12 @@ using sonic_json::internal::common::SkipLiteral; #include "../common/arm_common/skip.inc.h" #ifdef __has_include -# if __has_include() -# include -# ifndef USE_SVE_HIST -# define USE_SVE_HIST 1 -# endif -# endif +#if __has_include() +#include +#ifndef USE_SVE_HIST +#define USE_SVE_HIST 1 +#endif +#endif #endif #ifdef USE_SVE_HIST