diff --git a/demjson.py b/demjson.py index 226e268..00a69bd 100644 --- a/demjson.py +++ b/demjson.py @@ -2534,7 +2534,7 @@ def update_string_stats(self, s, **kwargs): st.max_codepoint = max( st.max_codepoint, maxcp ) if maxcp > 0xffff and not self._have_warned_nonbmp: self._have_warned_nonbmp = True - self.push_cond( self.options.non_portable, + self.push_cond( self.options.non_bmp, "Strings containing non-BMP characters (U+%04X) may not be portable" % maxcp, **kwargs ) @@ -2872,6 +2872,8 @@ class json_options(object): "A JSON document may start with a Unicode BOM (Byte Order Mark)"), ("non_portable", "Anything technically valid but likely to cause data portablibity issues"), + ("non_bmp", + "Characters out of the BMP may cause portability issues"), ) # end behavior list def reset_to_defaults(self): @@ -3339,6 +3341,7 @@ def strictness(self, strict): self.warn_zero_byte() self.warn_bom() self.warn_non_portable() + self.warn_non_bmp() elif strict == STRICTNESS_TOLERANT or strict is False: self._strictness = STRICTNESS_TOLERANT self.set_all_allow() @@ -3348,6 +3351,7 @@ def strictness(self, strict): self.leading_zero_radix = 8 self.warn_bom() self.allow_non_portable() + self.allow_non_bmp() else: raise ValueError("Unknown strictness options %r" % strict) self.allow_any_type_at_start() diff --git a/docs/demjson.txt b/docs/demjson.txt index ce2dc07..0da7c8d 100644 --- a/docs/demjson.txt +++ b/docs/demjson.txt @@ -1922,6 +1922,9 @@ CLASSES | | allow_non_portable(self, _name='non_portable', _value='allow') | Set behavior non_portable to allow. + | + | allow_non_bmp(self, _name='non_bmp', _value='allow') + | Set behavior non_bmp to allow. | | allow_nonescape_characters(self, _name='nonescape_characters', _value='allow') | Set behavior nonescape_characters to allow. @@ -2008,6 +2011,9 @@ CLASSES | forbid_non_portable(self, _name='non_portable', _value='forbid') | Set behavior non_portable to forbid. | + | forbid_non_bmp(self, _name='non_bmp', _value='forbid') + | Set behavior non_bmp to forbid. + | | forbid_nonescape_characters(self, _name='nonescape_characters', _value='forbid') | Set behavior nonescape_characters to forbid. | @@ -2143,6 +2149,9 @@ CLASSES | warn_non_portable(self, _name='non_portable', _value='warn') | Set behavior non_portable to warn. | + | warn_non_bmp(self, _name='non_bmp', _value='warn') + | Set behavior non_bmp to warn. + | | warn_nonescape_characters(self, _name='nonescape_characters', _value='warn') | Set behavior nonescape_characters to warn. | diff --git a/docs/jsonlint.txt b/docs/jsonlint.txt index 3fad882..beb44d7 100644 --- a/docs/jsonlint.txt +++ b/docs/jsonlint.txt @@ -117,6 +117,7 @@ forbid js-string-escapes All JavaScript character \-escape sequences ma forbid leading-zeros Numbers may have extra leading zeros (see --leading-zero-radix option) forbid non-numbers Non-numbers may be used, such as NaN or Infinity warn non-portable Anything technically valid but likely to cause data portablibity issues +warn non-bmp Unescaped non-BMP characters forbid nonescape-characters Unknown character \-escape sequences stand for that character (\Q -> 'Q') forbid nonstring-keys Value types other than strings (or identifiers) may be used as object keys forbid octal-numbers New-style octal numbers, e.g., 0o731 (see leading-zeros for legacy octals)