diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index e5bcdccc..5fb4179a 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -8,9 +8,13 @@ __all__ = ['Emitter', 'EmitterError'] +import sys + from error import YAMLError from events import * +has_ucs4 = sys.maxunicode > 0xffff + class EmitterError(YAMLError): pass @@ -674,7 +678,7 @@ def analyze_scalar(self, scalar): # Check for indicators. if index == 0: # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': + if ch in u'#,[]{}&*!|>\'\"%@`': flow_indicators = True block_indicators = True if ch in u'?:': @@ -701,7 +705,8 @@ def analyze_scalar(self, scalar): line_breaks = True if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + or u'\uE000' <= ch <= u'\uFFFD' + or ((not has_ucs4) or (u'\U00010000' <= ch < u'\U0010ffff'))) and ch != u'\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True @@ -1137,4 +1142,3 @@ def write_plain(self, text, split=True): spaces = (ch == u' ') breaks = (ch in u'\n\x85\u2028\u2029') end += 1 - diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index 3249e6b9..56a12f48 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -19,7 +19,9 @@ from error import YAMLError, Mark -import codecs, re +import codecs, re, sys + +has_ucs4 = sys.maxunicode > 0xffff class ReaderError(YAMLError): @@ -134,7 +136,10 @@ def determine_encoding(self): self.encoding = 'utf-8' self.update(1) - NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + if has_ucs4: + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') + else: + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -187,4 +192,3 @@ def update_raw(self, size=1024): # psyco.bind(Reader) #except ImportError: # pass - diff --git a/lib3/yaml/emitter.py b/lib3/yaml/emitter.py index 34cb145a..34798834 100644 --- a/lib3/yaml/emitter.py +++ b/lib3/yaml/emitter.py @@ -671,7 +671,7 @@ def analyze_scalar(self, scalar): # Check for indicators. if index == 0: # Leading indicators are special characters. - if ch in '#,[]{}&*!|>\'\"%@`': + if ch in '#,[]{}&*!|>\'\"%@`': flow_indicators = True block_indicators = True if ch in '?:': @@ -698,7 +698,8 @@ def analyze_scalar(self, scalar): line_breaks = True if not (ch == '\n' or '\x20' <= ch <= '\x7E'): if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' - or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': + or '\uE000' <= ch <= '\uFFFD' + or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True @@ -1134,4 +1135,3 @@ def write_plain(self, text, split=True): spaces = (ch == ' ') breaks = (ch in '\n\x85\u2028\u2029') end += 1 - diff --git a/lib3/yaml/reader.py b/lib3/yaml/reader.py index f70e920f..376b9a36 100644 --- a/lib3/yaml/reader.py +++ b/lib3/yaml/reader.py @@ -134,7 +134,7 @@ def determine_encoding(self): self.encoding = 'utf-8' self.update(1) - NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -189,4 +189,3 @@ def update_raw(self, size=4096): # psyco.bind(Reader) #except ImportError: # pass -