Skip to content

Commit

Permalink
First attack at pyyaml does not support literals in unicode over code…
Browse files Browse the repository at this point in the history
…point 0xffff yaml#25
  • Loading branch information
peterkmurphy committed May 8, 2017
1 parent a06c1f6 commit cf1c86c
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 11 deletions.
10 changes: 7 additions & 3 deletions lib/yaml/emitter.py
Expand Up @@ -8,9 +8,13 @@

__all__ = ['Emitter', 'EmitterError']

import sys

from error import YAMLError
from events import *

has_ucs4 = sys.maxunicode > 0xffff

class EmitterError(YAMLError):
pass

Expand Down Expand Up @@ -674,7 +678,7 @@ def analyze_scalar(self, scalar):
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
if ch in u'#,[]{}&*!|>\'\"%@`':
if ch in u'#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in u'?:':
Expand All @@ -701,7 +705,8 @@ def analyze_scalar(self, scalar):
line_breaks = True
if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
or u'\uE000' <= ch <= u'\uFFFD'
or ((not has_ucs4) or (u'\U00010000' <= ch < u'\U0010ffff'))) and ch != u'\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
Expand Down Expand Up @@ -1137,4 +1142,3 @@ def write_plain(self, text, split=True):
spaces = (ch == u' ')
breaks = (ch in u'\n\x85\u2028\u2029')
end += 1

10 changes: 7 additions & 3 deletions lib/yaml/reader.py
Expand Up @@ -19,7 +19,9 @@

from error import YAMLError, Mark

import codecs, re
import codecs, re, sys

has_ucs4 = sys.maxunicode > 0xffff

class ReaderError(YAMLError):

Expand Down Expand Up @@ -134,7 +136,10 @@ def determine_encoding(self):
self.encoding = 'utf-8'
self.update(1)

NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
if has_ucs4:
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
else:
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
Expand Down Expand Up @@ -187,4 +192,3 @@ def update_raw(self, size=1024):
# psyco.bind(Reader)
#except ImportError:
# pass

6 changes: 3 additions & 3 deletions lib3/yaml/emitter.py
Expand Up @@ -671,7 +671,7 @@ def analyze_scalar(self, scalar):
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
if ch in '#,[]{}&*!|>\'\"%@`':
if ch in '#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in '?:':
Expand All @@ -698,7 +698,8 @@ def analyze_scalar(self, scalar):
line_breaks = True
if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF':
or '\uE000' <= ch <= '\uFFFD'
or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
Expand Down Expand Up @@ -1134,4 +1135,3 @@ def write_plain(self, text, split=True):
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1

3 changes: 1 addition & 2 deletions lib3/yaml/reader.py
Expand Up @@ -134,7 +134,7 @@ def determine_encoding(self):
self.encoding = 'utf-8'
self.update(1)

NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
Expand Down Expand Up @@ -189,4 +189,3 @@ def update_raw(self, size=4096):
# psyco.bind(Reader)
#except ImportError:
# pass

0 comments on commit cf1c86c

Please sign in to comment.