-
Notifications
You must be signed in to change notification settings - Fork 441
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
project_loader: handle invalid unicode chars (#1941)
Note that the exception is currently raised with valid and invalid unicode chbaracters due to the upstream bug pyyaml#25. But we'll want to handle the error cleanly even if the upstream issue is fixed. This branch adds a patch to the PyYAML used by the Snapcraft snap to handle the unicode code points erroneously flagged as invalid, such as the hankey emoji. A Snapcraft snap built from this PR will successfully validate a summary or description making use of the hankey emoji. New test cases: tests.unit.project_loader.test_config.test_invalid_yaml_invalid_unicode_chars tests.integration.general.test_global_properties Note: The integration level test for the PyYAML work-around is skipped unless testing with a snap or Debian package (ie. SNAPCRAFT_FROM_SNAP=1 is set or SNAPCRAFT_FROM_DEB=1 is set), both of which are patched. I verified with a snap from the branch, and running the tests in a virtual environment respectively. LP: #1737571
- Loading branch information
1 parent
32fbaf3
commit e9775f2
Showing
6 changed files
with
101 additions
and
3 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
diff --git a/yaml/emitter.py b/yaml/emitter.py | ||
index 34cb145..1f8ed92 100644 | ||
--- a/yaml/emitter.py | ||
+++ b/yaml/emitter.py | ||
@@ -698,7 +698,8 @@ class Emitter: | ||
line_breaks = True | ||
if not (ch == '\n' or '\x20' <= ch <= '\x7E'): | ||
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' | ||
- or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': | ||
+ or '\uE000' <= ch <= '\uFFFD' | ||
+ or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': | ||
unicode_characters = True | ||
if not self.allow_unicode: | ||
special_characters = True | ||
diff --git a/yaml/reader.py b/yaml/reader.py | ||
index f70e920..5764f2d 100644 | ||
--- a/yaml/reader.py | ||
+++ b/yaml/reader.py | ||
@@ -134,7 +134,7 @@ class Reader(object): | ||
self.encoding = 'utf-8' | ||
self.update(1) | ||
|
||
- NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') | ||
+ NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') | ||
def check_printable(self, data): | ||
match = self.NON_PRINTABLE.search(data) | ||
if match: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*- | ||
# | ||
# Copyright (C) 2018 Canonical Ltd | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License version 3 as | ||
# published by the Free Software Foundation. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
import os | ||
|
||
import testscenarios | ||
|
||
from tests import integration, fixture_setup | ||
|
||
|
||
class UnicodePropertyTestCase(testscenarios.WithScenarios, | ||
integration.TestCase): | ||
|
||
scenarios = [ | ||
('summary', | ||
dict(name='foo', summary='bar💩', description='baz')), | ||
('description', | ||
dict(name='foo', summary='bar', description='baz💩')), | ||
] | ||
|
||
def test_invalid_unicode_workaround(self): | ||
if not (os.getenv('SNAPCRAFT_FROM_SNAP', False) or | ||
os.getenv('SNAPCRAFT_FROM_DEB', False)): | ||
self.skipTest('The yaml unicode patch is applied to the snap ' | ||
'and python3-yaml package, but not PyYAML in PyPI') | ||
|
||
snapcraft_yaml = fixture_setup.SnapcraftYaml( | ||
self.path, name=self.name, | ||
summary=self.summary, description=self.description) | ||
snapcraft_yaml.update_part('my-part', { | ||
'plugin': 'nil', | ||
}) | ||
self.useFixture(snapcraft_yaml) | ||
self.run_snapcraft('pull') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters