diff --git a/.travis.yml b/.travis.yml index 864189f0..74b429f3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,19 +1,133 @@ language: python +os: linux python: - - "2.7" - - "3.4" - - "3.5" + - "3.8" + - "3.7" - "3.6" - - "pypy-5.3.1" + - "3.5" + - "2.7" + - "pypy3" + - "pypy3.5-6.0" + - "pypy3.5-7.0" + - "pypy3.6-7.0.0" + - "pypy" + - "pypy2.7-6.0" + - "pypy2.7-7.0.0" +jobs: + include: + - os: osx + python: "3.7" + osx_image: xcode11.2 # Python 3.7.4 running on macOS 10.14.4 + language: shell # 'language: python' is an error on Travis CI macOS + before_install: + - brew install pkg-config + - brew install icu4c + - export PATH="$PATH:/usr/local/opt/icu4c/bin" + - export PKG_CONFIG_PATH="$PKG_CONFIG_PATH:/usr/local/opt/icu4c/lib/pkgconfig" + - which uconv + - uconv -V + - export ICU_VERSION="$(uconv -V | sed -e 's,.*\ + if [[ "$TRAVIS_PYTHON_VERSION" == "2"* ]] || [[ "$TRAVIS_PYTHON_VERSION" == "pypy"* ]] && [[ "$TRAVIS_PYTHON_VERSION" != "pypy3"* ]]; then + pip install -r requirements-py2.txt; + else + pip3 install -r requirements-py3.txt; + fi # command to run tests -script: nosetests tests -sudo: false +script: + # pypy2 and pypy3 segfault on Travis CI if running all tests in the same process + - > + if [[ "$TRAVIS_PYTHON_VERSION" == "pypy" ]]; then + nosetests --collect-only -v tests 2>&1 \ + | grep -e 'ok$' \ + | while read func class etc; do + class="${class//[()]/}"; + class="${class%.*}:${class##*.}"; + nosetests -v "$class.$func"; + done || ( echo "$s" >> "script-failures.log" ); + if [ -e "script-failures.log" ]; then + exit 1; + fi; + elif [[ "$TRAVIS_PYTHON_VERSION" == "pypy3" ]]; then + find tests -type f -name "*.py" | while read s; do + ( [ ! -x "$s" ] && nosetests --no-byte-compile -s -v "$s" ) || ( echo "$s" >> "script-failures.log" ); + done; + if [ -e "script-failures.log" ]; then + exit 1; + fi; + else + nosetests --no-byte-compile --with-coverage tests; + fi +after_failure: + - > + if [ -e "script-failures.log" ]; then + echo $(cat "script-failures.log"); + fi addons: apt: packages: - language-pack-fr - language-pack-de - language-pack-ko + - pkg-config diff --git a/agate/data_types/number.py b/agate/data_types/number.py index 1b62fe87..a92b95ec 100644 --- a/agate/data_types/number.py +++ b/agate/data_types/number.py @@ -98,7 +98,9 @@ def cast(self, d): try: return Decimal(d) * sign - except InvalidOperation: + # The Decimal class will return an InvalidOperation exception on most Python implementations, + # but PyPy3 may return a ValueError if the string is not translatable to ASCII + except (InvalidOperation, ValueError): pass raise CastError('Can not parse value "%s" as Decimal.' % d) diff --git a/agate/table/from_csv.py b/agate/table/from_csv.py index 6c2f8037..1e962a97 100644 --- a/agate/table/from_csv.py +++ b/agate/table/from_csv.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import io - import six @@ -45,44 +44,46 @@ def from_csv(cls, path, column_names=None, column_types=None, row_names=None, sk close = False - if hasattr(path, 'read'): - f = path - else: - if six.PY2: - f = open(path, 'Urb') + try: + if hasattr(path, 'read'): + f = path else: - f = io.open(path, encoding=encoding) + if six.PY2: + f = open(path, 'Urb') + else: + f = io.open(path, encoding=encoding) - close = True + close = True - if isinstance(skip_lines, int): - while skip_lines > 0: - f.readline() - skip_lines -= 1 - else: - raise ValueError('skip_lines argument must be an int') + if isinstance(skip_lines, int): + while skip_lines > 0: + f.readline() + skip_lines -= 1 + else: + raise ValueError('skip_lines argument must be an int') - contents = six.StringIO(f.read()) + contents = six.StringIO(f.read()) - if sniff_limit is None: - kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()) - elif sniff_limit > 0: - kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()[:sniff_limit]) + if sniff_limit is None: + kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()) + elif sniff_limit > 0: + kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()[:sniff_limit]) - if six.PY2: - kwargs['encoding'] = encoding + if six.PY2: + kwargs['encoding'] = encoding - reader = csv.reader(contents, header=header, **kwargs) + reader = csv.reader(contents, header=header, **kwargs) - if header: - if column_names is None: - column_names = next(reader) - else: - next(reader) + if header: + if column_names is None: + column_names = next(reader) + else: + next(reader) - rows = tuple(reader) + rows = tuple(reader) - if close: - f.close() + finally: + if close: + f.close() return Table(rows, column_names, column_types, row_names=row_names) diff --git a/agate/table/from_fixed.py b/agate/table/from_fixed.py index 7a05bd78..f73fee85 100644 --- a/agate/table/from_fixed.py +++ b/agate/table/from_fixed.py @@ -38,28 +38,30 @@ def from_fixed(cls, path, schema_path, column_names=utils.default, column_types= close_f = False - if not hasattr(path, 'read'): - f = io.open(path, encoding=encoding) - close_f = True - else: - f = path - close_schema_f = False - if not hasattr(schema_path, 'read'): - schema_f = io.open(schema_path, encoding=schema_encoding) - close_schema_f = True - else: - schema_f = path + try: + if not hasattr(path, 'read'): + f = io.open(path, encoding=encoding) + close_f = True + else: + f = path + + if not hasattr(schema_path, 'read'): + schema_f = io.open(schema_path, encoding=schema_encoding) + close_schema_f = True + else: + schema_f = path - reader = fixed.reader(f, schema_f) - rows = list(reader) + reader = fixed.reader(f, schema_f) + rows = list(reader) - if close_f: - f.close() + finally: + if close_f: + f.close() - if close_schema_f: - schema_f.close() + if close_schema_f: + schema_f.close() if column_names == utils.default: column_names = reader.fieldnames diff --git a/agate/table/from_json.py b/agate/table/from_json.py index 41956728..8516702a 100644 --- a/agate/table/from_json.py +++ b/agate/table/from_json.py @@ -2,11 +2,13 @@ from collections import OrderedDict from decimal import Decimal +import io import json +import six @classmethod -def from_json(cls, path, row_names=None, key=None, newline=False, column_types=None, **kwargs): +def from_json(cls, path, row_names=None, key=None, newline=False, column_types=None, encoding='utf-8', **kwargs): """ Create a new table from a JSON file. @@ -29,33 +31,49 @@ def from_json(cls, path, row_names=None, key=None, newline=False, column_types=N If `True` then the file will be parsed as "newline-delimited JSON". :param column_types: See :meth:`.Table.__init__`. + :param encoding: + According to RFC4627, JSON text shall be encoded in Unicode; the default encoding is + UTF-8. You can override this by using any encoding supported by your Python's open() function + if :code:`path` is a filepath. If passing in a file handle, it is assumed you have already opened it with the correct + encoding specified. """ from agate.table import Table if key is not None and newline: raise ValueError('key and newline may not be specified together.') - if newline: - js = [] + close = False + + try: + if newline: + js = [] + + if hasattr(path, 'read'): + for line in path: + js.append(json.loads(line, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)) + else: + f = io.open(path, encoding=encoding) + close = True - if hasattr(path, 'read'): - for line in path: - js.append(json.loads(line, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)) - else: - with open(path, 'r') as f: for line in f: js.append(json.loads(line, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)) - else: - if hasattr(path, 'read'): - js = json.load(path, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs) else: - with open(path, 'r') as f: + if hasattr(path, 'read'): + js = json.load(path, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs) + else: + f = io.open(path, encoding=encoding) + close = True + js = json.load(f, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs) - if isinstance(js, dict): - if not key: - raise TypeError('When converting a JSON document with a top-level dictionary element, a key must be specified.') + if isinstance(js, dict): + if not key: + raise TypeError('When converting a JSON document with a top-level dictionary element, a key must be specified.') + + js = js[key] - js = js[key] + finally: + if close: + f.close() return Table.from_object(js, row_names=row_names, column_types=column_types) diff --git a/docs/install.rst b/docs/install.rst index 961976d8..c9491b07 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -45,7 +45,7 @@ Supported platforms agate supports the following versions of Python: * Python 2.7 -* Python 3.4+ +* Python 3.5+ * `PyPy `_ versions >= 4.0.0 It is tested primarily on OSX, but due to its minimal dependencies it should work perfectly on both Linux and Windows. diff --git a/setup.py b/setup.py index 6c52e5d4..f797dbba 100644 --- a/setup.py +++ b/setup.py @@ -34,9 +34,10 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Scientific/Engineering :: Information Analysis', diff --git a/tests/test_data_types.py b/tests/test_data_types.py index 3d05033c..4476631d 100644 --- a/tests/test_data_types.py +++ b/tests/test_data_types.py @@ -381,15 +381,31 @@ def test_cast_format(self): def test_cast_format_locale(self): date_type = DateTime(datetime_format='%Y-%m-%d %I:%M %p', locale='ko_KR') - values = ('1994-03-01 12:30 오후', '2011-02-17 06:30 오전', None, '1984-01-05 06:30 오후', 'n/a') - casted = tuple(date_type.cast(v) for v in values) - self.assertSequenceEqual(casted, ( - datetime.datetime(1994, 3, 1, 12, 30, 0), - datetime.datetime(2011, 2, 17, 6, 30, 0), - None, - datetime.datetime(1984, 1, 5, 18, 30, 0), - None - )) + # Date formats depend on the platform's strftime/strptime implementation; + # some platforms like macOS always return AM/PM for day periods (%p), + # so we will catch any CastError that may arise from the conversion + possible_values = ( + ('1994-03-01 12:30 오후', '2011-02-17 06:30 오전', None, '1984-01-05 06:30 오후', 'n/a'), + ('1994-03-01 12:30 PM', '2011-02-17 06:30 AM', None, '1984-01-05 06:30 PM', 'n/a'), + ) + valid = False + exceptions = [] + for values in possible_values: + try: + casted = tuple(date_type.cast(v) for v in values) + except CastError as e: + exceptions.append(repr(e)) + continue + self.assertSequenceEqual(casted, ( + datetime.datetime(1994, 3, 1, 12, 30, 0), + datetime.datetime(2011, 2, 17, 6, 30, 0), + None, + datetime.datetime(1984, 1, 5, 18, 30, 0), + None + )) + valid = True + if not valid: + raise AssertionError('\n\n'.join(exceptions)) def test_cast_locale(self): date_type = DateTime(locale='fr_FR') diff --git a/tests/test_from_json.py b/tests/test_from_json.py index 0f8aa5b6..3674711d 100644 --- a/tests/test_from_json.py +++ b/tests/test_from_json.py @@ -5,6 +5,7 @@ from agate.testcase import AgateTestCase from agate.data_types import * from agate.type_tester import TypeTester +import six class TestJSON(AgateTestCase): @@ -34,8 +35,12 @@ def test_from_json(self): def test_from_json_file_like_object(self): table1 = Table(self.rows, self.column_names, self.column_types) - with open('examples/test.json') as f: - table2 = Table.from_json(f) + if six.PY2: + with open('examples/test.json') as f: + table2 = Table.from_json(f) + else: + with open('examples/test.json', encoding='utf-8') as f: + table2 = Table.from_json(f) self.assertColumnNames(table2, self.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) diff --git a/tests/test_py3.py b/tests/test_py3.py index be3e6066..1a85d9c0 100644 --- a/tests/test_py3.py +++ b/tests/test_py3.py @@ -50,7 +50,7 @@ def test_properties(self): self.assertEqual(reader.line_num, 1) def test_line_numbers(self): - with open('examples/test.csv') as f: + with open('examples/test.csv', encoding='utf-8') as f: rows = list(csv_py3.Reader(f, line_numbers=True)) sample_rows = [ @@ -69,7 +69,7 @@ class TestFieldSizeLimit(unittest.TestCase): def setUp(self): self.lim = csv.field_size_limit() - with open('.test.csv', 'w') as f: + with open('.test.csv', 'w', encoding='utf-8') as f: f.write('a' * 10) def tearDown(self): @@ -79,7 +79,7 @@ def tearDown(self): def test_field_size_limit(self): # Testing field_size_limit for failure. Creating data using str * int. - with open('.test.csv', 'r') as f: + with open('.test.csv', 'r', encoding='utf-8') as f: c = csv_py3.Reader(f, field_size_limit=9) try: c.__next__() @@ -89,7 +89,7 @@ def test_field_size_limit(self): raise AssertionError('Expected FieldSizeLimitError') # Now testing higher field_size_limit. - with open('.test.csv', 'r') as f: + with open('.test.csv', 'r', encoding='utf-8') as f: c = csv_py3.Reader(f, field_size_limit=11) self.assertEqual(['a' * 10], c.__next__()) @@ -165,7 +165,7 @@ def setUp(self): ['', 'b', '', '', '', ''] ] - self.f = open('examples/test.csv') + self.f = open('examples/test.csv', encoding='utf-8') def tearDown(self): self.f.close() @@ -248,6 +248,6 @@ def setUp(self): pass def test_sniffer(self): - with open('examples/test.csv') as f: + with open('examples/test.csv', encoding='utf-8') as f: contents = f.read() self.assertEqual(csv_py3.Sniffer().sniff(contents).__dict__, csv.Sniffer().sniff(contents).__dict__) diff --git a/tests/test_table/test_bins.py b/tests/test_table/test_bins.py index cabff86a..4245d3ee 100644 --- a/tests/test_table/test_bins.py +++ b/tests/test_table/test_bins.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf8 -*- +from babel.numbers import get_decimal_symbol try: from cdecimal import Decimal except ImportError: # pragma: no cover @@ -110,9 +111,9 @@ def test_bins_decimals(self): self.assertColumnNames(new_table, ['number', 'Count']) self.assertColumnTypes(new_table, [Text, Number]) - self.assertSequenceEqual(new_table.rows[0], ['[0.0 - 0.1)', 10]) - self.assertSequenceEqual(new_table.rows[3], ['[0.3 - 0.4)', 10]) - self.assertSequenceEqual(new_table.rows[9], ['[0.9 - 1.0]', 10]) + self.assertSequenceEqual(new_table.rows[0], [u'[0' + get_decimal_symbol() + u'0 - 0' + get_decimal_symbol() + u'1)', 10]) + self.assertSequenceEqual(new_table.rows[3], [u'[0' + get_decimal_symbol() + u'3 - 0' + get_decimal_symbol() + u'4)', 10]) + self.assertSequenceEqual(new_table.rows[9], [u'[0' + get_decimal_symbol() + u'9 - 1' + get_decimal_symbol() + u'0]', 10]) def test_bins_nulls(self): rows = [] @@ -127,7 +128,7 @@ def test_bins_nulls(self): self.assertColumnNames(new_table, ['number', 'Count']) self.assertColumnTypes(new_table, [Text, Number]) - self.assertSequenceEqual(new_table.rows[0], ['[0.0 - 0.1)', 10]) - self.assertSequenceEqual(new_table.rows[3], ['[0.3 - 0.4)', 10]) - self.assertSequenceEqual(new_table.rows[9], ['[0.9 - 1.0]', 10]) + self.assertSequenceEqual(new_table.rows[0], [u'[0' + get_decimal_symbol() + u'0 - 0' + get_decimal_symbol() + u'1)', 10]) + self.assertSequenceEqual(new_table.rows[3], [u'[0' + get_decimal_symbol() + u'3 - 0' + get_decimal_symbol() + u'4)', 10]) + self.assertSequenceEqual(new_table.rows[9], [u'[0' + get_decimal_symbol() + u'9 - 1' + get_decimal_symbol() + u'0]', 10]) self.assertSequenceEqual(new_table.rows[10], [None, 1]) diff --git a/tests/test_table/test_print_bars.py b/tests/test_table/test_print_bars.py index 847227a1..5026280f 100644 --- a/tests/test_table/test_print_bars.py +++ b/tests/test_table/test_print_bars.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf8 -*- +from babel.numbers import format_decimal import six from agate import Table @@ -98,8 +99,8 @@ def test_print_bars_with_nulls(self): output=output) self.assertEqual(output.getvalue(), "three two\n" - "a 2,000 |:::::::\n" + "a " + format_decimal(2000, format=u'#,##0') + " |:::::::\n" "None - | \n" "c 1 | \n" " +------+\n" - " 0 2,000\n") + " 0 " + format_decimal(2000, format=u'#,##0') + "\n") diff --git a/tests/test_table/test_print_table.py b/tests/test_table/test_print_table.py index e15197f9..7efc3079 100644 --- a/tests/test_table/test_print_table.py +++ b/tests/test_table/test_print_table.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf8 -*- +from babel.numbers import get_decimal_symbol import six from agate import Table @@ -11,19 +12,21 @@ class TestPrintTable(AgateTestCase): def setUp(self): self.rows = ( - ('1.7', 2000, 'a'), - ('11.18', None, None), - ('0', 1, 'c') + ('1.7', 2000, 2000, 'a'), + ('11.18', None, None, None), + ('0', 1, 1, 'c') ) self.number_type = Number() - self.international_number_type = Number(locale='de_DE') + self.american_number_type = Number(locale='en_US') + self.german_number_type = Number(locale='de_DE') self.text_type = Text() - self.column_names = ['one', 'two', 'three'] + self.column_names = ['one', 'two', 'three', 'four'] self.column_types = [ self.number_type, - self.international_number_type, + self.american_number_type, + self.german_number_type, self.text_type ] @@ -35,7 +38,7 @@ def test_print_table(self): lines = output.getvalue().split('\n') self.assertEqual(len(lines), 6) - self.assertEqual(len(lines[0]), 25) + self.assertEqual(len(lines[0]), 32) def test_print_table_max_rows(self): table = Table(self.rows, self.column_names, self.column_types) @@ -45,7 +48,7 @@ def test_print_table_max_rows(self): lines = output.getvalue().split('\n') self.assertEqual(len(lines), 6) - self.assertEqual(len(lines[0]), 25) + self.assertEqual(len(lines[0]), 32) def test_print_table_max_columns(self): table = Table(self.rows, self.column_names, self.column_types) @@ -81,22 +84,22 @@ def test_print_table_max_precision(self): self.assertIn(u' 11.123456 ', lines[3]) self.assertIn(u' 0 ', lines[4]) # Test real precision above max - self.assertIn(u' 1.74… ', lines[2]) - self.assertIn(u' 11.12… ', lines[3]) - self.assertIn(u' 0.00… ', lines[4]) + self.assertIn(u' 1' + get_decimal_symbol() + u'74… ', lines[2]) + self.assertIn(u' 11' + get_decimal_symbol() + u'12… ', lines[3]) + self.assertIn(u' 0' + get_decimal_symbol() + u'00… ', lines[4]) # Test real precision below max - self.assertIn(u' 1.72 ', lines[2]) - self.assertIn(u' 5.10 ', lines[3]) - self.assertIn(u' 0.10 ', lines[4]) + self.assertIn(u' 1' + get_decimal_symbol() + u'72 ', lines[2]) + self.assertIn(u' 5' + get_decimal_symbol() + u'10 ', lines[3]) + self.assertIn(u' 0' + get_decimal_symbol() + u'10 ', lines[4]) def test_print_table_max_column_width(self): rows = ( - ('1.7', 2, 'this is long'), - ('11.18', None, None), - ('0', 1, 'nope') + ('1.7', 2, 2, 'this is long'), + ('11.18', None, None, None), + ('0', 1, 1, 'nope') ) - column_names = ['one', 'two', 'also, this is long'] + column_names = ['one', 'two', 'three', 'also, this is long'] table = Table(rows, column_names, self.column_types) output = six.StringIO() @@ -107,9 +110,21 @@ def test_print_table_max_column_width(self): self.assertIn(' this... ', lines[2]) self.assertIn(' nope ', lines[4]) - def test_print_table_locale(self): + def test_print_table_locale_american(self): """ - Verify that the locale of the international number is correctly + Verify that the locale of the german number is correctly + controlling the format of how it is printed. + """ + table = Table(self.rows, self.column_names, self.column_types) + + output = six.StringIO() + table.print_table(max_columns=2, output=output, locale='en_US') + # If it's working, 2000 should appear as the english '2,000' + self.assertTrue("2,000" in output.getvalue()) + + def test_print_table_locale_german(self): + """ + Verify that the locale of the german number is correctly controlling the format of how it is printed. """ table = Table(self.rows, self.column_names, self.column_types) diff --git a/tox.ini b/tox.ini index 3cde685d..1fc719e9 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py33,py34,py35,py36,pypy +envlist = py27,py35,py36,py37,py38,pypy2,pypy3 [testenv] commands=nosetests tests @@ -7,21 +7,24 @@ commands=nosetests tests [testenv:py27] deps = -rrequirements-py2.txt -[testenv:py33] +[testenv:py35] deps = -rrequirements-py3.txt -[testenv:py34] -deps = {[testenv:py33]deps} +[testenv:py36] +deps = {[testenv:py35]deps} -[testenv:py35] -deps = {[testenv:py33]deps} +[testenv:py37] +deps = {[testenv:py35]deps} -[testenv:py36] -deps = {[testenv:py33]deps} +[testenv:py38] +deps = {[testenv:py35]deps} -[testenv:pypy] +[testenv:pypy2] deps = {[testenv:py27]deps} +[testenv:pypy3] +deps = {[testenv:py35]deps} + [flake8] ignore=E128,E402,E501,F403 # E128 continuation line under-indented for visual indent