From 3ee43efa78169bb0f3ccc621ad6e856d0432280e Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Fri, 23 Jul 2021 00:56:30 +0300 Subject: [PATCH 1/3] Made `load` support binary file objects --- CHANGELOG.md | 5 +++++ README.md | 2 +- tests/test_misc.py | 14 +++++++++++--- tomli/_parser.py | 6 ++++-- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78d349a..21158e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 1.1.0 + +- Added + - `load` can now take a binary file object + ## 1.0.4 - Performance diff --git a/README.md b/README.md index a66f8a1..9d3073e 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ assert toml_dict == {"gretzky": 99, "kurri": {"jari": 17}} ```python import tomli -with open("path_to_file/conf.toml", encoding="utf-8") as f: +with open("path_to_file/conf.toml", "rb") as f: toml_dict = tomli.load(f) ``` diff --git a/tests/test_misc.py b/tests/test_misc.py index fe38f18..4a23d7b 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -8,11 +8,19 @@ def test_load(tmp_path): content = "one=1 \n two='two' \n arr=[]" + expected = {"one": 1, "two": "two", "arr": []} file_path = tmp_path / "test.toml" file_path.write_text(content) - with open(file_path, encoding="utf-8") as f: + + # Test text mode + with open(file_path, encoding="utf-8", newline="") as f: actual = tomli.load(f) - assert actual == {"one": 1, "two": "two", "arr": []} + assert actual == expected + + # Test binary mode + with open(file_path, "rb") as bin_f: + actual = tomli.load(bin_f) + assert actual == expected def test_parse_float(): @@ -75,6 +83,6 @@ def test_deepcopy(): def test_own_pyproject(): pyproject_path = Path(__file__).parent.parent / "pyproject.toml" - with open(pyproject_path, encoding="utf-8") as f: + with open(pyproject_path, "rb") as f: pyproject = tomli.load(f) assert pyproject["project"]["version"] == tomli.__version__ diff --git a/tomli/_parser.py b/tomli/_parser.py index 12d4f63..3e46c04 100644 --- a/tomli/_parser.py +++ b/tomli/_parser.py @@ -1,6 +1,7 @@ import string from types import MappingProxyType from typing import ( + IO, Any, Callable, Dict, @@ -8,7 +9,6 @@ Iterable, NamedTuple, Optional, - TextIO, Tuple, ) @@ -61,9 +61,11 @@ class TOMLDecodeError(ValueError): """An error raised if a document is not valid TOML.""" -def load(fp: TextIO, *, parse_float: ParseFloat = float) -> Dict[str, Any]: +def load(fp: IO, *, parse_float: ParseFloat = float) -> Dict[str, Any]: """Parse TOML from a file object.""" s = fp.read() + if isinstance(s, bytes): + s = s.decode() return loads(s, parse_float=parse_float) From 5cda8dd09fe35987b4470f602e0502fec624ce53 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Fri, 23 Jul 2021 01:52:15 +0300 Subject: [PATCH 2/3] Dont convert newlines in tests --- benchmark/run.py | 2 +- tests/test_extras.py | 6 +++--- tests/test_for_profiler.py | 2 +- tests/test_toml_compliance.py | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmark/run.py b/benchmark/run.py index 82dd8ce..6ef958f 100644 --- a/benchmark/run.py +++ b/benchmark/run.py @@ -37,7 +37,7 @@ def benchmark( def run(run_count: int) -> None: data_path = Path(__file__).parent / "data.toml" - test_data = data_path.read_text(encoding="utf-8") + test_data = data_path.read_bytes().decode() col_width = (10, 10, 28) col_head = ("parser", "exec time", "performance (more is better)") print(f"Parsing data.toml {run_count} times:") diff --git a/tests/test_extras.py b/tests/test_extras.py index 5b1f89f..b1bc401 100644 --- a/tests/test_extras.py +++ b/tests/test_extras.py @@ -10,7 +10,7 @@ VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml")) VALID_FILES_EXPECTED = tuple( - json.loads(p.with_suffix(".json").read_text("utf-8")) for p in VALID_FILES + json.loads(p.with_suffix(".json").read_bytes().decode()) for p in VALID_FILES ) INVALID_FILES = tuple((DATA_DIR / "invalid").glob("**/*.toml")) @@ -22,7 +22,7 @@ ids=[p.stem for p in INVALID_FILES], ) def test_invalid(invalid): - toml_str = invalid.read_text(encoding="utf-8") + toml_str = invalid.read_bytes().decode() with pytest.raises(tomli.TOMLDecodeError): tomli.loads(toml_str) @@ -33,7 +33,7 @@ def test_invalid(invalid): ids=[p.stem for p in VALID_FILES], ) def test_valid(valid, expected): - toml_str = valid.read_text(encoding="utf-8") + toml_str = valid.read_bytes().decode() actual = tomli.loads(toml_str) actual = burntsushi.convert(actual) expected = burntsushi.normalize(expected) diff --git a/tests/test_for_profiler.py b/tests/test_for_profiler.py index 03f8618..df519b7 100644 --- a/tests/test_for_profiler.py +++ b/tests/test_for_profiler.py @@ -14,7 +14,7 @@ def test_for_profiler(): path = Path(__file__).parent.parent / "benchmark" / "data.toml" - benchmark_toml = path.read_text("utf-8") + benchmark_toml = path.read_bytes().decode() # increase the count here to reduce the impact of # setting up pytest execution environment. Let's keep # the count low by default because this is part of the diff --git a/tests/test_toml_compliance.py b/tests/test_toml_compliance.py index 58792bd..323ead9 100644 --- a/tests/test_toml_compliance.py +++ b/tests/test_toml_compliance.py @@ -16,13 +16,13 @@ def __init__(self, path: Path): VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml")) # VALID_FILES_EXPECTED = tuple( -# json.loads(p.with_suffix(".json").read_text("utf-8")) for p in VALID_FILES +# json.loads(p.with_suffix(".json").read_bytes().decode()) for p in VALID_FILES # ) _expected_files = [] for p in VALID_FILES: json_path = p.with_suffix(".json") try: - text = json.loads(json_path.read_text("utf-8")) + text = json.loads(json_path.read_bytes().decode()) except FileNotFoundError: text = MissingFile(json_path) _expected_files.append(text) @@ -37,7 +37,7 @@ def __init__(self, path: Path): ids=[p.stem for p in INVALID_FILES], ) def test_invalid(invalid): - toml_str = invalid.read_text(encoding="utf-8") + toml_str = invalid.read_bytes().decode() with pytest.raises(tomli.TOMLDecodeError): tomli.loads(toml_str) @@ -50,7 +50,7 @@ def test_invalid(invalid): def test_valid(valid, expected): if isinstance(expected, MissingFile): pytest.xfail(f"Missing a .json file corresponding the .toml: {expected.path}") - toml_str = valid.read_text(encoding="utf-8") + toml_str = valid.read_bytes().decode() actual = tomli.loads(toml_str) actual = burntsushi.convert(actual) expected = burntsushi.normalize(expected) From bd7c7491c136fc404bb899063839369bee8557e6 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Fri, 23 Jul 2021 11:27:46 +0300 Subject: [PATCH 3/3] Improve docs --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 9d3073e..f138555 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,11 @@ with open("path_to_file/conf.toml", "rb") as f: toml_dict = tomli.load(f) ``` +Opening the file in binary mode (with the `"rb"` flag) is highly encouraged. +Binary mode will enforce decoding the file as UTF-8 with universal newlines disabled, +both of which are required to correctly parse TOML. +Support for text file objects may be deprecated for removal in a future release. + ### Handle invalid TOML ```python