Add binary file object support to load (#103)

* Made `load` support binary file objects * Dont convert newlines in tests * Improve docs
hukkin · Jul 23, 2021 · b9cbbe2 · b9cbbe2
1 parent e58284a
commit b9cbbe2
Show file tree

Hide file tree

Showing 8 changed files with 35 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 1.1.0
+
+- Added
+  - `load` can now take a binary file object
+
 ## 1.0.4
 
 - Performance

diff --git a/README.md b/README.md
@@ -60,10 +60,15 @@ assert toml_dict == {"gretzky": 99, "kurri": {"jari": 17}}
 ```python
 import tomli
 
-with open("path_to_file/conf.toml", encoding="utf-8") as f:
+with open("path_to_file/conf.toml", "rb") as f:
     toml_dict = tomli.load(f)
 ```
 
+Opening the file in binary mode (with the `"rb"` flag) is highly encouraged.
+Binary mode will enforce decoding the file as UTF-8 with universal newlines disabled,
+both of which are required to correctly parse TOML.
+Support for text file objects may be deprecated for removal in a future release.
+
 ### Handle invalid TOML<a name="handle-invalid-toml"></a>
 
 ```python

diff --git a/benchmark/run.py b/benchmark/run.py
@@ -37,7 +37,7 @@ def benchmark(
 
 def run(run_count: int) -> None:
     data_path = Path(__file__).parent / "data.toml"
-    test_data = data_path.read_text(encoding="utf-8")
+    test_data = data_path.read_bytes().decode()
     col_width = (10, 10, 28)
     col_head = ("parser", "exec time", "performance (more is better)")
     print(f"Parsing data.toml {run_count} times:")

diff --git a/tests/test_extras.py b/tests/test_extras.py
@@ -10,7 +10,7 @@
 
 VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml"))
 VALID_FILES_EXPECTED = tuple(
-    json.loads(p.with_suffix(".json").read_text("utf-8")) for p in VALID_FILES
+    json.loads(p.with_suffix(".json").read_bytes().decode()) for p in VALID_FILES
 )
 
 INVALID_FILES = tuple((DATA_DIR / "invalid").glob("**/*.toml"))
@@ -22,7 +22,7 @@
     ids=[p.stem for p in INVALID_FILES],
 )
 def test_invalid(invalid):
-    toml_str = invalid.read_text(encoding="utf-8")
+    toml_str = invalid.read_bytes().decode()
     with pytest.raises(tomli.TOMLDecodeError):
         tomli.loads(toml_str)
 
@@ -33,7 +33,7 @@ def test_invalid(invalid):
     ids=[p.stem for p in VALID_FILES],
 )
 def test_valid(valid, expected):
-    toml_str = valid.read_text(encoding="utf-8")
+    toml_str = valid.read_bytes().decode()
     actual = tomli.loads(toml_str)
     actual = burntsushi.convert(actual)
     expected = burntsushi.normalize(expected)

diff --git a/tests/test_for_profiler.py b/tests/test_for_profiler.py
@@ -14,7 +14,7 @@
 
 def test_for_profiler():
     path = Path(__file__).parent.parent / "benchmark" / "data.toml"
-    benchmark_toml = path.read_text("utf-8")
+    benchmark_toml = path.read_bytes().decode()
     # increase the count here to reduce the impact of
     # setting up pytest execution environment. Let's keep
     # the count low by default because this is part of the

diff --git a/tests/test_misc.py b/tests/test_misc.py
@@ -8,11 +8,19 @@
 
 def test_load(tmp_path):
     content = "one=1 \n two='two' \n arr=[]"
+    expected = {"one": 1, "two": "two", "arr": []}
     file_path = tmp_path / "test.toml"
     file_path.write_text(content)
-    with open(file_path, encoding="utf-8") as f:
+
+    # Test text mode
+    with open(file_path, encoding="utf-8", newline="") as f:
         actual = tomli.load(f)
-    assert actual == {"one": 1, "two": "two", "arr": []}
+    assert actual == expected
+
+    # Test binary mode
+    with open(file_path, "rb") as bin_f:
+        actual = tomli.load(bin_f)
+    assert actual == expected
 
 
 def test_parse_float():
@@ -75,6 +83,6 @@ def test_deepcopy():
 
 def test_own_pyproject():
     pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
-    with open(pyproject_path, encoding="utf-8") as f:
+    with open(pyproject_path, "rb") as f:
         pyproject = tomli.load(f)
     assert pyproject["project"]["version"] == tomli.__version__
diff --git a/tests/test_toml_compliance.py b/tests/test_toml_compliance.py
@@ -16,13 +16,13 @@ def __init__(self, path: Path):
 
 VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml"))
 # VALID_FILES_EXPECTED = tuple(
-#     json.loads(p.with_suffix(".json").read_text("utf-8")) for p in VALID_FILES
+#     json.loads(p.with_suffix(".json").read_bytes().decode()) for p in VALID_FILES
 # )
 _expected_files = []
 for p in VALID_FILES:
     json_path = p.with_suffix(".json")
     try:
-        text = json.loads(json_path.read_text("utf-8"))
+        text = json.loads(json_path.read_bytes().decode())
     except FileNotFoundError:
         text = MissingFile(json_path)
     _expected_files.append(text)
@@ -37,7 +37,7 @@ def __init__(self, path: Path):
     ids=[p.stem for p in INVALID_FILES],
 )
 def test_invalid(invalid):
-    toml_str = invalid.read_text(encoding="utf-8")
+    toml_str = invalid.read_bytes().decode()
     with pytest.raises(tomli.TOMLDecodeError):
         tomli.loads(toml_str)
 
@@ -50,7 +50,7 @@ def test_invalid(invalid):
 def test_valid(valid, expected):
     if isinstance(expected, MissingFile):
         pytest.xfail(f"Missing a .json file corresponding the .toml: {expected.path}")
-    toml_str = valid.read_text(encoding="utf-8")
+    toml_str = valid.read_bytes().decode()
     actual = tomli.loads(toml_str)
     actual = burntsushi.convert(actual)
     expected = burntsushi.normalize(expected)

diff --git a/tomli/_parser.py b/tomli/_parser.py
@@ -1,14 +1,14 @@
 import string
 from types import MappingProxyType
 from typing import (
+    IO,
     Any,
     Callable,
     Dict,
     FrozenSet,
     Iterable,
     NamedTuple,
     Optional,
-    TextIO,
     Tuple,
 )
 
@@ -61,9 +61,11 @@ class TOMLDecodeError(ValueError):
     """An error raised if a document is not valid TOML."""
 
 
-def load(fp: TextIO, *, parse_float: ParseFloat = float) -> Dict[str, Any]:
+def load(fp: IO, *, parse_float: ParseFloat = float) -> Dict[str, Any]:
     """Parse TOML from a file object."""
     s = fp.read()
+    if isinstance(s, bytes):
+        s = s.decode()
     return loads(s, parse_float=parse_float)