From 4c067b74d84e04f3cdf246eda6dce5e02567172b Mon Sep 17 00:00:00 2001 From: Douglas Myers-Turnbull Date: Sun, 21 Mar 2021 16:36:15 -0700 Subject: [PATCH] feat: general read/write methods --- CHANGELOG.md | 3 + README.md | 12 +- poetry.lock | 246 ++++++++++++---------- pyproject.toml | 6 +- tests/__init__.py | 12 +- tests/test_fancy_read_write.py | 63 ++++++ tests/test_read_write.py | 4 +- typeddfs/__init__.py | 20 +- typeddfs/base_dfs.py | 362 +++++++++++++++++++++++++++++---- typeddfs/typed_dfs.py | 16 +- typeddfs/untyped_dfs.py | 1 + 11 files changed, 577 insertions(+), 168 deletions(-) create mode 100644 tests/test_fancy_read_write.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ed52b9d..d56ec08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,11 @@ Adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and ## Added - Read/write wrappers for Feather, Parquet, and JSON +- Added general functions `read_file` and `write_file` +- `TypeDfs.wrap` and `FinalDf` ### Fixed +- `to_csv` was not passing along `args` and `kwargs` - Slightly better build config ## [0.5.0] - 2021-01-19 diff --git a/README.md b/README.md index 5f9307e..e478727 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ [![Documentation status](https://readthedocs.org/projects/typed-dfs/badge)](https://typed-dfs.readthedocs.io/en/stable/) [![Coverage (coveralls)](https://coveralls.io/repos/github/dmyersturnbull/typed-dfs/badge.svg?branch=main&service=github)](https://coveralls.io/github/dmyersturnbull/typed-dfs?branch=main) [![Maintainability](https://api.codeclimate.com/v1/badges/6b804351b6ba5e7694af/maintainability)](https://codeclimate.com/github/dmyersturnbull/typed-dfs/maintainability) -[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/dmyersturnbull/typed-dfs/badges/quality-score.png?b=main)](https://scrutinizer-ci.com/g/dmyersturnbull/typed-dfs/?branch=main) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/dmyersturnbull/typed-dfs/badges/quality-score.png?b=main)](https://scrutinizer-ci.com/g/dmyersturnbull/typed-dfs/?branch=main) [![Created with Tyrannosaurus](https://img.shields.io/badge/Created_with-Tyrannosaurus-0000ff.svg)](https://github.com/dmyersturnbull/tyrannosaurus) @@ -35,7 +35,7 @@ MyDfType = ( so **`read_csv` and `to_csv` are inverses**. `MyDf.read_csv(mydf.to_csv())` is `mydf`. - DataFrames display elegantly in Jupyter notebooks. -- Extra methods such as `sort_natural` and `drop_cols`. +- Extra methods such as `sort_natural` and `write_file`. ### 🎨 Example @@ -77,6 +77,14 @@ Use `.untyped()` or `.vanilla()` to make a detyped copy that doesn’t enforce r ### 🔌 Serialization support +Like Pandas, TypedDfs can read and write to various formats. +It provides the methods `read_file` and `write_file`, which guess the format from the +filename extension. For example, `df.write_file("myfile.snappy)` writes Parquet files, +and `df.write_file("myfile.tab.gz")` writes a gzipped, tab-delimited file. +The `read_file` method works the same way: `MyDf.read_file("myfile.feather")` will +read an Apache Arrow Feather file, and `MyDf.read_file("myfile.json.gzip")`reads +a gzipped JSON file. You can pass keyword arguments to those functions. + Serialization is provided through Pandas, and some formats require additional packages. Pandas does not specify compatible versions, so typed-dfs specifies [extras](https://python-poetry.org/docs/pyproject/#extras) are provided in typed-dfs diff --git a/poetry.lock b/poetry.lock index cd61ddd..ba62a1a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -16,7 +16,7 @@ python-versions = "*" [[package]] name = "astroid" -version = "2.5.1" +version = "2.5.2" description = "An abstract syntax tree for Python with inference support." category = "dev" optional = false @@ -146,20 +146,20 @@ python-versions = "*" [[package]] name = "flake8" -version = "3.8.4" +version = "3.9.0" description = "the modular source code checker: pep8 pyflakes and co" category = "dev" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" [package.dependencies] mccabe = ">=0.6.0,<0.7.0" -pycodestyle = ">=2.6.0a1,<2.7.0" -pyflakes = ">=2.2.0,<2.3.0" +pycodestyle = ">=2.7.0,<2.8.0" +pyflakes = ">=2.3.0,<2.4.0" [[package]] name = "flake8-bugbear" -version = "20.11.1" +version = "21.3.2" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." category = "dev" optional = false @@ -174,7 +174,7 @@ dev = ["coverage", "black", "hypothesis", "hypothesmith"] [[package]] name = "flake8-docstrings" -version = "1.5.0" +version = "1.6.0" description = "Extension for flake8 which uses pydocstyle to check docstrings" category = "dev" optional = false @@ -186,14 +186,14 @@ pydocstyle = ">=2.1" [[package]] name = "gitdb" -version = "4.0.5" +version = "4.0.7" description = "Git Object Database" category = "dev" optional = false python-versions = ">=3.4" [package.dependencies] -smmap = ">=3.0.1,<4" +smmap = ">=3.0.1,<5" [[package]] name = "gitpython" @@ -208,14 +208,14 @@ gitdb = ">=4.0.1,<5" [[package]] name = "identify" -version = "2.0.0" +version = "2.2.2" description = "File identification library for Python" category = "dev" optional = false python-versions = ">=3.6.1" [package.extras] -license = ["editdistance"] +license = ["editdistance-s"] [[package]] name = "idna" @@ -257,11 +257,11 @@ i18n = ["Babel (>=0.8)"] [[package]] name = "lazy-object-proxy" -version = "1.5.2" +version = "1.6.0" description = "A fast and thorough lazy object proxy." category = "dev" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" [[package]] name = "markupsafe" @@ -301,7 +301,7 @@ python-versions = "*" [[package]] name = "numpy" -version = "1.20.1" +version = "1.20.2" description = "NumPy is the fundamental package for array computing with Python." category = "main" optional = false @@ -355,7 +355,7 @@ dev = ["pre-commit", "tox"] [[package]] name = "pre-commit" -version = "2.10.1" +version = "2.11.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." category = "dev" optional = false @@ -402,7 +402,7 @@ numpy = ">=1.16.6" [[package]] name = "pycodestyle" -version = "2.6.0" +version = "2.7.0" description = "Python style guide checker" category = "dev" optional = false @@ -410,18 +410,18 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "pydocstyle" -version = "5.1.1" +version = "6.0.0" description = "Python docstring style checker" category = "dev" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" [package.dependencies] snowballstemmer = "*" [[package]] name = "pyflakes" -version = "2.2.0" +version = "2.3.1" description = "passive checker of Python programs" category = "dev" optional = false @@ -429,7 +429,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "pygments" -version = "2.8.0" +version = "2.8.1" description = "Pygments is a syntax highlighting package written in Python." category = "dev" optional = false @@ -526,14 +526,14 @@ socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] [[package]] name = "ruamel.yaml" -version = "0.16.12" +version = "0.17.2" description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" category = "dev" optional = false -python-versions = "*" +python-versions = ">=3" [package.dependencies] -"ruamel.yaml.clib" = {version = ">=0.1.2", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.9\""} +"ruamel.yaml.clib" = {version = ">=0.1.2", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.10\""} [package.extras] docs = ["ryd"] @@ -557,11 +557,11 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" [[package]] name = "smmap" -version = "3.0.5" +version = "4.0.0" description = "A pure Python implementation of a sliding window memory map manager" category = "dev" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.5" [[package]] name = "snowballstemmer" @@ -573,7 +573,7 @@ python-versions = "*" [[package]] name = "sphinx" -version = "3.5.1" +version = "3.5.3" description = "Python documentation generator" category = "dev" optional = false @@ -744,20 +744,20 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "urllib3" -version = "1.26.3" +version = "1.26.4" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" [package.extras] -brotli = ["brotlipy (>=0.6.0)"] secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +brotli = ["brotlipy (>=0.6.0)"] [[package]] name = "virtualenv" -version = "20.4.2" +version = "20.4.3" description = "Virtual Python Environment builder" category = "dev" optional = false @@ -787,7 +787,7 @@ feather = ["pyarrow"] [metadata] lock-version = "1.1" python-versions = ">=3.8, <4" -content-hash = "39eef7b855e469d18b328c4e5daf3dda5230f9b5522cfdcb3488c23d54c5f8ad" +content-hash = "de3e3d8cf22ed5953ca93b6d8a28cdb193fb1020ef52bce0d980d297679ec2a3" [metadata.files] alabaster = [ @@ -799,8 +799,8 @@ appdirs = [ {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, ] astroid = [ - {file = "astroid-2.5.1-py3-none-any.whl", hash = "sha256:21d735aab248253531bb0f1e1e6d068f0ee23533e18ae8a6171ff892b98297cf"}, - {file = "astroid-2.5.1.tar.gz", hash = "sha256:cfc35498ee64017be059ceffab0a25bedf7548ab76f2bea691c5565896e7128d"}, + {file = "astroid-2.5.2-py3-none-any.whl", hash = "sha256:cd80bf957c49765dce6d92c43163ff9d2abc43132ce64d4b1b47717c6d2522df"}, + {file = "astroid-2.5.2.tar.gz", hash = "sha256:6b0ed1af831570e500e2437625979eaa3b36011f66ddfc4ce930128610258ca9"}, ] atomicwrites = [ {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, @@ -901,28 +901,28 @@ filelock = [ {file = "filelock-3.0.12.tar.gz", hash = "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59"}, ] flake8 = [ - {file = "flake8-3.8.4-py2.py3-none-any.whl", hash = "sha256:749dbbd6bfd0cf1318af27bf97a14e28e5ff548ef8e5b1566ccfb25a11e7c839"}, - {file = "flake8-3.8.4.tar.gz", hash = "sha256:aadae8761ec651813c24be05c6f7b4680857ef6afaae4651a4eccaef97ce6c3b"}, + {file = "flake8-3.9.0-py2.py3-none-any.whl", hash = "sha256:12d05ab02614b6aee8df7c36b97d1a3b2372761222b19b58621355e82acddcff"}, + {file = "flake8-3.9.0.tar.gz", hash = "sha256:78873e372b12b093da7b5e5ed302e8ad9e988b38b063b61ad937f26ca58fc5f0"}, ] flake8-bugbear = [ - {file = "flake8-bugbear-20.11.1.tar.gz", hash = "sha256:528020129fea2dea33a466b9d64ab650aa3e5f9ffc788b70ea4bc6cf18283538"}, - {file = "flake8_bugbear-20.11.1-py36.py37.py38-none-any.whl", hash = "sha256:f35b8135ece7a014bc0aee5b5d485334ac30a6da48494998cc1fabf7ec70d703"}, + {file = "flake8-bugbear-21.3.2.tar.gz", hash = "sha256:cadce434ceef96463b45a7c3000f23527c04ea4b531d16c7ac8886051f516ca0"}, + {file = "flake8_bugbear-21.3.2-py36.py37.py38-none-any.whl", hash = "sha256:5d6ccb0c0676c738a6e066b4d50589c408dcc1c5bf1d73b464b18b73cd6c05c2"}, ] flake8-docstrings = [ - {file = "flake8-docstrings-1.5.0.tar.gz", hash = "sha256:3d5a31c7ec6b7367ea6506a87ec293b94a0a46c0bce2bb4975b7f1d09b6f3717"}, - {file = "flake8_docstrings-1.5.0-py2.py3-none-any.whl", hash = "sha256:a256ba91bc52307bef1de59e2a009c3cf61c3d0952dbe035d6ff7208940c2edc"}, + {file = "flake8-docstrings-1.6.0.tar.gz", hash = "sha256:9fe7c6a306064af8e62a055c2f61e9eb1da55f84bb39caef2b84ce53708ac34b"}, + {file = "flake8_docstrings-1.6.0-py2.py3-none-any.whl", hash = "sha256:99cac583d6c7e32dd28bbfbef120a7c0d1b6dde4adb5a9fd441c4227a6534bde"}, ] gitdb = [ - {file = "gitdb-4.0.5-py3-none-any.whl", hash = "sha256:91f36bfb1ab7949b3b40e23736db18231bf7593edada2ba5c3a174a7b23657ac"}, - {file = "gitdb-4.0.5.tar.gz", hash = "sha256:c9e1f2d0db7ddb9a704c2a0217be31214e91a4fe1dea1efad19ae42ba0c285c9"}, + {file = "gitdb-4.0.7-py3-none-any.whl", hash = "sha256:6c4cc71933456991da20917998acbe6cf4fb41eeaab7d6d67fbc05ecd4c865b0"}, + {file = "gitdb-4.0.7.tar.gz", hash = "sha256:96bf5c08b157a666fec41129e6d327235284cca4c81e92109260f353ba138005"}, ] gitpython = [ {file = "GitPython-3.1.14-py3-none-any.whl", hash = "sha256:3283ae2fba31c913d857e12e5ba5f9a7772bbc064ae2bb09efafa71b0dd4939b"}, {file = "GitPython-3.1.14.tar.gz", hash = "sha256:be27633e7509e58391f10207cd32b2a6cf5b908f92d9cd30da2e514e1137af61"}, ] identify = [ - {file = "identify-2.0.0-py2.py3-none-any.whl", hash = "sha256:9cdd81e5d2b6e76c3006d5226316dd947bd6324fbeebb881bec489202fa09d3a"}, - {file = "identify-2.0.0.tar.gz", hash = "sha256:b99aa309329c4fea679463eb35d169f3fbe13e66e9dd6162ad1856cbeb03dcbd"}, + {file = "identify-2.2.2-py2.py3-none-any.whl", hash = "sha256:c7c0f590526008911ccc5ceee6ed7b085cbc92f7b6591d0ee5913a130ad64034"}, + {file = "identify-2.2.2.tar.gz", hash = "sha256:43cb1965e84cdd247e875dec6d13332ef5be355ddc16776396d98089b9053d87"}, ] idna = [ {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, @@ -941,30 +941,28 @@ jinja2 = [ {file = "Jinja2-2.11.3.tar.gz", hash = "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6"}, ] lazy-object-proxy = [ - {file = "lazy-object-proxy-1.5.2.tar.gz", hash = "sha256:5944a9b95e97de1980c65f03b79b356f30a43de48682b8bdd90aa5089f0ec1f4"}, - {file = "lazy_object_proxy-1.5.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:e960e8be509e8d6d618300a6c189555c24efde63e85acaf0b14b2cd1ac743315"}, - {file = "lazy_object_proxy-1.5.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:522b7c94b524389f4a4094c4bf04c2b02228454ddd17c1a9b2801fac1d754871"}, - {file = "lazy_object_proxy-1.5.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:3782931963dc89e0e9a0ae4348b44762e868ea280e4f8c233b537852a8996ab9"}, - {file = "lazy_object_proxy-1.5.2-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:429c4d1862f3fc37cd56304d880f2eae5bd0da83bdef889f3bd66458aac49128"}, - {file = "lazy_object_proxy-1.5.2-cp35-cp35m-win32.whl", hash = "sha256:cd1bdace1a8762534e9a36c073cd54e97d517a17d69a17985961265be6d22847"}, - {file = "lazy_object_proxy-1.5.2-cp35-cp35m-win_amd64.whl", hash = "sha256:ddbdcd10eb999d7ab292677f588b658372aadb9a52790f82484a37127a390108"}, - {file = "lazy_object_proxy-1.5.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ecb5dd5990cec6e7f5c9c1124a37cb2c710c6d69b0c1a5c4aa4b35eba0ada068"}, - {file = "lazy_object_proxy-1.5.2-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:b6577f15d5516d7d209c1a8cde23062c0f10625f19e8dc9fb59268859778d7d7"}, - {file = "lazy_object_proxy-1.5.2-cp36-cp36m-win32.whl", hash = "sha256:c8fe2d6ff0ff583784039d0255ea7da076efd08507f2be6f68583b0da32e3afb"}, - {file = "lazy_object_proxy-1.5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:fa5b2dee0e231fa4ad117be114251bdfe6afe39213bd629d43deb117b6a6c40a"}, - {file = "lazy_object_proxy-1.5.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1d33d6f789697f401b75ce08e73b1de567b947740f768376631079290118ad39"}, - {file = "lazy_object_proxy-1.5.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:57fb5c5504ddd45ed420b5b6461a78f58cbb0c1b0cbd9cd5a43ad30a4a3ee4d0"}, - {file = "lazy_object_proxy-1.5.2-cp37-cp37m-win32.whl", hash = "sha256:e7273c64bccfd9310e9601b8f4511d84730239516bada26a0c9846c9697617ef"}, - {file = "lazy_object_proxy-1.5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6f4e5e68b7af950ed7fdb594b3f19a0014a3ace0fedb86acb896e140ffb24302"}, - {file = "lazy_object_proxy-1.5.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cadfa2c2cf54d35d13dc8d231253b7985b97d629ab9ca6e7d672c35539d38163"}, - {file = "lazy_object_proxy-1.5.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e7428977763150b4cf83255625a80a23dfdc94d43be7791ce90799d446b4e26f"}, - {file = "lazy_object_proxy-1.5.2-cp38-cp38-win32.whl", hash = "sha256:2f2de8f8ac0be3e40d17730e0600619d35c78c13a099ea91ef7fb4ad944ce694"}, - {file = "lazy_object_proxy-1.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:38c3865bd220bd983fcaa9aa11462619e84a71233bafd9c880f7b1cb753ca7fa"}, - {file = "lazy_object_proxy-1.5.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:8a44e9901c0555f95ac401377032f6e6af66d8fc1fbfad77a7a8b1a826e0b93c"}, - {file = "lazy_object_proxy-1.5.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:fa7fb7973c622b9e725bee1db569d2c2ee64d2f9a089201c5e8185d482c7352d"}, - {file = "lazy_object_proxy-1.5.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:71a1ef23f22fa8437974b2d60fedb947c99a957ad625f83f43fd3de70f77f458"}, - {file = "lazy_object_proxy-1.5.2-cp39-cp39-win32.whl", hash = "sha256:ef3f5e288aa57b73b034ce9c1f1ac753d968f9069cd0742d1d69c698a0167166"}, - {file = "lazy_object_proxy-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:37d9c34b96cca6787fe014aeb651217944a967a5b165e2cacb6b858d2997ab84"}, + {file = "lazy-object-proxy-1.6.0.tar.gz", hash = "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726"}, + {file = "lazy_object_proxy-1.6.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b"}, + {file = "lazy_object_proxy-1.6.0-cp27-cp27m-win32.whl", hash = "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e"}, + {file = "lazy_object_proxy-1.6.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93"}, + {file = "lazy_object_proxy-1.6.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741"}, + {file = "lazy_object_proxy-1.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587"}, + {file = "lazy_object_proxy-1.6.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4"}, + {file = "lazy_object_proxy-1.6.0-cp36-cp36m-win32.whl", hash = "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f"}, + {file = "lazy_object_proxy-1.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3"}, + {file = "lazy_object_proxy-1.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981"}, + {file = "lazy_object_proxy-1.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2"}, + {file = "lazy_object_proxy-1.6.0-cp37-cp37m-win32.whl", hash = "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd"}, + {file = "lazy_object_proxy-1.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837"}, + {file = "lazy_object_proxy-1.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653"}, + {file = "lazy_object_proxy-1.6.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3"}, + {file = "lazy_object_proxy-1.6.0-cp38-cp38-win32.whl", hash = "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8"}, + {file = "lazy_object_proxy-1.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf"}, + {file = "lazy_object_proxy-1.6.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad"}, + {file = "lazy_object_proxy-1.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43"}, + {file = "lazy_object_proxy-1.6.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a"}, + {file = "lazy_object_proxy-1.6.0-cp39-cp39-win32.whl", hash = "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61"}, + {file = "lazy_object_proxy-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b"}, ] markupsafe = [ {file = "MarkupSafe-1.1.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161"}, @@ -985,20 +983,39 @@ markupsafe = [ {file = "MarkupSafe-1.1.1-cp35-cp35m-win32.whl", hash = "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1"}, {file = "MarkupSafe-1.1.1-cp35-cp35m-win_amd64.whl", hash = "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d"}, {file = "MarkupSafe-1.1.1-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d53bc011414228441014aa71dbec320c66468c1030aae3a6e29778a3382d96e5"}, {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473"}, {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:3b8a6499709d29c2e2399569d96719a1b21dcd94410a586a18526b143ec8470f"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:84dee80c15f1b560d55bcfe6d47b27d070b4681c699c572af2e3c7cc90a3b8e0"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:b1dba4527182c95a0db8b6060cc98ac49b9e2f5e64320e2b56e47cb2831978c7"}, {file = "MarkupSafe-1.1.1-cp36-cp36m-win32.whl", hash = "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66"}, {file = "MarkupSafe-1.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bf5aa3cbcfdf57fa2ee9cd1822c862ef23037f5c832ad09cfea57fa846dec193"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:6fffc775d90dcc9aed1b89219549b329a9250d918fd0b8fa8d93d154918422e1"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:a6a744282b7718a2a62d2ed9d993cad6f5f585605ad352c11de459f4108df0a1"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:195d7d2c4fbb0ee8139a6cf67194f3973a6b3042d742ebe0a9ed36d8b6f0c07f"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c"}, {file = "MarkupSafe-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15"}, {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2"}, {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:acf08ac40292838b3cbbb06cfe9b2cb9ec78fce8baca31ddb87aaac2e2dc3bc2"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d9be0ba6c527163cbed5e0857c451fcd092ce83947944d6c14bc95441203f032"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:caabedc8323f1e93231b52fc32bdcde6db817623d33e100708d9a68e1f53b26b"}, {file = "MarkupSafe-1.1.1-cp38-cp38-win32.whl", hash = "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b"}, {file = "MarkupSafe-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d73a845f227b0bfe8a7455ee623525ee656a9e2e749e4742706d80a6065d5e2c"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:98bae9582248d6cf62321dcb52aaf5d9adf0bad3b40582925ef7c7f0ed85fceb"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:2beec1e0de6924ea551859edb9e7679da6e4870d32cb766240ce17e0a0ba2014"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:7fed13866cf14bba33e7176717346713881f56d9d2bcebab207f7a036f41b850"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:6f1e273a344928347c1290119b493a1f0303c52f5a5eae5f16d74f48c15d4a85"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:feb7b34d6325451ef96bc0e36e1a6c0c1c64bc1fbec4b854f4529e51887b1621"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-win32.whl", hash = "sha256:22c178a091fc6630d0d045bdb5992d2dfe14e3259760e713c490da5323866c39"}, + {file = "MarkupSafe-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7d644ddb4dbd407d31ffb699f1d140bc35478da613b441c582aeb7c43838dd8"}, {file = "MarkupSafe-1.1.1.tar.gz", hash = "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b"}, ] mccabe = [ @@ -1014,30 +1031,30 @@ nodeenv = [ {file = "nodeenv-1.5.0.tar.gz", hash = "sha256:ab45090ae383b716c4ef89e690c41ff8c2b257b85b309f01f3654df3d084bd7c"}, ] numpy = [ - {file = "numpy-1.20.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae61f02b84a0211abb56462a3b6cd1e7ec39d466d3160eb4e1da8bf6717cdbeb"}, - {file = "numpy-1.20.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:65410c7f4398a0047eea5cca9b74009ea61178efd78d1be9847fac1d6716ec1e"}, - {file = "numpy-1.20.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:2d7e27442599104ee08f4faed56bb87c55f8b10a5494ac2ead5c98a4b289e61f"}, - {file = "numpy-1.20.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:4ed8e96dc146e12c1c5cdd6fb9fd0757f2ba66048bf94c5126b7efebd12d0090"}, - {file = "numpy-1.20.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:ecb5b74c702358cdc21268ff4c37f7466357871f53a30e6f84c686952bef16a9"}, - {file = "numpy-1.20.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b9410c0b6fed4a22554f072a86c361e417f0258838957b78bd063bde2c7f841f"}, - {file = "numpy-1.20.1-cp37-cp37m-win32.whl", hash = "sha256:3d3087e24e354c18fb35c454026af3ed8997cfd4997765266897c68d724e4845"}, - {file = "numpy-1.20.1-cp37-cp37m-win_amd64.whl", hash = "sha256:89f937b13b8dd17b0099c7c2e22066883c86ca1575a975f754babc8fbf8d69a9"}, - {file = "numpy-1.20.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a1d7995d1023335e67fb070b2fae6f5968f5be3802b15ad6d79d81ecaa014fe0"}, - {file = "numpy-1.20.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:60759ab15c94dd0e1ed88241fd4fa3312db4e91d2c8f5a2d4cf3863fad83d65b"}, - {file = "numpy-1.20.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:125a0e10ddd99a874fd357bfa1b636cd58deb78ba4a30b5ddb09f645c3512e04"}, - {file = "numpy-1.20.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:c26287dfc888cf1e65181f39ea75e11f42ffc4f4529e5bd19add57ad458996e2"}, - {file = "numpy-1.20.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:7199109fa46277be503393be9250b983f325880766f847885607d9b13848f257"}, - {file = "numpy-1.20.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:72251e43ac426ff98ea802a931922c79b8d7596480300eb9f1b1e45e0543571e"}, - {file = "numpy-1.20.1-cp38-cp38-win32.whl", hash = "sha256:c91ec9569facd4757ade0888371eced2ecf49e7982ce5634cc2cf4e7331a4b14"}, - {file = "numpy-1.20.1-cp38-cp38-win_amd64.whl", hash = "sha256:13adf545732bb23a796914fe5f891a12bd74cf3d2986eed7b7eba2941eea1590"}, - {file = "numpy-1.20.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:104f5e90b143dbf298361a99ac1af4cf59131218a045ebf4ee5990b83cff5fab"}, - {file = "numpy-1.20.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:89e5336f2bec0c726ac7e7cdae181b325a9c0ee24e604704ed830d241c5e47ff"}, - {file = "numpy-1.20.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:032be656d89bbf786d743fee11d01ef318b0781281241997558fa7950028dd29"}, - {file = "numpy-1.20.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:66b467adfcf628f66ea4ac6430ded0614f5cc06ba530d09571ea404789064adc"}, - {file = "numpy-1.20.1-cp39-cp39-win32.whl", hash = "sha256:12e4ba5c6420917571f1a5becc9338abbde71dd811ce40b37ba62dec7b39af6d"}, - {file = "numpy-1.20.1-cp39-cp39-win_amd64.whl", hash = "sha256:9c94cab5054bad82a70b2e77741271790304651d584e2cdfe2041488e753863b"}, - {file = "numpy-1.20.1-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:9eb551d122fadca7774b97db8a112b77231dcccda8e91a5bc99e79890797175e"}, - {file = "numpy-1.20.1.zip", hash = "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a"}, + {file = "numpy-1.20.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e9459f40244bb02b2f14f6af0cd0732791d72232bbb0dc4bab57ef88e75f6935"}, + {file = "numpy-1.20.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:a8e6859913ec8eeef3dbe9aed3bf475347642d1cdd6217c30f28dee8903528e6"}, + {file = "numpy-1.20.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:9cab23439eb1ebfed1aaec9cd42b7dc50fc96d5cd3147da348d9161f0501ada5"}, + {file = "numpy-1.20.2-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:9c0fab855ae790ca74b27e55240fe4f2a36a364a3f1ebcfd1fb5ac4088f1cec3"}, + {file = "numpy-1.20.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:61d5b4cf73622e4d0c6b83408a16631b670fc045afd6540679aa35591a17fe6d"}, + {file = "numpy-1.20.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:d15007f857d6995db15195217afdbddfcd203dfaa0ba6878a2f580eaf810ecd6"}, + {file = "numpy-1.20.2-cp37-cp37m-win32.whl", hash = "sha256:d76061ae5cab49b83a8cf3feacefc2053fac672728802ac137dd8c4123397677"}, + {file = "numpy-1.20.2-cp37-cp37m-win_amd64.whl", hash = "sha256:bad70051de2c50b1a6259a6df1daaafe8c480ca98132da98976d8591c412e737"}, + {file = "numpy-1.20.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:719656636c48be22c23641859ff2419b27b6bdf844b36a2447cb39caceb00935"}, + {file = "numpy-1.20.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:aa046527c04688af680217fffac61eec2350ef3f3d7320c07fd33f5c6e7b4d5f"}, + {file = "numpy-1.20.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2428b109306075d89d21135bdd6b785f132a1f5a3260c371cee1fae427e12727"}, + {file = "numpy-1.20.2-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:e8e4fbbb7e7634f263c5b0150a629342cc19b47c5eba8d1cd4363ab3455ab576"}, + {file = "numpy-1.20.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:edb1f041a9146dcf02cd7df7187db46ab524b9af2515f392f337c7cbbf5b52cd"}, + {file = "numpy-1.20.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:c73a7975d77f15f7f68dacfb2bca3d3f479f158313642e8ea9058eea06637931"}, + {file = "numpy-1.20.2-cp38-cp38-win32.whl", hash = "sha256:6c915ee7dba1071554e70a3664a839fbc033e1d6528199d4621eeaaa5487ccd2"}, + {file = "numpy-1.20.2-cp38-cp38-win_amd64.whl", hash = "sha256:471c0571d0895c68da309dacee4e95a0811d0a9f9f532a48dc1bea5f3b7ad2b7"}, + {file = "numpy-1.20.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4703b9e937df83f5b6b7447ca5912b5f5f297aba45f91dbbbc63ff9278c7aa98"}, + {file = "numpy-1.20.2-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:abc81829c4039e7e4c30f7897938fa5d4916a09c2c7eb9b244b7a35ddc9656f4"}, + {file = "numpy-1.20.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:377751954da04d4a6950191b20539066b4e19e3b559d4695399c5e8e3e683bf6"}, + {file = "numpy-1.20.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:6e51e417d9ae2e7848314994e6fc3832c9d426abce9328cf7571eefceb43e6c9"}, + {file = "numpy-1.20.2-cp39-cp39-win32.whl", hash = "sha256:780ae5284cb770ade51d4b4a7dce4faa554eb1d88a56d0e8b9f35fca9b0270ff"}, + {file = "numpy-1.20.2-cp39-cp39-win_amd64.whl", hash = "sha256:924dc3f83de20437de95a73516f36e09918e9c9c18d5eac520062c49191025fb"}, + {file = "numpy-1.20.2-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:97ce8b8ace7d3b9288d88177e66ee75480fb79b9cf745e91ecfe65d91a856042"}, + {file = "numpy-1.20.2.zip", hash = "sha256:878922bf5ad7550aa044aa9301d417e2d3ae50f0f577de92051d739ac6096cee"}, ] packaging = [ {file = "packaging-20.9-py2.py3-none-any.whl", hash = "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"}, @@ -1070,8 +1087,8 @@ pluggy = [ {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, ] pre-commit = [ - {file = "pre_commit-2.10.1-py2.py3-none-any.whl", hash = "sha256:16212d1fde2bed88159287da88ff03796863854b04dc9f838a55979325a3d20e"}, - {file = "pre_commit-2.10.1.tar.gz", hash = "sha256:399baf78f13f4de82a29b649afd74bef2c4e28eb4f021661fc7f29246e8c7a3a"}, + {file = "pre_commit-2.11.1-py2.py3-none-any.whl", hash = "sha256:94c82f1bf5899d56edb1d926732f4e75a7df29a0c8c092559c77420c9d62428b"}, + {file = "pre_commit-2.11.1.tar.gz", hash = "sha256:de55c5c72ce80d79106e48beb1b54104d16495ce7f95b0c7b13d4784193a00af"}, ] pre-commit-hooks = [ {file = "pre_commit_hooks-3.4.0-py2.py3-none-any.whl", hash = "sha256:b1d329fc712f53f56af7c4a0ac08c414a7fcfd634dbd829c3a03f39cfb9c3574"}, @@ -1105,20 +1122,20 @@ pyarrow = [ {file = "pyarrow-3.0.0.tar.gz", hash = "sha256:4bf8cc43e1db1e0517466209ee8e8f459d9b5e1b4074863317f2a965cf59889e"}, ] pycodestyle = [ - {file = "pycodestyle-2.6.0-py2.py3-none-any.whl", hash = "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367"}, - {file = "pycodestyle-2.6.0.tar.gz", hash = "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"}, + {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"}, + {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"}, ] pydocstyle = [ - {file = "pydocstyle-5.1.1-py3-none-any.whl", hash = "sha256:aca749e190a01726a4fb472dd4ef23b5c9da7b9205c0a7857c06533de13fd678"}, - {file = "pydocstyle-5.1.1.tar.gz", hash = "sha256:19b86fa8617ed916776a11cd8bc0197e5b9856d5433b777f51a3defe13075325"}, + {file = "pydocstyle-6.0.0-py3-none-any.whl", hash = "sha256:d4449cf16d7e6709f63192146706933c7a334af7c0f083904799ccb851c50f6d"}, + {file = "pydocstyle-6.0.0.tar.gz", hash = "sha256:164befb520d851dbcf0e029681b91f4f599c62c5cd8933fd54b1bfbd50e89e1f"}, ] pyflakes = [ - {file = "pyflakes-2.2.0-py2.py3-none-any.whl", hash = "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92"}, - {file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"}, + {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"}, + {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"}, ] pygments = [ - {file = "Pygments-2.8.0-py3-none-any.whl", hash = "sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"}, - {file = "Pygments-2.8.0.tar.gz", hash = "sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0"}, + {file = "Pygments-2.8.1-py3-none-any.whl", hash = "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8"}, + {file = "Pygments-2.8.1.tar.gz", hash = "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94"}, ] pyparsing = [ {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, @@ -1168,8 +1185,8 @@ requests = [ {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, ] "ruamel.yaml" = [ - {file = "ruamel.yaml-0.16.12-py2.py3-none-any.whl", hash = "sha256:012b9470a0ea06e4e44e99e7920277edf6b46eee0232a04487ea73a7386340a5"}, - {file = "ruamel.yaml-0.16.12.tar.gz", hash = "sha256:076cc0bc34f1966d920a49f18b52b6ad559fbe656a0748e3535cf7b3f29ebf9e"}, + {file = "ruamel.yaml-0.17.2-py3-none-any.whl", hash = "sha256:0850def9ebca23b3a8c64c4b4115ebb6b364a10d49f89d289a26ee965e1e7d9d"}, + {file = "ruamel.yaml-0.17.2.tar.gz", hash = "sha256:8f1e15421668b9edf30ed02899f5f81aff9808a4271935776f61a99a569a13da"}, ] "ruamel.yaml.clib" = [ {file = "ruamel.yaml.clib-0.2.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:28116f204103cb3a108dfd37668f20abe6e3cafd0d3fd40dba126c732457b3cc"}, @@ -1179,20 +1196,29 @@ requests = [ {file = "ruamel.yaml.clib-0.2.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:73b3d43e04cc4b228fa6fa5d796409ece6fcb53a6c270eb2048109cbcbc3b9c2"}, {file = "ruamel.yaml.clib-0.2.2-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:53b9dd1abd70e257a6e32f934ebc482dac5edb8c93e23deb663eac724c30b026"}, {file = "ruamel.yaml.clib-0.2.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:839dd72545ef7ba78fd2aa1a5dd07b33696adf3e68fae7f31327161c1093001b"}, + {file = "ruamel.yaml.clib-0.2.2-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:1236df55e0f73cd138c0eca074ee086136c3f16a97c2ac719032c050f7e0622f"}, {file = "ruamel.yaml.clib-0.2.2-cp35-cp35m-win32.whl", hash = "sha256:b1e981fe1aff1fd11627f531524826a4dcc1f26c726235a52fcb62ded27d150f"}, {file = "ruamel.yaml.clib-0.2.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4e52c96ca66de04be42ea2278012a2342d89f5e82b4512fb6fb7134e377e2e62"}, {file = "ruamel.yaml.clib-0.2.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a873e4d4954f865dcb60bdc4914af7eaae48fb56b60ed6daa1d6251c72f5337c"}, {file = "ruamel.yaml.clib-0.2.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ab845f1f51f7eb750a78937be9f79baea4a42c7960f5a94dde34e69f3cce1988"}, + {file = "ruamel.yaml.clib-0.2.2-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:2fd336a5c6415c82e2deb40d08c222087febe0aebe520f4d21910629018ab0f3"}, {file = "ruamel.yaml.clib-0.2.2-cp36-cp36m-win32.whl", hash = "sha256:e9f7d1d8c26a6a12c23421061f9022bb62704e38211fe375c645485f38df34a2"}, {file = "ruamel.yaml.clib-0.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:2602e91bd5c1b874d6f93d3086f9830f3e907c543c7672cf293a97c3fabdcd91"}, {file = "ruamel.yaml.clib-0.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:44c7b0498c39f27795224438f1a6be6c5352f82cb887bc33d962c3a3acc00df6"}, {file = "ruamel.yaml.clib-0.2.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:8e8fd0a22c9d92af3a34f91e8a2594eeb35cba90ab643c5e0e643567dc8be43e"}, + {file = "ruamel.yaml.clib-0.2.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:75f0ee6839532e52a3a53f80ce64925ed4aed697dd3fa890c4c918f3304bd4f4"}, {file = "ruamel.yaml.clib-0.2.2-cp37-cp37m-win32.whl", hash = "sha256:464e66a04e740d754170be5e740657a3b3b6d2bcc567f0c3437879a6e6087ff6"}, {file = "ruamel.yaml.clib-0.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:52ae5739e4b5d6317b52f5b040b1b6639e8af68a5b8fd606a8b08658fbd0cab5"}, {file = "ruamel.yaml.clib-0.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df5019e7783d14b79217ad9c56edf1ba7485d614ad5a385d1b3c768635c81c0"}, {file = "ruamel.yaml.clib-0.2.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5254af7d8bdf4d5484c089f929cb7f5bafa59b4f01d4f48adda4be41e6d29f99"}, + {file = "ruamel.yaml.clib-0.2.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8be05be57dc5c7b4a0b24edcaa2f7275866d9c907725226cdde46da09367d923"}, {file = "ruamel.yaml.clib-0.2.2-cp38-cp38-win32.whl", hash = "sha256:74161d827407f4db9072011adcfb825b5258a5ccb3d2cd518dd6c9edea9e30f1"}, {file = "ruamel.yaml.clib-0.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:058a1cc3df2a8aecc12f983a48bda99315cebf55a3b3a5463e37bb599b05727b"}, + {file = "ruamel.yaml.clib-0.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6ac7e45367b1317e56f1461719c853fd6825226f45b835df7436bb04031fd8a"}, + {file = "ruamel.yaml.clib-0.2.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:b4b0d31f2052b3f9f9b5327024dc629a253a83d8649d4734ca7f35b60ec3e9e5"}, + {file = "ruamel.yaml.clib-0.2.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:1f8c0a4577c0e6c99d208de5c4d3fd8aceed9574bb154d7a2b21c16bb924154c"}, + {file = "ruamel.yaml.clib-0.2.2-cp39-cp39-win32.whl", hash = "sha256:46d6d20815064e8bb023ea8628cfb7402c0f0e83de2c2227a88097e239a7dffd"}, + {file = "ruamel.yaml.clib-0.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:6c0a5dc52fc74eb87c67374a4e554d4761fd42a4d01390b7e868b30d21f4b8bb"}, {file = "ruamel.yaml.clib-0.2.2.tar.gz", hash = "sha256:2d24bd98af676f4990c4d715bcdc2a60b19c56a3fb3a763164d2d8ca0e806ba7"}, ] six = [ @@ -1200,16 +1226,16 @@ six = [ {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, ] smmap = [ - {file = "smmap-3.0.5-py2.py3-none-any.whl", hash = "sha256:7bfcf367828031dc893530a29cb35eb8c8f2d7c8f2d0989354d75d24c8573714"}, - {file = "smmap-3.0.5.tar.gz", hash = "sha256:84c2751ef3072d4f6b2785ec7ee40244c6f45eb934d9e543e2c51f1bd3d54c50"}, + {file = "smmap-4.0.0-py2.py3-none-any.whl", hash = "sha256:a9a7479e4c572e2e775c404dcd3080c8dc49f39918c2cf74913d30c4c478e3c2"}, + {file = "smmap-4.0.0.tar.gz", hash = "sha256:7e65386bd122d45405ddf795637b7f7d2b532e7e401d46bbe3fb49b9986d5182"}, ] snowballstemmer = [ {file = "snowballstemmer-2.1.0-py2.py3-none-any.whl", hash = "sha256:b51b447bea85f9968c13b650126a888aabd4cb4463fca868ec596826325dedc2"}, {file = "snowballstemmer-2.1.0.tar.gz", hash = "sha256:e997baa4f2e9139951b6f4c631bad912dfd3c792467e2f03d7239464af90e914"}, ] sphinx = [ - {file = "Sphinx-3.5.1-py3-none-any.whl", hash = "sha256:e90161222e4d80ce5fc811ace7c6787a226b4f5951545f7f42acf97277bfc35c"}, - {file = "Sphinx-3.5.1.tar.gz", hash = "sha256:11d521e787d9372c289472513d807277caafb1684b33eb4f08f7574c405893a9"}, + {file = "Sphinx-3.5.3-py3-none-any.whl", hash = "sha256:3f01732296465648da43dec8fb40dc451ba79eb3e2cc5c6d79005fd98197107d"}, + {file = "Sphinx-3.5.3.tar.gz", hash = "sha256:ce9c228456131bab09a3d7d10ae58474de562a6f79abb3dc811ae401cf8c1abc"}, ] sphinx-autoapi = [ {file = "sphinx-autoapi-1.7.0.tar.gz", hash = "sha256:48caa054a99c21156e9a1d26559281dc27f86ab8ef8bb6ef160f8cd9f4a0053d"}, @@ -1260,12 +1286,12 @@ unidecode = [ {file = "Unidecode-1.2.0.tar.gz", hash = "sha256:8d73a97d387a956922344f6b74243c2c6771594659778744b2dbdaad8f6b727d"}, ] urllib3 = [ - {file = "urllib3-1.26.3-py2.py3-none-any.whl", hash = "sha256:1b465e494e3e0d8939b50680403e3aedaa2bc434b7d5af64dfd3c958d7f5ae80"}, - {file = "urllib3-1.26.3.tar.gz", hash = "sha256:de3eedaad74a2683334e282005cd8d7f22f4d55fa690a2a1020a416cb0a47e73"}, + {file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"}, + {file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"}, ] virtualenv = [ - {file = "virtualenv-20.4.2-py2.py3-none-any.whl", hash = "sha256:2be72df684b74df0ea47679a7df93fd0e04e72520022c57b479d8f881485dbe3"}, - {file = "virtualenv-20.4.2.tar.gz", hash = "sha256:147b43894e51dd6bba882cf9c282447f780e2251cd35172403745fc381a0a80d"}, + {file = "virtualenv-20.4.3-py2.py3-none-any.whl", hash = "sha256:83f95875d382c7abafe06bd2a4cdd1b363e1bb77e02f155ebe8ac082a916b37c"}, + {file = "virtualenv-20.4.3.tar.gz", hash = "sha256:49ec4eb4c224c6f7dd81bb6d0a28a09ecae5894f4e593c89b0db0885f565a107"}, ] wrapt = [ {file = "wrapt-1.12.1.tar.gz", hash = "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"}, diff --git a/pyproject.toml b/pyproject.toml index d50ffa0..175d056 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,7 @@ pyarrow = {version=">=3.0, <4.0", optional=true} #tables = {version=">=3.6, <4.0", optional=true} [tool.poetry.dev-dependencies] -pre-commit = ">=2.10, <3.0" +pre-commit = ">=2.11, <3.0" pre-commit-hooks = ">=3.4, <4.0" bandit = ">=1.7, <2.0" pytest = ">=6.2, <7.0" @@ -113,9 +113,9 @@ pytest-cov = ">=2.11, <3.0" sphinx = ">=3.5, <4.0" sphinx-autoapi = ">=1.7, <2.0" sphinx-rtd-theme = ">=0.5, <1.0" -flake8 = ">=3.8, <4.0" +flake8 = ">=3.9, <4.0" flake8-docstrings = ">=1.5, <2.0" -flake8-bugbear = ">=20" +flake8-bugbear = ">=21" tomlkit = ">=0.7, <1.0" diff --git a/tests/__init__.py b/tests/__init__.py index 83e6138..217f7ee 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,6 @@ import contextlib import inspect +import random from pathlib import Path from typing import Sequence @@ -10,8 +11,9 @@ @contextlib.contextmanager def tmpfile(ext: str) -> Path: - caller = inspect.stack()[1][3] - path = Path(__file__).parent.parent.parent / "resources" / "tmp" / (str(caller) + ext) + # caller = inspect.stack()[1][3] + caller = str(random.randint(1, 100000)) # nosec + path = Path(__file__).parent / "resources" / "tmp" / (str(caller) + ext) path.parent.mkdir(parents=True, exist_ok=True) yield path if path.exists(): @@ -52,6 +54,12 @@ class TypedTrivial(TypedDf): pass +class TypedOneColumn(TypedDf): + @classmethod + def required_columns(cls) -> Sequence[str]: + return ["abc"] + + class TypedSingleIndex(TypedDf): @classmethod def required_index_names(cls) -> Sequence[str]: diff --git a/tests/test_fancy_read_write.py b/tests/test_fancy_read_write.py new file mode 100644 index 0000000..86e58e5 --- /dev/null +++ b/tests/test_fancy_read_write.py @@ -0,0 +1,63 @@ +import pytest + +from . import TypedMultiIndex, sample_data, tmpfile, TypedOneColumn, TypedSingleIndex + +# h5, snappy, and parquet work too -- but can't run in CI yet +known_compressions = {"", ".gz", ".zip", ".bz2", ".xz"} + + +def _get_known_extensions(): + ne = {".feather"} + for e in {".csv", ".tsv", ".tab"}: + for c in known_compressions: + ne.add(e + c) + return ne + + +known_extensions = _get_known_extensions() + + +class TestReadWrite: + def test_read_write_file_multi_index(self): + for ext in known_extensions: + with tmpfile(ext) as path: + df = TypedMultiIndex.convert(TypedMultiIndex(sample_data())) + df.write_file(path) + df2 = TypedMultiIndex.read_file(path) + assert df2.index_names() == ["abc", "xyz"] + assert df2.column_names() == ["123"] + + def test_read_write_one_single_index(self): + for ext in known_extensions: + with tmpfile(ext) as path: + df = TypedSingleIndex.convert(TypedSingleIndex(sample_data())) + df.write_file(path) + df2 = TypedSingleIndex.read_file(path) + assert df2.index_names() == ["abc"] + assert df2.column_names() == ["123", "xyz"] + + # noinspection DuplicatedCode + def test_read_write_one_col(self): + for ext in known_extensions: + with tmpfile(ext) as path: + df = TypedOneColumn(["a", "puppy", "and", "a", "parrot"], columns=["abc"]) + df = TypedOneColumn.convert(df) + df.write_file(path) + df2 = TypedOneColumn.read_file(path) + assert df2.index_names() == [] + assert df2.column_names() == ["abc"] + + # noinspection DuplicatedCode + def test_read_write_txt(self): + for c in known_compressions: + with tmpfile(".txt" + c) as path: + df = TypedOneColumn(["a", "puppy", "and", "a", "parrot"], columns=["abc"]) + df = TypedOneColumn.convert(df) + df.write_file(path) + df2 = TypedOneColumn.read_file(path) + assert df2.index_names() == [] + assert df2.column_names() == ["abc"] + + +if __name__ == "__main__": + pytest.main() diff --git a/tests/test_read_write.py b/tests/test_read_write.py index 6a7a650..0fd1dea 100644 --- a/tests/test_read_write.py +++ b/tests/test_read_write.py @@ -10,7 +10,7 @@ def test_feather_lz4(self): with tmpfile(".feather") as path: df = TypedMultiIndex.convert(TypedMultiIndex(sample_data())) df.to_feather(path, compression="lz4") - df2 = UntypedDf.read_feather(path) + df2 = TypedMultiIndex.read_feather(path) assert df2.index_names() == ["abc", "xyz"] assert df2.column_names() == ["123"] @@ -18,7 +18,7 @@ def test_feather_zstd(self): with tmpfile(".feather") as path: df = TypedMultiIndex.convert(TypedMultiIndex(sample_data())) df.to_feather(path, compression="zstd") - df2 = UntypedDf.read_feather(path) + df2 = TypedMultiIndex.read_feather(path) assert df2.index_names() == ["abc", "xyz"] assert df2.column_names() == ["123"] diff --git a/typeddfs/__init__.py b/typeddfs/__init__.py index 0f90811..8aefe92 100644 --- a/typeddfs/__init__.py +++ b/typeddfs/__init__.py @@ -9,6 +9,8 @@ from pathlib import Path from typing import Optional, Type +import pandas as pd + from typeddfs.base_dfs import AsymmetricDfError as _AsymmetricDfError from typeddfs.base_dfs import BaseDf from typeddfs.base_dfs import ExtraConditionFailedError as _ExtraConditionFailedError @@ -42,6 +44,10 @@ logger.error(f"Could not load package metadata for {pkg}. Is it installed?") +class FinalDf(UntypedDf): + """An untyped DataFrame meant for general use.""" + + class TypedDfs: """ The only thing you need to import from ``typeddfs``. @@ -83,6 +89,18 @@ def example(cls) -> Type[TypedDf]: ).build() return KeyValue + @classmethod + def wrap(cls, df: pd.DataFrame) -> FinalDf: + """ + Just wraps a DataFrame into a simple untyped DataFrame. + Useful to quickly access a function only defined on typeddfs DataFrames. + + Example: + + TypedDfs.wrap(df).write_file("abc.feather") + """ + return FinalDf(df) + @classmethod def typed(cls, name: str, doc: Optional[str] = None) -> TypedDfBuilder: """ @@ -127,4 +145,4 @@ class New(UntypedDf): return New -__all__ = ["BaseDf", "UntypedDf", "TypedDf", "TypedDfs"] +__all__ = ["BaseDf", "UntypedDf", "TypedDf", "TypedDfs", "FinalDf"] diff --git a/typeddfs/base_dfs.py b/typeddfs/base_dfs.py index d4dedc0..d7a7a7b 100644 --- a/typeddfs/base_dfs.py +++ b/typeddfs/base_dfs.py @@ -3,7 +3,9 @@ """ from __future__ import annotations +import csv import abc +import os from pathlib import Path, PurePath from typing import Any, Iterable, List, Mapping, Optional, Sequence, Union from warnings import warn @@ -12,6 +14,13 @@ from natsort import natsorted, ns from pandas.core.frame import DataFrame as _InternalDataFrame + +class _Sentinal: + pass + + +_SENTINAL = _Sentinal() +_FAKE_SEP = "\u2008" # 6-em space; very unlikely to occur PathLike = Union[str, PurePath] @@ -70,10 +79,7 @@ def index_names(self) -> List[str]: A Python list """ lst = list(self.index.names) - if lst == [None]: - return [] - else: - return lst + return [] if lst == [None] else lst def is_multindex(self) -> bool: """ @@ -231,60 +237,290 @@ def drop_cols(self, cols: Union[str, Iterable[str]]) -> __qualname__: df = df.drop(c, axis=1) return self.__class__._check_and_change(df) + def write_file(self, path: Union[Path, str], *args, **kwargs): + """ + Writes to a file (or possibly URL), guessing the format from the filename extension. + Delegates to the ``to_*`` functions of this class (e.g. ``to_csv``). + Only includes file formats that can be read back in with corresponding ``to`` methods, + and excludes pickle. + + Supports: + - .csv, .tsv, or .tab (optionally with .gz, .zip, .bz2, or .xz) + - .json (optionally with .gz, .zip, .bz2, or .xz) + - .feather + - .parquet or .snappy + - .h5 or .hdf + - .xlsx or .xls + - .txt, .lines, or .list (optionally with .gz, .zip, .bz2, or .xz); + see ``to_lines()`` + + Args: + path: Only path-like strings or pathlib objects are supported, not buffers + (because we need a filename). + args: Positional args passed to the read_ function + kwargs: Keyword args passed to the function + + Returns: + Whatever the corresponding method on ``pd.to_*`` returns. + This is usually either str or None + """ + cls = self.__class__ + return cls._guess_io(self, "to", path, _SENTINAL, _SENTINAL, _SENTINAL, "", *args, **kwargs) + + @classmethod + def read_file( + cls, + path: Union[Path, str], + *args, + nl: Optional[str] = _SENTINAL, + header: Optional[str] = _SENTINAL, + skip_blank_lines: bool = _SENTINAL, + comment: str = "", + **kwargs, + ) -> __qualname__: + """ + Reads from a file (or possibly URL), guessing the format from the filename extension. + Delegates to the ``read_*`` functions of this class. + + You can always write and then read back to get the same dataframe:: + + # df is any DataFrame from typeddfs + # path can use any suffix + df.write_file(path)) + df.read_file(path) + + Supports: + - .csv, .tsv, or .tab (optionally with .gz, .zip, .bz2, or .xz) + - .json (optionally with .gz, .zip, .bz2, or .xz) + - .feather + - .parquet or .snappy + - .h5 or .hdf + - .xlsx or .xls + - .fxf (fixed-width; read_fwf) + - .txt, .lines, or .list (optionally with .gz, .zip, .bz2, or .xz); + see ``read_lines()`` + + Args: + path: Only path-like strings or pathlib objects are supported, not buffers + (because we need a filename). + nl: Passes ``line_terminator=nl`` to ``.read_csv`` if the output is a CSV/TSV variant. + This can usually be inferred and is more important with ``.write_file``. + header: Same as ``header`` in ``to_csv`` but not passed for non-CSV/TSV. + Just allows passing header without worrying about whether it applies. + skip_blank_lines: Same idea as for ``header`` + comment: Prefix indicating comments to ignore; only applies to ``to_lines`` + args: Positional args passed to the read_ function + kwargs: Keyword args passed to the function + + Returns: + An instance of this class + """ + return cls._guess_io( + cls, "read", path, nl, header, skip_blank_lines, comment, *args, **kwargs + ) + + @classmethod + def _guess_io( + cls, + clazz, + prefix: str, + path: Union[Path, str], + nl: Optional[str], + header: Optional[str], + skip_blank_lines: Optional[bool], + comment: str, + *args, + **kwargs, + ) -> str: + nl = {} if nl == _SENTINAL else dict(line_terminator="\n") + header = {} if header == _SENTINAL else dict(header=header) + skip_blank_lines = ( + {} if skip_blank_lines == _SENTINAL else dict(skip_blank_lines=skip_blank_lines) + ) + dct = { + ".feather": ("feather", {}), + ".parquet": ("parquet", {}), + ".snappy": ("parquet", {}), + ".h5": ("hdf", {}), + ".hdf": ("hdf", {}), + ".xlsx": ("excel", {}), + ".xls": ("excel", {}), + } + if prefix == "read": + dct.update( + { + ".fwf": ("fwf", {}), + } + ) + for compression in {".gz", ".zip", ".bz2", ".xz", ""}: + dct[".lines" + compression] = ("lines", dict(comment=comment)) + dct[".txt" + compression] = ("lines", dict(comment=comment)) + dct[".list" + compression] = ("lines", dict(comment=comment)) + dct[".csv" + compression] = ("csv", nl) + dct[".json" + compression] = ("json", {}) + dct[".tab" + compression] = ("csv", dict(sep="\t", **nl, **header, **skip_blank_lines)) + dct[".tsv" + compression] = ("csv", dict(sep="\t", **nl, **header, **skip_blank_lines)) + # `path` could be a URL, so don't use Path.suffix + for suffix, (fn, params) in dct.items(): + if isinstance(path, (str, PurePath)) and str(path).endswith(suffix): + fn_name = prefix + "_" + fn + # Note the order! kwargs overwrites params + # clazz.to_csv(path, sep="\t") + my_kwargs = {**params, **kwargs} + return getattr(clazz, fn_name)(path, *args, **my_kwargs) + raise ValueError(f"Suffix for {path} not recognized") + + def to_lines( + self, + path_or_buff, + comment: str = "", + nl: Optional[str] = _SENTINAL, + ) -> Optional[str]: + """ + Writes a file that contains one row per line and 1 column per line. + Associated with ``.lines`` or ``.txt``. + + .. caution:: + + For technical reasons, values cannot contain a 6-em space (U+2008). + Their presence will result in undefined behavior. + + Args: + path_or_buff: Path or buffer + comment: Add a comment at the top line, such as ``'# list of fruits'``; + No first line is added if empty + nl: Forces using \n as the line separator + + Returns: + The string data if ``path_or_buff`` is a buffer; None if it is a file + """ + nl = {} if nl == _SENTINAL else dict(line_terminator="\n") + if len(self.columns) != 1 or len(self.index_names()) != 0: + raise ValueError(f"Cannot write {len(self.columns)} columns to lines") + df = self.reset_index(drop=True) + data = [*([] if len(comment) == 0 else [comment]), *self[self.columns[0]].values.tolist()] + return pd.DataFrame(data).to_csv( + path_or_buff, index=False, sep=_FAKE_SEP, header=False, quoting=csv.QUOTE_NONE, **nl + ) + + @classmethod + def read_lines( + cls, + path_or_buff, + comment: str = "", + nl: Optional[str] = _SENTINAL, + ) -> __qualname__: + """ + Reads a file that contains 1 row and 1 column per line. + Skips lines that are blank after trimming whitespace. + Also skips comments if ``comment`` is set. + + .. caution:: + + For technical reasons, values cannot contain a 6-em space (U+2008). + Their presence will result in undefined behavior. + + Args: + path_or_buff: Path or buffer + comment: Any line starting with this substring (excluding spaces) is ignored; + no comment is used if empty + nl: Forces using \n as the line separator (can almost always be inferred) + """ + nl = {} if nl == _SENTINAL else dict(line_terminator="\n") + df = pd.read_csv( + path_or_buff, + sep=_FAKE_SEP, + header=None, + quoting=csv.QUOTE_NONE, + skip_blank_lines=True, + **nl, + engine="python", + ) + values = [ + s.strip() + for s in df[df.columns[0]] + if s is not None + and len(s.strip()) > 0 + and len(comment) == 0 + or not s.strip().startswith(comment) + ] + df = pd.DataFrame(values) + if len(df.columns) != 1: + raise ValueError(f"Read multiple columns on {path_or_buff}") + if hasattr(cls, "required_columns"): + df.columns = cls.required_columns() + return cls._convert(df) + + @classmethod + def read_json(cls, *args, **kwargs) -> __qualname__: # pragma: no cover + # feather does not support MultiIndex, so reset index and use convert() + return cls._convert(pd.read_json(*args, **kwargs)) + + def to_json(self, path_or_buf, *args, **kwargs) -> Optional[str]: + df = self.vanilla().reset_index() + return df.to_json(path_or_buf, *args, **kwargs) + @classmethod def read_feather(cls, *args, **kwargs) -> __qualname__: # pragma: no cover # feather does not support MultiIndex, so reset index and use convert() - return cls.convert(pd.read_feather(*args, **kwargs)) + return cls._convert(pd.read_feather(*args, **kwargs)) - # noinspection PyMethodOverriding + # noinspection PyMethodOverriding,PyBroadException,DuplicatedCode def to_feather(self, path_or_buf, *args, **kwargs) -> Optional[str]: # pragma: no cover # feather does not support MultiIndex, so reset index and use convert() # if an error occurs you end up with a 0-byte file - # so, let's delete it if that happens - # but don't delete it if it already exists! - existed = isinstance(path_or_buf, (PurePath, str)) and Path(path_or_buf).exists() + # this is fixed with exactly the same logic as for to_hdf -- see that method + try: + old_size = os.path.getsize(path_or_buf) + except BaseException: + old_size = None try: return self.vanilla().reset_index().to_feather(path_or_buf, *args, **kwargs) - except: - if not existed: - Path(path_or_buf).unlink(missing_ok=True) + except BaseException: + try: + size = os.path.getsize(path_or_buf) + except BaseException: + size = None + if size is not None and size == 0 and (old_size is None or old_size > 0): + try: + Path(path_or_buf).unlink() + except BaseException: + pass raise @classmethod def read_parquet(cls, *args, **kwargs) -> __qualname__: # pragma: no cover # parquet does not support MultiIndex, so reset index and use convert() - return cls.convert(pd.read_parquet(*args, **kwargs)) + return cls._convert(pd.read_parquet(*args, **kwargs)) - # noinspection PyMethodOverriding + # noinspection PyMethodOverriding,PyBroadException,DuplicatedCode def to_parquet(self, path_or_buf, *args, **kwargs) -> Optional[str]: # pragma: no cover # parquet does not support MultiIndex, so reset index and use convert() # if an error occurs you end up with a 0-byte file - # so, let's delete it if that happens - # but don't delete it if it already exists! - existed = isinstance(path_or_buf, (PurePath, str)) and Path(path_or_buf).exists() + # this is fixed with exactly the same logic as for to_hdf -- see that method + try: + old_size = os.path.getsize(path_or_buf) + except BaseException: + old_size = None + reset = self.vanilla().reset_index() try: - return self.vanilla().reset_index().to_parquet(path_or_buf, *args, **kwargs) - except: - if not existed: - Path(path_or_buf).unlink(missing_ok=True) + return reset.to_parquet(path_or_buf, *args, **kwargs) + except BaseException: + try: + size = os.path.getsize(path_or_buf) + except BaseException: + size = None + if size is not None and size == 0 and (old_size is None or old_size > 0): + try: + Path(path_or_buf).unlink() + except BaseException: + pass raise @classmethod def read_csv(cls, *args, **kwargs) -> __qualname__: # pragma: no cover return cls._check_and_change(pd.read_csv(*args, **kwargs)) - # noinspection PyMethodOverriding - def to_csv(self, path_or_buf, *args, **kwargs) -> Optional[str]: # pragma: no cover - return self.vanilla().to_csv(path_or_buf, *args, **kwargs) - - @classmethod - def read_json(cls, *args, **kwargs) -> __qualname__: # pragma: no cover - return cls._check_and_change(pd.read_json(*args, **kwargs)) - - # noinspection PyMethodOverriding - def to_json(self, path_or_buf, *args, **kwargs) -> Optional[str]: # pragma: no cover - return self.vanilla().to_json(path_or_buf, *args, **kwargs) - @classmethod def read_hdf(cls, *args, key: str = "df", **kwargs) -> __qualname__: """ @@ -306,10 +542,16 @@ def read_hdf(cls, *args, key: str = "df", **kwargs) -> __qualname__: df: pd.DataFrame = pd.read_hdf(*args, key=key, **kwargs) return cls._check_and_change(df) + # noinspection PyBroadException,PyFinal,DuplicatedCode def to_hdf(self, path: PathLike, key: str = "df", **kwargs) -> None: """ Writes to HDF with ``key`` as the default. Calling pd.to_hdf on this would error. + Note: + This handles an edge case in vanilla ``pd.DataFrame.to_hdf`` + that results in 0-byte files being written on error. + Those empty files are deleted if they're created and didn't already exist. + Args: path: A ``pathlib.Path`` or str value key: The HDF store key @@ -320,16 +562,34 @@ def to_hdf(self, path: PathLike, key: str = "df", **kwargs) -> None: OSError: Likely for some HDF5 configurations """ path = Path(path) - df = self.vanilla() # if an error occurs you end up with a 0-byte file - # so, let's delete it if that happens - # but don't delete it if it already exists! - existed = path.exists() + # delete it if and only if we CREATED an empty file -- + # subtle, but: we shouldn't delete the 0-byte file if it + # already existed and was 0 bytes + # + # just wrap in try-except -- it might not be a file and might not exist + # technically there's an edge case: what if it was just not readable? + # if it isn't readable now but becomes readable (and writable) by the time + # we try to write, then we delete it anyway + # that's a super unlikely bug and shouldn't matter anyway + try: + old_size = os.path.getsize(path) + except BaseException: + old_size = None + df = self.vanilla() try: df.to_hdf(str(path), key, **kwargs) - except: - if not existed: - path.unlink(missing_ok=True) + except BaseException: + # noinspection PyBroadException + try: + size = os.path.getsize(path) + except BaseException: + size = None + if size is not None and size == 0 and (old_size is None or old_size > 0): + try: + Path(path).unlink() + except BaseException: + pass raise def vanilla(self) -> pd.DataFrame: @@ -381,7 +641,7 @@ def reset_index( self, level=None, drop=False, inplace=False, col_level=0, col_fill="" ) -> __qualname__: if inplace: # pragma: no cover - warn("inplace not supported") + warn("inplace not supported. Use vanilla() if needed.") return self.__class__._check_and_change( super().reset_index( level=level, @@ -396,7 +656,7 @@ def set_index( self, keys, drop=True, append=False, inplace=False, verify_integrity=False ) -> __qualname__: if inplace: # pragma: no cover - warn("inplace not supported") + warn("inplace not supported. Use vanilla() if needed.") if len(keys) == 0 and append: return self elif len(keys) == 0: @@ -443,6 +703,7 @@ def fillna( ) ) + # noinspection PyFinal def copy(self, deep: bool = False) -> __qualname__: return self.__class__._check_and_change(super().copy(deep=deep)) @@ -453,6 +714,7 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=False) ) ) + # noinspection PyFinal def ffill(self, axis=None, inplace=False, limit=None, downcast=None) -> __qualname__: if inplace: # pragma: no cover warn("inplace not supported") @@ -460,6 +722,7 @@ def ffill(self, axis=None, inplace=False, limit=None, downcast=None) -> __qualna super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) ) + # noinspection PyFinal def bfill(self, axis=None, inplace=False, limit=None, downcast=None) -> __qualname__: if inplace: # pragma: no cover warn("inplace not supported") @@ -467,6 +730,7 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None) -> __qualna super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) ) + # noinspection PyFinal def abs(self) -> __qualname__: return self.__class__._check_and_change(super().abs()) @@ -497,8 +761,8 @@ def replace( ) ) - def applymap(self, func) -> __qualname__: - return self.__class__._check_and_change(super().applymap(func)) + def applymap(self, func, na_action: Optional[str] = None) -> __qualname__: + return self.__class__._check_and_change(super().applymap(func, na_action=na_action)) def astype(self, dtype, copy=True, errors="raise") -> __qualname__: return self.__class__._check_and_change( @@ -549,6 +813,16 @@ def st( df = df.loc[df[key] == value] return self.__class__._check_and_change(df) + @classmethod + def _convert(cls, df: pd.DataFrame): + # not great, but works ok + # if this is a BaseDf, use convert + # otherwise, just use check_and_change + if hasattr(cls, "convert"): + return cls.convert(df) + else: + return cls._check_and_change(df) + @classmethod def _check_and_change(cls, df) -> __qualname__: df.__class__ = cls diff --git a/typeddfs/typed_dfs.py b/typeddfs/typed_dfs.py index b1acc82..f86ab58 100644 --- a/typeddfs/typed_dfs.py +++ b/typeddfs/typed_dfs.py @@ -163,8 +163,12 @@ def read_csv(cls, path: PathLike, *args, **kwargs) -> __qualname__: return cls.convert(df) def to_csv(self, path: PathLike, *args, **kwargs) -> Optional[str]: - df = self.vanilla().reset_index() - return df.to_csv(path, index=False) + # TODO not checking for index in the args + if "index" in kwargs: + return super().to_csv(path, *args, **kwargs) + else: + df = self.vanilla().reset_index() + return df.to_csv(path, *args, index=False, **kwargs) @classmethod def is_valid(cls, df: pd.DataFrame) -> bool: @@ -261,10 +265,14 @@ def _check(cls, df) -> None: def _check_has_required(cls, df: pd.DataFrame) -> None: for c in set(cls.required_index_names()): if c not in set(df.index.names): - raise MissingColumnError(f"Missing index name {c}") + raise MissingColumnError( + f"Missing index name {c} (indices are: {set(df.index.names)}; columns are: {set(df.columns.names)}))" + ) for c in set(cls.required_columns()): if c not in set(df.columns): - raise MissingColumnError(f"Missing column {c}") + raise MissingColumnError( + f"Missing column {c} (columns are: {set(df.columns.names)}; indices are: {set(df.index.names)})" + ) @classmethod def _check_has_unexpected(cls, df: pd.DataFrame) -> None: diff --git a/typeddfs/untyped_dfs.py b/typeddfs/untyped_dfs.py index 09318b0..9a46550 100644 --- a/typeddfs/untyped_dfs.py +++ b/typeddfs/untyped_dfs.py @@ -41,6 +41,7 @@ def to_csv(self, path: PathLike, *args, **kwargs) -> Optional[str]: df.to_csv(path) df.__class__.read_csv(path) == df """ + # TODO not checking for index in the args if "index" in kwargs: return super().to_csv(path, *args, **kwargs) else: