Skip to content

Commit

Permalink
Fixed import and added python-dateutil to dependencies (#1397)
Browse files Browse the repository at this point in the history
* Fixed import and added python-dateutil to dependencies

* Update release notes

* added python-dateutil to latest requirements
  • Loading branch information
ParthivNaresh committed Apr 25, 2022
1 parent b12e4b0 commit 4170c70
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 10 deletions.
8 changes: 5 additions & 3 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
Release Notes
-------------

.. Future Release
==============
Future Release
==============
* Enhancements
* Fixes
* Fixed import issues regarding ``pyarrow`` and made ``python-dateutil>=2.8.1`` a required dependency (:pr:`1397`)
* Changes
* Documentation Changes
* Testing Changes

.. Thanks to the following people for contributing to this release:
Thanks to the following people for contributing to this release:
:user:`ParthivNaresh`

v0.16.1 Apr 25, 2022
====================
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ include_package_data = True
install_requires =
pandas >= 1.3.0, <1.4.2
scikit-learn >= 0.22
python-dateutil >= 2.8.1

python_requires = >=3.7, <4

Expand Down
8 changes: 8 additions & 0 deletions woodwork/deserializers/deserializer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
from woodwork.serializers.serializer_base import SCHEMA_VERSION
from woodwork.utils import _is_s3, _is_url, import_or_raise

PYARROW_IMPORT_ERROR_MESSAGE_DESERIALIZE = (
f"The pyarrow library is required to deserialize from {format}.\n"
"Install via pip:\n"
" pip install pyarrow\n"
"Install via conda:\n"
" conda install pyarrow -c conda-forge"
)


class Deserializer:
def __init__(self, path, filename, data_subdirectory, typing_info):
Expand Down
13 changes: 9 additions & 4 deletions woodwork/deserializers/parquet_deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
import tempfile
from pathlib import Path

import pyarrow as pa

from woodwork.deserializers.deserializer_base import Deserializer, _check_schema_version
from woodwork.deserializers.deserializer_base import (
PYARROW_IMPORT_ERROR_MESSAGE_DESERIALIZE,
Deserializer,
_check_schema_version,
)
from woodwork.s3_utils import get_transport_params, use_smartopen
from woodwork.utils import _is_s3, _is_url
from woodwork.utils import _is_s3, _is_url, import_or_raise


class ParquetDeserializer(Deserializer):
Expand All @@ -17,6 +19,7 @@ class ParquetDeserializer(Deserializer):
format = "parquet"

def deserialize(self, profile_name, validate):
import_or_raise("pyarrow", PYARROW_IMPORT_ERROR_MESSAGE_DESERIALIZE)
if _is_url(self.path) or _is_s3(self.path):
dataframe = self.read_from_s3(profile_name)
else:
Expand All @@ -31,6 +34,8 @@ def deserialize(self, profile_name, validate):
return dataframe

def configure_deserializer(self):
import pyarrow as pa

self._set_metadata_path()
file_metadata = pa.parquet.read_metadata(self.metadata_path)
self.typing_info = json.loads(file_metadata.metadata[b"ww_meta"])
Expand Down
8 changes: 5 additions & 3 deletions woodwork/serializers/parquet_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from pathlib import Path

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

from woodwork.accessor_utils import _is_dask_dataframe, _is_spark_dataframe
from woodwork.exceptions import ParametersIgnoredWarning, WoodworkFileExistsError
Expand All @@ -23,6 +21,7 @@ class ParquetSerializer(Serializer):
format = "parquet"

def __init__(self, path, filename, data_subdirectory, typing_info_filename):
import_or_raise("pyarrow", PYARROW_IMPORT_ERROR_MESSAGE)
super().__init__(path, filename, data_subdirectory, typing_info_filename)
if typing_info_filename and typing_info_filename != "woodwork_typing_info.json":
warnings.warn(
Expand All @@ -32,7 +31,6 @@ def __init__(self, path, filename, data_subdirectory, typing_info_filename):
self.typing_info_filename = None

def serialize(self, dataframe, profile_name, **kwargs):
import_or_raise("pyarrow", PYARROW_IMPORT_ERROR_MESSAGE)
if self.filename is not None and _is_dask_dataframe(dataframe):
raise ValueError(
"Writing a Dask dataframe to parquet with a filename specified is not supported"
Expand Down Expand Up @@ -60,6 +58,8 @@ def _create_pyarrow_table(self):
"""Create a pyarrow table for pandas. This table will get updated to included
Woodwork typing info before saving. Skip for Dask/Spark because for those formats
typing information has to be added after files are saved to disk."""
import pyarrow as pa

if isinstance(self.dataframe, pd.DataFrame):
dataframe = clean_latlong(self.dataframe)
self.table = pa.Table.from_pandas(dataframe)
Expand Down Expand Up @@ -90,6 +90,8 @@ def _generate_parquet_metadata(self):

def _save_parquet_table_to_disk(self):
"""Writes data to disk with the updated metadata including WW typing info."""
import pyarrow.parquet as pq

if _is_dask_dataframe(self.dataframe):
path, dataframe = self._setup_for_dask_and_spark()
dataframe.to_parquet(path, custom_metadata=self.metadata)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ numpy==1.22.3
pandas==1.4.1
pyarrow==7.0.0
scikit-learn==1.0.2
python-dateutil==2.8.2
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pandas==1.3.0
pyarrow==5.0.0
scikit-learn==0.22
python-dateutil==2.8.1

0 comments on commit 4170c70

Please sign in to comment.