Fixed import and added python-dateutil to dependencies (#1397)

* Fixed import and added python-dateutil to dependencies * Update release notes * added python-dateutil to latest requirements
alteryx · Apr 25, 2022 · 4170c70 · 4170c70
1 parent b12e4b0
commit 4170c70
Show file tree

Hide file tree

Showing 7 changed files with 30 additions and 10 deletions.
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -3,15 +3,17 @@
 Release Notes
 -------------
 
-.. Future Release
-  ==============
+Future Release
+==============
     * Enhancements
     * Fixes
+      * Fixed import issues regarding ``pyarrow`` and made ``python-dateutil>=2.8.1`` a required dependency (:pr:`1397`)
     * Changes
     * Documentation Changes
     * Testing Changes
 
-.. Thanks to the following people for contributing to this release:
+    Thanks to the following people for contributing to this release:
+    :user:`ParthivNaresh`
 
 v0.16.1 Apr 25, 2022
 ====================

diff --git a/setup.cfg b/setup.cfg
@@ -40,6 +40,7 @@ include_package_data = True
 install_requires =
     pandas >= 1.3.0, <1.4.2
     scikit-learn >= 0.22
+    python-dateutil >= 2.8.1
 
 python_requires = >=3.7, <4
 

diff --git a/woodwork/deserializers/deserializer_base.py b/woodwork/deserializers/deserializer_base.py
@@ -13,6 +13,14 @@
 from woodwork.serializers.serializer_base import SCHEMA_VERSION
 from woodwork.utils import _is_s3, _is_url, import_or_raise
 
+PYARROW_IMPORT_ERROR_MESSAGE_DESERIALIZE = (
+    f"The pyarrow library is required to deserialize from {format}.\n"
+    "Install via pip:\n"
+    "    pip install pyarrow\n"
+    "Install via conda:\n"
+    "   conda install pyarrow -c conda-forge"
+)
+
 
 class Deserializer:
     def __init__(self, path, filename, data_subdirectory, typing_info):

diff --git a/woodwork/deserializers/parquet_deserializer.py b/woodwork/deserializers/parquet_deserializer.py
@@ -4,11 +4,13 @@
 import tempfile
 from pathlib import Path
 
-import pyarrow as pa
-
-from woodwork.deserializers.deserializer_base import Deserializer, _check_schema_version
+from woodwork.deserializers.deserializer_base import (
+    PYARROW_IMPORT_ERROR_MESSAGE_DESERIALIZE,
+    Deserializer,
+    _check_schema_version,
+)
 from woodwork.s3_utils import get_transport_params, use_smartopen
-from woodwork.utils import _is_s3, _is_url
+from woodwork.utils import _is_s3, _is_url, import_or_raise
 
 
 class ParquetDeserializer(Deserializer):
@@ -17,6 +19,7 @@ class ParquetDeserializer(Deserializer):
     format = "parquet"
 
     def deserialize(self, profile_name, validate):
+        import_or_raise("pyarrow", PYARROW_IMPORT_ERROR_MESSAGE_DESERIALIZE)
         if _is_url(self.path) or _is_s3(self.path):
             dataframe = self.read_from_s3(profile_name)
         else:
@@ -31,6 +34,8 @@ def deserialize(self, profile_name, validate):
         return dataframe
 
     def configure_deserializer(self):
+        import pyarrow as pa
+
         self._set_metadata_path()
         file_metadata = pa.parquet.read_metadata(self.metadata_path)
         self.typing_info = json.loads(file_metadata.metadata[b"ww_meta"])

diff --git a/woodwork/serializers/parquet_serializer.py b/woodwork/serializers/parquet_serializer.py
@@ -4,8 +4,6 @@
 from pathlib import Path
 
 import pandas as pd
-import pyarrow as pa
-import pyarrow.parquet as pq
 
 from woodwork.accessor_utils import _is_dask_dataframe, _is_spark_dataframe
 from woodwork.exceptions import ParametersIgnoredWarning, WoodworkFileExistsError
@@ -23,6 +21,7 @@ class ParquetSerializer(Serializer):
     format = "parquet"
 
     def __init__(self, path, filename, data_subdirectory, typing_info_filename):
+        import_or_raise("pyarrow", PYARROW_IMPORT_ERROR_MESSAGE)
         super().__init__(path, filename, data_subdirectory, typing_info_filename)
         if typing_info_filename and typing_info_filename != "woodwork_typing_info.json":
             warnings.warn(
@@ -32,7 +31,6 @@ def __init__(self, path, filename, data_subdirectory, typing_info_filename):
         self.typing_info_filename = None
 
     def serialize(self, dataframe, profile_name, **kwargs):
-        import_or_raise("pyarrow", PYARROW_IMPORT_ERROR_MESSAGE)
         if self.filename is not None and _is_dask_dataframe(dataframe):
             raise ValueError(
                 "Writing a Dask dataframe to parquet with a filename specified is not supported"
@@ -60,6 +58,8 @@ def _create_pyarrow_table(self):
         """Create a pyarrow table for pandas. This table will get updated to included
         Woodwork typing info before saving. Skip for Dask/Spark because for those formats
         typing information has to be added after files are saved to disk."""
+        import pyarrow as pa
+
         if isinstance(self.dataframe, pd.DataFrame):
             dataframe = clean_latlong(self.dataframe)
             self.table = pa.Table.from_pandas(dataframe)
@@ -90,6 +90,8 @@ def _generate_parquet_metadata(self):
 
     def _save_parquet_table_to_disk(self):
         """Writes data to disk with the updated metadata including WW typing info."""
+        import pyarrow.parquet as pq
+
         if _is_dask_dataframe(self.dataframe):
             path, dataframe = self._setup_for_dask_and_spark()
             dataframe.to_parquet(path, custom_metadata=self.metadata)

diff --git a/woodwork/tests/requirement_files/latest_core_dependencies.txt b/woodwork/tests/requirement_files/latest_core_dependencies.txt
@@ -2,3 +2,4 @@ numpy==1.22.3
 pandas==1.4.1
 pyarrow==7.0.0
 scikit-learn==1.0.2
+python-dateutil==2.8.2
diff --git a/woodwork/tests/requirement_files/minimum_core_requirements.txt b/woodwork/tests/requirement_files/minimum_core_requirements.txt
@@ -1,3 +1,4 @@
 pandas==1.3.0
 pyarrow==5.0.0
 scikit-learn==0.22
+python-dateutil==2.8.1