From 2e5af4eb2600c5840ef7fecab8fa6c77d445cae9 Mon Sep 17 00:00:00 2001 From: Solomon Yu <797346+sqr00t@users.noreply.github.com> Date: Mon, 5 Feb 2024 16:44:35 +0000 Subject: [PATCH] fix!(imports & deps): add enabled_features dictionary to handle excel engine requirement, set Selenium>=4.17.2 and PyYAML>=6.0.1 (#96) * chore(deps): update Selenium and PyYAML minimum supported versions * refactor(altair saving): update type hints in function signature for path * fix(openpyxl): handle import bug when excel backend openpyxl not installed * chore: add __init__.py to modules * refactor(capabilities): add top level feature_enabled dict for evaluating enabled features --- nesta_ds_utils/__init__.py | 6 ++ nesta_ds_utils/cleaning/__init__.py | 0 nesta_ds_utils/loading_saving/S3.py | 30 ++++---- nesta_ds_utils/loading_saving/__init__.py | 9 +++ .../loading_saving/gis_interface.py | 72 ++++++++++--------- nesta_ds_utils/networks/__init__.py | 0 nesta_ds_utils/viz/__init__.py | 0 nesta_ds_utils/viz/altair/__init__.py | 0 nesta_ds_utils/viz/altair/saving.py | 22 ++++-- setup.cfg | 4 +- 10 files changed, 88 insertions(+), 55 deletions(-) create mode 100644 nesta_ds_utils/cleaning/__init__.py create mode 100644 nesta_ds_utils/loading_saving/__init__.py create mode 100644 nesta_ds_utils/networks/__init__.py create mode 100644 nesta_ds_utils/viz/__init__.py create mode 100644 nesta_ds_utils/viz/altair/__init__.py diff --git a/nesta_ds_utils/__init__.py b/nesta_ds_utils/__init__.py index e69de29..14ed982 100644 --- a/nesta_ds_utils/__init__.py +++ b/nesta_ds_utils/__init__.py @@ -0,0 +1,6 @@ +from nesta_ds_utils.loading_saving import _gis_enabled, _excel_backend_available + +feature_enabled = { + "gis": _gis_enabled, + "excel": _excel_backend_available, +} diff --git a/nesta_ds_utils/cleaning/__init__.py b/nesta_ds_utils/cleaning/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nesta_ds_utils/loading_saving/S3.py b/nesta_ds_utils/loading_saving/S3.py index 2da24d4..9ef399c 100644 --- a/nesta_ds_utils/loading_saving/S3.py +++ b/nesta_ds_utils/loading_saving/S3.py @@ -11,9 +11,9 @@ import warnings from nesta_ds_utils.loading_saving import file_ops -from nesta_ds_utils.loading_saving.gis_interface import _gis_enabled +from nesta_ds_utils import feature_enabled -if _gis_enabled: +if feature_enabled["gis"]: from nesta_ds_utils.loading_saving.gis_interface import ( _gdf_to_fileobj, _fileobj_to_gdf, @@ -53,10 +53,13 @@ def _df_to_fileobj(df_data: pd.DataFrame, path_to: str, **kwargs) -> io.BytesIO: df_data.to_csv(buffer, **kwargs) elif fnmatch(path_to, "*.parquet"): df_data.to_parquet(buffer, **kwargs) - elif fnmatch(path_to, "*.xlsx"): - df_data.to_excel(buffer, **kwargs) - elif fnmatch(path_to, "*.xlsm"): - df_data.to_excel(buffer, **kwargs) + elif fnmatch(path_to, "*.xlsx") or fnmatch(path_to, "*.xlsm"): + if feature_enabled["excel"]: + df_data.to_excel(buffer, **kwargs) + else: + raise ModuleNotFoundError( + "Please install 'io_extras' extra from nesta_ds_utils or 'openpyxl' to upload excel files." + ) else: raise NotImplementedError( "Uploading dataframe currently supported only for 'csv', 'parquet', 'xlsx' and xlsm'." @@ -217,7 +220,7 @@ def upload_obj( """ if isinstance(obj, pd.DataFrame): if type(obj).__name__ == "GeoDataFrame": - if _gis_enabled: + if feature_enabled["gis"]: obj = _gdf_to_fileobj(obj, path_to, **kwargs_writing) else: raise ModuleNotFoundError( @@ -258,10 +261,13 @@ def _fileobj_to_df(fileobj: io.BytesIO, path_from: str, **kwargs) -> pd.DataFram return pd.read_csv(fileobj, **kwargs) elif fnmatch(path_from, "*.parquet"): return pd.read_parquet(fileobj, **kwargs) - elif fnmatch(path_from, "*.xlsx"): - return pd.read_excel(fileobj, **kwargs) - elif fnmatch(path_from, "*.xlsm"): - return pd.read_excel(fileobj, **kwargs) + elif fnmatch(path_from, "*.xlsx") or fnmatch(path_from, "*.xlsm"): + if feature_enabled["excel"]: + return pd.read_excel(fileobj, **kwargs) + else: + raise ModuleNotFoundError( + "Please install 'io_extras' extra from nesta_ds_utils or 'openpyxl' to download excel files." + ) def _fileobj_to_dict(fileobj: io.BytesIO, path_from: str, **kwargs) -> dict: @@ -375,7 +381,7 @@ def download_obj( ) elif download_as == "geodf": if path_from.endswith(tuple([".geojson"])): - if _gis_enabled: + if feature_enabled["gis"]: return _fileobj_to_gdf(fileobj, path_from, **kwargs_reading) else: raise ModuleNotFoundError( diff --git a/nesta_ds_utils/loading_saving/__init__.py b/nesta_ds_utils/loading_saving/__init__.py new file mode 100644 index 0000000..17bee7b --- /dev/null +++ b/nesta_ds_utils/loading_saving/__init__.py @@ -0,0 +1,9 @@ +from nesta_ds_utils.loading_saving.gis_interface import _gis_enabled + +try: + import openpyxl + + _excel_backend_available = True + +except ImportError: + _excel_backend_available = False diff --git a/nesta_ds_utils/loading_saving/gis_interface.py b/nesta_ds_utils/loading_saving/gis_interface.py index 746adab..bcd0134 100644 --- a/nesta_ds_utils/loading_saving/gis_interface.py +++ b/nesta_ds_utils/loading_saving/gis_interface.py @@ -6,40 +6,42 @@ _gis_enabled = True - def _gdf_to_fileobj(df_data: GeoDataFrame, path_to: str, **kwargs) -> BytesIO: - """Convert GeoDataFrame into bytes file object. - - Args: - df_data (gpd.DataFrame): Dataframe to convert. - path_to (str): Saving file name. - - Returns: - io.BytesIO: Bytes file object. - """ - buffer = BytesIO() - if fnmatch(path_to, "*.geojson"): - df_data.to_file(buffer, driver="GeoJSON", **kwargs) - else: - raise NotImplementedError( - "Uploading geodataframe currently supported only for 'geojson'." - ) - buffer.seek(0) - return buffer - - def _fileobj_to_gdf(fileobj: BytesIO, path_from: str, **kwargs) -> GeoDataFrame: - """Convert bytes file object into geodataframe. - - Args: - fileobj (io.BytesIO): Bytes file object. - path_from (str): Path of loaded data. - - Returns: - gpd.DataFrame: Data as geodataframe. - """ - if fnmatch(path_from, "*.geojson"): - return GeoDataFrame.from_features( - load_json(fileobj.getvalue().decode())["features"] - ) - except ImportError: _gis_enabled = False + + +def _gdf_to_fileobj(df_data: GeoDataFrame, path_to: str, **kwargs) -> BytesIO: + """Convert GeoDataFrame into bytes file object. + + Args: + df_data (gpd.DataFrame): Dataframe to convert. + path_to (str): Saving file name. + + Returns: + io.BytesIO: Bytes file object. + """ + buffer = BytesIO() + if fnmatch(path_to, "*.geojson"): + df_data.to_file(buffer, driver="GeoJSON", **kwargs) + else: + raise NotImplementedError( + "Uploading geodataframe currently supported only for 'geojson'." + ) + buffer.seek(0) + return buffer + + +def _fileobj_to_gdf(fileobj: BytesIO, path_from: str, **kwargs) -> GeoDataFrame: + """Convert bytes file object into geodataframe. + + Args: + fileobj (io.BytesIO): Bytes file object. + path_from (str): Path of loaded data. + + Returns: + gpd.DataFrame: Data as geodataframe. + """ + if fnmatch(path_from, "*.geojson"): + return GeoDataFrame.from_features( + load_json(fileobj.getvalue().decode())["features"] + ) diff --git a/nesta_ds_utils/networks/__init__.py b/nesta_ds_utils/networks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nesta_ds_utils/viz/__init__.py b/nesta_ds_utils/viz/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nesta_ds_utils/viz/altair/__init__.py b/nesta_ds_utils/viz/altair/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nesta_ds_utils/viz/altair/saving.py b/nesta_ds_utils/viz/altair/saving.py index 6a932c0..0352f10 100644 --- a/nesta_ds_utils/viz/altair/saving.py +++ b/nesta_ds_utils/viz/altair/saving.py @@ -51,13 +51,17 @@ def webdriver_context(driver: WebDriver = None): def _save_png( - fig: Chart, path: os.PathLike, name: str, scale_factor: int, driver: WebDriver + fig: Chart, + path: Union[os.PathLike, Path, str], + name: str, + scale_factor: int, + driver: WebDriver, ): """Save altair chart as a raster png file. Args: fig: Altair chart. - path (os.PathLike): Path where to save the figure. + path (Union[os.PathLike, Path, str]): Path where to save the figure. name (str): Name of figure. scale_factor (int): Saving scale factor. driver (WebDriver): webdriver to use for saving. @@ -70,12 +74,14 @@ def _save_png( ) -def _save_html(fig: Chart, path: os.PathLike, name: str, scale_factor: int): +def _save_html( + fig: Chart, path: Union[os.PathLike, Path, str], name: str, scale_factor: int +): """Save altair chart as a html file. Args: fig: Altair chart. - path (os.PathLike): Path where to save the figure. + path (Union[os.PathLike, Path, str]): Path where to save the figure. name (str): Name of figure. scale_factor (int): Saving scale factor. """ @@ -83,13 +89,17 @@ def _save_html(fig: Chart, path: os.PathLike, name: str, scale_factor: int): def _save_svg( - fig: Chart, path: os.PathLike, name: str, scale_factor: int, driver: WebDriver + fig: Chart, + path: Union[os.PathLike, Path, str], + name: str, + scale_factor: int, + driver: WebDriver, ): """Save altair chart as vector svg file. Args: fig: Altair chart. - path (os.PathLike): Path where to save the figure. + path (Union[os.PathLike, Path, str]): Path where to save the figure. name (str): Name of figure. scale_factor (int): Saving scale factor. driver (WebDriver): webdriver to use for saving. diff --git a/setup.cfg b/setup.cfg index c6d5c43..2c9d17c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,7 +14,7 @@ python_requires = >=3.8 install_requires = numpy>=1.23.4 pandas>=1.5.1 - pyyaml<5.4.0 + pyyaml>=6.0.1 scipy>=1.9.3 pyarrow>=10.0.0 [options.extras_require] @@ -28,7 +28,7 @@ viz = altair>=4.2.0 vl-convert-python>=1.2.0 matplotlib>=3.6.2 - selenium>=4.2.0 + selenium>=4.17.2 webdriver_manager>=4.0.0 networks = networkx==2.8.8