Skip to content

Commit

Permalink
fix!(imports & deps): add enabled_features dictionary to handle excel…
Browse files Browse the repository at this point in the history
… engine requirement, set Selenium>=4.17.2 and PyYAML>=6.0.1 (#96)

* chore(deps): update Selenium and PyYAML minimum supported versions

* refactor(altair saving): update type hints in function signature for path

* fix(openpyxl): handle import bug when excel backend openpyxl not installed

* chore: add __init__.py to modules

* refactor(capabilities): add top level feature_enabled dict for evaluating enabled features
  • Loading branch information
sqr00t committed Feb 5, 2024
1 parent 15a8ec3 commit 2e5af4e
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 55 deletions.
6 changes: 6 additions & 0 deletions nesta_ds_utils/__init__.py
@@ -0,0 +1,6 @@
from nesta_ds_utils.loading_saving import _gis_enabled, _excel_backend_available

feature_enabled = {
"gis": _gis_enabled,
"excel": _excel_backend_available,
}
Empty file.
30 changes: 18 additions & 12 deletions nesta_ds_utils/loading_saving/S3.py
Expand Up @@ -11,9 +11,9 @@
import warnings
from nesta_ds_utils.loading_saving import file_ops

from nesta_ds_utils.loading_saving.gis_interface import _gis_enabled
from nesta_ds_utils import feature_enabled

if _gis_enabled:
if feature_enabled["gis"]:
from nesta_ds_utils.loading_saving.gis_interface import (
_gdf_to_fileobj,
_fileobj_to_gdf,
Expand Down Expand Up @@ -53,10 +53,13 @@ def _df_to_fileobj(df_data: pd.DataFrame, path_to: str, **kwargs) -> io.BytesIO:
df_data.to_csv(buffer, **kwargs)
elif fnmatch(path_to, "*.parquet"):
df_data.to_parquet(buffer, **kwargs)
elif fnmatch(path_to, "*.xlsx"):
df_data.to_excel(buffer, **kwargs)
elif fnmatch(path_to, "*.xlsm"):
df_data.to_excel(buffer, **kwargs)
elif fnmatch(path_to, "*.xlsx") or fnmatch(path_to, "*.xlsm"):
if feature_enabled["excel"]:
df_data.to_excel(buffer, **kwargs)
else:
raise ModuleNotFoundError(
"Please install 'io_extras' extra from nesta_ds_utils or 'openpyxl' to upload excel files."
)
else:
raise NotImplementedError(
"Uploading dataframe currently supported only for 'csv', 'parquet', 'xlsx' and xlsm'."
Expand Down Expand Up @@ -217,7 +220,7 @@ def upload_obj(
"""
if isinstance(obj, pd.DataFrame):
if type(obj).__name__ == "GeoDataFrame":
if _gis_enabled:
if feature_enabled["gis"]:
obj = _gdf_to_fileobj(obj, path_to, **kwargs_writing)
else:
raise ModuleNotFoundError(
Expand Down Expand Up @@ -258,10 +261,13 @@ def _fileobj_to_df(fileobj: io.BytesIO, path_from: str, **kwargs) -> pd.DataFram
return pd.read_csv(fileobj, **kwargs)
elif fnmatch(path_from, "*.parquet"):
return pd.read_parquet(fileobj, **kwargs)
elif fnmatch(path_from, "*.xlsx"):
return pd.read_excel(fileobj, **kwargs)
elif fnmatch(path_from, "*.xlsm"):
return pd.read_excel(fileobj, **kwargs)
elif fnmatch(path_from, "*.xlsx") or fnmatch(path_from, "*.xlsm"):
if feature_enabled["excel"]:
return pd.read_excel(fileobj, **kwargs)
else:
raise ModuleNotFoundError(
"Please install 'io_extras' extra from nesta_ds_utils or 'openpyxl' to download excel files."
)


def _fileobj_to_dict(fileobj: io.BytesIO, path_from: str, **kwargs) -> dict:
Expand Down Expand Up @@ -375,7 +381,7 @@ def download_obj(
)
elif download_as == "geodf":
if path_from.endswith(tuple([".geojson"])):
if _gis_enabled:
if feature_enabled["gis"]:
return _fileobj_to_gdf(fileobj, path_from, **kwargs_reading)
else:
raise ModuleNotFoundError(
Expand Down
9 changes: 9 additions & 0 deletions nesta_ds_utils/loading_saving/__init__.py
@@ -0,0 +1,9 @@
from nesta_ds_utils.loading_saving.gis_interface import _gis_enabled

try:
import openpyxl

_excel_backend_available = True

except ImportError:
_excel_backend_available = False
72 changes: 37 additions & 35 deletions nesta_ds_utils/loading_saving/gis_interface.py
Expand Up @@ -6,40 +6,42 @@

_gis_enabled = True

def _gdf_to_fileobj(df_data: GeoDataFrame, path_to: str, **kwargs) -> BytesIO:
"""Convert GeoDataFrame into bytes file object.
Args:
df_data (gpd.DataFrame): Dataframe to convert.
path_to (str): Saving file name.
Returns:
io.BytesIO: Bytes file object.
"""
buffer = BytesIO()
if fnmatch(path_to, "*.geojson"):
df_data.to_file(buffer, driver="GeoJSON", **kwargs)
else:
raise NotImplementedError(
"Uploading geodataframe currently supported only for 'geojson'."
)
buffer.seek(0)
return buffer

def _fileobj_to_gdf(fileobj: BytesIO, path_from: str, **kwargs) -> GeoDataFrame:
"""Convert bytes file object into geodataframe.
Args:
fileobj (io.BytesIO): Bytes file object.
path_from (str): Path of loaded data.
Returns:
gpd.DataFrame: Data as geodataframe.
"""
if fnmatch(path_from, "*.geojson"):
return GeoDataFrame.from_features(
load_json(fileobj.getvalue().decode())["features"]
)

except ImportError:
_gis_enabled = False


def _gdf_to_fileobj(df_data: GeoDataFrame, path_to: str, **kwargs) -> BytesIO:
"""Convert GeoDataFrame into bytes file object.
Args:
df_data (gpd.DataFrame): Dataframe to convert.
path_to (str): Saving file name.
Returns:
io.BytesIO: Bytes file object.
"""
buffer = BytesIO()
if fnmatch(path_to, "*.geojson"):
df_data.to_file(buffer, driver="GeoJSON", **kwargs)
else:
raise NotImplementedError(
"Uploading geodataframe currently supported only for 'geojson'."
)
buffer.seek(0)
return buffer


def _fileobj_to_gdf(fileobj: BytesIO, path_from: str, **kwargs) -> GeoDataFrame:
"""Convert bytes file object into geodataframe.
Args:
fileobj (io.BytesIO): Bytes file object.
path_from (str): Path of loaded data.
Returns:
gpd.DataFrame: Data as geodataframe.
"""
if fnmatch(path_from, "*.geojson"):
return GeoDataFrame.from_features(
load_json(fileobj.getvalue().decode())["features"]
)
Empty file.
Empty file added nesta_ds_utils/viz/__init__.py
Empty file.
Empty file.
22 changes: 16 additions & 6 deletions nesta_ds_utils/viz/altair/saving.py
Expand Up @@ -51,13 +51,17 @@ def webdriver_context(driver: WebDriver = None):


def _save_png(
fig: Chart, path: os.PathLike, name: str, scale_factor: int, driver: WebDriver
fig: Chart,
path: Union[os.PathLike, Path, str],
name: str,
scale_factor: int,
driver: WebDriver,
):
"""Save altair chart as a raster png file.
Args:
fig: Altair chart.
path (os.PathLike): Path where to save the figure.
path (Union[os.PathLike, Path, str]): Path where to save the figure.
name (str): Name of figure.
scale_factor (int): Saving scale factor.
driver (WebDriver): webdriver to use for saving.
Expand All @@ -70,26 +74,32 @@ def _save_png(
)


def _save_html(fig: Chart, path: os.PathLike, name: str, scale_factor: int):
def _save_html(
fig: Chart, path: Union[os.PathLike, Path, str], name: str, scale_factor: int
):
"""Save altair chart as a html file.
Args:
fig: Altair chart.
path (os.PathLike): Path where to save the figure.
path (Union[os.PathLike, Path, str]): Path where to save the figure.
name (str): Name of figure.
scale_factor (int): Saving scale factor.
"""
fig.save(f"{path}/{name}.html", scale_factor=scale_factor)


def _save_svg(
fig: Chart, path: os.PathLike, name: str, scale_factor: int, driver: WebDriver
fig: Chart,
path: Union[os.PathLike, Path, str],
name: str,
scale_factor: int,
driver: WebDriver,
):
"""Save altair chart as vector svg file.
Args:
fig: Altair chart.
path (os.PathLike): Path where to save the figure.
path (Union[os.PathLike, Path, str]): Path where to save the figure.
name (str): Name of figure.
scale_factor (int): Saving scale factor.
driver (WebDriver): webdriver to use for saving.
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Expand Up @@ -14,7 +14,7 @@ python_requires = >=3.8
install_requires =
numpy>=1.23.4
pandas>=1.5.1
pyyaml<5.4.0
pyyaml>=6.0.1
scipy>=1.9.3
pyarrow>=10.0.0
[options.extras_require]
Expand All @@ -28,7 +28,7 @@ viz =
altair>=4.2.0
vl-convert-python>=1.2.0
matplotlib>=3.6.2
selenium>=4.2.0
selenium>=4.17.2
webdriver_manager>=4.0.0
networks =
networkx==2.8.8
Expand Down

0 comments on commit 2e5af4e

Please sign in to comment.