diff --git a/openeo_driver/delayed_vector.py b/openeo_driver/delayed_vector.py index a566fa7a..8b918ce2 100644 --- a/openeo_driver/delayed_vector.py +++ b/openeo_driver/delayed_vector.py @@ -1,19 +1,23 @@ +import json +import logging +import os import tempfile +from datetime import datetime, timedelta +from typing import Iterable, List, Dict +from urllib.parse import urlparse import fiona import geopandas as gpd import pyproj +import requests from shapely.geometry import shape from shapely.geometry.base import BaseGeometry -from urllib.parse import urlparse -import requests -from datetime import datetime, timedelta -import os -import json -from typing import Iterable, List, Dict +from openeo_driver.errors import OpenEOApiException from openeo_driver.utils import reproject_bounding_box +_log = logging.getLogger(__name__) + class DelayedVector: """ @@ -43,6 +47,23 @@ def __str__(self): def __eq__(self, other): return isinstance(other, type(self)) and self.path == other.path + def _load_geojson_url(self, url: str) -> dict: + _log.info(f"Loading GeoJSON from {url!r}") + resp = requests.get(url) + content_type = resp.headers.get("content-type") + content_length = resp.headers.get("content-length") + _log.info( + f"GeoJSON response: status:{resp.status_code!r}" + f" content-type:{content_type!r} content-length:{content_length!r}" + ) + resp.raise_for_status() + try: + return resp.json() + except json.JSONDecodeError as e: + message = f"Failed to parse GeoJSON from URL {url!r} (content-type={content_type!r}, content-length={content_length!r}): {e!r}" + # TODO: use generic client error? https://github.com/Open-EO/openeo-api/issues/456 + raise OpenEOApiException(status_code=400, message=message) + @property def crs(self) -> pyproj.CRS: if self._crs is None: @@ -51,7 +72,7 @@ def crs(self) -> pyproj.CRS: local_shp_file = self._download_shapefile(self.path) self._crs = DelayedVector._read_shapefile_crs(local_shp_file) else: # it's GeoJSON - geojson = requests.get(self.path).json() + geojson = self._load_geojson_url(url=self.path) # FIXME: can be cached self._crs = DelayedVector._read_geojson_crs(geojson) else: # it's a file on disk @@ -70,7 +91,7 @@ def geometries(self) -> Iterable[BaseGeometry]: local_shp_file = self._download_shapefile(self.path) geometries = DelayedVector._read_shapefile_geometries(local_shp_file) else: # it's GeoJSON - geojson = requests.get(self.path).json() + geojson = self._load_geojson_url(url=self.path) geometries = DelayedVector._read_geojson_geometries(geojson) else: # it's a file on disk if self.path.endswith(".shp"): @@ -111,7 +132,7 @@ def bounds(self) -> (float, float, float, float): local_shp_file = self._download_shapefile(self.path) bounds = DelayedVector._read_shapefile_bounds(local_shp_file) else: # it's GeoJSON - geojson = requests.get(self.path).json() + geojson = self._load_geojson_url(url=self.path) # FIXME: can be cached bounds = DelayedVector._read_geojson_bounds(geojson) else: # it's a file on disk diff --git a/setup.py b/setup.py index 890d972a..07e1a477 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ install_requires=[ 'flask', 'werkzeug>=1.0.1', - 'requests', + "requests>=2.28.0", 'openeo>=0.9.2.a1.dev', 'openeo_processes==0.0.4', 'gunicorn>=20.0.1', diff --git a/tests/test_delayed_vector.py b/tests/test_delayed_vector.py index eac8f592..cdf5ad43 100644 --- a/tests/test_delayed_vector.py +++ b/tests/test_delayed_vector.py @@ -1,4 +1,7 @@ +import pytest + from openeo_driver.delayed_vector import DelayedVector +from openeo_driver.errors import OpenEOApiException from .data import get_path from pyproj import CRS @@ -12,14 +15,25 @@ def test_geometry_collection_bounds(): dv = DelayedVector(str(get_path("geojson/GeometryCollection01.json"))) assert dv.bounds == (5.05, 51.21, 5.15, 51.3) + def test_geojson_crs_unspecified(): dv = DelayedVector(str(get_path("geojson/test_geojson_crs_unspecified.geojson"))) assert dv.crs == CRS.from_user_input("+init=epsg:4326") + def test_geojson_crs_from_epsg(): dv = DelayedVector(str(get_path("geojson/test_geojson_crs_from_epsg.geojson"))) assert dv.crs == CRS.from_user_input("+init=epsg:4326") + def test_geojson_crs_from_ogc_urn(): dv = DelayedVector(str(get_path("geojson/test_geojson_crs_from_ogc_urn.geojson"))) assert dv.crs == CRS.from_user_input("+init=epsg:4326") + + +def test_geojson_url_invalid(requests_mock): + requests_mock.get("https://dl.test/features.json", text="\n\n

not json

", headers={"Content-Type": "text/html"}) + dv = DelayedVector("https://dl.test/features.json") + + with pytest.raises(OpenEOApiException, match="Failed to parse GeoJSON from URL"): + _ = dv.bounds