Skip to content

Commit

Permalink
Issue #130: improve error handling/logging in read_vector
Browse files Browse the repository at this point in the history
Also require "requests>=2.28.0" to avoid excessive JSONDecodeError message size (psf/requests#6036)
  • Loading branch information
soxofaan committed Jun 14, 2022
1 parent b85e4b3 commit aab5c20
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
39 changes: 30 additions & 9 deletions openeo_driver/delayed_vector.py
@@ -1,19 +1,23 @@
import json
import logging
import os
import tempfile
from datetime import datetime, timedelta
from typing import Iterable, List, Dict
from urllib.parse import urlparse

import fiona
import geopandas as gpd
import pyproj
import requests
from shapely.geometry import shape
from shapely.geometry.base import BaseGeometry
from urllib.parse import urlparse
import requests
from datetime import datetime, timedelta
import os
import json
from typing import Iterable, List, Dict

from openeo_driver.errors import OpenEOApiException
from openeo_driver.utils import reproject_bounding_box

_log = logging.getLogger(__name__)


class DelayedVector:
"""
Expand Down Expand Up @@ -43,6 +47,23 @@ def __str__(self):
def __eq__(self, other):
return isinstance(other, type(self)) and self.path == other.path

def _load_geojson_url(self, url: str) -> dict:
_log.info(f"Loading GeoJSON from {url!r}")
resp = requests.get(url)
content_type = resp.headers.get("content-type")
content_length = resp.headers.get("content-length")
_log.info(
f"GeoJSON response: status:{resp.status_code!r}"
f" content-type:{content_type!r} content-length:{content_length!r}"
)
resp.raise_for_status()
try:
return resp.json()
except json.JSONDecodeError as e:
message = f"Failed to parse GeoJSON from URL {url!r} (content-type={content_type!r}, content-length={content_length!r}): {e!r}"
# TODO: use generic client error? https://github.com/Open-EO/openeo-api/issues/456
raise OpenEOApiException(status_code=400, message=message)

@property
def crs(self) -> pyproj.CRS:
if self._crs is None:
Expand All @@ -51,7 +72,7 @@ def crs(self) -> pyproj.CRS:
local_shp_file = self._download_shapefile(self.path)
self._crs = DelayedVector._read_shapefile_crs(local_shp_file)
else: # it's GeoJSON
geojson = requests.get(self.path).json()
geojson = self._load_geojson_url(url=self.path)
# FIXME: can be cached
self._crs = DelayedVector._read_geojson_crs(geojson)
else: # it's a file on disk
Expand All @@ -70,7 +91,7 @@ def geometries(self) -> Iterable[BaseGeometry]:
local_shp_file = self._download_shapefile(self.path)
geometries = DelayedVector._read_shapefile_geometries(local_shp_file)
else: # it's GeoJSON
geojson = requests.get(self.path).json()
geojson = self._load_geojson_url(url=self.path)
geometries = DelayedVector._read_geojson_geometries(geojson)
else: # it's a file on disk
if self.path.endswith(".shp"):
Expand Down Expand Up @@ -111,7 +132,7 @@ def bounds(self) -> (float, float, float, float):
local_shp_file = self._download_shapefile(self.path)
bounds = DelayedVector._read_shapefile_bounds(local_shp_file)
else: # it's GeoJSON
geojson = requests.get(self.path).json()
geojson = self._load_geojson_url(url=self.path)
# FIXME: can be cached
bounds = DelayedVector._read_geojson_bounds(geojson)
else: # it's a file on disk
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -39,7 +39,7 @@
install_requires=[
'flask',
'werkzeug>=1.0.1',
'requests',
"requests>=2.28.0",
'openeo>=0.9.2.a1.dev',
'openeo_processes==0.0.4',
'gunicorn>=20.0.1',
Expand Down
14 changes: 14 additions & 0 deletions tests/test_delayed_vector.py
@@ -1,4 +1,7 @@
import pytest

from openeo_driver.delayed_vector import DelayedVector
from openeo_driver.errors import OpenEOApiException
from .data import get_path
from pyproj import CRS

Expand All @@ -12,14 +15,25 @@ def test_geometry_collection_bounds():
dv = DelayedVector(str(get_path("geojson/GeometryCollection01.json")))
assert dv.bounds == (5.05, 51.21, 5.15, 51.3)


def test_geojson_crs_unspecified():
dv = DelayedVector(str(get_path("geojson/test_geojson_crs_unspecified.geojson")))
assert dv.crs == CRS.from_user_input("+init=epsg:4326")


def test_geojson_crs_from_epsg():
dv = DelayedVector(str(get_path("geojson/test_geojson_crs_from_epsg.geojson")))
assert dv.crs == CRS.from_user_input("+init=epsg:4326")


def test_geojson_crs_from_ogc_urn():
dv = DelayedVector(str(get_path("geojson/test_geojson_crs_from_ogc_urn.geojson")))
assert dv.crs == CRS.from_user_input("+init=epsg:4326")


def test_geojson_url_invalid(requests_mock):
requests_mock.get("https://dl.test/features.json", text="\n\n<p>not json<p>", headers={"Content-Type": "text/html"})
dv = DelayedVector("https://dl.test/features.json")

with pytest.raises(OpenEOApiException, match="Failed to parse GeoJSON from URL"):
_ = dv.bounds

0 comments on commit aab5c20

Please sign in to comment.