From 31c21d3e3555b9f478b26e5c422d73e4ad152b9b Mon Sep 17 00:00:00 2001 From: avinukon Date: Wed, 24 Apr 2024 19:06:18 -0400 Subject: [PATCH] avinukon solution to issue #58049 --- pandas/io/json/_json.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 13d74e935f786..826c25ad0dc7d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -511,6 +511,7 @@ def read_json( storage_options: StorageOptions | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, engine: JSONEngine = "ujson", + skiprows: int | list[int] | Callable[[int], bool] = None, ) -> DataFrame | Series | JsonReader: """ Convert a JSON string to pandas object. @@ -829,6 +830,7 @@ def __init__( encoding_errors: str | None = "strict", dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, engine: JSONEngine = "ujson", + skiprows: int | list[int] | Callable[[int], bool] = None, ) -> None: self.orient = orient self.typ = typ @@ -849,6 +851,7 @@ def __init__( self.encoding_errors = encoding_errors self.handles: IOHandles[str] | None = None self.dtype_backend = dtype_backend + self.skiprows = skiprows if self.engine not in {"pyarrow", "ujson"}: raise ValueError( @@ -1021,11 +1024,19 @@ def __next__(self) -> DataFrame | Series: self.close() raise StopIteration + if isinstance(self.skiprows, int): + for _ in range(self.skiprows): + next(self.data) + self.skiprows = None + lines = list(islice(self.data, self.chunksize)) if not lines: self.close() raise StopIteration + if callable(self.skiprows): + lines = [line for i, line in enumerate(lines) if not self.skiprows(i)] + try: lines_json = self._combine_lines(lines) obj = self._get_object_parser(lines_json)