From 5fd78e27d2f69dd31375780825550a6ce24689ff Mon Sep 17 00:00:00 2001 From: Mischa Untaga <99098079+MischaU8@users.noreply.github.com> Date: Thu, 8 Sep 2022 16:56:52 +0200 Subject: [PATCH 1/3] progressbar for inserts/upserts of all fileformats, closes #485 --- sqlite_utils/cli.py | 52 +++++++++++++++++++++---------------------- sqlite_utils/utils.py | 5 +++++ 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index c51b10178..35d4ed50b 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -953,14 +953,14 @@ def insert_upsert_implementation( decoded = io.TextIOWrapper(file, encoding=encoding) tracker = None - if csv or tsv: - if sniff: - # Read first 2048 bytes and use that to detect - first_bytes = sniff_buffer.peek(2048) - dialect = csv_std.Sniffer().sniff(first_bytes.decode(encoding, "ignore")) - else: - dialect = "excel-tab" if tsv else "excel" - with file_progress(decoded, silent=silent) as decoded: + with file_progress(decoded, silent=silent) as decoded: + if csv or tsv: + if sniff: + # Read first 2048 bytes and use that to detect + first_bytes = sniff_buffer.peek(2048) + dialect = csv_std.Sniffer().sniff(first_bytes.decode(encoding, "ignore")) + else: + dialect = "excel-tab" if tsv else "excel" csv_reader_args = {"dialect": dialect} if delimiter: csv_reader_args["delimiter"] = delimiter @@ -977,24 +977,24 @@ def insert_upsert_implementation( if detect_types: tracker = TypeTracker() docs = tracker.wrap(docs) - elif lines: - docs = ({"line": line.strip()} for line in decoded) - elif text: - docs = ({"text": decoded.read()},) - else: - try: - if nl: - docs = (json.loads(line) for line in decoded if line.strip()) - else: - docs = json.load(decoded) - if isinstance(docs, dict): - docs = [docs] - except json.decoder.JSONDecodeError: - raise click.ClickException( - "Invalid JSON - use --csv for CSV or --tsv for TSV files" - ) - if flatten: - docs = (dict(_flatten(doc)) for doc in docs) + elif lines: + docs = ({"line": line.strip()} for line in decoded) + elif text: + docs = ({"text": decoded.read()},) + else: + try: + if nl: + docs = (json.loads(line) for line in decoded if line.strip()) + else: + docs = json.load(decoded) + if isinstance(docs, dict): + docs = [docs] + except json.decoder.JSONDecodeError: + raise click.ClickException( + "Invalid JSON - use --csv for CSV or --tsv for TSV files" + ) + if flatten: + docs = (dict(_flatten(doc)) for doc in docs) if convert: variable = "row" diff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py index c0b7bf18d..875455420 100644 --- a/sqlite_utils/utils.py +++ b/sqlite_utils/utils.py @@ -155,6 +155,11 @@ def __iter__(self): self._update(len(line)) yield line + def read(self, size=-1): + data = self._wrapped.read(size) + self._update(len(data)) + return data + @contextlib.contextmanager def file_progress(file, silent=False, **kwargs): From 0acbc68545fca6b95365b66280a5865cfd49633c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Sep 2022 12:33:04 -0700 Subject: [PATCH 2/3] Applied Black --- sqlite_utils/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 35d4ed50b..767b170ec 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -958,7 +958,9 @@ def insert_upsert_implementation( if sniff: # Read first 2048 bytes and use that to detect first_bytes = sniff_buffer.peek(2048) - dialect = csv_std.Sniffer().sniff(first_bytes.decode(encoding, "ignore")) + dialect = csv_std.Sniffer().sniff( + first_bytes.decode(encoding, "ignore") + ) else: dialect = "excel-tab" if tsv else "excel" csv_reader_args = {"dialect": dialect} From d5db749480aaf8518e611ff55da186cadf6c63bc Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Sep 2022 13:22:35 -0700 Subject: [PATCH 3/3] Pin to Python 3.10.6 for the moment Refs #486 Refs https://github.com/python/mypy/issues/13627 --- .github/workflows/publish.yml | 4 ++-- .github/workflows/test.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 59c8a2323..355f27147 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,12 +9,12 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10.6"] os: [ubuntu-latest, windows-latest, macos-latest] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - uses: actions/cache@v2 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 930396564..788df25c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,13 +10,13 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10.6"] numpy: [0, 1] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - uses: actions/cache@v2