Merge branch 'main' of https://github.com/pandas-dev/pandas into bug_…

…empty_contructor_dtype
rhshadrach · May 11, 2024 · 87d4d20 · 87d4d20
2 parents 99613b4 + 4d9ffcf
commit 87d4d20
Show file tree

Hide file tree

Showing 116 changed files with 1,872 additions and 3,165 deletions.
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -314,7 +314,7 @@ jobs:
     timeout-minutes: 90
 
     concurrency:
-      #https://github.community/t/concurrecy-not-work-for-push/183068/7
+      # https://github.community/t/concurrecy-not-work-for-push/183068/7
       group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev
       cancel-in-progress: true
 
@@ -346,3 +346,62 @@ jobs:
 
       - name: Run Tests
         uses: ./.github/actions/run-tests
+
+  emscripten:
+    # Note: the Python version, Emscripten toolchain version are determined
+    # by the Pyodide version. The appropriate versions can be found in the
+    # Pyodide repodata.json "info" field, or in the Makefile.envs file:
+    # https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2
+    # The Node.js version can be determined via Pyodide:
+    # https://pyodide.org/en/stable/usage/index.html#node-js
+    name: Pyodide build
+    runs-on: ubuntu-22.04
+    concurrency:
+      # https://github.community/t/concurrecy-not-work-for-push/183068/7
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm
+      cancel-in-progress: true
+    steps:
+      - name: Checkout pandas Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python for Pyodide
+        id: setup-python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11.3'
+
+      - name: Set up Emscripten toolchain
+        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: '3.1.46'
+          actions-cache-folder: emsdk-cache
+
+      - name: Install pyodide-build
+        run: pip install "pyodide-build==0.25.1"
+
+      - name: Build pandas for Pyodide
+        run: |
+          pyodide build
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '18'
+
+      - name: Set up Pyodide virtual environment
+        run: |
+          pyodide venv .venv-pyodide
+          source .venv-pyodide/bin/activate
+          pip install dist/*.whl
+
+      - name: Test pandas for Pyodide
+        env:
+          PANDAS_CI: 1
+        run: |
+          source .venv-pyodide/bin/activate
+          pip install pytest hypothesis
+          # do not import pandas from the checked out repo
+          cd ..
+          python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.3.4
+    rev: v0.4.3
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -46,12 +46,12 @@ repos:
         types_or: [python, rst, markdown, cython, c]
         additional_dependencies: [tomli]
 -   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.16.0
+    rev: v0.16.2
     hooks:
     -   id: cython-lint
     -   id: double-quote-cython-strings
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
     -   id: check-case-conflict
     -   id: check-toml
@@ -91,7 +91,7 @@ repos:
     hooks:
     - id: sphinx-lint
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v18.1.2
+    rev: v18.1.4
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
@@ -445,16 +445,6 @@ def setup(self, engine):
         data = data.format(*two_cols)
         self.StringIO_input = StringIO(data)
 
-    def time_multiple_date(self, engine):
-        read_csv(
-            self.data(self.StringIO_input),
-            engine=engine,
-            sep=",",
-            header=None,
-            names=list(string.digits[:9]),
-            parse_dates=[[1, 2], [1, 3]],
-        )
-
     def time_baseline(self, engine):
         read_csv(
             self.data(self.StringIO_input),

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
@@ -1,10 +1,5 @@
-import numpy as np
-
 try:
-    from pandas._libs.tslibs.parsing import (
-        _does_string_look_like_datetime,
-        concat_date_cols,
-    )
+    from pandas._libs.tslibs.parsing import _does_string_look_like_datetime
 except ImportError:
     # Avoid whole benchmark suite import failure on asv (currently 0.4)
     pass
@@ -20,21 +15,3 @@ def setup(self, value):
     def time_check_datetimes(self, value):
         for obj in self.objects:
             _does_string_look_like_datetime(obj)
-
-
-class ConcatDateCols:
-    params = ([1234567890, "AAAA"], [1, 2])
-    param_names = ["value", "dim"]
-
-    def setup(self, value, dim):
-        count_elem = 10000
-        if dim == 1:
-            self.object = (np.array([value] * count_elem),)
-        if dim == 2:
-            self.object = (
-                np.array([value] * count_elem),
-                np.array([value] * count_elem),
-            )
-
-    def time_check_concat(self, value, dim):
-        concat_date_cols(self.object)
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -148,10 +148,14 @@ def time_searchsorted(self, dtype):
 
 
 class Map:
-    params = (["dict", "Series", "lambda"], ["object", "category", "int"])
-    param_names = "mapper"
-
-    def setup(self, mapper, dtype):
+    params = (
+        ["dict", "Series", "lambda"],
+        ["object", "category", "int"],
+        [None, "ignore"],
+    )
+    param_names = ["mapper", "dtype", "na_action"]
+
+    def setup(self, mapper, dtype, na_action):
         map_size = 1000
         map_data = Series(map_size - np.arange(map_size), dtype=dtype)
 
@@ -168,8 +172,8 @@ def setup(self, mapper, dtype):
 
         self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype)
 
-    def time_map(self, mapper, *args, **kwargs):
-        self.s.map(self.map_data)
+    def time_map(self, mapper, dtype, na_action):
+        self.s.map(self.map_data, na_action=na_action)
 
 
 class Clip: