pandas-dev · MarcoGorelli · Mar 30, 2022 · Mar 30, 2022
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
     -   id: absolufy-imports
         files: ^pandas/
 -   repo: https://github.com/python/black
-    rev: 21.12b0
+    rev: 22.3.0
     hooks:
     -   id: black
 -   repo: https://github.com/codespell-project/codespell

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -34,7 +34,7 @@ class Factorize:
     param_names = ["unique", "sort", "dtype"]
 
     def setup(self, unique, sort, dtype):
-        N = 10 ** 5
+        N = 10**5
         string_index = tm.makeStringIndex(N)
         string_arrow = None
         if dtype == "string[pyarrow]":
@@ -74,7 +74,7 @@ class Duplicated:
     param_names = ["unique", "keep", "dtype"]
 
     def setup(self, unique, keep, dtype):
-        N = 10 ** 5
+        N = 10**5
         data = {
             "int": pd.Index(np.arange(N), dtype="int64"),
             "uint": pd.Index(np.arange(N), dtype="uint64"),
@@ -97,7 +97,7 @@ def time_duplicated(self, unique, keep, dtype):
 
 class Hashing:
     def setup_cache(self):
-        N = 10 ** 5
+        N = 10**5
 
         df = pd.DataFrame(
             {
@@ -145,7 +145,7 @@ class Quantile:
     param_names = ["quantile", "interpolation", "dtype"]
 
     def setup(self, quantile, interpolation, dtype):
-        N = 10 ** 5
+        N = 10**5
         data = {
             "int": np.arange(N),
             "uint": np.arange(N).astype(np.uint64),
@@ -158,7 +158,7 @@ def time_quantile(self, quantile, interpolation, dtype):
 
 
 class SortIntegerArray:
-    params = [10 ** 3, 10 ** 5]
+    params = [10**3, 10**5]
 
     def setup(self, N):
         data = np.arange(N, dtype=float)

diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py
@@ -49,7 +49,7 @@ def setup(self, dtype):
 
         elif dtype in ["category[object]", "category[int]"]:
             # Note: sizes are different in this case than others
-            n = 5 * 10 ** 5
+            n = 5 * 10**5
             sample_size = 100
 
             arr = list(np.random.randint(0, n // 10, size=n))
@@ -174,7 +174,7 @@ class IsinWithArange:
 
     def setup(self, dtype, M, offset_factor):
         offset = int(M * offset_factor)
-        tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
+        tmp = Series(np.random.randint(offset, M + offset, 10**6))
         self.series = tmp.astype(dtype)
         self.values = np.arange(M).astype(dtype)
 
@@ -191,8 +191,8 @@ class IsInFloat64:
     param_names = ["dtype", "title"]
 
     def setup(self, dtype, title):
-        N_many = 10 ** 5
-        N_few = 10 ** 6
+        N_many = 10**5
+        N_few = 10**6
         self.series = Series([1, 2], dtype=dtype)
 
         if title == "many_different_values":
@@ -240,10 +240,10 @@ class IsInForObjects:
     param_names = ["series_type", "vals_type"]
 
     def setup(self, series_type, vals_type):
-        N_many = 10 ** 5
+        N_many = 10**5
 
         if series_type == "nans":
-            ser_vals = np.full(10 ** 4, np.nan)
+            ser_vals = np.full(10**4, np.nan)
         elif series_type == "short":
             ser_vals = np.arange(2)
         elif series_type == "long":
@@ -254,7 +254,7 @@ def setup(self, series_type, vals_type):
         self.series = Series(ser_vals).astype(object)
 
         if vals_type == "nans":
-            values = np.full(10 ** 4, np.nan)
+            values = np.full(10**4, np.nan)
         elif vals_type == "short":
             values = np.arange(2)
         elif vals_type == "long":
@@ -277,7 +277,7 @@ class IsInLongSeriesLookUpDominates:
     param_names = ["dtype", "MaxNumber", "series_type"]
 
     def setup(self, dtype, MaxNumber, series_type):
-        N = 10 ** 7
+        N = 10**7
 
         if series_type == "random_hits":
             array = np.random.randint(0, MaxNumber, N)
@@ -304,15 +304,15 @@ class IsInLongSeriesValuesDominate:
     param_names = ["dtype", "series_type"]
 
     def setup(self, dtype, series_type):
-        N = 10 ** 7
+        N = 10**7
 
         if series_type == "random":
             vals = np.random.randint(0, 10 * N, N)
         if series_type == "monotone":
             vals = np.arange(N)
 
         self.values = vals.astype(dtype.lower())
-        M = 10 ** 6 + 1
+        M = 10**6 + 1
         self.series = Series(np.arange(M)).astype(dtype)
 
     def time_isin(self, dtypes, series_type):

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
@@ -59,7 +59,7 @@ def time_frame_op_with_scalar(self, dtype, scalar, op):
 class OpWithFillValue:
     def setup(self):
         # GH#31300
-        arr = np.arange(10 ** 6)
+        arr = np.arange(10**6)
         df = DataFrame({"A": arr})
         ser = df["A"]
 
@@ -93,7 +93,7 @@ class MixedFrameWithSeriesAxis:
     param_names = ["opname"]
 
     def setup(self, opname):
-        arr = np.arange(10 ** 6).reshape(1000, -1)
+        arr = np.arange(10**6).reshape(1000, -1)
         df = DataFrame(arr)
         df["C"] = 1.0
         self.df = df
@@ -201,7 +201,7 @@ def teardown(self, use_numexpr, threads):
 
 class Ops2:
     def setup(self):
-        N = 10 ** 3
+        N = 10**3
         self.df = DataFrame(np.random.randn(N, N))
         self.df2 = DataFrame(np.random.randn(N, N))
 
@@ -258,7 +258,7 @@ class Timeseries:
     param_names = ["tz"]
 
     def setup(self, tz):
-        N = 10 ** 6
+        N = 10**6
         halfway = (N // 2) - 1
         self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz))
         self.ts = self.s[halfway]
@@ -280,7 +280,7 @@ def time_timestamp_ops_diff_with_shift(self, tz):
 
 class IrregularOps:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         idx = date_range(start="1/1/2000", periods=N, freq="s")
         s = Series(np.random.randn(N), index=idx)
         self.left = s.sample(frac=1)
@@ -304,7 +304,7 @@ class CategoricalComparisons:
     param_names = ["op"]
 
     def setup(self, op):
-        N = 10 ** 5
+        N = 10**5
         self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
 
     def time_categorical_op(self, op):
@@ -317,7 +317,7 @@ class IndexArithmetic:
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        N = 10 ** 6
+        N = 10**6
         indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
         self.index = getattr(tm, indexes[dtype])(N)
 
@@ -343,7 +343,7 @@ class NumericInferOps:
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        N = 5 * 10 ** 5
+        N = 5 * 10**5
         self.df = DataFrame(
             {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)}
         )
@@ -367,7 +367,7 @@ def time_modulo(self, dtype):
 class DateInferOps:
     # from GH 7332
     def setup_cache(self):
-        N = 5 * 10 ** 5
+        N = 5 * 10**5
         df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")})
         df["timedelta"] = df["datetime64"] - df["datetime64"]
         return df
@@ -388,7 +388,7 @@ class AddOverflowScalar:
     param_names = ["scalar"]
 
     def setup(self, scalar):
-        N = 10 ** 6
+        N = 10**6
         self.arr = np.arange(N)
 
     def time_add_overflow_scalar(self, scalar):
@@ -397,7 +397,7 @@ def time_add_overflow_scalar(self, scalar):
 
 class AddOverflowArray:
     def setup(self):
-        N = 10 ** 6
+        N = 10**6
         self.arr = np.arange(N)
         self.arr_rev = np.arange(-N, 0)
         self.arr_mixed = np.array([1, -1]).repeat(N / 2)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -19,7 +19,7 @@
 
 class Constructor:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         self.categories = list("abcde")
         self.cat_idx = pd.Index(self.categories)
         self.values = np.tile(self.categories, N)
@@ -71,16 +71,16 @@ def time_existing_series(self):
 
 class AsType:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
 
         random_pick = np.random.default_rng().choice
 
         categories = {
             "str": list(string.ascii_letters),
-            "int": np.random.randint(2 ** 16, size=154),
+            "int": np.random.randint(2**16, size=154),
             "float": sys.maxsize * np.random.random((38,)),
             "timestamp": [
-                pd.Timestamp(x, unit="s") for x in np.random.randint(2 ** 18, size=578)
+                pd.Timestamp(x, unit="s") for x in np.random.randint(2**18, size=578)
             ],
         }
 
@@ -112,7 +112,7 @@ def astype_datetime(self):
 
 class Concat:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         self.s = pd.Series(list("aabbcd") * N).astype("category")
 
         self.a = pd.Categorical(list("aabbcd") * N)
@@ -148,7 +148,7 @@ class ValueCounts:
     param_names = ["dropna"]
 
     def setup(self, dropna):
-        n = 5 * 10 ** 5
+        n = 5 * 10**5
         arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
         self.ts = pd.Series(arr).astype("category")
 
@@ -166,7 +166,7 @@ def time_rendering(self):
 
 class SetCategories:
     def setup(self):
-        n = 5 * 10 ** 5
+        n = 5 * 10**5
         arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
         self.ts = pd.Series(arr).astype("category")
 
@@ -176,7 +176,7 @@ def time_set_categories(self):
 
 class RemoveCategories:
     def setup(self):
-        n = 5 * 10 ** 5
+        n = 5 * 10**5
         arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
         self.ts = pd.Series(arr).astype("category")
 
@@ -186,7 +186,7 @@ def time_remove_categories(self):
 
 class Rank:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         ncats = 100
 
         self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
@@ -241,7 +241,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
 
 class Contains:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         self.ci = tm.makeCategoricalIndex(N)
         self.c = self.ci.values
         self.key = self.ci.categories[0]
@@ -259,7 +259,7 @@ class CategoricalSlicing:
     param_names = ["index"]
 
     def setup(self, index):
-        N = 10 ** 6
+        N = 10**6
         categories = ["a", "b", "c"]
         values = [0] * N + [1] * N + [2] * N
         if index == "monotonic_incr":
@@ -295,7 +295,7 @@ def time_getitem_bool_array(self, index):
 
 class Indexing:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         self.index = pd.CategoricalIndex(range(N), range(N))
         self.series = pd.Series(range(N), index=self.index).sort_index()
         self.category = self.index[500]
@@ -327,7 +327,7 @@ def time_sort_values(self):
 
 class SearchSorted:
     def setup(self):
-        N = 10 ** 5
+        N = 10**5
         self.ci = tm.makeCategoricalIndex(N).sort_values()
         self.c = self.ci.values
         self.key = self.ci.categories[1]

diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py
@@ -76,7 +76,7 @@ def setup(self, data_fmt, with_index, dtype):
             raise NotImplementedError(
                 "Series constructors do not support using generators with indexes"
             )
-        N = 10 ** 4
+        N = 10**4
         if dtype == "float":
             arr = np.random.randn(N)
         else:
@@ -90,7 +90,7 @@ def time_series_constructor(self, data_fmt, with_index, dtype):
 
 class SeriesDtypesConstructors:
     def setup(self):
-        N = 10 ** 4
+        N = 10**4
         self.arr = np.random.randn(N)
         self.arr_str = np.array(["foo", "bar", "baz"], dtype=object)
         self.s = Series(
@@ -114,7 +114,7 @@ def time_dtindex_from_index_with_series(self):
 
 class MultiIndexConstructor:
     def setup(self):
-        N = 10 ** 4
+        N = 10**4
         self.iterables = [tm.makeStringIndex(N), range(20)]
 
     def time_multiindex_from_iterables(self):

diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py
@@ -43,7 +43,7 @@ def teardown(self, engine, threads):
 
 class Query:
     def setup(self):
-        N = 10 ** 6
+        N = 10**6
         halfway = (N // 2) - 1
         index = pd.date_range("20010101", periods=N, freq="T")
         s = pd.Series(index)

diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -77,7 +77,7 @@ class FromDictwithTimestamp:
     param_names = ["offset"]
 
     def setup(self, offset):
-        N = 10 ** 3
+        N = 10**3
         idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N)
         df = DataFrame(np.random.randn(N, 10), index=idx)
         self.d = df.to_dict()