pandas-dev · jreback · Jul 14, 2020 · Jun 15, 2020 · Jun 15, 2020 · Jul 7, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -668,8 +668,6 @@ apply and applymap on ``DataFrame`` evaluates first row/column only once
 Other API changes
 ^^^^^^^^^^^^^^^^^
 
-- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
-  will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
 - Added :meth:`DataFrame.value_counts` (:issue:`5377`)
 - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
 - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
@@ -783,6 +781,7 @@ Deprecations
 
 - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version.  Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
 
+- :meth:`Series.describe` and :meth:`DataFrame.describe` treating datetime dtypes as categorical rather than numeric is deprecated. Specify ``datetime_is_numeric=True`` to show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`, :issue:`33903`)
 - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
 - Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version.  Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
 - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9711,7 +9711,11 @@ def abs(self: FrameOrSeries) -> FrameOrSeries:
         return np.abs(self)
 
     def describe(
-        self: FrameOrSeries, percentiles=None, include=None, exclude=None
+        self: FrameOrSeries,
+        percentiles=None,
+        include=None,
+        exclude=None,
+        datetime_is_numeric=False,
     ) -> FrameOrSeries:
         """
         Generate descriptive statistics.
@@ -9757,6 +9761,10 @@ def describe(
               ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
               exclude pandas categorical columns, use ``'category'``
             - None (default) : The result will exclude nothing.
+        datetime_is_numeric : bool, default False
+            Whether to treat datetime dtypes as numeric.
+
+            .. versionadded:: 1.1.0
 
         Returns
         -------
@@ -9834,7 +9842,7 @@ def describe(
         ...   np.datetime64("2010-01-01"),
         ...   np.datetime64("2010-01-01")
         ... ])
-        >>> s.describe()
+        >>> s.describe(datetime_is_numeric=True)
         count                      3
         mean     2006-09-01 08:00:00
         min      2000-01-01 00:00:00
@@ -9992,8 +10000,37 @@ def describe_categorical_1d(data):
             dtype = None
             if result[1] > 0:
                 top, freq = objcounts.index[0], objcounts.iloc[0]
-                names += ["top", "freq"]
-                result += [top, freq]
+                if is_datetime64_any_dtype(data.dtype):
+                    if self.ndim == 1:
+                        stacklevel = 4
+                    else:
+                        stacklevel = 5
+                    warnings.warn(
+                        "Treating datetime data as categorical rather than numeric in "
+                        "`.describe` is deprecated and will be removed in a future "
+                        "version of pandas. Specify `datetime_is_numeric=True` to "
+                        "silence this warning and adopt the future behavior now.",
+                        FutureWarning,
+                        stacklevel=stacklevel,
+                    )
+                    tz = data.dt.tz
+                    asint = data.dropna().values.view("i8")
+                    top = Timestamp(top)
+                    if top.tzinfo is not None and tz is not None:
+                        # Don't tz_localize(None) if key is already tz-aware
+                        top = top.tz_convert(tz)
+                    else:
+                        top = top.tz_localize(tz)
+                    names += ["top", "freq", "first", "last"]
+                    result += [
+                        top,
+                        freq,
+                        Timestamp(asint.min(), tz=tz),
+                        Timestamp(asint.max(), tz=tz),
+                    ]
+                else:
+                    names += ["top", "freq"]
+                    result += [top, freq]
 
             # If the DataFrame is empty, set 'top' and 'freq' to None
             # to maintain output shape consistency
@@ -10019,7 +10056,7 @@ def describe_1d(data):
                 return describe_categorical_1d(data)
             elif is_numeric_dtype(data):
                 return describe_numeric_1d(data)
-            elif is_datetime64_any_dtype(data.dtype):
+            elif is_datetime64_any_dtype(data.dtype) and datetime_is_numeric:
                 return describe_timestamp_1d(data)
             elif is_timedelta64_dtype(data.dtype):
                 return describe_numeric_1d(data)

diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
@@ -267,7 +267,48 @@ def test_describe_tz_values(self, tz_naive_fixture):
             },
             index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
         )
-        result = df.describe(include="all")
+        result = df.describe(include="all", datetime_is_numeric=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_describe_tz_values2(self):
+        tz = "CET"
+        s1 = Series(range(5))
+        start = Timestamp(2018, 1, 1)
+        end = Timestamp(2018, 1, 5)
+        s2 = Series(date_range(start, end, tz=tz))
+        df = pd.DataFrame({"s1": s1, "s2": s2})
+
+        s1_ = s1.describe()
+        s2_ = pd.Series(
+            [
+                5,
+                5,
+                s2.value_counts().index[0],
+                1,
+                start.tz_localize(tz),
+                end.tz_localize(tz),
+            ],
+            index=["count", "unique", "top", "freq", "first", "last"],
+        )
+        idx = [
+            "count",
+            "unique",
+            "top",
+            "freq",
+            "first",
+            "last",
+            "mean",
+            "std",
+            "min",
+            "25%",
+            "50%",
+            "75%",
+            "max",
+        ]
+        expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx]
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.describe(include="all")
         tm.assert_frame_equal(result, expected)
 
     def test_describe_percentiles_integer_idx(self):

diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py
@@ -83,7 +83,7 @@ def test_describe_with_tz(self, tz_naive_fixture):
         start = Timestamp(2018, 1, 1)
         end = Timestamp(2018, 1, 5)
         s = Series(date_range(start, end, tz=tz), name=name)
-        result = s.describe()
+        result = s.describe(datetime_is_numeric=True)
         expected = Series(
             [
                 5,
@@ -98,3 +98,26 @@ def test_describe_with_tz(self, tz_naive_fixture):
             index=["count", "mean", "min", "25%", "50%", "75%", "max"],
         )
         tm.assert_series_equal(result, expected)
+
+    def test_describe_with_tz_warns(self):
+        name = tz = "CET"
+        start = Timestamp(2018, 1, 1)
+        end = Timestamp(2018, 1, 5)
+        s = Series(date_range(start, end, tz=tz), name=name)
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = s.describe()
+
+        expected = Series(
+            [
+                5,
+                5,
+                s.value_counts().index[0],
+                1,
+                start.tz_localize(tz),
+                end.tz_localize(tz),
+            ],
+            name=name,
+            index=["count", "unique", "top", "freq", "first", "last"],
+        )
+        tm.assert_series_equal(result, expected)