Merge branch 'main' of https://github.com/pandas-dev/pandas into bug_…

…empty_contructor_dtype
rhshadrach · May 12, 2024 · 41c96a2 · 41c96a2
2 parents ad0260c + 34177d6
commit 41c96a2
Show file tree

Hide file tree

Showing 10 changed files with 216 additions and 50 deletions.
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -77,11 +77,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.plot PR02,SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
-        -i "pandas.IntervalIndex.is_non_overlapping_monotonic SA01" \
         -i "pandas.IntervalIndex.left GL08" \
-        -i "pandas.IntervalIndex.length GL08" \
         -i "pandas.IntervalIndex.set_closed RT03,SA01" \
-        -i "pandas.IntervalIndex.to_tuples RT03,SA01" \
         -i "pandas.MultiIndex PR01" \
         -i "pandas.MultiIndex.append PR07,SA01" \
         -i "pandas.MultiIndex.copy PR07,RT03,SA01" \
@@ -144,9 +141,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.RangeIndex.start SA01" \
         -i "pandas.RangeIndex.step SA01" \
         -i "pandas.RangeIndex.stop SA01" \
-        -i "pandas.Series.add PR07" \
         -i "pandas.Series.case_when RT03" \
-        -i "pandas.Series.cat PR07" \
         -i "pandas.Series.cat.add_categories PR01,PR02" \
         -i "pandas.Series.cat.as_ordered PR01" \
         -i "pandas.Series.cat.as_unordered PR01" \
@@ -274,7 +269,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
-        -i "pandas.Timestamp.asm8 SA01" \
         -i "pandas.Timestamp.astimezone SA01" \
         -i "pandas.Timestamp.ceil SA01" \
         -i "pandas.Timestamp.combine PR01,SA01" \
@@ -289,7 +283,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.hour GL08" \
         -i "pandas.Timestamp.is_leap_year SA01" \
         -i "pandas.Timestamp.isocalendar SA01" \
-        -i "pandas.Timestamp.isoformat SA01" \
         -i "pandas.Timestamp.isoweekday SA01" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.microsecond GL08" \
@@ -305,7 +298,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.resolution PR02,PR07,SA01" \
         -i "pandas.Timestamp.round SA01" \
         -i "pandas.Timestamp.second GL08" \
-        -i "pandas.Timestamp.strftime SA01" \
         -i "pandas.Timestamp.strptime PR01,SA01" \
         -i "pandas.Timestamp.time SA01" \
         -i "pandas.Timestamp.timestamp SA01" \
@@ -328,9 +320,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.utcoffset SA01" \
         -i "pandas.Timestamp.utctimetuple SA01" \
         -i "pandas.Timestamp.value GL08" \
-        -i "pandas.Timestamp.week SA01" \
         -i "pandas.Timestamp.weekday SA01" \
-        -i "pandas.Timestamp.weekofyear SA01" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.extensions.ExtensionArray._from_sequence_of_strings SA01" \
         -i "pandas.api.extensions.ExtensionArray._hash_pandas_object RT03,SA01" \
@@ -402,13 +392,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.DatetimeArray SA01" \
         -i "pandas.arrays.FloatingArray SA01" \
         -i "pandas.arrays.IntegerArray SA01" \
-        -i "pandas.arrays.IntervalArray.is_non_overlapping_monotonic SA01" \
         -i "pandas.arrays.IntervalArray.left SA01" \
         -i "pandas.arrays.IntervalArray.length SA01" \
         -i "pandas.arrays.IntervalArray.mid SA01" \
         -i "pandas.arrays.IntervalArray.right SA01" \
         -i "pandas.arrays.IntervalArray.set_closed RT03,SA01" \
-        -i "pandas.arrays.IntervalArray.to_tuples RT03,SA01" \
         -i "pandas.arrays.NumpyExtensionArray SA01" \
         -i "pandas.arrays.SparseArray PR07,SA01" \
         -i "pandas.arrays.TimedeltaArray PR07,SA01" \

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -384,6 +384,8 @@ Datetimelike
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
+- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
+- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
 Timedelta
@@ -421,7 +423,6 @@ Interval
 Indexing
 ^^^^^^^^
 - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
-- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
 -
 
 Missing

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -253,7 +253,7 @@ def get_start_end_field(
         # month of year. Other offsets use month, startingMonth as ending
         # month of year.
 
-        if freq_name.lstrip("B")[0:2]  in ["MS", "QS", "YS"]:
+        if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]:
             end_month = 12 if month_kw == 1 else month_kw - 1
             start_month = month_kw
         else:

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -692,6 +692,12 @@ class NaTType(_NaT):
             See strftime documentation for more information on the format string:
             https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
 
+        See Also
+        --------
+        Timestamp.isoformat : Return the time formatted according to ISO 8601.
+        pd.to_datetime : Convert argument to datetime.
+        Period.strftime : Format a single Period.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651')

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -902,6 +902,11 @@ cdef class _Timestamp(ABCTimestamp):
         -------
         int
 
+        See Also
+        --------
+        Timestamp.weekday : Return the day of the week.
+        Timestamp.quarter : Return the quarter of the year.
+
         Examples
         --------
         >>> ts = pd.Timestamp(2020, 3, 14)
@@ -1008,6 +1013,12 @@ cdef class _Timestamp(ABCTimestamp):
         -------
         str
 
+        See Also
+        --------
+        Timestamp.strftime : Return a formatted string.
+        Timestamp.isocalendar : Return a tuple containing ISO year, week number and
+            weekday.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651')
@@ -1150,6 +1161,12 @@ cdef class _Timestamp(ABCTimestamp):
         """
         Return numpy datetime64 format in nanoseconds.
 
+        See Also
+        --------
+        numpy.datetime64 : Numpy datatype for dates and times with high precision.
+        Timestamp.to_numpy : Convert the Timestamp to a NumPy datetime64.
+        to_datetime : Convert argument to datetime.
+
         Examples
         --------
         >>> ts = pd.Timestamp(2020, 3, 14, 15)
@@ -1543,6 +1560,12 @@ class Timestamp(_Timestamp):
             See strftime documentation for more information on the format string:
             https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
 
+        See Also
+        --------
+        Timestamp.isoformat : Return the time formatted according to ISO 8601.
+        pd.to_datetime : Convert argument to datetime.
+        Period.strftime : Format a single Period.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651')

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2847,6 +2847,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
     Parameters
     ----------
     data : Series or CategoricalIndex
+        The object to which the categorical accessor is attached.
 
     See Also
     --------

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1509,10 +1509,54 @@ def set_closed(self, closed: IntervalClosedType) -> Self:
         """
 
     @property
-    @Appender(
-        _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
-    )
     def is_non_overlapping_monotonic(self) -> bool:
+        """
+        Return a boolean whether the IntervalArray/IntervalIndex\
+        is non-overlapping and monotonic.
+
+        Non-overlapping means (no Intervals share points), and monotonic means
+        either monotonic increasing or monotonic decreasing.
+
+        See Also
+        --------
+        overlaps : Check if two IntervalIndex objects overlap.
+
+        Examples
+        --------
+        For arrays:
+
+        >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
+        >>> interv_arr
+        <IntervalArray>
+        [(0, 1], (1, 5]]
+        Length: 2, dtype: interval[int64, right]
+        >>> interv_arr.is_non_overlapping_monotonic
+        True
+
+        >>> interv_arr = pd.arrays.IntervalArray(
+        ...     [pd.Interval(0, 1), pd.Interval(-1, 0.1)]
+        ... )
+        >>> interv_arr
+        <IntervalArray>
+        [(0.0, 1.0], (-1.0, 0.1]]
+        Length: 2, dtype: interval[float64, right]
+        >>> interv_arr.is_non_overlapping_monotonic
+        False
+
+        For Interval Index:
+
+        >>> interv_idx = pd.interval_range(start=0, end=2)
+        >>> interv_idx
+        IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
+        >>> interv_idx.is_non_overlapping_monotonic
+        True
+
+        >>> interv_idx = pd.interval_range(start=0, end=2, closed="both")
+        >>> interv_idx
+        IntervalIndex([[0, 1], [1, 2]], dtype='interval[int64, both]')
+        >>> interv_idx.is_non_overlapping_monotonic
+        False
+        """
         # must be increasing  (e.g., [0, 1), [1, 2), [2, 3), ... )
         # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
         # we already require left <= right
@@ -1624,39 +1668,51 @@ def __arrow_array__(self, type=None):
         """
     )
 
-    @Appender(
-        _interval_shared_docs["to_tuples"]
-        % {
-            "return_type": (
-                "ndarray (if self is IntervalArray) or Index (if self is IntervalIndex)"
-            ),
-            "examples": textwrap.dedent(
-                """\
-
-         Examples
-         --------
-         For :class:`pandas.IntervalArray`:
-
-         >>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
-         >>> idx
-         <IntervalArray>
-         [(0, 1], (1, 2]]
-         Length: 2, dtype: interval[int64, right]
-         >>> idx.to_tuples()
-         array([(0, 1), (1, 2)], dtype=object)
-
-         For :class:`pandas.IntervalIndex`:
-
-         >>> idx = pd.interval_range(start=0, end=2)
-         >>> idx
-         IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
-         >>> idx.to_tuples()
-         Index([(0, 1), (1, 2)], dtype='object')
-         """
-            ),
-        }
-    )
     def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
+        """
+        Return an ndarray (if self is IntervalArray) or Index \
+        (if self is IntervalIndex) of tuples of the form (left, right).
+
+        Parameters
+        ----------
+        na_tuple : bool, default True
+            If ``True``, return ``NA`` as a tuple ``(nan, nan)``. If ``False``,
+            just return ``NA`` as ``nan``.
+
+        Returns
+        -------
+        ndarray or Index
+            An ndarray of tuples representing the intervals
+                if `self` is an IntervalArray.
+            An Index of tuples representing the intervals
+                if `self` is an IntervalIndex.
+
+        See Also
+        --------
+        IntervalArray.to_list : Convert IntervalArray to a list of tuples.
+        IntervalArray.to_numpy : Convert IntervalArray to a numpy array.
+        IntervalArray.unique : Find unique intervals in an IntervalArray.
+
+        Examples
+        --------
+        For :class:`pandas.IntervalArray`:
+
+        >>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
+        >>> idx
+        <IntervalArray>
+        [(0, 1], (1, 2]]
+        Length: 2, dtype: interval[int64, right]
+        >>> idx.to_tuples()
+        array([(0, 1), (1, 2)], dtype=object)
+
+        For :class:`pandas.IntervalIndex`:
+
+        >>> idx = pd.interval_range(start=0, end=2)
+        >>> idx
+        IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
+        >>> idx.to_tuples()
+        Index([(0, 1), (1, 2)], dtype='object')
+        """
         tuples = com.asarray_tuplesafe(zip(self._left, self._right))
         if not na_tuple:
             # GH 18756

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -924,6 +924,34 @@ def mid(self) -> Index:
 
     @property
     def length(self) -> Index:
+        """
+        Calculate the length of each interval in the IntervalIndex.
+
+        This method returns a new Index containing the lengths of each interval
+        in the IntervalIndex. The length of an interval is defined as the difference
+        between its end and its start.
+
+        Returns
+        -------
+        Index
+            An Index containing the lengths of each interval.
+
+        See Also
+        --------
+        Interval.length : Return the length of the Interval.
+
+        Examples
+        --------
+        >>> intervals = pd.IntervalIndex.from_arrays(
+        ...     [1, 2, 3], [4, 5, 6], closed="right"
+        ... )
+        >>> intervals.length
+        Index([3, 3, 3], dtype='int64')
+
+        >>> intervals = pd.IntervalIndex.from_tuples([(1, 5), (6, 10), (11, 15)])
+        >>> intervals.length
+        Index([4, 4, 4], dtype='int64')
+        """
         return Index(self._data.length, copy=False)
 
     # --------------------------------------------------------------------