Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/pandas-dev/pandas into bug_…
Browse files Browse the repository at this point in the history
…empty_contructor_dtype
  • Loading branch information
rhshadrach committed May 12, 2024
2 parents ad0260c + 34177d6 commit 41c96a2
Show file tree
Hide file tree
Showing 10 changed files with 216 additions and 50 deletions.
12 changes: 0 additions & 12 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.plot PR02,SA01" \
-i "pandas.Grouper PR02" \
-i "pandas.Index PR07" \
-i "pandas.IntervalIndex.is_non_overlapping_monotonic SA01" \
-i "pandas.IntervalIndex.left GL08" \
-i "pandas.IntervalIndex.length GL08" \
-i "pandas.IntervalIndex.set_closed RT03,SA01" \
-i "pandas.IntervalIndex.to_tuples RT03,SA01" \
-i "pandas.MultiIndex PR01" \
-i "pandas.MultiIndex.append PR07,SA01" \
-i "pandas.MultiIndex.copy PR07,RT03,SA01" \
Expand Down Expand Up @@ -144,9 +141,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.RangeIndex.start SA01" \
-i "pandas.RangeIndex.step SA01" \
-i "pandas.RangeIndex.stop SA01" \
-i "pandas.Series.add PR07" \
-i "pandas.Series.case_when RT03" \
-i "pandas.Series.cat PR07" \
-i "pandas.Series.cat.add_categories PR01,PR02" \
-i "pandas.Series.cat.as_ordered PR01" \
-i "pandas.Series.cat.as_unordered PR01" \
Expand Down Expand Up @@ -274,7 +269,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
-i "pandas.TimedeltaIndex.seconds SA01" \
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
-i "pandas.Timestamp.asm8 SA01" \
-i "pandas.Timestamp.astimezone SA01" \
-i "pandas.Timestamp.ceil SA01" \
-i "pandas.Timestamp.combine PR01,SA01" \
Expand All @@ -289,7 +283,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.hour GL08" \
-i "pandas.Timestamp.is_leap_year SA01" \
-i "pandas.Timestamp.isocalendar SA01" \
-i "pandas.Timestamp.isoformat SA01" \
-i "pandas.Timestamp.isoweekday SA01" \
-i "pandas.Timestamp.max PR02" \
-i "pandas.Timestamp.microsecond GL08" \
Expand All @@ -305,7 +298,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.resolution PR02,PR07,SA01" \
-i "pandas.Timestamp.round SA01" \
-i "pandas.Timestamp.second GL08" \
-i "pandas.Timestamp.strftime SA01" \
-i "pandas.Timestamp.strptime PR01,SA01" \
-i "pandas.Timestamp.time SA01" \
-i "pandas.Timestamp.timestamp SA01" \
Expand All @@ -328,9 +320,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.utcoffset SA01" \
-i "pandas.Timestamp.utctimetuple SA01" \
-i "pandas.Timestamp.value GL08" \
-i "pandas.Timestamp.week SA01" \
-i "pandas.Timestamp.weekday SA01" \
-i "pandas.Timestamp.weekofyear SA01" \
-i "pandas.Timestamp.year GL08" \
-i "pandas.api.extensions.ExtensionArray._from_sequence_of_strings SA01" \
-i "pandas.api.extensions.ExtensionArray._hash_pandas_object RT03,SA01" \
Expand Down Expand Up @@ -402,13 +392,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.arrays.DatetimeArray SA01" \
-i "pandas.arrays.FloatingArray SA01" \
-i "pandas.arrays.IntegerArray SA01" \
-i "pandas.arrays.IntervalArray.is_non_overlapping_monotonic SA01" \
-i "pandas.arrays.IntervalArray.left SA01" \
-i "pandas.arrays.IntervalArray.length SA01" \
-i "pandas.arrays.IntervalArray.mid SA01" \
-i "pandas.arrays.IntervalArray.right SA01" \
-i "pandas.arrays.IntervalArray.set_closed RT03,SA01" \
-i "pandas.arrays.IntervalArray.to_tuples RT03,SA01" \
-i "pandas.arrays.NumpyExtensionArray SA01" \
-i "pandas.arrays.SparseArray PR07,SA01" \
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
Expand Down
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ Datetimelike
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

Timedelta
Expand Down Expand Up @@ -421,7 +423,6 @@ Interval
Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
-

Missing
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def get_start_end_field(
# month of year. Other offsets use month, startingMonth as ending
# month of year.

if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]:
if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]:
end_month = 12 if month_kw == 1 else month_kw - 1
start_month = month_kw
else:
Expand Down
6 changes: 6 additions & 0 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,12 @@ class NaTType(_NaT):
See strftime documentation for more information on the format string:
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
See Also
--------
Timestamp.isoformat : Return the time formatted according to ISO 8601.
pd.to_datetime : Convert argument to datetime.
Period.strftime : Format a single Period.
Examples
--------
>>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651')
Expand Down
23 changes: 23 additions & 0 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,11 @@ cdef class _Timestamp(ABCTimestamp):
-------
int

See Also
--------
Timestamp.weekday : Return the day of the week.
Timestamp.quarter : Return the quarter of the year.

Examples
--------
>>> ts = pd.Timestamp(2020, 3, 14)
Expand Down Expand Up @@ -1008,6 +1013,12 @@ cdef class _Timestamp(ABCTimestamp):
-------
str

See Also
--------
Timestamp.strftime : Return a formatted string.
Timestamp.isocalendar : Return a tuple containing ISO year, week number and
weekday.

Examples
--------
>>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651')
Expand Down Expand Up @@ -1150,6 +1161,12 @@ cdef class _Timestamp(ABCTimestamp):
"""
Return numpy datetime64 format in nanoseconds.

See Also
--------
numpy.datetime64 : Numpy datatype for dates and times with high precision.
Timestamp.to_numpy : Convert the Timestamp to a NumPy datetime64.
to_datetime : Convert argument to datetime.

Examples
--------
>>> ts = pd.Timestamp(2020, 3, 14, 15)
Expand Down Expand Up @@ -1543,6 +1560,12 @@ class Timestamp(_Timestamp):
See strftime documentation for more information on the format string:
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
See Also
--------
Timestamp.isoformat : Return the time formatted according to ISO 8601.
pd.to_datetime : Convert argument to datetime.
Period.strftime : Format a single Period.
Examples
--------
>>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651')
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2847,6 +2847,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
Parameters
----------
data : Series or CategoricalIndex
The object to which the categorical accessor is attached.
See Also
--------
Expand Down
126 changes: 91 additions & 35 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1509,10 +1509,54 @@ def set_closed(self, closed: IntervalClosedType) -> Self:
"""

@property
@Appender(
_interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
)
def is_non_overlapping_monotonic(self) -> bool:
"""
Return a boolean whether the IntervalArray/IntervalIndex\
is non-overlapping and monotonic.
Non-overlapping means (no Intervals share points), and monotonic means
either monotonic increasing or monotonic decreasing.
See Also
--------
overlaps : Check if two IntervalIndex objects overlap.
Examples
--------
For arrays:
>>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
>>> interv_arr
<IntervalArray>
[(0, 1], (1, 5]]
Length: 2, dtype: interval[int64, right]
>>> interv_arr.is_non_overlapping_monotonic
True
>>> interv_arr = pd.arrays.IntervalArray(
... [pd.Interval(0, 1), pd.Interval(-1, 0.1)]
... )
>>> interv_arr
<IntervalArray>
[(0.0, 1.0], (-1.0, 0.1]]
Length: 2, dtype: interval[float64, right]
>>> interv_arr.is_non_overlapping_monotonic
False
For Interval Index:
>>> interv_idx = pd.interval_range(start=0, end=2)
>>> interv_idx
IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
>>> interv_idx.is_non_overlapping_monotonic
True
>>> interv_idx = pd.interval_range(start=0, end=2, closed="both")
>>> interv_idx
IntervalIndex([[0, 1], [1, 2]], dtype='interval[int64, both]')
>>> interv_idx.is_non_overlapping_monotonic
False
"""
# must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
# or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
# we already require left <= right
Expand Down Expand Up @@ -1624,39 +1668,51 @@ def __arrow_array__(self, type=None):
"""
)

@Appender(
_interval_shared_docs["to_tuples"]
% {
"return_type": (
"ndarray (if self is IntervalArray) or Index (if self is IntervalIndex)"
),
"examples": textwrap.dedent(
"""\
Examples
--------
For :class:`pandas.IntervalArray`:
>>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
>>> idx
<IntervalArray>
[(0, 1], (1, 2]]
Length: 2, dtype: interval[int64, right]
>>> idx.to_tuples()
array([(0, 1), (1, 2)], dtype=object)
For :class:`pandas.IntervalIndex`:
>>> idx = pd.interval_range(start=0, end=2)
>>> idx
IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
>>> idx.to_tuples()
Index([(0, 1), (1, 2)], dtype='object')
"""
),
}
)
def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
"""
Return an ndarray (if self is IntervalArray) or Index \
(if self is IntervalIndex) of tuples of the form (left, right).
Parameters
----------
na_tuple : bool, default True
If ``True``, return ``NA`` as a tuple ``(nan, nan)``. If ``False``,
just return ``NA`` as ``nan``.
Returns
-------
ndarray or Index
An ndarray of tuples representing the intervals
if `self` is an IntervalArray.
An Index of tuples representing the intervals
if `self` is an IntervalIndex.
See Also
--------
IntervalArray.to_list : Convert IntervalArray to a list of tuples.
IntervalArray.to_numpy : Convert IntervalArray to a numpy array.
IntervalArray.unique : Find unique intervals in an IntervalArray.
Examples
--------
For :class:`pandas.IntervalArray`:
>>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
>>> idx
<IntervalArray>
[(0, 1], (1, 2]]
Length: 2, dtype: interval[int64, right]
>>> idx.to_tuples()
array([(0, 1), (1, 2)], dtype=object)
For :class:`pandas.IntervalIndex`:
>>> idx = pd.interval_range(start=0, end=2)
>>> idx
IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
>>> idx.to_tuples()
Index([(0, 1), (1, 2)], dtype='object')
"""
tuples = com.asarray_tuplesafe(zip(self._left, self._right))
if not na_tuple:
# GH 18756
Expand Down
28 changes: 28 additions & 0 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,34 @@ def mid(self) -> Index:

@property
def length(self) -> Index:
"""
Calculate the length of each interval in the IntervalIndex.
This method returns a new Index containing the lengths of each interval
in the IntervalIndex. The length of an interval is defined as the difference
between its end and its start.
Returns
-------
Index
An Index containing the lengths of each interval.
See Also
--------
Interval.length : Return the length of the Interval.
Examples
--------
>>> intervals = pd.IntervalIndex.from_arrays(
... [1, 2, 3], [4, 5, 6], closed="right"
... )
>>> intervals.length
Index([3, 3, 3], dtype='int64')
>>> intervals = pd.IntervalIndex.from_tuples([(1, 5), (6, 10), (11, 15)])
>>> intervals.length
Index([4, 4, 4], dtype='int64')
"""
return Index(self._data.length, copy=False)

# --------------------------------------------------------------------
Expand Down

0 comments on commit 41c96a2

Please sign in to comment.