Skip to content

Commit

Permalink
BUG: timedelta merge asof with tolerance (#27650)
Browse files Browse the repository at this point in the history
* issue #27642 - timedelta merge asof with tolerance
  • Loading branch information
ianzur authored and TomAugspurger committed Aug 22, 2019
1 parent b1c871c commit def01cf
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Reshaping
^^^^^^^^^

- A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`)
- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing `tolerance` kwarg (:issue:`27642`)
- Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`)
- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`)
- Bug in :meth:`DataFrame.join` raising with readonly arrays (:issue:`27943`)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
is_bool,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_datetimelike,
is_dtype_equal,
Expand Down Expand Up @@ -1635,7 +1634,7 @@ def _get_merge_keys(self):
)
)

if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt):
if is_datetimelike(lt):
if not isinstance(self.tolerance, Timedelta):
raise MergeError(msg)
if self.tolerance < Timedelta(0):
Expand Down
55 changes: 51 additions & 4 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime

import numpy as np
import pytest
import pytz
Expand Down Expand Up @@ -588,14 +590,23 @@ def test_non_sorted(self):
# ok, though has dupes
merge_asof(trades, self.quotes, on="time", by="ticker")

def test_tolerance(self):
@pytest.mark.parametrize(
"tolerance",
[
Timedelta("1day"),
pytest.param(
datetime.timedelta(days=1),
marks=pytest.mark.xfail(reason="not implemented", strict=True),
),
],
ids=["pd.Timedelta", "datetime.timedelta"],
)
def test_tolerance(self, tolerance):

trades = self.trades
quotes = self.quotes

result = merge_asof(
trades, quotes, on="time", by="ticker", tolerance=Timedelta("1day")
)
result = merge_asof(trades, quotes, on="time", by="ticker", tolerance=tolerance)
expected = self.tolerance
assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1246,3 +1257,39 @@ def test_by_mixed_tz_aware(self):
)
expected["value_y"] = np.array([np.nan], dtype=object)
assert_frame_equal(result, expected)

def test_timedelta_tolerance_nearest(self):
# GH 27642

left = pd.DataFrame(
list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),
columns=["time", "left"],
)

left["time"] = pd.to_timedelta(left["time"], "ms")

right = pd.DataFrame(
list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),
columns=["time", "right"],
)

right["time"] = pd.to_timedelta(right["time"], "ms")

expected = pd.DataFrame(
list(
zip(
[0, 5, 10, 15, 20, 25],
[0, 1, 2, 3, 4, 5],
[0, np.nan, 2, 4, np.nan, np.nan],
)
),
columns=["time", "left", "right"],
)

expected["time"] = pd.to_timedelta(expected["time"], "ms")

result = pd.merge_asof(
left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest"
)

assert_frame_equal(result, expected)

0 comments on commit def01cf

Please sign in to comment.