Skip to content

Commit

Permalink
MAINT: implement median
Browse files Browse the repository at this point in the history
  • Loading branch information
stsievert committed Jul 26, 2018
1 parent 5b2ecf4 commit 60dd691
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 1 deletion.
2 changes: 1 addition & 1 deletion dask/array/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
nanmax, nanargmin, nanargmax,
cumsum, cumprod,
topk, argtopk)
from .percentile import percentile
from .percentile import percentile, median
with ignoring(ImportError):
from .reductions import nanprod, nancumprod, nancumsum
with ignoring(ImportError):
Expand Down
4 changes: 4 additions & 0 deletions dask/array/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,10 @@ def mean(self, axis=None, dtype=None, keepdims=False, split_every=None,
return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
split_every=split_every, out=out)

def median(self):
from .percentile import median
return median(self)

@derived_from(np.ndarray)
def std(self, axis=None, dtype=None, keepdims=False, ddof=0,
split_every=None, out=None):
Expand Down
8 changes: 8 additions & 0 deletions dask/array/percentile.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ def percentile(a, q, interpolation='linear'):
return Array(dsk, name2, chunks=((len(q),),), dtype=dtype)


def median(x, interpolation='linear'):
""" Approximate the median function
See :func:`percentile` for more information.
"""
return percentile(x, 50, interpolation=interpolation)


def merge_percentiles(finalq, qs, vals, interpolation='lower', Ns=None):
""" Combine several percentile calculations of different data.
Expand Down
11 changes: 11 additions & 0 deletions dask/array/tests/test_percentiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ def test_percentile():
np.array(['a', 'd', 'e'], dtype=x.dtype))


def test_median():
rng = np.random.RandomState(42)
n = 1000
x = rng.rand(n)
d = da.from_array(x, chunks=n // 10)

assert_eq(d.median(), da.median(d))
assert_eq(da.percentile(d, 50), da.median(d))
assert np.abs(da.median(d).compute() - np.median(x)) < 0.07


@pytest.mark.skip
def test_percentile_with_categoricals():
try:
Expand Down

0 comments on commit 60dd691

Please sign in to comment.