Skip to content

Commit

Permalink
Ensure that MaskedColumn.data returns a plain MaskedArray.
Browse files Browse the repository at this point in the history
Previously this was instead a masked_BaseColumn, which caused a number
of performance hits, e.g., because slicing would go through multiple
rounds of ``__array_finalize__``. This includes a regression check for
that count.
  • Loading branch information
mhvk committed Jun 30, 2019
1 parent a0e7363 commit 5f99eef
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ astropy.table
- Improved the implementation of ``Table.replace_column()`` to provide
a speed-up of 5-10 times for wide tables. [#8902]

- ``MaskedColumn.data`` will now return a plain ``MaskedArray`` rather than
the previous (unintended) ``masked_BaseColumn``. [#8855]

astropy.tests
^^^^^^^^^^^^^

Expand Down
10 changes: 6 additions & 4 deletions astropy/table/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,10 +1272,12 @@ def fill_value(self, val):

@property
def data(self):
out = self.view(ma.MaskedArray)
# The following is necessary because of a bug in Numpy, which was
# fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0.
out.fill_value = self.fill_value
"""The plain MaskedArray data held by this column."""
out = self.view(np.ma.MaskedArray)
# By default, a MaskedArray view will set the _baseclass to be the
# same as that of our own class, i.e., BaseColumn. Since we want
# to return a plain MaskedArray, we reset the baseclass accordingly.
out._baseclass = np.ndarray
return out

def filled(self, fill_value=None):
Expand Down
44 changes: 44 additions & 0 deletions astropy/table/tests/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy.ma as ma

from astropy.table import Column, MaskedColumn, Table, QTable
from astropy.table.column import BaseColumn
from astropy.tests.helper import catch_warnings
from astropy.time import Time
import astropy.units as u
Expand Down Expand Up @@ -499,3 +500,46 @@ def test_masked_column_with_unit_in_qtable():
assert len(w) == 1
assert "dropping mask in Quantity column 'c'"
assert isinstance(t['b'], u.Quantity)


def test_masked_column_data_attribute_is_plain_masked_array():
c = MaskedColumn([1, 2], mask=[False, True])
c_data = c.data
assert type(c_data) is np.ma.MaskedArray
assert type(c_data.data) is np.ndarray


def test_mask_slicing_count_array_finalize():
"""Check that we don't finalize MaskedColumn too often.
Regression test for gh-6721.
"""
# Create a new BaseColumn class that counts how often
# ``__array_finalize__`` is called.
class MyBaseColumn(BaseColumn):
counter = 0

def __array_finalize__(self, obj):
super().__array_finalize__(obj)
MyBaseColumn.counter += 1

# Base a new MaskedColumn class on it. The normal MaskedColumn
# hardcodes the initialization to BaseColumn, so we exchange that.
class MyMaskedColumn(MaskedColumn, Column, MyBaseColumn):
def __new__(cls, *args, **kwargs):
self = super().__new__(cls, *args, **kwargs)
self._baseclass = MyBaseColumn
return self

# Creation really needs 2 finalizations (once for the BaseColumn
# call inside ``__new__`` and once when the view as a MaskedColumn
# is taken), but since the first is hardcoded, we do not capture it
# and thus the count is only 1.
c = MyMaskedColumn([1, 2], mask=[False, True])
assert MyBaseColumn.counter == 1
# slicing should need only one ``__array_finalize__`` (used to be 3).
c0 = c[:]
assert MyBaseColumn.counter == 2
# repr should need none (used to be 2!!)
repr(c0)
assert MyBaseColumn.counter == 2

0 comments on commit 5f99eef

Please sign in to comment.