Skip to content

Commit

Permalink
BUG: to_dict not converting NA to None (#50796)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Jan 18, 2023
1 parent 0a0372a commit b888dad
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Expand Up @@ -1020,6 +1020,7 @@ I/O
- Bug in :meth:`DataFrame.to_string` ignoring float formatter for extension arrays (:issue:`39336`)
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
- Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`)
- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)

Period
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/dtypes/cast.py
Expand Up @@ -20,6 +20,10 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.missing import (
NA,
NAType,
)
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -176,7 +180,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar:
return value


def maybe_box_native(value: Scalar) -> Scalar:
def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType:
"""
If passed a scalar cast the scalar to a python native type.
Expand All @@ -202,6 +206,8 @@ def maybe_box_native(value: Scalar) -> Scalar:
value = bool(value)
elif isinstance(value, (np.datetime64, np.timedelta64)):
value = maybe_box_datetimelike(value)
elif value is NA:
value = None
return value


Expand Down
27 changes: 16 additions & 11 deletions pandas/core/methods/to_dict.py
Expand Up @@ -6,7 +6,10 @@
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import maybe_box_native
from pandas.core.dtypes.common import is_object_dtype
from pandas.core.dtypes.common import (
is_extension_array_dtype,
is_object_dtype,
)

from pandas import DataFrame
from pandas.core import common as com
Expand Down Expand Up @@ -88,16 +91,18 @@ def to_dict(
# GH46470 Return quickly if orient series to avoid creating dtype objects
return into_c((k, v) for k, v in df.items())

object_dtype_indices = [
i for i, col_dtype in enumerate(df.dtypes.values) if is_object_dtype(col_dtype)
box_native_indices = [
i
for i, col_dtype in enumerate(df.dtypes.values)
if is_object_dtype(col_dtype) or is_extension_array_dtype(col_dtype)
]
are_all_object_dtype_cols = len(object_dtype_indices) == len(df.dtypes)
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)

if orient == "dict":
return into_c((k, v.to_dict(into)) for k, v in df.items())

elif orient == "list":
object_dtype_indices_as_set = set(object_dtype_indices)
object_dtype_indices_as_set = set(box_native_indices)
return into_c(
(
k,
Expand All @@ -110,7 +115,7 @@ def to_dict(

elif orient == "split":
data = df._create_data_for_split_and_tight_to_dict(
are_all_object_dtype_cols, object_dtype_indices
are_all_object_dtype_cols, box_native_indices
)

return into_c(
Expand All @@ -123,7 +128,7 @@ def to_dict(

elif orient == "tight":
data = df._create_data_for_split_and_tight_to_dict(
are_all_object_dtype_cols, object_dtype_indices
are_all_object_dtype_cols, box_native_indices
)

return into_c(
Expand Down Expand Up @@ -155,8 +160,8 @@ def to_dict(
data = [
into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None)
]
if object_dtype_indices:
object_dtype_indices_as_set = set(object_dtype_indices)
if box_native_indices:
object_dtype_indices_as_set = set(box_native_indices)
object_dtype_cols = {
col
for i, col in enumerate(df.columns)
Expand All @@ -176,8 +181,8 @@ def to_dict(
(t[0], dict(zip(df.columns, map(maybe_box_native, t[1:]))))
for t in df.itertuples(name=None)
)
elif object_dtype_indices:
object_dtype_indices_as_set = set(object_dtype_indices)
elif box_native_indices:
object_dtype_indices_as_set = set(box_native_indices)
is_object_dtype_by_index = [
i in object_dtype_indices_as_set for i in range(len(df.columns))
]
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Expand Up @@ -89,6 +89,7 @@
from pandas.core.dtypes.common import (
ensure_platform_int,
is_dict_like,
is_extension_array_dtype,
is_integer,
is_iterator,
is_list_like,
Expand Down Expand Up @@ -1832,7 +1833,7 @@ def to_dict(self, into: type[dict] = dict) -> dict:
# GH16122
into_c = com.standardize_mapping(into)

if is_object_dtype(self):
if is_object_dtype(self) or is_extension_array_dtype(self):
return into_c((k, maybe_box_native(v)) for k, v in self.items())
else:
# Not an object dtype => all types will be the same so let the default
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/frame/methods/test_to_dict.py
Expand Up @@ -9,6 +9,7 @@
import pytz

from pandas import (
NA,
DataFrame,
Index,
MultiIndex,
Expand Down Expand Up @@ -458,3 +459,29 @@ def test_to_dict_index_false(self, orient, expected):
df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=["row1", "row2"])
result = df.to_dict(orient=orient, index=False)
tm.assert_dict_equal(result, expected)

@pytest.mark.parametrize(
"orient, expected",
[
("dict", {"a": {0: 1, 1: None}}),
("list", {"a": [1, None]}),
("split", {"index": [0, 1], "columns": ["a"], "data": [[1], [None]]}),
(
"tight",
{
"index": [0, 1],
"columns": ["a"],
"data": [[1], [None]],
"index_names": [None],
"column_names": [None],
},
),
("records", [{"a": 1}, {"a": None}]),
("index", {0: {"a": 1}, 1: {"a": None}}),
],
)
def test_to_dict_na_to_none(self, orient, expected):
# GH#50795
df = DataFrame({"a": [1, NA]}, dtype="Int64")
result = df.to_dict(orient=orient)
assert result == expected

0 comments on commit b888dad

Please sign in to comment.