Skip to content

Commit

Permalink
BUG: Use large_string in string array consistently
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed May 6, 2024
1 parent ea7bcd1 commit c9ac047
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions pandas/core/arrays/string_arrow.py
Expand Up @@ -196,13 +196,13 @@ def _from_sequence(
na_values = scalars._mask
result = scalars._data
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
return cls(pa.array(result, mask=na_values, type=pa.string()))
return cls(pa.array(result, mask=na_values, type=pa.large_string()))
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
return cls(pc.cast(scalars, pa.string()))
return cls(pc.cast(scalars, pa.large_string()))

# convert non-na-likes to str
result = lib.ensure_string_array(scalars, copy=copy)
return cls(pa.array(result, type=pa.string(), from_pandas=True))
return cls(pa.array(result, type=pa.large_string(), from_pandas=True))

@classmethod
def _from_sequence_of_strings(
Expand Down Expand Up @@ -245,7 +245,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
value_set = [
pa_scalar.as_py()
for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
if pa_scalar.type in (pa.string(), pa.null())
if pa_scalar.type in (pa.string(), pa.null(), pa.large_string())
]

# short-circuit to return all False array.
Expand Down Expand Up @@ -332,7 +332,9 @@ def _str_map(
result = lib.map_infer_mask(
arr, f, mask.view("uint8"), convert=False, na_value=na_value
)
result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
result = pa.array(
result, mask=mask, type=pa.large_string(), from_pandas=True
)
return type(self)(result)
else:
# This is when the result type is object. We reach this when
Expand Down Expand Up @@ -655,7 +657,9 @@ def _str_map(
result = lib.map_infer_mask(
arr, f, mask.view("uint8"), convert=False, na_value=na_value
)
result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
result = pa.array(
result, mask=mask, type=pa.large_string(), from_pandas=True
)
return type(self)(result)
else:
# This is when the result type is object. We reach this when
Expand Down

0 comments on commit c9ac047

Please sign in to comment.