Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Array::as_any_arc #2902

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions arrow-array/src/array/binary_array.rs
Expand Up @@ -25,6 +25,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
/// binary data.
Expand Down Expand Up @@ -254,6 +255,10 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericBinaryArray<OffsetSize> {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Expand Up @@ -23,6 +23,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Array of bools
///
Expand Down Expand Up @@ -152,6 +153,10 @@ impl Array for BooleanArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Expand Up @@ -26,6 +26,7 @@ use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

///
/// A dictionary array where each element is a single value indexed by an integer key.
Expand Down Expand Up @@ -520,6 +521,10 @@ impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Copy link
Contributor

@tustvold tustvold Oct 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would still work with Option<Arc<dyn Any>>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get your question. Is that that you're wondering about Send + Sync + 'static? They are required for Arc::downcast.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh the Send + Sync is required, but I think you can drop the 'static as it is implied by Any (and lifetime erasure for trait objects)

Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down Expand Up @@ -597,6 +602,10 @@ impl<'a, K: ArrowPrimitiveType, V: Sync> Array for TypedDictionaryArray<'a, K, V
self.dictionary
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... Perhaps we should remove the requirement for ArrayAccessor to impl Array 🤔

}

fn data(&self) -> &ArrayData {
&self.dictionary.data
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Expand Up @@ -22,6 +22,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

/// An array where each element is a fixed-size sequence of bytes.
///
Expand Down Expand Up @@ -358,6 +359,10 @@ impl Array for FixedSizeBinaryArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Expand Up @@ -22,6 +22,7 @@ use crate::{
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// A list array where each element is a fixed-size sequence of values with the same
/// type whose maximum length is represented by a i32.
Expand Down Expand Up @@ -202,6 +203,10 @@ impl Array for FixedSizeListArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/list_array.rs
Expand Up @@ -26,6 +26,7 @@ use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use num::Integer;
use std::any::Any;
use std::sync::Arc;

/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
Expand Down Expand Up @@ -252,6 +253,10 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
4 changes: 4 additions & 0 deletions arrow-array/src/array/map_array.rs
Expand Up @@ -208,6 +208,10 @@ impl Array for MapArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
33 changes: 33 additions & 0 deletions arrow-array/src/array/mod.rs
Expand Up @@ -88,6 +88,31 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// ```
fn as_any(&self) -> &dyn Any;

/// Returns the array as [`Any`](std::any::Any) wrapped into an [`Arc`] so that it can be
/// downcasted to a specific implementation without the need for an unsized reference. This is helpful if you want
/// to return a concrete array type.
///
/// Note that this conversion may not be possible for all types due to the `'static` constraint.
///
/// # Example:
///
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{Int32Array, RecordBatch};
/// # use arrow_schema::{Schema, Field, DataType, ArrowError};
///
/// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
/// let batch = RecordBatch::try_new(
/// Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
/// vec![Arc::new(id)]
/// ).unwrap();
///
/// let int32array = Arc::downcast::<Int32Array>(
/// Arc::clone(batch.column(0)).as_any_arc().expect("Failed to create static Arc")
/// ).expect("Failed to downcast");
Comment on lines +110 to +112
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should provide a downcast_array_ref method in cast that can abstract away this logic (and potentially allow hiding this method by putting it on a super trait within a private module)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a helpers sounds great.

Regarding privacy: do we consider Array to be a sealed trait (because this would be the implication)? Currently users can implement their own Array types if they want.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the nature of the way that the downcasting works means that Array is effectively sealed as there would be no use to implementing it

/// ```
fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>>;

/// Returns a reference to the underlying data of this array.
fn data(&self) -> &ArrayData;

Expand Down Expand Up @@ -258,6 +283,10 @@ impl Array for ArrayRef {
self.as_ref().as_any()
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Copy link
Contributor

@tustvold tustvold Oct 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was really confused how this was working, and then realised this is actually implementing as_any_arc for Arc<Arc<dyn Array>> 😅

Perhaps this should return None to be consistent with as_any above which calls through to the inner array?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as_any "above" (i.e. in the same impl) returns None. We only return None for two cases where we have a references that are not 'static (TBH I don't really like this solution but couldn't come up with something more elegant).

Copy link
Contributor

@tustvold tustvold Oct 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as_any "above" (i.e. in the same impl) returns None

No it returns self.as_ref().as_any(). My point here is much like the other cases, we only have a reference to the underlying dyn Array here through an Arc. As written this method will return a Arc<dyn Any> that will downcast to an Arc<ArrayRef>, as opposed to as_any() which will downcast to PrimitiveArray, etc... I think this is a bit off perhaps? Given we have the ability to return None for cases where "this doesn't make sense" perhaps we could use it here?

Alternatively we could clone the inner ArrayRef and downcast that 🤔

TBH I don't really like this solution but couldn't come up with something more elegant

Ditto, hence my suggestion to hide it away in a private trait so we can revisit it later perhaps

Some(self)
}

fn data(&self) -> &ArrayData {
self.as_ref().data()
}
Expand Down Expand Up @@ -316,6 +345,10 @@ impl<'a, T: Array> Array for &'a T {
T::as_any(self)
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
None
}

fn data(&self) -> &ArrayData {
T::data(self)
}
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/null_array.rs
Expand Up @@ -20,7 +20,7 @@
use crate::Array;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::{any::Any, sync::Arc};

/// An Array where all elements are nulls
///
Expand Down Expand Up @@ -58,6 +58,10 @@ impl Array for NullArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Expand Up @@ -29,6 +29,7 @@ use arrow_schema::{ArrowError, DataType};
use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
use half::f16;
use std::any::Any;
use std::sync::Arc;

///
/// # Example: Using `collect`
Expand Down Expand Up @@ -466,6 +467,10 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/string_array.rs
Expand Up @@ -25,6 +25,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Generic struct for \[Large\]StringArray
///
Expand Down Expand Up @@ -318,6 +319,10 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericStringArray<OffsetSize> {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/struct_array.rs
Expand Up @@ -20,7 +20,7 @@ use arrow_buffer::buffer::buffer_bin_or;
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use std::any::Any;
use std::{any::Any, sync::Arc};

/// A nested array type where each child (called *field*) is represented by a separate
/// array.
Expand Down Expand Up @@ -192,6 +192,10 @@ impl Array for StructArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/union_array.rs
Expand Up @@ -21,7 +21,7 @@ use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field, UnionMode};
/// Contains the `UnionArray` type.
///
use std::any::Any;
use std::{any::Any, sync::Arc};

/// An Array that can represent slots of varying types.
///
Expand Down Expand Up @@ -310,6 +310,10 @@ impl Array for UnionArray {
self
}

fn as_any_arc(self: Arc<Self>) -> Option<Arc<dyn Any + Send + Sync + 'static>> {
Some(self)
}

fn data(&self) -> &ArrayData {
&self.data
}
Expand Down