Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor UnionBuilder Constructors #2488

Merged
merged 2 commits into from Aug 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 14 additions & 14 deletions arrow/src/array/array_union.rs
Expand Up @@ -387,7 +387,7 @@ mod tests {

#[test]
fn test_dense_i32() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand Down Expand Up @@ -447,7 +447,7 @@ mod tests {
#[test]
#[cfg_attr(miri, ignore)]
fn test_dense_i32_large() {
let mut builder = UnionBuilder::new_dense(1024);
let mut builder = UnionBuilder::new_dense();

let expected_type_ids = vec![0_i8; 1024];
let expected_value_offsets: Vec<_> = (0..1024).collect();
Expand Down Expand Up @@ -489,7 +489,7 @@ mod tests {

#[test]
fn test_dense_mixed() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
Expand Down Expand Up @@ -539,7 +539,7 @@ mod tests {

#[test]
fn test_dense_mixed_with_nulls() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 10).unwrap();
Expand Down Expand Up @@ -587,7 +587,7 @@ mod tests {

#[test]
fn test_dense_mixed_with_nulls_and_offset() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 10).unwrap();
Expand Down Expand Up @@ -714,7 +714,7 @@ mod tests {

#[test]
fn test_sparse_i32() {
let mut builder = UnionBuilder::new_sparse(7);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand Down Expand Up @@ -766,7 +766,7 @@ mod tests {

#[test]
fn test_sparse_mixed() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
Expand Down Expand Up @@ -829,7 +829,7 @@ mod tests {

#[test]
fn test_sparse_mixed_with_nulls() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand Down Expand Up @@ -882,7 +882,7 @@ mod tests {

#[test]
fn test_sparse_mixed_with_nulls_and_offset() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand Down Expand Up @@ -929,7 +929,7 @@ mod tests {

#[test]
fn test_union_array_validaty() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand All @@ -939,7 +939,7 @@ mod tests {

test_union_validity(&union);

let mut builder = UnionBuilder::new_dense(5);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand All @@ -952,7 +952,7 @@ mod tests {

#[test]
fn test_type_check() {
let mut builder = UnionBuilder::new_sparse(2);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Float32Type>("a", 1.0).unwrap();
let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);
Expand Down Expand Up @@ -1009,14 +1009,14 @@ mod tests {
}

// Sparse Union
let builder = UnionBuilder::new_sparse(5);
let builder = UnionBuilder::new_sparse();
let record_batch = create_batch(create_union(builder));
// [null, 3.0, null]
let record_batch_slice = record_batch.slice(1, 3);
test_slice_union(record_batch_slice);

// Dense Union
let builder = UnionBuilder::new_dense(5);
let builder = UnionBuilder::new_dense();
let record_batch = create_batch(create_union(builder));
// [null, 3.0, null]
let record_batch_slice = record_batch.slice(1, 3);
Expand Down
20 changes: 15 additions & 5 deletions arrow/src/array/builder/union_builder.rs
Expand Up @@ -111,7 +111,7 @@ impl FieldData {
/// use arrow::array::UnionBuilder;
/// use arrow::datatypes::{Float64Type, Int32Type};
///
/// let mut builder = UnionBuilder::new_dense(3);
/// let mut builder = UnionBuilder::new_dense();
/// builder.append::<Int32Type>("a", 1).unwrap();
/// builder.append::<Float64Type>("b", 3.0).unwrap();
/// builder.append::<Int32Type>("a", 4).unwrap();
Expand All @@ -131,7 +131,7 @@ impl FieldData {
/// use arrow::array::UnionBuilder;
/// use arrow::datatypes::{Float64Type, Int32Type};
///
/// let mut builder = UnionBuilder::new_sparse(3);
/// let mut builder = UnionBuilder::new_sparse();
/// builder.append::<Int32Type>("a", 1).unwrap();
/// builder.append::<Float64Type>("b", 3.0).unwrap();
/// builder.append::<Int32Type>("a", 4).unwrap();
Expand Down Expand Up @@ -159,7 +159,17 @@ pub struct UnionBuilder {

impl UnionBuilder {
/// Creates a new dense array builder.
pub fn new_dense(capacity: usize) -> Self {
pub fn new_dense() -> Self {
Self::with_capacity_dense(1024)
}

/// Creates a new sparse array builder.
pub fn new_sparse() -> Self {
Self::with_capacity_sparse(1024)
}

/// Creates a new dense array builder with capacity.
pub fn with_capacity_dense(capacity: usize) -> Self {
Self {
len: 0,
fields: HashMap::default(),
Expand All @@ -168,8 +178,8 @@ impl UnionBuilder {
}
}

/// Creates a new sparse array builder.
pub fn new_sparse(capacity: usize) -> Self {
Copy link
Contributor

@tustvold tustvold Aug 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not something for this PR but we should probably save the capacity and use it when we create new builder children

Ticket created - #2523

/// Creates a new sparse array builder with capacity.
pub fn with_capacity_sparse(capacity: usize) -> Self {
Self {
len: 0,
fields: HashMap::default(),
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/equal/mod.rs
Expand Up @@ -1370,7 +1370,7 @@ mod tests {

#[test]
fn test_union_equal_dense() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1380,7 +1380,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union1 = builder.build().unwrap();

builder = UnionBuilder::new_dense(7);
builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1390,7 +1390,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union2 = builder.build().unwrap();

builder = UnionBuilder::new_dense(7);
builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1400,7 +1400,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union3 = builder.build().unwrap();

builder = UnionBuilder::new_dense(7);
builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1417,7 +1417,7 @@ mod tests {

#[test]
fn test_union_equal_sparse() {
let mut builder = UnionBuilder::new_sparse(7);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1427,7 +1427,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union1 = builder.build().unwrap();

builder = UnionBuilder::new_sparse(7);
builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1437,7 +1437,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union2 = builder.build().unwrap();

builder = UnionBuilder::new_sparse(7);
builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1447,7 +1447,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union3 = builder.build().unwrap();

builder = UnionBuilder::new_sparse(7);
builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/compute/kernels/cast.rs
Expand Up @@ -5367,7 +5367,7 @@ mod tests {
}

fn make_union_array() -> UnionArray {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::with_capacity_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("b", 2).unwrap();
builder.build().unwrap()
Expand Down
24 changes: 12 additions & 12 deletions arrow/src/compute/kernels/filter.rs
Expand Up @@ -1553,7 +1553,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(1);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
let expected_array = builder.build().unwrap();

Expand All @@ -1563,7 +1563,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1574,7 +1574,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1584,7 +1584,7 @@ mod tests {

#[test]
fn test_filter_union_array_dense() {
let mut builder = UnionBuilder::new_dense(3);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
Expand All @@ -1595,7 +1595,7 @@ mod tests {

#[test]
fn test_filter_run_union_array_dense() {
let mut builder = UnionBuilder::new_dense(3);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Int32Type>("A", 3).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
Expand All @@ -1605,7 +1605,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(3);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Int32Type>("A", 3).unwrap();
let expected = builder.build().unwrap();
Expand All @@ -1615,7 +1615,7 @@ mod tests {

#[test]
fn test_filter_union_array_dense_with_nulls() {
let mut builder = UnionBuilder::new_dense(4);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
Expand All @@ -1626,7 +1626,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1637,7 +1637,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1647,7 +1647,7 @@ mod tests {

#[test]
fn test_filter_union_array_sparse() {
let mut builder = UnionBuilder::new_sparse(3);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
Expand All @@ -1658,7 +1658,7 @@ mod tests {

#[test]
fn test_filter_union_array_sparse_with_nulls() {
let mut builder = UnionBuilder::new_sparse(4);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
Expand All @@ -1669,7 +1669,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_sparse(2);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
let expected_array = builder.build().unwrap();
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/ipc/reader.rs
Expand Up @@ -1630,7 +1630,7 @@ mod tests {
let array1 = StringArray::from(vec!["foo", "bar", "baz"]);
let array2 = BooleanArray::from(vec![true, false, true]);

let mut union_builder = UnionBuilder::new_dense(3);
let mut union_builder = UnionBuilder::new_dense();
union_builder.append::<Int32Type>("a", 1).unwrap();
union_builder.append::<Float64Type>("b", 10.1).unwrap();
union_builder.append_null::<Float64Type>("b").unwrap();
Expand Down Expand Up @@ -1879,12 +1879,12 @@ mod tests {

#[test]
fn test_roundtrip_dense_union() {
check_union_with_builder(UnionBuilder::new_dense(6));
check_union_with_builder(UnionBuilder::new_dense());
}

#[test]
fn test_roundtrip_sparse_union() {
check_union_with_builder(UnionBuilder::new_sparse(6));
check_union_with_builder(UnionBuilder::new_sparse());
}

/// Read gzipped JSON file
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/ipc/writer.rs
Expand Up @@ -2040,7 +2040,7 @@ mod tests {
),
true,
)]);
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::with_capacity_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand Down