Skip to content

Commit

Permalink
refactoring union builder apis (#2488)
Browse files Browse the repository at this point in the history
  • Loading branch information
psvri committed Aug 19, 2022
1 parent 930d2d7 commit ac8b0eb
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 47 deletions.
28 changes: 14 additions & 14 deletions arrow/src/array/array_union.rs
Expand Up @@ -387,7 +387,7 @@ mod tests {

#[test]
fn test_dense_i32() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand Down Expand Up @@ -447,7 +447,7 @@ mod tests {
#[test]
#[cfg_attr(miri, ignore)]
fn test_dense_i32_large() {
let mut builder = UnionBuilder::new_dense(1024);
let mut builder = UnionBuilder::new_dense();

let expected_type_ids = vec![0_i8; 1024];
let expected_value_offsets: Vec<_> = (0..1024).collect();
Expand Down Expand Up @@ -489,7 +489,7 @@ mod tests {

#[test]
fn test_dense_mixed() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
Expand Down Expand Up @@ -539,7 +539,7 @@ mod tests {

#[test]
fn test_dense_mixed_with_nulls() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 10).unwrap();
Expand Down Expand Up @@ -587,7 +587,7 @@ mod tests {

#[test]
fn test_dense_mixed_with_nulls_and_offset() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 10).unwrap();
Expand Down Expand Up @@ -714,7 +714,7 @@ mod tests {

#[test]
fn test_sparse_i32() {
let mut builder = UnionBuilder::new_sparse(7);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand Down Expand Up @@ -766,7 +766,7 @@ mod tests {

#[test]
fn test_sparse_mixed() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
Expand Down Expand Up @@ -829,7 +829,7 @@ mod tests {

#[test]
fn test_sparse_mixed_with_nulls() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand Down Expand Up @@ -882,7 +882,7 @@ mod tests {

#[test]
fn test_sparse_mixed_with_nulls_and_offset() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand Down Expand Up @@ -929,7 +929,7 @@ mod tests {

#[test]
fn test_union_array_validaty() {
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand All @@ -939,7 +939,7 @@ mod tests {

test_union_validity(&union);

let mut builder = UnionBuilder::new_dense(5);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand All @@ -952,7 +952,7 @@ mod tests {

#[test]
fn test_type_check() {
let mut builder = UnionBuilder::new_sparse(2);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Float32Type>("a", 1.0).unwrap();
let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);
Expand Down Expand Up @@ -1009,14 +1009,14 @@ mod tests {
}

// Sparse Union
let builder = UnionBuilder::new_sparse(5);
let builder = UnionBuilder::new_sparse();
let record_batch = create_batch(create_union(builder));
// [null, 3.0, null]
let record_batch_slice = record_batch.slice(1, 3);
test_slice_union(record_batch_slice);

// Dense Union
let builder = UnionBuilder::new_dense(5);
let builder = UnionBuilder::new_dense();
let record_batch = create_batch(create_union(builder));
// [null, 3.0, null]
let record_batch_slice = record_batch.slice(1, 3);
Expand Down
20 changes: 15 additions & 5 deletions arrow/src/array/builder/union_builder.rs
Expand Up @@ -111,7 +111,7 @@ impl FieldData {
/// use arrow::array::UnionBuilder;
/// use arrow::datatypes::{Float64Type, Int32Type};
///
/// let mut builder = UnionBuilder::new_dense(3);
/// let mut builder = UnionBuilder::new_dense();
/// builder.append::<Int32Type>("a", 1).unwrap();
/// builder.append::<Float64Type>("b", 3.0).unwrap();
/// builder.append::<Int32Type>("a", 4).unwrap();
Expand All @@ -131,7 +131,7 @@ impl FieldData {
/// use arrow::array::UnionBuilder;
/// use arrow::datatypes::{Float64Type, Int32Type};
///
/// let mut builder = UnionBuilder::new_sparse(3);
/// let mut builder = UnionBuilder::new_sparse();
/// builder.append::<Int32Type>("a", 1).unwrap();
/// builder.append::<Float64Type>("b", 3.0).unwrap();
/// builder.append::<Int32Type>("a", 4).unwrap();
Expand Down Expand Up @@ -159,7 +159,17 @@ pub struct UnionBuilder {

impl UnionBuilder {
/// Creates a new dense array builder.
pub fn new_dense(capacity: usize) -> Self {
pub fn new_dense() -> Self {
Self::with_capacity_dense(1024)
}

/// Creates a new sparse array builder.
pub fn new_sparse() -> Self {
Self::with_capacity_sparse(1024)
}

/// Creates a new dense array builder with capacity.
pub fn with_capacity_dense(capacity: usize) -> Self {
Self {
len: 0,
fields: HashMap::default(),
Expand All @@ -168,8 +178,8 @@ impl UnionBuilder {
}
}

/// Creates a new sparse array builder.
pub fn new_sparse(capacity: usize) -> Self {
/// Creates a new sparse array builder with capacity.
pub fn with_capacity_sparse(capacity: usize) -> Self {
Self {
len: 0,
fields: HashMap::default(),
Expand Down
16 changes: 8 additions & 8 deletions arrow/src/array/equal/mod.rs
Expand Up @@ -1370,7 +1370,7 @@ mod tests {

#[test]
fn test_union_equal_dense() {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1380,7 +1380,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union1 = builder.build().unwrap();

builder = UnionBuilder::new_dense(7);
builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1390,7 +1390,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union2 = builder.build().unwrap();

builder = UnionBuilder::new_dense(7);
builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1400,7 +1400,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union3 = builder.build().unwrap();

builder = UnionBuilder::new_dense(7);
builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1417,7 +1417,7 @@ mod tests {

#[test]
fn test_union_equal_sparse() {
let mut builder = UnionBuilder::new_sparse(7);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1427,7 +1427,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union1 = builder.build().unwrap();

builder = UnionBuilder::new_sparse(7);
builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1437,7 +1437,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union2 = builder.build().unwrap();

builder = UnionBuilder::new_sparse(7);
builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand All @@ -1447,7 +1447,7 @@ mod tests {
builder.append::<Int32Type>("b", 7).unwrap();
let union3 = builder.build().unwrap();

builder = UnionBuilder::new_sparse(7);
builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/compute/kernels/cast.rs
Expand Up @@ -5367,7 +5367,7 @@ mod tests {
}

fn make_union_array() -> UnionArray {
let mut builder = UnionBuilder::new_dense(7);
let mut builder = UnionBuilder::with_capacity_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("b", 2).unwrap();
builder.build().unwrap()
Expand Down
24 changes: 12 additions & 12 deletions arrow/src/compute/kernels/filter.rs
Expand Up @@ -1553,7 +1553,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(1);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
let expected_array = builder.build().unwrap();

Expand All @@ -1563,7 +1563,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1574,7 +1574,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1584,7 +1584,7 @@ mod tests {

#[test]
fn test_filter_union_array_dense() {
let mut builder = UnionBuilder::new_dense(3);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
Expand All @@ -1595,7 +1595,7 @@ mod tests {

#[test]
fn test_filter_run_union_array_dense() {
let mut builder = UnionBuilder::new_dense(3);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Int32Type>("A", 3).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
Expand All @@ -1605,7 +1605,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(3);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Int32Type>("A", 3).unwrap();
let expected = builder.build().unwrap();
Expand All @@ -1615,7 +1615,7 @@ mod tests {

#[test]
fn test_filter_union_array_dense_with_nulls() {
let mut builder = UnionBuilder::new_dense(4);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
Expand All @@ -1626,7 +1626,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1637,7 +1637,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_dense(2);
let mut builder = UnionBuilder::new_dense();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
let expected_array = builder.build().unwrap();
Expand All @@ -1647,7 +1647,7 @@ mod tests {

#[test]
fn test_filter_union_array_sparse() {
let mut builder = UnionBuilder::new_sparse(3);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append::<Int32Type>("A", 34).unwrap();
Expand All @@ -1658,7 +1658,7 @@ mod tests {

#[test]
fn test_filter_union_array_sparse_with_nulls() {
let mut builder = UnionBuilder::new_sparse(4);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append::<Float64Type>("B", 3.2).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
Expand All @@ -1669,7 +1669,7 @@ mod tests {
let c = filter(&array, &filter_array).unwrap();
let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();

let mut builder = UnionBuilder::new_sparse(2);
let mut builder = UnionBuilder::new_sparse();
builder.append::<Int32Type>("A", 1).unwrap();
builder.append_null::<Float64Type>("B").unwrap();
let expected_array = builder.build().unwrap();
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/ipc/reader.rs
Expand Up @@ -1627,7 +1627,7 @@ mod tests {
let array1 = StringArray::from(vec!["foo", "bar", "baz"]);
let array2 = BooleanArray::from(vec![true, false, true]);

let mut union_builder = UnionBuilder::new_dense(3);
let mut union_builder = UnionBuilder::new_dense();
union_builder.append::<Int32Type>("a", 1).unwrap();
union_builder.append::<Float64Type>("b", 10.1).unwrap();
union_builder.append_null::<Float64Type>("b").unwrap();
Expand Down Expand Up @@ -1876,12 +1876,12 @@ mod tests {

#[test]
fn test_roundtrip_dense_union() {
check_union_with_builder(UnionBuilder::new_dense(6));
check_union_with_builder(UnionBuilder::new_dense());
}

#[test]
fn test_roundtrip_sparse_union() {
check_union_with_builder(UnionBuilder::new_sparse(6));
check_union_with_builder(UnionBuilder::new_sparse());
}

/// Read gzipped JSON file
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/ipc/writer.rs
Expand Up @@ -2040,7 +2040,7 @@ mod tests {
),
true,
)]);
let mut builder = UnionBuilder::new_sparse(5);
let mut builder = UnionBuilder::with_capacity_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
Expand Down

0 comments on commit ac8b0eb

Please sign in to comment.