Skip to content

Commit

Permalink
Split out arrow-string (apache#2594)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Dec 8, 2022
1 parent 99ced48 commit 9799439
Show file tree
Hide file tree
Showing 11 changed files with 2,534 additions and 2,126 deletions.
35 changes: 18 additions & 17 deletions Cargo.toml
Expand Up @@ -17,23 +17,24 @@

[workspace]
members = [
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-csv",
"arrow-data",
"arrow-flight",
"arrow-integration-test",
"arrow-integration-testing",
"arrow-ipc",
"arrow-json",
"arrow-schema",
"arrow-select",
"object_store",
"parquet",
"parquet_derive",
"parquet_derive_test",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-csv",
"arrow-data",
"arrow-flight",
"arrow-integration-test",
"arrow-integration-testing",
"arrow-ipc",
"arrow-json",
"arrow-schema",
"arrow-select",
"arrow-string",
"object_store",
"parquet",
"parquet_derive",
"parquet_derive_test",
]
# Enable the version 2 feature resolver, which avoids unifying features for targets that are not being built
#
Expand Down
49 changes: 49 additions & 0 deletions arrow-string/Cargo.toml
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "arrow-string"
version = "28.0.0"
description = "String kernels for arrow arrays"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = ["arrow"]
include = [
"benches/*.rs",
"src/**/*.rs",
"Cargo.toml",
]
edition = "2021"
rust-version = "1.62"

[lib]
name = "arrow_string"
path = "src/lib.rs"
bench = false

[dependencies]
arrow-buffer = { version = "28.0.0", path = "../arrow-buffer" }
arrow-data = { version = "28.0.0", path = "../arrow-data" }
arrow-schema = { version = "28.0.0", path = "../arrow-schema" }
arrow-array = { version = "28.0.0", path = "../arrow-array" }
regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
regex-syntax = { version = "0.6.27", default-features = false, features = ["unicode"] }

[features]
dyn_cmp_dict = []
Expand Up @@ -15,9 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::*;
use crate::error::{ArrowError, Result};

use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
use arrow_array::builder::BufferBuilder;
use arrow_data::ArrayDataBuilder;
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_schema::ArrowError;

/// Returns the elementwise concatenation of a [`StringArray`].
///
Expand All @@ -36,7 +39,7 @@ use arrow_data::bit_mask::combine_option_bitmap;
pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
left: &GenericStringArray<Offset>,
right: &GenericStringArray<Offset>,
) -> Result<GenericStringArray<Offset>> {
) -> Result<GenericStringArray<Offset>, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(format!(
"Arrays must have the same length: {} != {}",
Expand Down Expand Up @@ -89,7 +92,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
/// An error will be returned if the [`StringArray`] are of different lengths
pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
arrays: &[&GenericStringArray<Offset>],
) -> Result<GenericStringArray<Offset>> {
) -> Result<GenericStringArray<Offset>, ArrowError> {
if arrays.is_empty() {
return Err(ArrowError::ComputeError(
"concat requires input of at least one array".to_string(),
Expand Down Expand Up @@ -157,6 +160,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(

#[cfg(test)]
mod tests {
use arrow_array::StringArray;
use super::*;
#[test]
fn test_string_concat() {
Expand Down

0 comments on commit 9799439

Please sign in to comment.