Skip to content

Commit

Permalink
Split out arrow-string (#2594) (#3295)
Browse files Browse the repository at this point in the history
* Split out arrow-string (#2594)

* Doc

* Clippy
  • Loading branch information
tustvold committed Dec 8, 2022
1 parent 7b71713 commit 96c7c9d
Show file tree
Hide file tree
Showing 19 changed files with 2,563 additions and 2,164 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/arrow.yml
Expand Up @@ -70,6 +70,8 @@ jobs:
run: cargo test -p arrow-csv --all-features
- name: Test arrow-json with all features
run: cargo test -p arrow-json --all-features
- name: Test arrow-string with all features
run: cargo test -p arrow-string --all-features
- name: Test arrow-integration-test with all features
run: cargo test -p arrow-integration-test --all-features
- name: Test arrow with default features
Expand Down Expand Up @@ -184,5 +186,7 @@ jobs:
run: cargo clippy -p arrow-csv --all-targets --all-features -- -D warnings
- name: Clippy arrow-json with all features
run: cargo clippy -p arrow-json --all-targets --all-features -- -D warnings
- name: Clippy arrow-string with all features
run: cargo clippy -p arrow-string --all-targets --all-features -- -D warnings
- name: Clippy arrow
run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings
5 changes: 3 additions & 2 deletions .github/workflows/arrow_flight.yml
Expand Up @@ -31,10 +31,11 @@ on:
- arrow-buffer/**
- arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
- arrow-flight/**
- arrow-ipc/**
- arrow-schema/**
- arrow-select/**
- arrow-string/**
- .github/**

jobs:
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/dev_pr/labeler.yml
Expand Up @@ -16,16 +16,17 @@
# under the License.

arrow:
- arrow/**/*
- arrow-array/**/*
- arrow-buffer/**/*
- arrow-cast/**/*
- arrow-csv/**/*
- arrow-data/**/*
- arrow-schema/**/*
- arrow-select/**/*
- arrow-ipc/**/*
- arrow-csv/**/*
- arrow-json/**/*
- arrow-schema/**/*
- arrow-select/**/*
- arrow-string/**/*
- arrow/**/*

arrow-flight:
- arrow-flight/**/*
Expand Down
15 changes: 8 additions & 7 deletions .github/workflows/integration.yml
Expand Up @@ -24,20 +24,21 @@ on:
- master
pull_request:
paths:
- arrow/**
- .github/**
- arrow-array/**
- arrow-buffer/**
- arrow-cast/**
- arrow-csv/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
- arrow-integration-test/**
- arrow-integration-testing/**
- arrow-ipc/**
- arrow-csv/**
- arrow-json/**
- arrow-pyarrow-integration-testing/**
- arrow-integration-test/**
- arrow-integration-testing/**
- .github/**
- arrow-schema/**
- arrow-select/**
- arrow-string/**
- arrow/**

jobs:

Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/miri.yaml
Expand Up @@ -24,17 +24,18 @@ on:
- master
pull_request:
paths:
- arrow/**
- .github/**
- arrow-array/**
- arrow-buffer/**
- arrow-cast/**
- arrow-csv/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
- arrow-ipc/**
- arrow-csv/**
- arrow-json/**
- .github/**
- arrow-schema/**
- arrow-select/**
- arrow-string/**
- arrow/**

jobs:
miri-checks:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/parquet.yml
Expand Up @@ -36,6 +36,7 @@ on:
- arrow-ipc/**
- arrow-csv/**
- arrow-json/**
- arrow-string/**
- parquet/**
- .github/**

Expand Down Expand Up @@ -123,7 +124,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
rust: [stable]
rust: [ stable ]
steps:
- uses: actions/checkout@v3
- name: Setup Python
Expand Down
35 changes: 18 additions & 17 deletions Cargo.toml
Expand Up @@ -17,23 +17,24 @@

[workspace]
members = [
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-csv",
"arrow-data",
"arrow-flight",
"arrow-integration-test",
"arrow-integration-testing",
"arrow-ipc",
"arrow-json",
"arrow-schema",
"arrow-select",
"object_store",
"parquet",
"parquet_derive",
"parquet_derive_test",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-csv",
"arrow-data",
"arrow-flight",
"arrow-integration-test",
"arrow-integration-testing",
"arrow-ipc",
"arrow-json",
"arrow-schema",
"arrow-select",
"arrow-string",
"object_store",
"parquet",
"parquet_derive",
"parquet_derive_test",
]
# Enable the version 2 feature resolver, which avoids unifying features for targets that are not being built
#
Expand Down
49 changes: 49 additions & 0 deletions arrow-string/Cargo.toml
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "arrow-string"
version = "28.0.0"
description = "String kernels for arrow arrays"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = ["arrow"]
include = [
"benches/*.rs",
"src/**/*.rs",
"Cargo.toml",
]
edition = "2021"
rust-version = "1.62"

[lib]
name = "arrow_string"
path = "src/lib.rs"
bench = false

[dependencies]
arrow-buffer = { version = "28.0.0", path = "../arrow-buffer" }
arrow-data = { version = "28.0.0", path = "../arrow-data" }
arrow-schema = { version = "28.0.0", path = "../arrow-schema" }
arrow-array = { version = "28.0.0", path = "../arrow-array" }
regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
regex-syntax = { version = "0.6.27", default-features = false, features = ["unicode"] }

[features]
dyn_cmp_dict = []
Expand Up @@ -15,9 +15,11 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::*;
use crate::error::{ArrowError, Result};
use arrow_array::builder::BufferBuilder;
use arrow_array::*;
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayDataBuilder;
use arrow_schema::ArrowError;

/// Returns the elementwise concatenation of a [`StringArray`].
///
Expand All @@ -36,7 +38,7 @@ use arrow_data::bit_mask::combine_option_bitmap;
pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
left: &GenericStringArray<Offset>,
right: &GenericStringArray<Offset>,
) -> Result<GenericStringArray<Offset>> {
) -> Result<GenericStringArray<Offset>, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(format!(
"Arrays must have the same length: {} != {}",
Expand Down Expand Up @@ -89,7 +91,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
/// An error will be returned if the [`StringArray`] are of different lengths
pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
arrays: &[&GenericStringArray<Offset>],
) -> Result<GenericStringArray<Offset>> {
) -> Result<GenericStringArray<Offset>, ArrowError> {
if arrays.is_empty() {
return Err(ArrowError::ComputeError(
"concat requires input of at least one array".to_string(),
Expand Down Expand Up @@ -158,6 +160,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
#[cfg(test)]
mod tests {
use super::*;
use arrow_array::StringArray;
#[test]
fn test_string_concat() {
let left = [Some("foo"), Some("bar"), None]
Expand Down

0 comments on commit 96c7c9d

Please sign in to comment.