Skip to content

Commit

Permalink
Split out arrow-schema (#2594) (#2711)
Browse files Browse the repository at this point in the history
* Split out arrow-schema (#2594)

* Flatten schema

* Move decimal logic

* Fix doc

* Fix tests

* Fix integration-test

* Remove pyarrow orphan

* PyArrow fixes

* Move ArrowError to arrow-schema

* Fix pyarrow

* Fix test

* Fix conflicts

* Fix pyarrow

* Tweak feature flags

* Test juggling

* Derive PyArrowConvert for Vec
  • Loading branch information
tustvold committed Sep 21, 2022
1 parent 74f639c commit 48cc8be
Show file tree
Hide file tree
Showing 21 changed files with 1,625 additions and 1,493 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/arrow.yml
Expand Up @@ -63,6 +63,8 @@ jobs:
cargo run --example read_csv_infer_schema
- name: Run non-archery based integration-tests
run: cargo test -p arrow-integration-testing
- name: Test arrow-schema with all features
run: cargo test -p arrow-schema --all-features

# test compilaton features
linux-features:
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -18,6 +18,7 @@
[workspace]
members = [
"arrow",
"arrow-schema",
"arrow-buffer",
"arrow-flight",
"parquet",
Expand Down
45 changes: 28 additions & 17 deletions arrow-pyarrow-integration-testing/src/lib.rs
Expand Up @@ -28,9 +28,13 @@ use arrow::compute::kernels;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::error::ArrowError;
use arrow::ffi_stream::ArrowArrayStreamReader;
use arrow::pyarrow::PyArrowConvert;
use arrow::pyarrow::{PyArrowConvert, PyArrowException, PyArrowType};
use arrow::record_batch::RecordBatch;

fn to_py_err(err: ArrowError) -> PyErr {
PyArrowException::new_err(err.to_string())
}

/// Returns `array + array` of an int64 array.
#[pyfunction]
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
Expand All @@ -41,8 +45,10 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
let array = array
.as_any()
.downcast_ref::<Int64Array>()
.ok_or(ArrowError::ParseError("Expects an int64".to_string()))?;
let array = kernels::arithmetic::add(array, array)?;
.ok_or_else(|| ArrowError::ParseError("Expects an int64".to_string()))
.map_err(to_py_err)?;

let array = kernels::arithmetic::add(array, array).map_err(to_py_err)?;

// export
array.to_pyarrow(py)
Expand All @@ -66,56 +72,61 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {

/// Returns the substring
#[pyfunction]
fn substring(array: ArrayData, start: i64) -> PyResult<ArrayData> {
fn substring(
array: PyArrowType<ArrayData>,
start: i64,
) -> PyResult<PyArrowType<ArrayData>> {
// import
let array = ArrayRef::from(array);
let array = ArrayRef::from(array.0);

// substring
let array = kernels::substring::substring(array.as_ref(), start, None)?;
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;

Ok(array.data().to_owned())
Ok(array.data().to_owned().into())
}

/// Returns the concatenate
#[pyfunction]
fn concatenate(array: ArrayData, py: Python) -> PyResult<PyObject> {
let array = ArrayRef::from(array);
fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject> {
let array = ArrayRef::from(array.0);

// concat
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?;
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;

array.to_pyarrow(py)
}

#[pyfunction]
fn round_trip_type(obj: DataType) -> PyResult<DataType> {
fn round_trip_type(obj: PyArrowType<DataType>) -> PyResult<PyArrowType<DataType>> {
Ok(obj)
}

#[pyfunction]
fn round_trip_field(obj: Field) -> PyResult<Field> {
fn round_trip_field(obj: PyArrowType<Field>) -> PyResult<PyArrowType<Field>> {
Ok(obj)
}

#[pyfunction]
fn round_trip_schema(obj: Schema) -> PyResult<Schema> {
fn round_trip_schema(obj: PyArrowType<Schema>) -> PyResult<PyArrowType<Schema>> {
Ok(obj)
}

#[pyfunction]
fn round_trip_array(obj: ArrayData) -> PyResult<ArrayData> {
fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayData>> {
Ok(obj)
}

#[pyfunction]
fn round_trip_record_batch(obj: RecordBatch) -> PyResult<RecordBatch> {
fn round_trip_record_batch(
obj: PyArrowType<RecordBatch>,
) -> PyResult<PyArrowType<RecordBatch>> {
Ok(obj)
}

#[pyfunction]
fn round_trip_record_batch_reader(
obj: ArrowArrayStreamReader,
) -> PyResult<ArrowArrayStreamReader> {
obj: PyArrowType<ArrowArrayStreamReader>,
) -> PyResult<PyArrowType<ArrowArrayStreamReader>> {
Ok(obj)
}

Expand Down
47 changes: 47 additions & 0 deletions arrow-schema/Cargo.toml
@@ -0,0 +1,47 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "arrow-schema"
version = "23.0.0"
description = "Defines the logical types for arrow arrays"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = ["arrow"]
include = [
"benches/*.rs",
"src/**/*.rs",
"Cargo.toml",
]
edition = "2021"
rust-version = "1.62"

[lib]
name = "arrow_schema"
path = "src/lib.rs"
bench = false

[dependencies]
serde = { version = "1.0", default-features = false, features = ["derive", "std"], optional = true }

[features]
default = []

[dev-dependencies]
serde_json = "1.0"

0 comments on commit 48cc8be

Please sign in to comment.