Skip to content

Commit

Permalink
Split out arrow-csv (#2594) (#3044)
Browse files Browse the repository at this point in the history
* Split out arrow-csv (#2594)

* Fix doc

* Update arrow-csv/Cargo.toml

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
tustvold and alamb committed Nov 8, 2022
1 parent a950b52 commit fe3318b
Show file tree
Hide file tree
Showing 15 changed files with 656 additions and 633 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/arrow.yml
Expand Up @@ -34,6 +34,7 @@ on:
- arrow-select/**
- arrow-integration-test/**
- arrow-ipc/**
- arrow-csv/**
- .github/**

jobs:
Expand Down Expand Up @@ -64,6 +65,8 @@ jobs:
run: cargo test -p arrow-cast --all-features
- name: Test arrow-ipc with all features
run: cargo test -p arrow-ipc --all-features
- name: Test arrow-csv with all features
run: cargo test -p arrow-csv --all-features
- name: Test arrow-integration-test with all features
run: cargo test -p arrow-integration-test --all-features
- name: Test arrow with default features
Expand Down Expand Up @@ -174,5 +177,7 @@ jobs:
run: cargo clippy -p arrow-cast --all-targets --all-features -- -D warnings
- name: Clippy arrow-ipc with all features
run: cargo clippy -p arrow-ipc --all-targets --all-features -- -D warnings
- name: Clippy arrow-csv with all features
run: cargo clippy -p arrow-csv --all-targets --all-features -- -D warnings
- name: Clippy arrow
run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings
1 change: 1 addition & 0 deletions .github/workflows/arrow_flight.yml
Expand Up @@ -36,6 +36,7 @@ on:
- arrow-select/**
- arrow-flight/**
- arrow-ipc/**
- arrow-csv/**
- .github/**

jobs:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/dev_pr/labeler.yml
Expand Up @@ -24,6 +24,7 @@ arrow:
- arrow-schema/**/*
- arrow-select/**/*
- arrow-ipc/**/*
- arrow-csv/**/*

arrow-flight:
- arrow-flight/**/*
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/integration.yml
Expand Up @@ -32,6 +32,7 @@ on:
- arrow-schema/**
- arrow-select/**
- arrow-ipc/**
- arrow-csv/**
- arrow-pyarrow-integration-testing/**
- arrow-integration-test/**
- arrow-integration-testing/**
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/miri.yaml
Expand Up @@ -32,6 +32,7 @@ on:
- arrow-schema/**
- arrow-select/**
- arrow-ipc/**
- arrow-csv/**
- .github/**

jobs:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/parquet.yml
Expand Up @@ -35,6 +35,7 @@ on:
- arrow-schema/**
- arrow-select/**
- arrow-ipc/**
- arrow-csv/**
- parquet/**
- .github/**

Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -21,6 +21,7 @@ members = [
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-csv",
"arrow-data",
"arrow-flight",
"arrow-integration-test",
Expand Down
53 changes: 53 additions & 0 deletions arrow-csv/Cargo.toml
@@ -0,0 +1,53 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "arrow-csv"
version = "26.0.0"
description = "Support for parsing CSV format into the Arrow format"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = ["arrow"]
include = [
"benches/*.rs",
"src/**/*.rs",
"Cargo.toml",
]
edition = "2021"
rust-version = "1.62"

[lib]
name = "arrow_csv"
path = "src/lib.rs"
bench = false

[dependencies]
arrow-array = { version = "26.0.0", path = "../arrow-array" }
arrow-buffer = { version = "26.0.0", path = "../arrow-buffer" }
arrow-cast = { version = "26.0.0", path = "../arrow-cast" }
arrow-data = { version = "26.0.0", path = "../arrow-data" }
arrow-schema = { version = "26.0.0", path = "../arrow-schema" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
csv = { version = "1.1", default-features = false }
lazy_static = { version = "1.4", default-features = false }
lexical-core = { version = "^0.8", default-features = false }
regex = { version = "1.5.6", default-features = false, features = ["std", "unicode"] }

[dev-dependencies]
tempfile = "3.3"
8 changes: 4 additions & 4 deletions arrow/src/csv/mod.rs → arrow-csv/src/lib.rs
Expand Up @@ -27,14 +27,14 @@ pub use self::writer::Writer;
pub use self::writer::WriterBuilder;
use arrow_schema::ArrowError;

fn map_csv_error(error: csv_crate::Error) -> ArrowError {
fn map_csv_error(error: csv::Error) -> ArrowError {
match error.kind() {
csv_crate::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
csv_crate::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
csv::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
csv::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
"Encountered UTF-8 error while reading CSV file: {}",
err
)),
csv_crate::ErrorKind::UnequalLengths {
csv::ErrorKind::UnequalLengths {
expected_len, len, ..
} => ArrowError::CsvError(format!(
"Encountered unequal lengths between records on CSV file. Expected {} \
Expand Down

0 comments on commit fe3318b

Please sign in to comment.