Skip to content

Commit

Permalink
Split out integration test plumbing (#2594) (#2300) (#2598)
Browse files Browse the repository at this point in the history
* Split out integration test plumbing (#2594) (#2300)

* Fix RAT
  • Loading branch information
tustvold committed Aug 28, 2022
1 parent 6ab208c commit c6e7680
Show file tree
Hide file tree
Showing 13 changed files with 677 additions and 757 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/arrow.yml
Expand Up @@ -64,6 +64,8 @@ jobs:
cargo run --example dynamic_types
cargo run --example read_csv
cargo run --example read_csv_infer_schema
- name: Run non-archery based integration-tests
run: cargo test -p arrow-integration-testing

# test compilaton features
linux-features:
Expand Down
1 change: 0 additions & 1 deletion arrow/Cargo.toml
Expand Up @@ -61,7 +61,6 @@ packed_simd = { version = "0.3", default-features = false, optional = true, pack
chrono = { version = "0.4", default-features = false, features = ["clock"] }
chrono-tz = { version = "0.6", default-features = false, optional = true }
flatbuffers = { version = "2.1.2", default-features = false, features = ["thiserror"], optional = true }
hex = { version = "0.4", default-features = false, features = ["std"] }
comfy-table = { version = "6.0", optional = true, default-features = false }
pyo3 = { version = "0.17", default-features = false, optional = true }
lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
Expand Down
346 changes: 1 addition & 345 deletions arrow/src/ipc/reader.rs
Expand Up @@ -1173,336 +1173,8 @@ mod tests {

use std::fs::File;

use flate2::read::GzDecoder;

use crate::datatypes;
use crate::datatypes::{ArrowNativeType, Float64Type, Int32Type, Int8Type};
use crate::{datatypes, util::integration_util::*};

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_generated_files_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
"generated_nested",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
"generated_decimal",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
});
}

#[test]
#[should_panic(expected = "Big Endian is not supported for Decimal!")]
fn read_decimal_be_file_should_panic() {
let testdata = crate::util::test_util::arrow_test_data();
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_decimal.arrow_file",
testdata
))
.unwrap();
FileReader::try_new(file, None).unwrap();
}

#[test]
#[should_panic(
expected = "Last offset 687865856 of Utf8 is larger than values length 41"
)]
fn read_dictionary_be_not_implemented() {
// The offsets are not translated for big-endian files
// https://github.com/apache/arrow-rs/issues/859
let testdata = crate::util::test_util::arrow_test_data();
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_dictionary.arrow_file",
testdata
))
.unwrap();
FileReader::try_new(file, None).unwrap();
}

#[test]
fn read_generated_be_files_should_work() {
// complementary to the previous test
let testdata = crate::util::test_util::arrow_test_data();
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_map",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/{}.arrow_file",
testdata, path
))
.unwrap();

FileReader::try_new(file, None).unwrap();
});
}

#[test]
fn projection_should_work() {
// complementary to the previous test
let testdata = crate::util::test_util::arrow_test_data();
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_map",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
// We must use littleendian files here.
// The offsets are not translated for big-endian files
// https://github.com/apache/arrow-rs/issues/859
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-littleendian/{}.arrow_file",
testdata, path
))
.unwrap();

let reader = FileReader::try_new(file, Some(vec![0])).unwrap();
let datatype_0 = reader.schema().fields()[0].data_type().clone();
reader.for_each(|batch| {
let batch = batch.unwrap();
assert_eq!(batch.columns().len(), 1);
assert_eq!(datatype_0, batch.schema().fields()[0].data_type().clone());
});
});
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_generated_streams_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
"generated_nested",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
"generated_decimal",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
// the next batch must be empty
assert!(reader.next().is_none());
// the stream must indicate that it's finished
assert!(reader.is_finished());
});
}

#[test]
fn read_generated_files_100() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "1.0.0-littleendian";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
// "generated_map_non_canonical",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
});
}

#[test]
fn read_generated_streams_100() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "1.0.0-littleendian";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
// "generated_map_non_canonical",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
// the next batch must be empty
assert!(reader.next().is_none());
// the stream must indicate that it's finished
assert!(reader.is_finished());
});
}

#[test]
#[cfg(feature = "ipc_compression")]
fn read_generated_streams_200() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";

// the test is repetitive, thus we can read all supported files at once
let paths = vec!["generated_lz4", "generated_zstd"];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
// the next batch must be empty
assert!(reader.next().is_none());
// the stream must indicate that it's finished
assert!(reader.is_finished());
});
}

#[test]
#[cfg(not(feature = "ipc_compression"))]
fn read_generated_streams_200_negative() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";

// the test is repetitive, thus we can read all supported files at once
let cases = vec![("generated_lz4", "LZ4_FRAME"), ("generated_zstd", "ZSTD")];
cases.iter().for_each(|(path, compression_name)| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();
let err = reader.next().unwrap().unwrap_err();
let expected_error = format!(
"Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature",
compression_name
);
assert_eq!(err.to_string(), expected_error);
});
}

#[test]
#[cfg(feature = "ipc_compression")]
fn read_generated_files_200() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";
// the test is repetitive, thus we can read all supported files at once
let paths = vec!["generated_lz4", "generated_zstd"];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
});
}

#[test]
#[cfg(not(feature = "ipc_compression"))]
fn read_generated_files_200_negative() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";
// the test is repetitive, thus we can read all supported files at once
let cases = vec![("generated_lz4", "LZ4_FRAME"), ("generated_zstd", "ZSTD")];
cases.iter().for_each(|(path, compression_name)| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

let err = reader.next().unwrap().unwrap_err();
let expected_error = format!(
"Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature",
compression_name
);
assert_eq!(err.to_string(), expected_error);
});
}

fn create_test_projection_schema() -> Schema {
// define field types
Expand Down Expand Up @@ -1816,22 +1488,6 @@ mod tests {
check_union_with_builder(UnionBuilder::new_sparse());
}

/// Read gzipped JSON file
fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
let testdata = crate::util::test_util::arrow_test_data();
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.json.gz",
testdata, version, path
))
.unwrap();
let mut gz = GzDecoder::new(&file);
let mut s = String::new();
gz.read_to_string(&mut s).unwrap();
// convert to Arrow JSON
let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
arrow_json
}

#[test]
fn test_roundtrip_stream_nested_dict() {
let xs = vec!["AA", "BB", "AA", "CC", "BB"];
Expand Down

0 comments on commit c6e7680

Please sign in to comment.