From a2f6a7233b924cffff7c9a8cb7e7ad38b56d8832 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 12 Sep 2022 17:55:36 +0100 Subject: [PATCH 01/16] Split out arrow-schema (#2594) --- Cargo.toml | 1 + arrow-schema/Cargo.toml | 52 + arrow-schema/src/datatype.rs | 683 ++++++++ arrow-schema/src/error.rs | 45 + .../datatypes => arrow-schema/src}/field.rs | 88 +- arrow-schema/src/lib.rs | 23 + .../datatypes => arrow-schema/src}/schema.rs | 63 +- arrow/Cargo.toml | 3 +- arrow/src/array/array_decimal.rs | 14 +- arrow/src/array/builder/decimal_builder.rs | 5 +- arrow/src/array/data.rs | 8 +- arrow/src/csv/reader.rs | 3 +- arrow/src/datatypes/datatype.rs | 1499 ----------------- arrow/src/datatypes/mod.rs | 10 +- arrow/src/datatypes/types.rs | 2 +- arrow/src/error.rs | 11 +- arrow/src/record_batch.rs | 5 +- arrow/src/util/decimal.rs | 809 ++++++++- 18 files changed, 1727 insertions(+), 1597 deletions(-) create mode 100644 arrow-schema/Cargo.toml create mode 100644 arrow-schema/src/datatype.rs create mode 100644 arrow-schema/src/error.rs rename {arrow/src/datatypes => arrow-schema/src}/field.rs (91%) create mode 100644 arrow-schema/src/lib.rs rename {arrow/src/datatypes => arrow-schema/src}/schema.rs (90%) delete mode 100644 arrow/src/datatypes/datatype.rs diff --git a/Cargo.toml b/Cargo.toml index 9bf55c0f236..5afe98173ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ [workspace] members = [ "arrow", + "arrow-schema", "parquet", "parquet_derive", "parquet_derive_test", diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml new file mode 100644 index 00000000000..272e55a8fc5 --- /dev/null +++ b/arrow-schema/Cargo.toml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow-schema" +version = "22.0.0" +description = "Defines the logical types for arrow arrays" +homepage = "https://github.com/apache/arrow-rs" +repository = "https://github.com/apache/arrow-rs" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = ["arrow"] +include = [ + "benches/*.rs", + "src/**/*.rs", + "Cargo.toml", +] +edition = "2021" +rust-version = "1.62" + +[lib] +name = "arrow_schema" +path = "src/lib.rs" +bench = false + +[dependencies] +serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } +serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } + +[package.metadata.docs.rs] +features = ["json"] + +[features] +default = [] +json = ["serde", "serde_json"] + +[dev-dependencies] + diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs new file mode 100644 index 00000000000..8e553749dbc --- /dev/null +++ b/arrow-schema/src/datatype.rs @@ -0,0 +1,683 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt; + +use crate::field::Field; + +#[cfg(feature = "json")] +use crate::error::ArrowSchemaError; + +/// The set of datatypes that are supported by this implementation of Apache Arrow. +/// +/// The Arrow specification on data types includes some more types. +/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs) +/// for Arrow's specification. +/// +/// The variants of this enum include primitive fixed size types as well as parametric or +/// nested types. +/// Currently the Rust implementation supports the following nested types: +/// - `List` +/// - `Struct` +/// +/// Nested types can themselves be nested within other arrays. +/// For more information on these types please see +/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout). +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum DataType { + /// Null type + Null, + /// A boolean datatype representing the values `true` and `false`. + Boolean, + /// A signed 8-bit integer. + Int8, + /// A signed 16-bit integer. + Int16, + /// A signed 32-bit integer. + Int32, + /// A signed 64-bit integer. + Int64, + /// An unsigned 8-bit integer. + UInt8, + /// An unsigned 16-bit integer. + UInt16, + /// An unsigned 32-bit integer. + UInt32, + /// An unsigned 64-bit integer. + UInt64, + /// A 16-bit floating point number. + Float16, + /// A 32-bit floating point number. + Float32, + /// A 64-bit floating point number. + Float64, + /// A timestamp with an optional timezone. + /// + /// Time is measured as a Unix epoch, counting the seconds from + /// 00:00:00.000 on 1 January 1970, excluding leap seconds, + /// as a 64-bit integer. + /// + /// The time zone is a string indicating the name of a time zone, one of: + /// + /// * As used in the Olson time zone database (the "tz database" or + /// "tzdata"), such as "America/New_York" + /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 + /// + /// Timestamps with a non-empty timezone + /// ------------------------------------ + /// + /// If a Timestamp column has a non-empty timezone value, its epoch is + /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone + /// (the Unix epoch), regardless of the Timestamp's own timezone. + /// + /// Therefore, timestamp values with a non-empty timezone correspond to + /// physical points in time together with some additional information about + /// how the data was obtained and/or how to display it (the timezone). + /// + /// For example, the timestamp value 0 with the timezone string "Europe/Paris" + /// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the + /// application may prefer to display it as "January 1st 1970, 01h00" in + /// the Europe/Paris timezone (which is the same physical point in time). + /// + /// One consequence is that timestamp values with a non-empty timezone + /// can be compared and ordered directly, since they all share the same + /// well-known point of reference (the Unix epoch). + /// + /// Timestamps with an unset / empty timezone + /// ----------------------------------------- + /// + /// If a Timestamp column has no timezone value, its epoch is + /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. + /// + /// Therefore, timestamp values without a timezone cannot be meaningfully + /// interpreted as physical points in time, but only as calendar / clock + /// indications ("wall clock time") in an unspecified timezone. + /// + /// For example, the timestamp value 0 with an empty timezone string + /// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there + /// is not enough information to interpret it as a well-defined physical + /// point in time. + /// + /// One consequence is that timestamp values without a timezone cannot + /// be reliably compared or ordered, since they may have different points of + /// reference. In particular, it is *not* possible to interpret an unset + /// or empty timezone as the same as "UTC". + /// + /// Conversion between timezones + /// ---------------------------- + /// + /// If a Timestamp column has a non-empty timezone, changing the timezone + /// to a different non-empty value is a metadata-only operation: + /// the timestamp values need not change as their point of reference remains + /// the same (the Unix epoch). + /// + /// However, if a Timestamp column has no timezone value, changing it to a + /// non-empty value requires to think about the desired semantics. + /// One possibility is to assume that the original timestamp values are + /// relative to the epoch of the timezone being set; timestamp values should + /// then adjusted to the Unix epoch (for example, changing the timezone from + /// empty to "Europe/Paris" would require converting the timestamp values + /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is + /// nevertheless correct). + Timestamp(TimeUnit, Option), + /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01) + /// in days (32 bits). + Date32, + /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) + /// in milliseconds (64 bits). Values are evenly divisible by 86400000. + Date64, + /// A 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. + Time32(TimeUnit), + /// A 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. + Time64(TimeUnit), + /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. + Duration(TimeUnit), + /// A "calendar" interval which models types that don't necessarily + /// have a precise duration without the context of a base timestamp (e.g. + /// days can differ in length during day light savings time transitions). + Interval(IntervalUnit), + /// Opaque binary data of variable length. + Binary, + /// Opaque binary data of fixed size. + /// Enum parameter specifies the number of bytes per value. + FixedSizeBinary(i32), + /// Opaque binary data of variable length and 64-bit offsets. + LargeBinary, + /// A variable-length string in Unicode with UTF-8 encoding. + Utf8, + /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets. + LargeUtf8, + /// A list of some logical data type with variable length. + List(Box), + /// A list of some logical data type with fixed length. + FixedSizeList(Box, i32), + /// A list of some logical data type with variable length and 64-bit offsets. + LargeList(Box), + /// A nested datatype that contains a number of sub-fields. + Struct(Vec), + /// A nested datatype that can represent slots of differing types. Components: + /// + /// 1. [`Field`] for each possible child type the Union can hold + /// 2. The corresponding `type_id` used to identify which Field + /// 3. The type of union (Sparse or Dense) + Union(Vec, Vec, UnionMode), + /// A dictionary encoded array (`key_type`, `value_type`), where + /// each array element is an index of `key_type` into an + /// associated dictionary of `value_type`. + /// + /// Dictionary arrays are used to store columns of `value_type` + /// that contain many repeated values using less memory, but with + /// a higher CPU overhead for some operations. + /// + /// This type mostly used to represent low cardinality string + /// arrays or a limited set of primitive types as integers. + Dictionary(Box, Box), + /// Exact 128-bit width decimal value with precision and scale + /// + /// * precision is the total number of digits + /// * scale is the number of digits past the decimal + /// + /// For example the number 123.45 has precision 5 and scale 2. + Decimal128(u8, u8), + /// Exact 256-bit width decimal value with precision and scale + /// + /// * precision is the total number of digits + /// * scale is the number of digits past the decimal + /// + /// For example the number 123.45 has precision 5 and scale 2. + Decimal256(u8, u8), + /// A Map is a logical nested type that is represented as + /// + /// `List>` + /// + /// The keys and values are each respectively contiguous. + /// The key and value types are not constrained, but keys should be + /// hashable and unique. + /// Whether the keys are sorted can be set in the `bool` after the `Field`. + /// + /// In a field with Map type, the field has a child Struct field, which then + /// has two children: key type and the second the value type. The names of the + /// child fields may be respectively "entries", "key", and "value", but this is + /// not enforced. + Map(Box, bool), +} + +/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TimeUnit { + /// Time in seconds. + Second, + /// Time in milliseconds. + Millisecond, + /// Time in microseconds. + Microsecond, + /// Time in nanoseconds. + Nanosecond, +} + +/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum IntervalUnit { + /// Indicates the number of elapsed whole months, stored as 4-byte integers. + YearMonth, + /// Indicates the number of elapsed days and milliseconds, + /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total). + DayTime, + /// A triple of the number of elapsed months, days, and nanoseconds. + /// The values are stored contiguously in 16 byte blocks. Months and + /// days are encoded as 32 bit integers and nanoseconds is encoded as a + /// 64 bit integer. All integers are signed. Each field is independent + /// (e.g. there is no constraint that nanoseconds have the same sign + /// as days or that the quantity of nanoseconds represents less + /// than a day's worth of time). + MonthDayNano, +} + +// Sparse or Dense union layouts +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum UnionMode { + Sparse, + Dense, +} + +impl fmt::Display for DataType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl DataType { + /// Parse a data type from a JSON representation. + #[cfg(feature = "json")] + pub fn from(json: &serde_json::Value) -> Result { + use serde_json::Value; + let default_field = Field::new("", DataType::Boolean, true); + match *json { + Value::Object(ref map) => match map.get("name") { + Some(s) if s == "null" => Ok(DataType::Null), + Some(s) if s == "bool" => Ok(DataType::Boolean), + Some(s) if s == "binary" => Ok(DataType::Binary), + Some(s) if s == "largebinary" => Ok(DataType::LargeBinary), + Some(s) if s == "utf8" => Ok(DataType::Utf8), + Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8), + Some(s) if s == "fixedsizebinary" => { + // return a list with any type as its child isn't defined in the map + if let Some(Value::Number(size)) = map.get("byteWidth") { + Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32)) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a byteWidth for fixedsizebinary".to_string(), + )) + } + } + Some(s) if s == "decimal" => { + // return a list with any type as its child isn't defined in the map + let precision = match map.get("precision") { + Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()), + None => Err(ArrowSchemaError::Parse( + "Expecting a precision for decimal".to_string(), + )), + }?; + let scale = match map.get("scale") { + Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()), + _ => Err(ArrowSchemaError::Parse( + "Expecting a scale for decimal".to_string(), + )), + }?; + let bit_width: usize = match map.get("bitWidth") { + Some(b) => b.as_u64().unwrap() as usize, + _ => 128, // Default bit width + }; + + if bit_width == 128 { + Ok(DataType::Decimal128(precision, scale)) + } else if bit_width == 256 { + Ok(DataType::Decimal256(precision, scale)) + } else { + Err(ArrowSchemaError::Parse( + "Decimal bit_width invalid".to_string(), + )) + } + } + Some(s) if s == "floatingpoint" => match map.get("precision") { + Some(p) if p == "HALF" => Ok(DataType::Float16), + Some(p) if p == "SINGLE" => Ok(DataType::Float32), + Some(p) if p == "DOUBLE" => Ok(DataType::Float64), + _ => Err(ArrowSchemaError::Parse( + "floatingpoint precision missing or invalid".to_string(), + )), + }, + Some(s) if s == "timestamp" => { + let unit = match map.get("unit") { + Some(p) if p == "SECOND" => Ok(TimeUnit::Second), + Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), + Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), + Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), + _ => Err(ArrowSchemaError::Parse( + "timestamp unit missing or invalid".to_string(), + )), + }; + let tz = match map.get("timezone") { + None => Ok(None), + Some(Value::String(tz)) => Ok(Some(tz.clone())), + _ => Err(ArrowSchemaError::Parse( + "timezone must be a string".to_string(), + )), + }; + Ok(DataType::Timestamp(unit?, tz?)) + } + Some(s) if s == "date" => match map.get("unit") { + Some(p) if p == "DAY" => Ok(DataType::Date32), + Some(p) if p == "MILLISECOND" => Ok(DataType::Date64), + _ => Err(ArrowSchemaError::Parse( + "date unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "time" => { + let unit = match map.get("unit") { + Some(p) if p == "SECOND" => Ok(TimeUnit::Second), + Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), + Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), + Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), + _ => Err(ArrowSchemaError::Parse( + "time unit missing or invalid".to_string(), + )), + }; + match map.get("bitWidth") { + Some(p) if p == 32 => Ok(DataType::Time32(unit?)), + Some(p) if p == 64 => Ok(DataType::Time64(unit?)), + _ => Err(ArrowSchemaError::Parse( + "time bitWidth missing or invalid".to_string(), + )), + } + } + Some(s) if s == "duration" => match map.get("unit") { + Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)), + Some(p) if p == "MILLISECOND" => { + Ok(DataType::Duration(TimeUnit::Millisecond)) + } + Some(p) if p == "MICROSECOND" => { + Ok(DataType::Duration(TimeUnit::Microsecond)) + } + Some(p) if p == "NANOSECOND" => { + Ok(DataType::Duration(TimeUnit::Nanosecond)) + } + _ => Err(ArrowSchemaError::Parse( + "time unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "interval" => match map.get("unit") { + Some(p) if p == "DAY_TIME" => { + Ok(DataType::Interval(IntervalUnit::DayTime)) + } + Some(p) if p == "YEAR_MONTH" => { + Ok(DataType::Interval(IntervalUnit::YearMonth)) + } + Some(p) if p == "MONTH_DAY_NANO" => { + Ok(DataType::Interval(IntervalUnit::MonthDayNano)) + } + _ => Err(ArrowSchemaError::Parse( + "interval unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "int" => match map.get("isSigned") { + Some(&Value::Bool(true)) => match map.get("bitWidth") { + Some(&Value::Number(ref n)) => match n.as_u64() { + Some(8) => Ok(DataType::Int8), + Some(16) => Ok(DataType::Int16), + Some(32) => Ok(DataType::Int32), + Some(64) => Ok(DataType::Int64), + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + Some(&Value::Bool(false)) => match map.get("bitWidth") { + Some(&Value::Number(ref n)) => match n.as_u64() { + Some(8) => Ok(DataType::UInt8), + Some(16) => Ok(DataType::UInt16), + Some(32) => Ok(DataType::UInt32), + Some(64) => Ok(DataType::UInt64), + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + _ => Err(ArrowSchemaError::Parse( + "int signed missing or invalid".to_string(), + )), + }, + Some(s) if s == "list" => { + // return a list with any type as its child isn't defined in the map + Ok(DataType::List(Box::new(default_field))) + } + Some(s) if s == "largelist" => { + // return a largelist with any type as its child isn't defined in the map + Ok(DataType::LargeList(Box::new(default_field))) + } + Some(s) if s == "fixedsizelist" => { + // return a list with any type as its child isn't defined in the map + if let Some(Value::Number(size)) = map.get("listSize") { + Ok(DataType::FixedSizeList( + Box::new(default_field), + size.as_i64().unwrap() as i32, + )) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a listSize for fixedsizelist".to_string(), + )) + } + } + Some(s) if s == "struct" => { + // return an empty `struct` type as its children aren't defined in the map + Ok(DataType::Struct(vec![])) + } + Some(s) if s == "map" => { + if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") { + // Return a map with an empty type as its children aren't defined in the map + Ok(DataType::Map(Box::new(default_field), *keys_sorted)) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a keysSorted for map".to_string(), + )) + } + } + Some(s) if s == "union" => { + if let Some(Value::String(mode)) = map.get("mode") { + let union_mode = if mode == "SPARSE" { + UnionMode::Sparse + } else if mode == "DENSE" { + UnionMode::Dense + } else { + return Err(ArrowSchemaError::Parse(format!( + "Unknown union mode {:?} for union", + mode + ))); + }; + if let Some(type_ids) = map.get("typeIds") { + let type_ids = type_ids + .as_array() + .unwrap() + .iter() + .map(|t| t.as_i64().unwrap() as i8) + .collect::>(); + + let default_fields = type_ids + .iter() + .map(|_| default_field.clone()) + .collect::>(); + + Ok(DataType::Union(default_fields, type_ids, union_mode)) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a typeIds for union ".to_string(), + )) + } + } else { + Err(ArrowSchemaError::Parse( + "Expecting a mode for union".to_string(), + )) + } + } + Some(other) => Err(ArrowSchemaError::Parse(format!( + "invalid or unsupported type name: {} in {:?}", + other, json + ))), + None => Err(ArrowSchemaError::Parse("type name missing".to_string())), + }, + _ => Err(ArrowSchemaError::Parse( + "invalid json value type".to_string(), + )), + } + } + + /// Generate a JSON representation of the data type. + #[cfg(feature = "json")] + pub fn to_json(&self) -> serde_json::Value { + use serde_json::json; + match self { + DataType::Null => json!({"name": "null"}), + DataType::Boolean => json!({"name": "bool"}), + DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}), + DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}), + DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}), + DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}), + DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}), + DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}), + DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}), + DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}), + DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}), + DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}), + DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}), + DataType::Utf8 => json!({"name": "utf8"}), + DataType::LargeUtf8 => json!({"name": "largeutf8"}), + DataType::Binary => json!({"name": "binary"}), + DataType::LargeBinary => json!({"name": "largebinary"}), + DataType::FixedSizeBinary(byte_width) => { + json!({"name": "fixedsizebinary", "byteWidth": byte_width}) + } + DataType::Struct(_) => json!({"name": "struct"}), + DataType::Union(_, _, _) => json!({"name": "union"}), + DataType::List(_) => json!({ "name": "list"}), + DataType::LargeList(_) => json!({ "name": "largelist"}), + DataType::FixedSizeList(_, length) => { + json!({"name":"fixedsizelist", "listSize": length}) + } + DataType::Time32(unit) => { + json!({"name": "time", "bitWidth": 32, "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}) + } + DataType::Time64(unit) => { + json!({"name": "time", "bitWidth": 64, "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}) + } + DataType::Date32 => { + json!({"name": "date", "unit": "DAY"}) + } + DataType::Date64 => { + json!({"name": "date", "unit": "MILLISECOND"}) + } + DataType::Timestamp(unit, None) => { + json!({"name": "timestamp", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}) + } + DataType::Timestamp(unit, Some(tz)) => { + json!({"name": "timestamp", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }, "timezone": tz}) + } + DataType::Interval(unit) => json!({"name": "interval", "unit": match unit { + IntervalUnit::YearMonth => "YEAR_MONTH", + IntervalUnit::DayTime => "DAY_TIME", + IntervalUnit::MonthDayNano => "MONTH_DAY_NANO", + }}), + DataType::Duration(unit) => json!({"name": "duration", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}), + DataType::Dictionary(_, _) => json!({ "name": "dictionary"}), + DataType::Decimal128(precision, scale) => { + json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128}) + } + DataType::Decimal256(precision, scale) => { + json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 256}) + } + DataType::Map(_, keys_sorted) => { + json!({"name": "map", "keysSorted": keys_sorted}) + } + } + } + + /// Returns true if this type is numeric: (UInt*, Int*, or Float*). + pub fn is_numeric(t: &DataType) -> bool { + use DataType::*; + matches!( + t, + UInt8 + | UInt16 + | UInt32 + | UInt64 + | Int8 + | Int16 + | Int32 + | Int64 + | Float32 + | Float64 + ) + } + + /// Returns true if this type is temporal: (Date*, Time*, Duration, or Interval). + pub fn is_temporal(t: &DataType) -> bool { + use DataType::*; + matches!( + t, + Date32 + | Date64 + | Timestamp(_, _) + | Time32(_) + | Time64(_) + | Duration(_) + | Interval(_) + ) + } + + /// Returns true if this type is valid as a dictionary key + /// (e.g. [`super::ArrowDictionaryKeyType`] + pub fn is_dictionary_key_type(t: &DataType) -> bool { + use DataType::*; + matches!( + t, + UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 + ) + } + + /// Compares the datatype with another, ignoring nested field names + /// and metadata. + pub fn equals_datatype(&self, other: &DataType) -> bool { + match (&self, other) { + (DataType::List(a), DataType::List(b)) + | (DataType::LargeList(a), DataType::LargeList(b)) => { + a.is_nullable() == b.is_nullable() + && a.data_type().equals_datatype(b.data_type()) + } + (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => { + a_size == b_size + && a.is_nullable() == b.is_nullable() + && a.data_type().equals_datatype(b.data_type()) + } + (DataType::Struct(a), DataType::Struct(b)) => { + a.len() == b.len() + && a.iter().zip(b).all(|(a, b)| { + a.is_nullable() == b.is_nullable() + && a.data_type().equals_datatype(b.data_type()) + }) + } + ( + DataType::Map(a_field, a_is_sorted), + DataType::Map(b_field, b_is_sorted), + ) => a_field == b_field && a_is_sorted == b_is_sorted, + _ => self == other, + } + } +} diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs new file mode 100644 index 00000000000..4ad7c65c503 --- /dev/null +++ b/arrow-schema/src/error.rs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines `ArrowSchemaError` for representing failures in arrow schema + +use std::error::Error; + +#[derive(Debug)] +pub enum ArrowSchemaError { + Parse(String), + Merge(String), + Field(String), +} + +impl std::fmt::Display for ArrowSchemaError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ArrowSchemaError::Parse(message) => { + write!(f, "Error parsing schema: {}", message) + } + ArrowSchemaError::Merge(message) => { + write!(f, "Error merging schema: {}", message) + } + ArrowSchemaError::Field(message) => { + write!(f, "Error indexing field: {}", message) + } + } + } +} + +impl Error for ArrowSchemaError {} diff --git a/arrow/src/datatypes/field.rs b/arrow-schema/src/field.rs similarity index 91% rename from arrow/src/datatypes/field.rs rename to arrow-schema/src/field.rs index ac966cafe34..2da449a5d1e 100644 --- a/arrow/src/datatypes/field.rs +++ b/arrow-schema/src/field.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -use crate::error::{ArrowError, Result}; +use crate::error::ArrowSchemaError; use std::cmp::Ordering; use std::collections::BTreeMap; use std::hash::{Hash, Hasher}; -use super::DataType; +use crate::datatype::DataType; /// Describes a single column in a [`Schema`](super::Schema). /// @@ -145,7 +145,8 @@ impl Field { /// Set the name of the [`Field`] and returns self. /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let field = Field::new("c1", DataType::Int64, false) /// .with_name("c2"); /// @@ -165,7 +166,8 @@ impl Field { /// Set [`DataType`] of the [`Field`] and returns self. /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let field = Field::new("c1", DataType::Int64, false) /// .with_data_type(DataType::Utf8); /// @@ -185,7 +187,8 @@ impl Field { /// Set `nullable` of the [`Field`] and returns self. /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let field = Field::new("c1", DataType::Int64, false) /// .with_nullable(true); /// @@ -252,14 +255,14 @@ impl Field { /// Parse a `Field` definition from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; match *json { Value::Object(ref map) => { let name = match map.get("name") { Some(&Value::String(ref name)) => name.to_string(), _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'name' attribute".to_string(), )); } @@ -267,7 +270,7 @@ impl Field { let nullable = match map.get("nullable") { Some(&Value::Bool(b)) => b, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'nullable' attribute".to_string(), )); } @@ -275,7 +278,7 @@ impl Field { let data_type = match map.get("type") { Some(t) => DataType::from(t)?, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'type' attribute".to_string(), )); } @@ -289,7 +292,7 @@ impl Field { match value.as_object() { Some(map) => { if map.len() != 2 { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'metadata' must have exact two entries for each key-value map".to_string(), )); } @@ -304,14 +307,14 @@ impl Field { v_str.to_string().clone(), ); } else { - return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string())); + return Err(ArrowSchemaError::Parse("Field 'metadata' must have map value of string type".to_string())); } } else { - return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string())); + return Err(ArrowSchemaError::Parse("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string())); } } _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'metadata' contains non-object key-value pair".to_string(), )); } @@ -327,7 +330,7 @@ impl Field { if let Some(str_value) = v.as_str() { res.insert(k.clone(), str_value.to_string().clone()); } else { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( format!("Field 'metadata' contains non-string value for key {}", k), )); } @@ -335,7 +338,7 @@ impl Field { Some(res) } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field `metadata` is not json array".to_string(), )); } @@ -349,7 +352,7 @@ impl Field { | DataType::FixedSizeList(_, _) => match map.get("children") { Some(Value::Array(values)) => { if values.len() != 1 { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must have one element for a list data type".to_string(), )); } @@ -370,30 +373,30 @@ impl Field { } } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } }, DataType::Struct(mut fields) => match map.get("children") { Some(Value::Array(values)) => { - let struct_fields: Result> = + let struct_fields: Result, _> = values.iter().map(Field::from).collect(); fields.append(&mut struct_fields?); DataType::Struct(fields) } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } @@ -408,20 +411,20 @@ impl Field { DataType::Map(Box::new(child), keys_sorted) } t => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( format!("Map children should be a struct with 2 fields, found {:?}", t) )) } } } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array with 1 element" .to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } @@ -429,17 +432,19 @@ impl Field { } DataType::Union(_, type_ids, mode) => match map.get("children") { Some(Value::Array(values)) => { - let union_fields: Vec = - values.iter().map(Field::from).collect::>()?; + let union_fields: Vec = values + .iter() + .map(Field::from) + .collect::>()?; DataType::Union(union_fields, type_ids, mode) } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } @@ -455,7 +460,7 @@ impl Field { let index_type = match dictionary.get("indexType") { Some(t) => DataType::from(t)?, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'indexType' attribute".to_string(), )); } @@ -463,7 +468,7 @@ impl Field { dict_id = match dictionary.get("id") { Some(Value::Number(n)) => n.as_i64().unwrap(), _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'id' attribute".to_string(), )); } @@ -471,7 +476,7 @@ impl Field { dict_is_ordered = match dictionary.get("isOrdered") { Some(&Value::Bool(n)) => n, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'isOrdered' attribute".to_string(), )); } @@ -489,7 +494,7 @@ impl Field { metadata, }) } - _ => Err(ArrowError::ParseError( + _ => Err(ArrowSchemaError::Parse( "Invalid json value type for field".to_string(), )), } @@ -536,19 +541,20 @@ impl Field { /// Example: /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let mut field = Field::new("c1", DataType::Int64, false); /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok()); /// assert!(field.is_nullable()); /// ``` - pub fn try_merge(&mut self, from: &Field) -> Result<()> { + pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowSchemaError> { if from.dict_id != self.dict_id { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting dict_id".to_string(), )); } if from.dict_is_ordered != self.dict_is_ordered { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting dict_is_ordered" .to_string(), )); @@ -560,7 +566,7 @@ impl Field { for (key, from_value) in from_metadata { if let Some(self_value) = self_metadata.get(key) { if self_value != from_value { - return Err(ArrowError::SchemaError(format!( + return Err(ArrowSchemaError::Merge(format!( "Fail to merge field due to conflicting metadata data value for key {}", key), )); } @@ -589,7 +595,7 @@ impl Field { } } _ => { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); @@ -609,7 +615,7 @@ impl Field { // If the nested fields in two unions are the same, they must have same // type id. if self_type_id != field_type_id { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting type ids in union datatype" .to_string(), )); @@ -627,7 +633,7 @@ impl Field { } } _ => { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); @@ -666,7 +672,7 @@ impl Field { | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { if self.data_type != from.data_type { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs new file mode 100644 index 00000000000..867c428f1ab --- /dev/null +++ b/arrow-schema/src/lib.rs @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Arrow logical types + +pub mod datatype; +pub mod error; +pub mod field; +pub mod schema; diff --git a/arrow/src/datatypes/schema.rs b/arrow-schema/src/schema.rs similarity index 90% rename from arrow/src/datatypes/schema.rs rename to arrow-schema/src/schema.rs index efde4edefa6..e52e12b7057 100644 --- a/arrow/src/datatypes/schema.rs +++ b/arrow-schema/src/schema.rs @@ -19,9 +19,8 @@ use std::collections::HashMap; use std::fmt; use std::hash::Hash; -use crate::error::{ArrowError, Result}; - -use super::Field; +use crate::error::ArrowSchemaError; +use crate::field::Field; /// Describes the meta-data of an ordered sequence of relative types. /// @@ -53,7 +52,9 @@ impl Schema { /// # Example /// /// ``` - /// # use arrow::datatypes::{Field, DataType, Schema}; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::schema::Schema; /// let field_a = Field::new("a", DataType::Int64, false); /// let field_b = Field::new("b", DataType::Boolean, false); /// @@ -69,7 +70,9 @@ impl Schema { /// # Example /// /// ``` - /// # use arrow::datatypes::{Field, DataType, Schema}; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::schema::Schema; /// # use std::collections::HashMap; /// let field_a = Field::new("a", DataType::Int64, false); /// let field_b = Field::new("b", DataType::Boolean, false); @@ -95,19 +98,19 @@ impl Schema { /// Returns a new schema with only the specified columns in the new schema /// This carries metadata from the parent schema over as well - pub fn project(&self, indices: &[usize]) -> Result { + pub fn project(&self, indices: &[usize]) -> Result { let new_fields = indices .iter() .map(|i| { self.fields.get(*i).cloned().ok_or_else(|| { - ArrowError::SchemaError(format!( + ArrowSchemaError::Field(format!( "project index {} out of bounds, max field {}", i, self.fields().len() )) }) }) - .collect::>>()?; + .collect::, _>>()?; Ok(Self::new_with_metadata(new_fields, self.metadata.clone())) } @@ -116,7 +119,9 @@ impl Schema { /// Example: /// /// ``` - /// use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::schema::Schema; /// /// let merged = Schema::try_merge(vec![ /// Schema::new(vec![ @@ -139,7 +144,9 @@ impl Schema { /// ]), /// ); /// ``` - pub fn try_merge(schemas: impl IntoIterator) -> Result { + pub fn try_merge( + schemas: impl IntoIterator, + ) -> Result { schemas .into_iter() .try_fold(Self::empty(), |mut merged, schema| { @@ -148,7 +155,7 @@ impl Schema { // merge metadata if let Some(old_val) = merged.metadata.get(&key) { if old_val != &value { - return Err(ArrowError::SchemaError(format!( + return Err(ArrowSchemaError::Merge(format!( "Fail to merge schema due to conflicting metadata. \ Key '{}' has different values '{}' and '{}'", key, old_val, value @@ -179,8 +186,7 @@ impl Schema { /// Returns a vector with references to all fields (including nested fields) #[inline] - #[cfg(feature = "ipc")] - pub(crate) fn all_fields(&self) -> Vec<&Field> { + pub fn all_fields(&self) -> Vec<&Field> { self.fields.iter().flat_map(|f| f.fields()).collect() } @@ -191,7 +197,7 @@ impl Schema { } /// Returns an immutable reference of a specific [`Field`] instance selected by name. - pub fn field_with_name(&self, name: &str) -> Result<&Field> { + pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowSchemaError> { Ok(&self.fields[self.index_of(name)?]) } @@ -205,13 +211,13 @@ impl Schema { } /// Find the index of the column with the given name. - pub fn index_of(&self, name: &str) -> Result { + pub fn index_of(&self, name: &str) -> Result { (0..self.fields.len()) .find(|idx| self.fields[*idx].name() == name) .ok_or_else(|| { let valid_fields: Vec = self.fields.iter().map(|f| f.name().clone()).collect(); - ArrowError::InvalidArgumentError(format!( + ArrowSchemaError::Field(format!( "Unable to get field named \"{}\". Valid fields: {:?}", name, valid_fields )) @@ -244,14 +250,14 @@ impl Schema { /// Parse a `Schema` definition from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; match *json { Value::Object(ref schema) => { let fields = if let Some(Value::Array(fields)) = schema.get("fields") { - fields.iter().map(Field::from).collect::>()? + fields.iter().map(Field::from).collect::>()? } else { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Schema fields should be an array".to_string(), )); }; @@ -264,7 +270,7 @@ impl Schema { Ok(Self { fields, metadata }) } - _ => Err(ArrowError::ParseError( + _ => Err(ArrowSchemaError::Parse( "Invalid json value type for schema".to_string(), )), } @@ -273,14 +279,16 @@ impl Schema { /// Parse a `metadata` definition from a JSON representation. /// The JSON can either be an Object or an Array of Objects. #[cfg(feature = "json")] - fn from_metadata(json: &serde_json::Value) -> Result> { + fn from_metadata( + json: &serde_json::Value, + ) -> Result, ArrowSchemaError> { use serde_json::Value; match json { Value::Array(_) => { let mut hashmap = HashMap::new(); let values: Vec = serde_json::from_value(json.clone()) .map_err(|_| { - ArrowError::JsonError( + ArrowSchemaError::Parse( "Unable to parse object into key-value pair".to_string(), ) })?; @@ -295,13 +303,13 @@ impl Schema { if let Value::String(v) = v { Ok((k.to_string(), v.to_string())) } else { - Err(ArrowError::ParseError( + Err(ArrowSchemaError::Parse( "metadata `value` field must be a string".to_string(), )) } }) - .collect::>(), - _ => Err(ArrowError::ParseError( + .collect::>(), + _ => Err(ArrowSchemaError::Parse( "`metadata` field must be an object".to_string(), )), } @@ -364,9 +372,8 @@ struct MetadataKeyValue { #[cfg(test)] mod tests { - use crate::datatypes::DataType; - use super::*; + use crate::datatype::DataType; #[test] #[cfg(feature = "json")] @@ -424,7 +431,7 @@ mod tests { ]) .with_metadata(metadata); - let projected: Result = schema.project(&[0, 3]); + let projected = schema.project(&[0, 3]); assert!(projected.is_err()); if let Err(e) = projected { diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 2de4db64276..10c62c84ebb 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -44,6 +44,7 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } [dependencies] +arrow-schema = { version = "22.0.0", path = "../arrow-schema" } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } indexmap = { version = "1.9", default-features = false, features = ["std"] } @@ -75,7 +76,7 @@ default = ["csv", "ipc", "json"] ipc_compression = ["ipc", "zstd", "lz4"] csv = ["csv_crate"] ipc = ["flatbuffers"] -json = ["serde", "serde_json"] +json = ["serde", "serde_json", "arrow-schema/json"] simd = ["packed_simd"] prettyprint = ["comfy-table"] # The test utils feature enables code used in benchmarks and tests but diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 543fda1b1a8..8780e6315b4 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -27,13 +27,14 @@ use super::{ use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray}; #[allow(deprecated)] use crate::buffer::{Buffer, MutableBuffer}; -use crate::datatypes::validate_decimal_precision; use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type, - Decimal256Type, DecimalType, NativeDecimalType, + DataType, Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType, }; use crate::error::{ArrowError, Result}; -use crate::util::decimal::{Decimal, Decimal256}; +use crate::util::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, Decimal, + Decimal256, +}; /// `Decimal128Array` stores fixed width decimal numbers, /// with a fixed precision and scale. @@ -549,8 +550,9 @@ impl<'a, T: DecimalType> DecimalArray { #[cfg(test)] mod tests { use crate::array::Decimal256Builder; - use crate::datatypes::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; - use crate::util::decimal::Decimal128; + use crate::util::decimal::{ + Decimal128, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, + }; use crate::{array::Decimal128Builder, datatypes::Field}; use num::{BigInt, Num}; diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index daa30eebed9..c2a03862679 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -25,10 +25,9 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder}; use crate::error::{ArrowError, Result}; -use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, +use crate::util::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, Decimal256, }; -use crate::util::decimal::Decimal256; /// Array Builder for [`Decimal128Array`] /// diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 7571ba210d7..bc504ff9453 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -18,12 +18,12 @@ //! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates //! common attributes and operations for Arrow array. -use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, DataType, - IntervalUnit, UnionMode, -}; +use crate::datatypes::{DataType, IntervalUnit, UnionMode}; use crate::error::{ArrowError, Result}; use crate::util::bit_iterator::BitSliceIterator; +use crate::util::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, +}; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use crate::{ buffer::{Buffer, MutableBuffer}, diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index d164d35c3c8..54b7e045bb7 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -58,6 +58,7 @@ use crate::error::{ArrowError, Result}; use crate::record_batch::{RecordBatch, RecordBatchOptions}; use crate::util::reader_parser::Parser; +use crate::util::decimal::validate_decimal_precision; use csv_crate::{ByteRecord, StringRecord}; use std::ops::Neg; @@ -289,7 +290,7 @@ pub fn infer_schema_from_files( } } - Schema::try_merge(schemas) + Ok(Schema::try_merge(schemas)?) } // optional bounds of the reader, of the form (min line, max line). diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs deleted file mode 100644 index b65bfd7725a..00000000000 --- a/arrow/src/datatypes/datatype.rs +++ /dev/null @@ -1,1499 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use num::BigInt; -use std::cmp::Ordering; -use std::fmt; - -use crate::error::{ArrowError, Result}; -use crate::util::decimal::singed_cmp_le_bytes; - -use super::Field; - -/// The set of datatypes that are supported by this implementation of Apache Arrow. -/// -/// The Arrow specification on data types includes some more types. -/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs) -/// for Arrow's specification. -/// -/// The variants of this enum include primitive fixed size types as well as parametric or -/// nested types. -/// Currently the Rust implementation supports the following nested types: -/// - `List` -/// - `Struct` -/// -/// Nested types can themselves be nested within other arrays. -/// For more information on these types please see -/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout). -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum DataType { - /// Null type - Null, - /// A boolean datatype representing the values `true` and `false`. - Boolean, - /// A signed 8-bit integer. - Int8, - /// A signed 16-bit integer. - Int16, - /// A signed 32-bit integer. - Int32, - /// A signed 64-bit integer. - Int64, - /// An unsigned 8-bit integer. - UInt8, - /// An unsigned 16-bit integer. - UInt16, - /// An unsigned 32-bit integer. - UInt32, - /// An unsigned 64-bit integer. - UInt64, - /// A 16-bit floating point number. - Float16, - /// A 32-bit floating point number. - Float32, - /// A 64-bit floating point number. - Float64, - /// A timestamp with an optional timezone. - /// - /// Time is measured as a Unix epoch, counting the seconds from - /// 00:00:00.000 on 1 January 1970, excluding leap seconds, - /// as a 64-bit integer. - /// - /// The time zone is a string indicating the name of a time zone, one of: - /// - /// * As used in the Olson time zone database (the "tz database" or - /// "tzdata"), such as "America/New_York" - /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 - /// - /// Timestamps with a non-empty timezone - /// ------------------------------------ - /// - /// If a Timestamp column has a non-empty timezone value, its epoch is - /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone - /// (the Unix epoch), regardless of the Timestamp's own timezone. - /// - /// Therefore, timestamp values with a non-empty timezone correspond to - /// physical points in time together with some additional information about - /// how the data was obtained and/or how to display it (the timezone). - /// - /// For example, the timestamp value 0 with the timezone string "Europe/Paris" - /// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the - /// application may prefer to display it as "January 1st 1970, 01h00" in - /// the Europe/Paris timezone (which is the same physical point in time). - /// - /// One consequence is that timestamp values with a non-empty timezone - /// can be compared and ordered directly, since they all share the same - /// well-known point of reference (the Unix epoch). - /// - /// Timestamps with an unset / empty timezone - /// ----------------------------------------- - /// - /// If a Timestamp column has no timezone value, its epoch is - /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. - /// - /// Therefore, timestamp values without a timezone cannot be meaningfully - /// interpreted as physical points in time, but only as calendar / clock - /// indications ("wall clock time") in an unspecified timezone. - /// - /// For example, the timestamp value 0 with an empty timezone string - /// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there - /// is not enough information to interpret it as a well-defined physical - /// point in time. - /// - /// One consequence is that timestamp values without a timezone cannot - /// be reliably compared or ordered, since they may have different points of - /// reference. In particular, it is *not* possible to interpret an unset - /// or empty timezone as the same as "UTC". - /// - /// Conversion between timezones - /// ---------------------------- - /// - /// If a Timestamp column has a non-empty timezone, changing the timezone - /// to a different non-empty value is a metadata-only operation: - /// the timestamp values need not change as their point of reference remains - /// the same (the Unix epoch). - /// - /// However, if a Timestamp column has no timezone value, changing it to a - /// non-empty value requires to think about the desired semantics. - /// One possibility is to assume that the original timestamp values are - /// relative to the epoch of the timezone being set; timestamp values should - /// then adjusted to the Unix epoch (for example, changing the timezone from - /// empty to "Europe/Paris" would require converting the timestamp values - /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is - /// nevertheless correct). - Timestamp(TimeUnit, Option), - /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01) - /// in days (32 bits). - Date32, - /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) - /// in milliseconds (64 bits). Values are evenly divisible by 86400000. - Date64, - /// A 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. - Time32(TimeUnit), - /// A 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. - Time64(TimeUnit), - /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. - Duration(TimeUnit), - /// A "calendar" interval which models types that don't necessarily - /// have a precise duration without the context of a base timestamp (e.g. - /// days can differ in length during day light savings time transitions). - Interval(IntervalUnit), - /// Opaque binary data of variable length. - Binary, - /// Opaque binary data of fixed size. - /// Enum parameter specifies the number of bytes per value. - FixedSizeBinary(i32), - /// Opaque binary data of variable length and 64-bit offsets. - LargeBinary, - /// A variable-length string in Unicode with UTF-8 encoding. - Utf8, - /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets. - LargeUtf8, - /// A list of some logical data type with variable length. - List(Box), - /// A list of some logical data type with fixed length. - FixedSizeList(Box, i32), - /// A list of some logical data type with variable length and 64-bit offsets. - LargeList(Box), - /// A nested datatype that contains a number of sub-fields. - Struct(Vec), - /// A nested datatype that can represent slots of differing types. Components: - /// - /// 1. [`Field`] for each possible child type the Union can hold - /// 2. The corresponding `type_id` used to identify which Field - /// 3. The type of union (Sparse or Dense) - Union(Vec, Vec, UnionMode), - /// A dictionary encoded array (`key_type`, `value_type`), where - /// each array element is an index of `key_type` into an - /// associated dictionary of `value_type`. - /// - /// Dictionary arrays are used to store columns of `value_type` - /// that contain many repeated values using less memory, but with - /// a higher CPU overhead for some operations. - /// - /// This type mostly used to represent low cardinality string - /// arrays or a limited set of primitive types as integers. - Dictionary(Box, Box), - /// Exact 128-bit width decimal value with precision and scale - /// - /// * precision is the total number of digits - /// * scale is the number of digits past the decimal - /// - /// For example the number 123.45 has precision 5 and scale 2. - Decimal128(u8, u8), - /// Exact 256-bit width decimal value with precision and scale - /// - /// * precision is the total number of digits - /// * scale is the number of digits past the decimal - /// - /// For example the number 123.45 has precision 5 and scale 2. - Decimal256(u8, u8), - /// A Map is a logical nested type that is represented as - /// - /// `List>` - /// - /// The keys and values are each respectively contiguous. - /// The key and value types are not constrained, but keys should be - /// hashable and unique. - /// Whether the keys are sorted can be set in the `bool` after the `Field`. - /// - /// In a field with Map type, the field has a child Struct field, which then - /// has two children: key type and the second the value type. The names of the - /// child fields may be respectively "entries", "key", and "value", but this is - /// not enforced. - Map(Box, bool), -} - -/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum TimeUnit { - /// Time in seconds. - Second, - /// Time in milliseconds. - Millisecond, - /// Time in microseconds. - Microsecond, - /// Time in nanoseconds. - Nanosecond, -} - -/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum IntervalUnit { - /// Indicates the number of elapsed whole months, stored as 4-byte integers. - YearMonth, - /// Indicates the number of elapsed days and milliseconds, - /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total). - DayTime, - /// A triple of the number of elapsed months, days, and nanoseconds. - /// The values are stored contiguously in 16 byte blocks. Months and - /// days are encoded as 32 bit integers and nanoseconds is encoded as a - /// 64 bit integer. All integers are signed. Each field is independent - /// (e.g. there is no constraint that nanoseconds have the same sign - /// as days or that the quantity of nanoseconds represents less - /// than a day's worth of time). - MonthDayNano, -} - -// Sparse or Dense union layouts -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum UnionMode { - Sparse, - Dense, -} - -impl fmt::Display for DataType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -// MAX decimal256 value of little-endian format for each precision. -// Each element is the max value of signed 256-bit integer for the specified precision which -// is encoded to the 32-byte width format of little-endian. -pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ - [ - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - [ - 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - [ - 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, - 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, - 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, - 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, - 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, - 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, - 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, - 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, - 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, - 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, - 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, - 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, - 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, - 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, - 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, - 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, - 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, - 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, - 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, - 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, - 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, - 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, - 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, - 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, - 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, - 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, - 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, - 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, - 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, - 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, - 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, - 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, - 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, - 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, - 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, - 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, - 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, - ], -]; - -// MIN decimal256 value of little-endian format for each precision. -// Each element is the min value of signed 256-bit integer for the specified precision which -// is encoded to the 76-byte width format of little-endian. -pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ - [ - 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, - 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, - 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, - 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, - 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, - 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, - 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, - 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, - 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, - 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, - 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, - 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, - 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, - 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, - 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, - 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, - 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, - 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, - 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, - 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, - 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, - 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, - 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, - 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, - 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, - 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, - 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, - 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, - 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, - 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, - 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, - 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, - ], -]; - -/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value -/// that can be stored in [DataType::Decimal128] value of precision `p` -pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ - 9, - 99, - 999, - 9999, - 99999, - 999999, - 9999999, - 99999999, - 999999999, - 9999999999, - 99999999999, - 999999999999, - 9999999999999, - 99999999999999, - 999999999999999, - 9999999999999999, - 99999999999999999, - 999999999999999999, - 9999999999999999999, - 99999999999999999999, - 999999999999999999999, - 9999999999999999999999, - 99999999999999999999999, - 999999999999999999999999, - 9999999999999999999999999, - 99999999999999999999999999, - 999999999999999999999999999, - 9999999999999999999999999999, - 99999999999999999999999999999, - 999999999999999999999999999999, - 9999999999999999999999999999999, - 99999999999999999999999999999999, - 999999999999999999999999999999999, - 9999999999999999999999999999999999, - 99999999999999999999999999999999999, - 999999999999999999999999999999999999, - 9999999999999999999999999999999999999, - 99999999999999999999999999999999999999, -]; - -/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value -/// that can be stored in a [DataType::Decimal128] value of precision `p` -pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ - -9, - -99, - -999, - -9999, - -99999, - -999999, - -9999999, - -99999999, - -999999999, - -9999999999, - -99999999999, - -999999999999, - -9999999999999, - -99999999999999, - -999999999999999, - -9999999999999999, - -99999999999999999, - -999999999999999999, - -9999999999999999999, - -99999999999999999999, - -999999999999999999999, - -9999999999999999999999, - -99999999999999999999999, - -999999999999999999999999, - -9999999999999999999999999, - -99999999999999999999999999, - -999999999999999999999999999, - -9999999999999999999999999999, - -99999999999999999999999999999, - -999999999999999999999999999999, - -9999999999999999999999999999999, - -99999999999999999999999999999999, - -999999999999999999999999999999999, - -9999999999999999999999999999999999, - -99999999999999999999999999999999999, - -999999999999999999999999999999999999, - -9999999999999999999999999999999999999, - -99999999999999999999999999999999999999, -]; - -/// The maximum precision for [DataType::Decimal128] values -pub const DECIMAL128_MAX_PRECISION: u8 = 38; - -/// The maximum scale for [DataType::Decimal128] values -pub const DECIMAL128_MAX_SCALE: u8 = 38; - -/// The maximum precision for [DataType::Decimal256] values -pub const DECIMAL256_MAX_PRECISION: u8 = 76; - -/// The maximum scale for [DataType::Decimal256] values -pub const DECIMAL256_MAX_SCALE: u8 = 76; - -/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values -pub const DECIMAL_DEFAULT_SCALE: u8 = 10; - -/// Validates that the specified `i128` value can be properly -/// interpreted as a Decimal number with precision `precision` -#[inline] -pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { - if precision > DECIMAL128_MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Max precision of a Decimal128 is {}, but got {}", - DECIMAL128_MAX_PRECISION, precision, - ))); - } - - let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; - let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; - - if value > max { - Err(ArrowError::InvalidArgumentError(format!( - "{} is too large to store in a Decimal128 of precision {}. Max is {}", - value, precision, max - ))) - } else if value < min { - Err(ArrowError::InvalidArgumentError(format!( - "{} is too small to store in a Decimal128 of precision {}. Min is {}", - value, precision, min - ))) - } else { - Ok(()) - } -} - -/// Validates that the specified `byte_array` of little-endian format -/// value can be properly interpreted as a Decimal256 number with precision `precision` -#[inline] -pub(crate) fn validate_decimal256_precision_with_lt_bytes( - lt_value: &[u8], - precision: u8, -) -> Result<()> { - if precision > DECIMAL256_MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Max precision of a Decimal256 is {}, but got {}", - DECIMAL256_MAX_PRECISION, precision, - ))); - } - let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; - let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; - - if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { - Err(ArrowError::InvalidArgumentError(format!( - "{:?} is too large to store in a Decimal256 of precision {}. Max is {:?}", - BigInt::from_signed_bytes_le(lt_value), - precision, - BigInt::from_signed_bytes_le(&max) - ))) - } else if singed_cmp_le_bytes(lt_value, &min) == Ordering::Less { - Err(ArrowError::InvalidArgumentError(format!( - "{:?} is too small to store in a Decimal256 of precision {}. Min is {:?}", - BigInt::from_signed_bytes_le(lt_value), - precision, - BigInt::from_signed_bytes_le(&min) - ))) - } else { - Ok(()) - } -} - -impl DataType { - /// Parse a data type from a JSON representation. - #[cfg(feature = "json")] - pub(crate) fn from(json: &serde_json::Value) -> Result { - use serde_json::Value; - let default_field = Field::new("", DataType::Boolean, true); - match *json { - Value::Object(ref map) => match map.get("name") { - Some(s) if s == "null" => Ok(DataType::Null), - Some(s) if s == "bool" => Ok(DataType::Boolean), - Some(s) if s == "binary" => Ok(DataType::Binary), - Some(s) if s == "largebinary" => Ok(DataType::LargeBinary), - Some(s) if s == "utf8" => Ok(DataType::Utf8), - Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8), - Some(s) if s == "fixedsizebinary" => { - // return a list with any type as its child isn't defined in the map - if let Some(Value::Number(size)) = map.get("byteWidth") { - Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32)) - } else { - Err(ArrowError::ParseError( - "Expecting a byteWidth for fixedsizebinary".to_string(), - )) - } - } - Some(s) if s == "decimal" => { - // return a list with any type as its child isn't defined in the map - let precision = match map.get("precision") { - Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()), - None => Err(ArrowError::ParseError( - "Expecting a precision for decimal".to_string(), - )), - }?; - let scale = match map.get("scale") { - Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()), - _ => Err(ArrowError::ParseError( - "Expecting a scale for decimal".to_string(), - )), - }?; - let bit_width: usize = match map.get("bitWidth") { - Some(b) => b.as_u64().unwrap() as usize, - _ => 128, // Default bit width - }; - - if bit_width == 128 { - Ok(DataType::Decimal128(precision, scale)) - } else if bit_width == 256 { - Ok(DataType::Decimal256(precision, scale)) - } else { - Err(ArrowError::ParseError( - "Decimal bit_width invalid".to_string(), - )) - } - } - Some(s) if s == "floatingpoint" => match map.get("precision") { - Some(p) if p == "HALF" => Ok(DataType::Float16), - Some(p) if p == "SINGLE" => Ok(DataType::Float32), - Some(p) if p == "DOUBLE" => Ok(DataType::Float64), - _ => Err(ArrowError::ParseError( - "floatingpoint precision missing or invalid".to_string(), - )), - }, - Some(s) if s == "timestamp" => { - let unit = match map.get("unit") { - Some(p) if p == "SECOND" => Ok(TimeUnit::Second), - Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), - Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), - Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), - _ => Err(ArrowError::ParseError( - "timestamp unit missing or invalid".to_string(), - )), - }; - let tz = match map.get("timezone") { - None => Ok(None), - Some(serde_json::Value::String(tz)) => Ok(Some(tz.clone())), - _ => Err(ArrowError::ParseError( - "timezone must be a string".to_string(), - )), - }; - Ok(DataType::Timestamp(unit?, tz?)) - } - Some(s) if s == "date" => match map.get("unit") { - Some(p) if p == "DAY" => Ok(DataType::Date32), - Some(p) if p == "MILLISECOND" => Ok(DataType::Date64), - _ => Err(ArrowError::ParseError( - "date unit missing or invalid".to_string(), - )), - }, - Some(s) if s == "time" => { - let unit = match map.get("unit") { - Some(p) if p == "SECOND" => Ok(TimeUnit::Second), - Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), - Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), - Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), - _ => Err(ArrowError::ParseError( - "time unit missing or invalid".to_string(), - )), - }; - match map.get("bitWidth") { - Some(p) if p == 32 => Ok(DataType::Time32(unit?)), - Some(p) if p == 64 => Ok(DataType::Time64(unit?)), - _ => Err(ArrowError::ParseError( - "time bitWidth missing or invalid".to_string(), - )), - } - } - Some(s) if s == "duration" => match map.get("unit") { - Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)), - Some(p) if p == "MILLISECOND" => { - Ok(DataType::Duration(TimeUnit::Millisecond)) - } - Some(p) if p == "MICROSECOND" => { - Ok(DataType::Duration(TimeUnit::Microsecond)) - } - Some(p) if p == "NANOSECOND" => { - Ok(DataType::Duration(TimeUnit::Nanosecond)) - } - _ => Err(ArrowError::ParseError( - "time unit missing or invalid".to_string(), - )), - }, - Some(s) if s == "interval" => match map.get("unit") { - Some(p) if p == "DAY_TIME" => { - Ok(DataType::Interval(IntervalUnit::DayTime)) - } - Some(p) if p == "YEAR_MONTH" => { - Ok(DataType::Interval(IntervalUnit::YearMonth)) - } - Some(p) if p == "MONTH_DAY_NANO" => { - Ok(DataType::Interval(IntervalUnit::MonthDayNano)) - } - _ => Err(ArrowError::ParseError( - "interval unit missing or invalid".to_string(), - )), - }, - Some(s) if s == "int" => match map.get("isSigned") { - Some(&Value::Bool(true)) => match map.get("bitWidth") { - Some(&Value::Number(ref n)) => match n.as_u64() { - Some(8) => Ok(DataType::Int8), - Some(16) => Ok(DataType::Int16), - Some(32) => Ok(DataType::Int32), - Some(64) => Ok(DataType::Int64), - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - Some(&Value::Bool(false)) => match map.get("bitWidth") { - Some(&Value::Number(ref n)) => match n.as_u64() { - Some(8) => Ok(DataType::UInt8), - Some(16) => Ok(DataType::UInt16), - Some(32) => Ok(DataType::UInt32), - Some(64) => Ok(DataType::UInt64), - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - _ => Err(ArrowError::ParseError( - "int signed missing or invalid".to_string(), - )), - }, - Some(s) if s == "list" => { - // return a list with any type as its child isn't defined in the map - Ok(DataType::List(Box::new(default_field))) - } - Some(s) if s == "largelist" => { - // return a largelist with any type as its child isn't defined in the map - Ok(DataType::LargeList(Box::new(default_field))) - } - Some(s) if s == "fixedsizelist" => { - // return a list with any type as its child isn't defined in the map - if let Some(Value::Number(size)) = map.get("listSize") { - Ok(DataType::FixedSizeList( - Box::new(default_field), - size.as_i64().unwrap() as i32, - )) - } else { - Err(ArrowError::ParseError( - "Expecting a listSize for fixedsizelist".to_string(), - )) - } - } - Some(s) if s == "struct" => { - // return an empty `struct` type as its children aren't defined in the map - Ok(DataType::Struct(vec![])) - } - Some(s) if s == "map" => { - if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") { - // Return a map with an empty type as its children aren't defined in the map - Ok(DataType::Map(Box::new(default_field), *keys_sorted)) - } else { - Err(ArrowError::ParseError( - "Expecting a keysSorted for map".to_string(), - )) - } - } - Some(s) if s == "union" => { - if let Some(Value::String(mode)) = map.get("mode") { - let union_mode = if mode == "SPARSE" { - UnionMode::Sparse - } else if mode == "DENSE" { - UnionMode::Dense - } else { - return Err(ArrowError::ParseError(format!( - "Unknown union mode {:?} for union", - mode - ))); - }; - if let Some(type_ids) = map.get("typeIds") { - let type_ids = type_ids - .as_array() - .unwrap() - .iter() - .map(|t| t.as_i64().unwrap() as i8) - .collect::>(); - - let default_fields = type_ids - .iter() - .map(|_| default_field.clone()) - .collect::>(); - - Ok(DataType::Union(default_fields, type_ids, union_mode)) - } else { - Err(ArrowError::ParseError( - "Expecting a typeIds for union ".to_string(), - )) - } - } else { - Err(ArrowError::ParseError( - "Expecting a mode for union".to_string(), - )) - } - } - Some(other) => Err(ArrowError::ParseError(format!( - "invalid or unsupported type name: {} in {:?}", - other, json - ))), - None => Err(ArrowError::ParseError("type name missing".to_string())), - }, - _ => Err(ArrowError::ParseError( - "invalid json value type".to_string(), - )), - } - } - - /// Generate a JSON representation of the data type. - #[cfg(feature = "json")] - pub fn to_json(&self) -> serde_json::Value { - use serde_json::json; - match self { - DataType::Null => json!({"name": "null"}), - DataType::Boolean => json!({"name": "bool"}), - DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}), - DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}), - DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}), - DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}), - DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}), - DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}), - DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}), - DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}), - DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}), - DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}), - DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}), - DataType::Utf8 => json!({"name": "utf8"}), - DataType::LargeUtf8 => json!({"name": "largeutf8"}), - DataType::Binary => json!({"name": "binary"}), - DataType::LargeBinary => json!({"name": "largebinary"}), - DataType::FixedSizeBinary(byte_width) => { - json!({"name": "fixedsizebinary", "byteWidth": byte_width}) - } - DataType::Struct(_) => json!({"name": "struct"}), - DataType::Union(_, _, _) => json!({"name": "union"}), - DataType::List(_) => json!({ "name": "list"}), - DataType::LargeList(_) => json!({ "name": "largelist"}), - DataType::FixedSizeList(_, length) => { - json!({"name":"fixedsizelist", "listSize": length}) - } - DataType::Time32(unit) => { - json!({"name": "time", "bitWidth": 32, "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}) - } - DataType::Time64(unit) => { - json!({"name": "time", "bitWidth": 64, "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}) - } - DataType::Date32 => { - json!({"name": "date", "unit": "DAY"}) - } - DataType::Date64 => { - json!({"name": "date", "unit": "MILLISECOND"}) - } - DataType::Timestamp(unit, None) => { - json!({"name": "timestamp", "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}) - } - DataType::Timestamp(unit, Some(tz)) => { - json!({"name": "timestamp", "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }, "timezone": tz}) - } - DataType::Interval(unit) => json!({"name": "interval", "unit": match unit { - IntervalUnit::YearMonth => "YEAR_MONTH", - IntervalUnit::DayTime => "DAY_TIME", - IntervalUnit::MonthDayNano => "MONTH_DAY_NANO", - }}), - DataType::Duration(unit) => json!({"name": "duration", "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}), - DataType::Dictionary(_, _) => json!({ "name": "dictionary"}), - DataType::Decimal128(precision, scale) => { - json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128}) - } - DataType::Decimal256(precision, scale) => { - json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 256}) - } - DataType::Map(_, keys_sorted) => { - json!({"name": "map", "keysSorted": keys_sorted}) - } - } - } - - /// Returns true if this type is numeric: (UInt*, Int*, or Float*). - pub fn is_numeric(t: &DataType) -> bool { - use DataType::*; - matches!( - t, - UInt8 - | UInt16 - | UInt32 - | UInt64 - | Int8 - | Int16 - | Int32 - | Int64 - | Float32 - | Float64 - ) - } - - /// Returns true if this type is temporal: (Date*, Time*, Duration, or Interval). - pub fn is_temporal(t: &DataType) -> bool { - use DataType::*; - matches!( - t, - Date32 - | Date64 - | Timestamp(_, _) - | Time32(_) - | Time64(_) - | Duration(_) - | Interval(_) - ) - } - - /// Returns true if this type is valid as a dictionary key - /// (e.g. [`super::ArrowDictionaryKeyType`] - pub fn is_dictionary_key_type(t: &DataType) -> bool { - use DataType::*; - matches!( - t, - UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 - ) - } - - /// Compares the datatype with another, ignoring nested field names - /// and metadata. - pub fn equals_datatype(&self, other: &DataType) -> bool { - match (&self, other) { - (DataType::List(a), DataType::List(b)) - | (DataType::LargeList(a), DataType::LargeList(b)) => { - a.is_nullable() == b.is_nullable() - && a.data_type().equals_datatype(b.data_type()) - } - (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => { - a_size == b_size - && a.is_nullable() == b.is_nullable() - && a.data_type().equals_datatype(b.data_type()) - } - (DataType::Struct(a), DataType::Struct(b)) => { - a.len() == b.len() - && a.iter().zip(b).all(|(a, b)| { - a.is_nullable() == b.is_nullable() - && a.data_type().equals_datatype(b.data_type()) - }) - } - ( - DataType::Map(a_field, a_is_sorted), - DataType::Map(b_field, b_is_sorted), - ) => a_field == b_field && a_is_sorted == b_is_sorted, - _ => self == other, - } - } -} - -#[cfg(test)] -mod test { - use crate::datatypes::datatype::{ - MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION, - MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION, - }; - use crate::util::decimal::Decimal256; - use num::{BigInt, Num}; - - #[test] - fn test_decimal256_min_max_for_precision() { - // The precision from 1 to 76 - let mut max_value = "9".to_string(); - let mut min_value = "-9".to_string(); - for i in 1..77 { - let max_decimal = - Decimal256::from(BigInt::from_str_radix(max_value.as_str(), 10).unwrap()); - let min_decimal = - Decimal256::from(BigInt::from_str_radix(min_value.as_str(), 10).unwrap()); - let max_bytes = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; - let min_bytes = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; - max_value += "9"; - min_value += "9"; - assert_eq!(max_decimal.raw_value(), &max_bytes); - assert_eq!(min_decimal.raw_value(), &min_bytes); - } - } -} diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index 38b6c7bf974..b19890ea7ad 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -26,18 +26,16 @@ use std::sync::Arc; mod native; pub use native::*; -mod field; -pub use field::*; -mod schema; -pub use schema::*; mod numeric; pub use numeric::*; mod types; pub use types::*; -mod datatype; -pub use datatype::*; mod delta; +pub use arrow_schema::datatype::*; +pub use arrow_schema::field::*; +pub use arrow_schema::schema::*; + #[cfg(feature = "ffi")] mod ffi; #[cfg(feature = "ffi")] diff --git a/arrow/src/datatypes/types.rs b/arrow/src/datatypes/types.rs index 1b7d0675bb4..8037ed9a938 100644 --- a/arrow/src/datatypes/types.rs +++ b/arrow/src/datatypes/types.rs @@ -17,7 +17,7 @@ use super::{ArrowPrimitiveType, DataType, IntervalUnit, TimeUnit}; use crate::datatypes::delta::shift_months; -use crate::datatypes::{ +use crate::util::decimal::{ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE, }; diff --git a/arrow/src/error.rs b/arrow/src/error.rs index 5d92fb93017..8f0c78f9e06 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -21,6 +21,9 @@ use std::io::Write; use std::error::Error; +// Re-export ArrowSchemaError +pub use arrow_schema::error::ArrowSchemaError; + /// Many different operations in the `arrow` crate return this error type. #[derive(Debug)] pub enum ArrowError { @@ -30,7 +33,7 @@ pub enum ArrowError { CastError(String), MemoryError(String), ParseError(String), - SchemaError(String), + SchemaError(ArrowSchemaError), ComputeError(String), DivideByZero, CsvError(String), @@ -58,6 +61,12 @@ impl From<::std::io::Error> for ArrowError { } } +impl From for ArrowError { + fn from(error: ArrowSchemaError) -> Self { + Self::SchemaError(error) + } +} + #[cfg(feature = "csv")] impl From for ArrowError { fn from(error: csv_crate::Error) -> Self { diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 4b0d36a43e5..4b86ceb93b5 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -18,6 +18,7 @@ //! A two-dimensional batch of column-oriented data with a defined //! [schema](crate::datatypes::Schema). +use arrow_schema::error::ArrowSchemaError; use std::sync::Arc; use crate::array::*; @@ -203,11 +204,11 @@ impl RecordBatch { .iter() .map(|f| { self.columns.get(*f).cloned().ok_or_else(|| { - ArrowError::SchemaError(format!( + ArrowError::SchemaError(ArrowSchemaError::Field(format!( "project index {} out of bounds, max field {}", f, self.columns.len() - )) + ))) }) }) .collect::>>()?; diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 421942df5c1..a41e60b5c81 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -17,10 +17,7 @@ //! Decimal related utils -use crate::datatypes::{ - DataType, Decimal128Type, Decimal256Type, DecimalType, DECIMAL256_MAX_PRECISION, - DECIMAL_DEFAULT_SCALE, -}; +use crate::datatypes::{DataType, Decimal128Type, Decimal256Type, DecimalType}; use crate::error::{ArrowError, Result}; use num::bigint::BigInt; use num::Signed; @@ -296,6 +293,791 @@ pub(crate) fn singed_cmp_le_bytes(left: &[u8], right: &[u8]) -> Ordering { Ordering::Equal } +// MAX decimal256 value of little-endian format for each precision. +// Each element is the max value of signed 256-bit integer for the specified precision which +// is encoded to the 32-byte width format of little-endian. +pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ + [ + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + [ + 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + [ + 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, + 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, + 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, + 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, + 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, + 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, + 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, + 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, + 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, + 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, + 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, + 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, + 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, + 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, + 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, + 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, + 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, + 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, + 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, + 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, + 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, + 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, + 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, + 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, + 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, + 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, + 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, + 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, + 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, + 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, + 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, + 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, + 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, + 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, + 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, + 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, + 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, + ], +]; + +// MIN decimal256 value of little-endian format for each precision. +// Each element is the min value of signed 256-bit integer for the specified precision which +// is encoded to the 76-byte width format of little-endian. +pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ + [ + 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, + 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, + 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, + 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, + 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, + 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, + 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, + 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, + 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, + 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, + 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, + 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, + 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, + 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, + 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, + 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, + 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, + 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, + 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, + 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, + 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, + 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, + 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, + 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, + 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, + 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, + 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, + 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, + 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, + 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, + 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, + 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, + ], +]; + +/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value +/// that can be stored in [DataType::Decimal128] value of precision `p` +pub(crate) const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ + 9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999, + 9999999999999999, + 99999999999999999, + 999999999999999999, + 9999999999999999999, + 99999999999999999999, + 999999999999999999999, + 9999999999999999999999, + 99999999999999999999999, + 999999999999999999999999, + 9999999999999999999999999, + 99999999999999999999999999, + 999999999999999999999999999, + 9999999999999999999999999999, + 99999999999999999999999999999, + 999999999999999999999999999999, + 9999999999999999999999999999999, + 99999999999999999999999999999999, + 999999999999999999999999999999999, + 9999999999999999999999999999999999, + 99999999999999999999999999999999999, + 999999999999999999999999999999999999, + 9999999999999999999999999999999999999, + 99999999999999999999999999999999999999, +]; + +/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value +/// that can be stored in a [DataType::Decimal128] value of precision `p` +pub(crate) const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ + -9, + -99, + -999, + -9999, + -99999, + -999999, + -9999999, + -99999999, + -999999999, + -9999999999, + -99999999999, + -999999999999, + -9999999999999, + -99999999999999, + -999999999999999, + -9999999999999999, + -99999999999999999, + -999999999999999999, + -9999999999999999999, + -99999999999999999999, + -999999999999999999999, + -9999999999999999999999, + -99999999999999999999999, + -999999999999999999999999, + -9999999999999999999999999, + -99999999999999999999999999, + -999999999999999999999999999, + -9999999999999999999999999999, + -99999999999999999999999999999, + -999999999999999999999999999999, + -9999999999999999999999999999999, + -99999999999999999999999999999999, + -999999999999999999999999999999999, + -9999999999999999999999999999999999, + -99999999999999999999999999999999999, + -999999999999999999999999999999999999, + -9999999999999999999999999999999999999, + -99999999999999999999999999999999999999, +]; + +/// The maximum precision for [DataType::Decimal128] values +pub(crate) const DECIMAL128_MAX_PRECISION: u8 = 38; + +/// The maximum scale for [DataType::Decimal128] values +pub(crate) const DECIMAL128_MAX_SCALE: u8 = 38; + +/// The maximum precision for [DataType::Decimal256] values +pub(crate) const DECIMAL256_MAX_PRECISION: u8 = 76; + +/// The maximum scale for [DataType::Decimal256] values +pub(crate) const DECIMAL256_MAX_SCALE: u8 = 76; + +/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values +pub(crate) const DECIMAL_DEFAULT_SCALE: u8 = 10; + +/// Validates that the specified `i128` value can be properly +/// interpreted as a Decimal number with precision `precision` +#[inline] +pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { + if precision > DECIMAL128_MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Max precision of a Decimal128 is {}, but got {}", + DECIMAL128_MAX_PRECISION, precision, + ))); + } + + let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + + if value > max { + Err(ArrowError::InvalidArgumentError(format!( + "{} is too large to store in a Decimal128 of precision {}. Max is {}", + value, precision, max + ))) + } else if value < min { + Err(ArrowError::InvalidArgumentError(format!( + "{} is too small to store in a Decimal128 of precision {}. Min is {}", + value, precision, min + ))) + } else { + Ok(()) + } +} + +/// Validates that the specified `byte_array` of little-endian format +/// value can be properly interpreted as a Decimal256 number with precision `precision` +#[inline] +pub(crate) fn validate_decimal256_precision_with_lt_bytes( + lt_value: &[u8], + precision: u8, +) -> Result<()> { + if precision > DECIMAL256_MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Max precision of a Decimal256 is {}, but got {}", + DECIMAL256_MAX_PRECISION, precision, + ))); + } + let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + + if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { + Err(ArrowError::InvalidArgumentError(format!( + "{:?} is too large to store in a Decimal256 of precision {}. Max is {:?}", + BigInt::from_signed_bytes_le(lt_value), + precision, + BigInt::from_signed_bytes_le(&max) + ))) + } else if singed_cmp_le_bytes(lt_value, &min) == Ordering::Less { + Err(ArrowError::InvalidArgumentError(format!( + "{:?} is too small to store in a Decimal256 of precision {}. Min is {:?}", + BigInt::from_signed_bytes_le(lt_value), + precision, + BigInt::from_signed_bytes_le(&min) + ))) + } else { + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; @@ -471,4 +1253,23 @@ mod tests { assert_eq!(left == right, left_decimal == right_decimal) } } + + #[test] + fn test_decimal256_min_max_for_precision() { + // The precision from 1 to 76 + let mut max_value = "9".to_string(); + let mut min_value = "-9".to_string(); + for i in 1..77 { + let max_decimal = + Decimal256::from(BigInt::from_str_radix(max_value.as_str(), 10).unwrap()); + let min_decimal = + Decimal256::from(BigInt::from_str_radix(min_value.as_str(), 10).unwrap()); + let max_bytes = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; + let min_bytes = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; + max_value += "9"; + min_value += "9"; + assert_eq!(max_decimal.raw_value(), &max_bytes); + assert_eq!(min_decimal.raw_value(), &min_bytes); + } + } } From da189d16cee1548da5c324ef980cd7982fdfce42 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 12 Sep 2022 18:42:20 +0100 Subject: [PATCH 02/16] Flatten schema --- arrow-schema/src/field.rs | 12 ++++-------- arrow-schema/src/lib.rs | 12 ++++++++---- arrow-schema/src/schema.rs | 15 +++++---------- arrow/src/datatypes/mod.rs | 4 +--- arrow/src/error.rs | 2 +- arrow/src/record_batch.rs | 2 +- 6 files changed, 20 insertions(+), 27 deletions(-) diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index 2da449a5d1e..b45e706b77f 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -145,8 +145,7 @@ impl Field { /// Set the name of the [`Field`] and returns self. /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::*; /// let field = Field::new("c1", DataType::Int64, false) /// .with_name("c2"); /// @@ -166,8 +165,7 @@ impl Field { /// Set [`DataType`] of the [`Field`] and returns self. /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::*; /// let field = Field::new("c1", DataType::Int64, false) /// .with_data_type(DataType::Utf8); /// @@ -187,8 +185,7 @@ impl Field { /// Set `nullable` of the [`Field`] and returns self. /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::*; /// let field = Field::new("c1", DataType::Int64, false) /// .with_nullable(true); /// @@ -541,8 +538,7 @@ impl Field { /// Example: /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::*; /// let mut field = Field::new("c1", DataType::Int64, false); /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok()); /// assert!(field.is_nullable()); diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs index 867c428f1ab..34030f2d356 100644 --- a/arrow-schema/src/lib.rs +++ b/arrow-schema/src/lib.rs @@ -17,7 +17,11 @@ //! Arrow logical types -pub mod datatype; -pub mod error; -pub mod field; -pub mod schema; +mod datatype; +pub use datatype::*; +mod error; +pub use error::*; +mod field; +pub use field::*; +mod schema; +pub use schema::*; diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index e52e12b7057..d0bff9d0ab9 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -52,9 +52,7 @@ impl Schema { /// # Example /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; - /// # use arrow_schema::schema::Schema; + /// # use arrow_schema::*; /// let field_a = Field::new("a", DataType::Int64, false); /// let field_b = Field::new("b", DataType::Boolean, false); /// @@ -70,10 +68,9 @@ impl Schema { /// # Example /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; - /// # use arrow_schema::schema::Schema; + /// # use arrow_schema::*; /// # use std::collections::HashMap; + /// /// let field_a = Field::new("a", DataType::Int64, false); /// let field_b = Field::new("b", DataType::Boolean, false); /// @@ -119,9 +116,7 @@ impl Schema { /// Example: /// /// ``` - /// # use arrow_schema::field::Field; - /// # use arrow_schema::datatype::DataType; - /// # use arrow_schema::schema::Schema; + /// # use arrow_schema::*; /// /// let merged = Schema::try_merge(vec![ /// Schema::new(vec![ @@ -437,7 +432,7 @@ mod tests { if let Err(e) = projected { assert_eq!( e.to_string(), - "Schema error: project index 3 out of bounds, max field 3".to_string() + "Error indexing field: project index 3 out of bounds, max field 3".to_string() ) } } diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index b19890ea7ad..ab1e355e672 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -32,9 +32,7 @@ mod types; pub use types::*; mod delta; -pub use arrow_schema::datatype::*; -pub use arrow_schema::field::*; -pub use arrow_schema::schema::*; +pub use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit, UnionMode}; #[cfg(feature = "ffi")] mod ffi; diff --git a/arrow/src/error.rs b/arrow/src/error.rs index 8f0c78f9e06..b0eb9b99594 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -22,7 +22,7 @@ use std::io::Write; use std::error::Error; // Re-export ArrowSchemaError -pub use arrow_schema::error::ArrowSchemaError; +pub use arrow_schema::ArrowSchemaError; /// Many different operations in the `arrow` crate return this error type. #[derive(Debug)] diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 4b86ceb93b5..d71c1a65a2e 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -18,7 +18,7 @@ //! A two-dimensional batch of column-oriented data with a defined //! [schema](crate::datatypes::Schema). -use arrow_schema::error::ArrowSchemaError; +use arrow_schema::ArrowSchemaError; use std::sync::Arc; use crate::array::*; From f2ff606a5ec1c41223d098af057e367ac7084f30 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 12 Sep 2022 18:50:23 +0100 Subject: [PATCH 03/16] Move decimal logic --- arrow-schema/src/schema.rs | 3 +- arrow/src/array/array_decimal.rs | 9 +- arrow/src/array/builder/decimal_builder.rs | 5 +- arrow/src/array/data.rs | 8 +- arrow/src/csv/reader.rs | 1 - arrow/src/datatypes/decimal.rs | 832 +++++++++++++++++++++ arrow/src/datatypes/mod.rs | 2 + arrow/src/datatypes/types.rs | 2 +- arrow/src/util/decimal.rs | 809 +------------------- 9 files changed, 852 insertions(+), 819 deletions(-) create mode 100644 arrow/src/datatypes/decimal.rs diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index d0bff9d0ab9..686fb572911 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -432,7 +432,8 @@ mod tests { if let Err(e) = projected { assert_eq!( e.to_string(), - "Error indexing field: project index 3 out of bounds, max field 3".to_string() + "Error indexing field: project index 3 out of bounds, max field 3" + .to_string() ) } } diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 8780e6315b4..a3abb5ca1f3 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -27,14 +27,13 @@ use super::{ use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray}; #[allow(deprecated)] use crate::buffer::{Buffer, MutableBuffer}; +use crate::datatypes::validate_decimal_precision; use crate::datatypes::{ - DataType, Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType, + validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type, + Decimal256Type, DecimalType, NativeDecimalType, }; use crate::error::{ArrowError, Result}; -use crate::util::decimal::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, Decimal, - Decimal256, -}; +use crate::util::decimal::{Decimal, Decimal256}; /// `Decimal128Array` stores fixed width decimal numbers, /// with a fixed precision and scale. diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index c2a03862679..daa30eebed9 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -25,9 +25,10 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder}; use crate::error::{ArrowError, Result}; -use crate::util::decimal::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, Decimal256, +use crate::datatypes::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, }; +use crate::util::decimal::Decimal256; /// Array Builder for [`Decimal128Array`] /// diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index bc504ff9453..7571ba210d7 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -18,12 +18,12 @@ //! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates //! common attributes and operations for Arrow array. -use crate::datatypes::{DataType, IntervalUnit, UnionMode}; +use crate::datatypes::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, DataType, + IntervalUnit, UnionMode, +}; use crate::error::{ArrowError, Result}; use crate::util::bit_iterator::BitSliceIterator; -use crate::util::decimal::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, -}; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use crate::{ buffer::{Buffer, MutableBuffer}, diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index 54b7e045bb7..7eb5514e86f 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -58,7 +58,6 @@ use crate::error::{ArrowError, Result}; use crate::record_batch::{RecordBatch, RecordBatchOptions}; use crate::util::reader_parser::Parser; -use crate::util::decimal::validate_decimal_precision; use csv_crate::{ByteRecord, StringRecord}; use std::ops::Neg; diff --git a/arrow/src/datatypes/decimal.rs b/arrow/src/datatypes/decimal.rs new file mode 100644 index 00000000000..cf0ff29686b --- /dev/null +++ b/arrow/src/datatypes/decimal.rs @@ -0,0 +1,832 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::{ArrowError, Result}; +use crate::util::decimal::singed_cmp_le_bytes; +use num::BigInt; +use std::cmp::Ordering; + +// MAX decimal256 value of little-endian format for each precision. +// Each element is the max value of signed 256-bit integer for the specified precision which +// is encoded to the 32-byte width format of little-endian. +pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ + [ + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + [ + 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + [ + 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, + 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, + 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, + 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, + 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, + 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, + 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, + 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, + 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, + 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, + 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, + 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, + 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, + 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, + 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, + 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, + 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, + 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, + 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, + 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, + 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, + 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, + 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, + 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, + 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, + 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, + 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, + 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, + 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, + 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, + 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, + 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, + 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, + 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, + 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, + 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, + 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, + ], +]; + +// MIN decimal256 value of little-endian format for each precision. +// Each element is the min value of signed 256-bit integer for the specified precision which +// is encoded to the 76-byte width format of little-endian. +pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ + [ + 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, + 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, + 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, + 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, + 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, + 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, + 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, + 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, + 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, + 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, + 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, + 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, + 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, + 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, + 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, + 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, + 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, + 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, + 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, + 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, + 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, + 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, + 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, + 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, + 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, + 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, + 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, + 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, + 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, + 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, + 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, + 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, + ], +]; + +/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value +/// that can be stored in [DataType::Decimal128] value of precision `p` +pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ + 9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999, + 9999999999999999, + 99999999999999999, + 999999999999999999, + 9999999999999999999, + 99999999999999999999, + 999999999999999999999, + 9999999999999999999999, + 99999999999999999999999, + 999999999999999999999999, + 9999999999999999999999999, + 99999999999999999999999999, + 999999999999999999999999999, + 9999999999999999999999999999, + 99999999999999999999999999999, + 999999999999999999999999999999, + 9999999999999999999999999999999, + 99999999999999999999999999999999, + 999999999999999999999999999999999, + 9999999999999999999999999999999999, + 99999999999999999999999999999999999, + 999999999999999999999999999999999999, + 9999999999999999999999999999999999999, + 99999999999999999999999999999999999999, +]; + +/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value +/// that can be stored in a [DataType::Decimal128] value of precision `p` +pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ + -9, + -99, + -999, + -9999, + -99999, + -999999, + -9999999, + -99999999, + -999999999, + -9999999999, + -99999999999, + -999999999999, + -9999999999999, + -99999999999999, + -999999999999999, + -9999999999999999, + -99999999999999999, + -999999999999999999, + -9999999999999999999, + -99999999999999999999, + -999999999999999999999, + -9999999999999999999999, + -99999999999999999999999, + -999999999999999999999999, + -9999999999999999999999999, + -99999999999999999999999999, + -999999999999999999999999999, + -9999999999999999999999999999, + -99999999999999999999999999999, + -999999999999999999999999999999, + -9999999999999999999999999999999, + -99999999999999999999999999999999, + -999999999999999999999999999999999, + -9999999999999999999999999999999999, + -99999999999999999999999999999999999, + -999999999999999999999999999999999999, + -9999999999999999999999999999999999999, + -99999999999999999999999999999999999999, +]; + +/// The maximum precision for [DataType::Decimal128] values +pub const DECIMAL128_MAX_PRECISION: u8 = 38; + +/// The maximum scale for [DataType::Decimal128] values +pub const DECIMAL128_MAX_SCALE: u8 = 38; + +/// The maximum precision for [DataType::Decimal256] values +pub const DECIMAL256_MAX_PRECISION: u8 = 76; + +/// The maximum scale for [DataType::Decimal256] values +pub const DECIMAL256_MAX_SCALE: u8 = 76; + +/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values +pub const DECIMAL_DEFAULT_SCALE: u8 = 10; + +/// Validates that the specified `i128` value can be properly +/// interpreted as a Decimal number with precision `precision` +#[inline] +pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { + if precision > DECIMAL128_MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Max precision of a Decimal128 is {}, but got {}", + DECIMAL128_MAX_PRECISION, precision, + ))); + } + + let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + + if value > max { + Err(ArrowError::InvalidArgumentError(format!( + "{} is too large to store in a Decimal128 of precision {}. Max is {}", + value, precision, max + ))) + } else if value < min { + Err(ArrowError::InvalidArgumentError(format!( + "{} is too small to store in a Decimal128 of precision {}. Min is {}", + value, precision, min + ))) + } else { + Ok(()) + } +} + +/// Validates that the specified `byte_array` of little-endian format +/// value can be properly interpreted as a Decimal256 number with precision `precision` +#[inline] +pub(crate) fn validate_decimal256_precision_with_lt_bytes( + lt_value: &[u8], + precision: u8, +) -> Result<()> { + if precision > DECIMAL256_MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Max precision of a Decimal256 is {}, but got {}", + DECIMAL256_MAX_PRECISION, precision, + ))); + } + let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + + if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { + Err(ArrowError::InvalidArgumentError(format!( + "{:?} is too large to store in a Decimal256 of precision {}. Max is {:?}", + BigInt::from_signed_bytes_le(lt_value), + precision, + BigInt::from_signed_bytes_le(&max) + ))) + } else if singed_cmp_le_bytes(lt_value, &min) == Ordering::Less { + Err(ArrowError::InvalidArgumentError(format!( + "{:?} is too small to store in a Decimal256 of precision {}. Min is {:?}", + BigInt::from_signed_bytes_le(lt_value), + precision, + BigInt::from_signed_bytes_le(&min) + ))) + } else { + Ok(()) + } +} + +#[cfg(test)] +mod test { + use crate::datatypes::MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION; + use crate::util::decimal::Decimal256; + use num::{BigInt, Num}; + + #[test] + fn test_decimal256_min_max_for_precision() { + // The precision from 1 to 76 + let mut max_value = "9".to_string(); + let mut min_value = "-9".to_string(); + for i in 1..77 { + let max_decimal = + Decimal256::from(BigInt::from_str_radix(max_value.as_str(), 10).unwrap()); + let min_decimal = + Decimal256::from(BigInt::from_str_radix(min_value.as_str(), 10).unwrap()); + let max_bytes = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; + let min_bytes = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; + max_value += "9"; + min_value += "9"; + assert_eq!(max_decimal.raw_value(), &max_bytes); + assert_eq!(min_decimal.raw_value(), &min_bytes); + } + } +} diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index ab1e355e672..693fb40feae 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -30,7 +30,9 @@ mod numeric; pub use numeric::*; mod types; pub use types::*; +mod decimal; mod delta; +pub use decimal::*; pub use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit, UnionMode}; diff --git a/arrow/src/datatypes/types.rs b/arrow/src/datatypes/types.rs index 8037ed9a938..1b7d0675bb4 100644 --- a/arrow/src/datatypes/types.rs +++ b/arrow/src/datatypes/types.rs @@ -17,7 +17,7 @@ use super::{ArrowPrimitiveType, DataType, IntervalUnit, TimeUnit}; use crate::datatypes::delta::shift_months; -use crate::util::decimal::{ +use crate::datatypes::{ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE, }; diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index a41e60b5c81..421942df5c1 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -17,7 +17,10 @@ //! Decimal related utils -use crate::datatypes::{DataType, Decimal128Type, Decimal256Type, DecimalType}; +use crate::datatypes::{ + DataType, Decimal128Type, Decimal256Type, DecimalType, DECIMAL256_MAX_PRECISION, + DECIMAL_DEFAULT_SCALE, +}; use crate::error::{ArrowError, Result}; use num::bigint::BigInt; use num::Signed; @@ -293,791 +296,6 @@ pub(crate) fn singed_cmp_le_bytes(left: &[u8], right: &[u8]) -> Ordering { Ordering::Equal } -// MAX decimal256 value of little-endian format for each precision. -// Each element is the max value of signed 256-bit integer for the specified precision which -// is encoded to the 32-byte width format of little-endian. -pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ - [ - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - [ - 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - [ - 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, - 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, - 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, - 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, - 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, - 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, - 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, - 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, - 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, - 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, - 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, - 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, - 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, - 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, - 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, - 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, - 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, - 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, - 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, - 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, - 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, - 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, - 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, - 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, - 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, - 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, - 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, - 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, - 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, - 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, - 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, - 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, - 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, - 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, - 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, - 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, - 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, - ], -]; - -// MIN decimal256 value of little-endian format for each precision. -// Each element is the min value of signed 256-bit integer for the specified precision which -// is encoded to the 76-byte width format of little-endian. -pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ - [ - 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, - 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, - 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, - 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, - 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, - 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, - 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, - 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, - 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, - 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, - 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, - 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, - 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, - 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, - 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, - 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, - 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, - 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, - 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, - 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, - 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, - 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, - 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, - 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, - 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, - 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, - 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, - 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, - 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, - 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, - 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, - 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, - ], -]; - -/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value -/// that can be stored in [DataType::Decimal128] value of precision `p` -pub(crate) const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ - 9, - 99, - 999, - 9999, - 99999, - 999999, - 9999999, - 99999999, - 999999999, - 9999999999, - 99999999999, - 999999999999, - 9999999999999, - 99999999999999, - 999999999999999, - 9999999999999999, - 99999999999999999, - 999999999999999999, - 9999999999999999999, - 99999999999999999999, - 999999999999999999999, - 9999999999999999999999, - 99999999999999999999999, - 999999999999999999999999, - 9999999999999999999999999, - 99999999999999999999999999, - 999999999999999999999999999, - 9999999999999999999999999999, - 99999999999999999999999999999, - 999999999999999999999999999999, - 9999999999999999999999999999999, - 99999999999999999999999999999999, - 999999999999999999999999999999999, - 9999999999999999999999999999999999, - 99999999999999999999999999999999999, - 999999999999999999999999999999999999, - 9999999999999999999999999999999999999, - 99999999999999999999999999999999999999, -]; - -/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value -/// that can be stored in a [DataType::Decimal128] value of precision `p` -pub(crate) const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ - -9, - -99, - -999, - -9999, - -99999, - -999999, - -9999999, - -99999999, - -999999999, - -9999999999, - -99999999999, - -999999999999, - -9999999999999, - -99999999999999, - -999999999999999, - -9999999999999999, - -99999999999999999, - -999999999999999999, - -9999999999999999999, - -99999999999999999999, - -999999999999999999999, - -9999999999999999999999, - -99999999999999999999999, - -999999999999999999999999, - -9999999999999999999999999, - -99999999999999999999999999, - -999999999999999999999999999, - -9999999999999999999999999999, - -99999999999999999999999999999, - -999999999999999999999999999999, - -9999999999999999999999999999999, - -99999999999999999999999999999999, - -999999999999999999999999999999999, - -9999999999999999999999999999999999, - -99999999999999999999999999999999999, - -999999999999999999999999999999999999, - -9999999999999999999999999999999999999, - -99999999999999999999999999999999999999, -]; - -/// The maximum precision for [DataType::Decimal128] values -pub(crate) const DECIMAL128_MAX_PRECISION: u8 = 38; - -/// The maximum scale for [DataType::Decimal128] values -pub(crate) const DECIMAL128_MAX_SCALE: u8 = 38; - -/// The maximum precision for [DataType::Decimal256] values -pub(crate) const DECIMAL256_MAX_PRECISION: u8 = 76; - -/// The maximum scale for [DataType::Decimal256] values -pub(crate) const DECIMAL256_MAX_SCALE: u8 = 76; - -/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values -pub(crate) const DECIMAL_DEFAULT_SCALE: u8 = 10; - -/// Validates that the specified `i128` value can be properly -/// interpreted as a Decimal number with precision `precision` -#[inline] -pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { - if precision > DECIMAL128_MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Max precision of a Decimal128 is {}, but got {}", - DECIMAL128_MAX_PRECISION, precision, - ))); - } - - let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; - let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; - - if value > max { - Err(ArrowError::InvalidArgumentError(format!( - "{} is too large to store in a Decimal128 of precision {}. Max is {}", - value, precision, max - ))) - } else if value < min { - Err(ArrowError::InvalidArgumentError(format!( - "{} is too small to store in a Decimal128 of precision {}. Min is {}", - value, precision, min - ))) - } else { - Ok(()) - } -} - -/// Validates that the specified `byte_array` of little-endian format -/// value can be properly interpreted as a Decimal256 number with precision `precision` -#[inline] -pub(crate) fn validate_decimal256_precision_with_lt_bytes( - lt_value: &[u8], - precision: u8, -) -> Result<()> { - if precision > DECIMAL256_MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Max precision of a Decimal256 is {}, but got {}", - DECIMAL256_MAX_PRECISION, precision, - ))); - } - let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; - let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; - - if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { - Err(ArrowError::InvalidArgumentError(format!( - "{:?} is too large to store in a Decimal256 of precision {}. Max is {:?}", - BigInt::from_signed_bytes_le(lt_value), - precision, - BigInt::from_signed_bytes_le(&max) - ))) - } else if singed_cmp_le_bytes(lt_value, &min) == Ordering::Less { - Err(ArrowError::InvalidArgumentError(format!( - "{:?} is too small to store in a Decimal256 of precision {}. Min is {:?}", - BigInt::from_signed_bytes_le(lt_value), - precision, - BigInt::from_signed_bytes_le(&min) - ))) - } else { - Ok(()) - } -} - #[cfg(test)] mod tests { use super::*; @@ -1253,23 +471,4 @@ mod tests { assert_eq!(left == right, left_decimal == right_decimal) } } - - #[test] - fn test_decimal256_min_max_for_precision() { - // The precision from 1 to 76 - let mut max_value = "9".to_string(); - let mut min_value = "-9".to_string(); - for i in 1..77 { - let max_decimal = - Decimal256::from(BigInt::from_str_radix(max_value.as_str(), 10).unwrap()); - let min_decimal = - Decimal256::from(BigInt::from_str_radix(min_value.as_str(), 10).unwrap()); - let max_bytes = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; - let min_bytes = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; - max_value += "9"; - min_value += "9"; - assert_eq!(max_decimal.raw_value(), &max_bytes); - assert_eq!(min_decimal.raw_value(), &min_bytes); - } - } } From e86510054dd7557856e00683bcbfa134750b10bb Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 12 Sep 2022 19:02:54 +0100 Subject: [PATCH 04/16] Fix doc --- arrow-schema/src/datatype.rs | 1 - arrow/src/datatypes/decimal.rs | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index 8e553749dbc..c7ae12e6150 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -643,7 +643,6 @@ impl DataType { } /// Returns true if this type is valid as a dictionary key - /// (e.g. [`super::ArrowDictionaryKeyType`] pub fn is_dictionary_key_type(t: &DataType) -> bool { use DataType::*; matches!( diff --git a/arrow/src/datatypes/decimal.rs b/arrow/src/datatypes/decimal.rs index cf0ff29686b..09dfb42a857 100644 --- a/arrow/src/datatypes/decimal.rs +++ b/arrow/src/datatypes/decimal.rs @@ -641,7 +641,7 @@ pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ ]; /// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value -/// that can be stored in [DataType::Decimal128] value of precision `p` +/// that can be stored in `DataType::Decimal128` value of precision `p` pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ 9, 99, @@ -684,7 +684,7 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ ]; /// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value -/// that can be stored in a [DataType::Decimal128] value of precision `p` +/// that can be stored in a `DataType::Decimal128` value of precision `p` pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -9, -99, @@ -726,19 +726,19 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -99999999999999999999999999999999999999, ]; -/// The maximum precision for [DataType::Decimal128] values +/// The maximum precision for `DataType::Decimal128` values pub const DECIMAL128_MAX_PRECISION: u8 = 38; -/// The maximum scale for [DataType::Decimal128] values +/// The maximum scale for `DataType::Decimal128` values pub const DECIMAL128_MAX_SCALE: u8 = 38; -/// The maximum precision for [DataType::Decimal256] values +/// The maximum precision for `DataType::Decimal256` values pub const DECIMAL256_MAX_PRECISION: u8 = 76; -/// The maximum scale for [DataType::Decimal256] values +/// The maximum scale for `DataType::Decimal256` values pub const DECIMAL256_MAX_SCALE: u8 = 76; -/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values +/// The default scale for `DataType::Decimal128` and `DataType::Decimal256` values pub const DECIMAL_DEFAULT_SCALE: u8 = 10; /// Validates that the specified `i128` value can be properly From 3f213db8ff8244cc2b9723e42105053a27195484 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 12 Sep 2022 19:04:06 +0100 Subject: [PATCH 05/16] Fix tests --- arrow/src/array/array_decimal.rs | 5 ++--- arrow/src/datatypes/decimal.rs | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index a3abb5ca1f3..543fda1b1a8 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -549,9 +549,8 @@ impl<'a, T: DecimalType> DecimalArray { #[cfg(test)] mod tests { use crate::array::Decimal256Builder; - use crate::util::decimal::{ - Decimal128, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, - }; + use crate::datatypes::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; + use crate::util::decimal::Decimal128; use crate::{array::Decimal128Builder, datatypes::Field}; use num::{BigInt, Num}; diff --git a/arrow/src/datatypes/decimal.rs b/arrow/src/datatypes/decimal.rs index 09dfb42a857..a7f3f3c1dc1 100644 --- a/arrow/src/datatypes/decimal.rs +++ b/arrow/src/datatypes/decimal.rs @@ -807,7 +807,7 @@ pub(crate) fn validate_decimal256_precision_with_lt_bytes( #[cfg(test)] mod test { - use crate::datatypes::MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION; + use super::*; use crate::util::decimal::Decimal256; use num::{BigInt, Num}; From b01cb8edfa94399706c8e4993580a0520750f940 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 13 Sep 2022 13:55:34 +0100 Subject: [PATCH 06/16] Fix integration-test --- integration-testing/src/util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-testing/src/util.rs b/integration-testing/src/util.rs index e098c4e1491..382feece09a 100644 --- a/integration-testing/src/util.rs +++ b/integration-testing/src/util.rs @@ -256,7 +256,7 @@ impl ArrowJsonField { fn to_arrow_field(&self) -> Result { // a bit regressive, but we have to convert the field to JSON in order to convert it let field = serde_json::to_value(self)?; - Field::from(&field) + Ok(Field::from(&field)?) } } From af2652b6bb039f1db011d83a4a996ec1cfc47bd4 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 13 Sep 2022 14:49:28 +0100 Subject: [PATCH 07/16] Remove pyarrow orphan --- arrow/src/pyarrow.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 89463e4c8fd..d0ac76ef975 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -236,26 +236,3 @@ impl PyArrowConvert for ArrowArrayStreamReader { Ok(PyObject::from(reader)) } } - -macro_rules! add_conversion { - ($typ:ty) => { - impl<'source> FromPyObject<'source> for $typ { - fn extract(value: &'source PyAny) -> PyResult { - Self::from_pyarrow(value) - } - } - - impl<'a> IntoPy for $typ { - fn into_py(self, py: Python) -> PyObject { - self.to_pyarrow(py).unwrap() - } - } - }; -} - -add_conversion!(DataType); -add_conversion!(Field); -add_conversion!(Schema); -add_conversion!(ArrayData); -add_conversion!(RecordBatch); -add_conversion!(ArrowArrayStreamReader); From 56eb7a6718d9bb6e678455490d39ce506a8959e3 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 13 Sep 2022 16:54:28 +0100 Subject: [PATCH 08/16] PyArrow fixes --- arrow-pyarrow-integration-testing/src/lib.rs | 31 ++++++++++++-------- arrow/src/pyarrow.rs | 23 +++++++++++++++ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index 086b2183465..9741d7d1757 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -28,7 +28,7 @@ use arrow::compute::kernels; use arrow::datatypes::{DataType, Field, Schema}; use arrow::error::ArrowError; use arrow::ffi_stream::ArrowArrayStreamReader; -use arrow::pyarrow::PyArrowConvert; +use arrow::pyarrow::{PyArrowConvert, PyArrowType}; use arrow::record_batch::RecordBatch; /// Returns `array + array` of an int64 array. @@ -66,20 +66,23 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult { /// Returns the substring #[pyfunction] -fn substring(array: ArrayData, start: i64) -> PyResult { +fn substring( + array: PyArrowType, + start: i64, +) -> PyResult> { // import - let array = ArrayRef::from(array); + let array = ArrayRef::from(array.0); // substring let array = kernels::substring::substring(array.as_ref(), start, None)?; - Ok(array.data().to_owned()) + Ok(array.data().to_owned().into()) } /// Returns the concatenate #[pyfunction] -fn concatenate(array: ArrayData, py: Python) -> PyResult { - let array = ArrayRef::from(array); +fn concatenate(array: PyArrowType, py: Python) -> PyResult { + let array = ArrayRef::from(array.0); // concat let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?; @@ -88,34 +91,36 @@ fn concatenate(array: ArrayData, py: Python) -> PyResult { } #[pyfunction] -fn round_trip_type(obj: DataType) -> PyResult { +fn round_trip_type(obj: PyArrowType) -> PyResult> { Ok(obj) } #[pyfunction] -fn round_trip_field(obj: Field) -> PyResult { +fn round_trip_field(obj: PyArrowType) -> PyResult> { Ok(obj) } #[pyfunction] -fn round_trip_schema(obj: Schema) -> PyResult { +fn round_trip_schema(obj: PyArrowType) -> PyResult> { Ok(obj) } #[pyfunction] -fn round_trip_array(obj: ArrayData) -> PyResult { +fn round_trip_array(obj: PyArrowType) -> PyResult> { Ok(obj) } #[pyfunction] -fn round_trip_record_batch(obj: RecordBatch) -> PyResult { +fn round_trip_record_batch( + obj: PyArrowType, +) -> PyResult> { Ok(obj) } #[pyfunction] fn round_trip_record_batch_reader( - obj: ArrowArrayStreamReader, -) -> PyResult { + obj: PyArrowType, +) -> PyResult> { Ok(obj) } diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index d0ac76ef975..2965dbc48cb 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -236,3 +236,26 @@ impl PyArrowConvert for ArrowArrayStreamReader { Ok(PyObject::from(reader)) } } + +/// A newtype wrapper around a `T: PyArrowConvert` that implements +/// [`FromPyObject`] and [`IntoPy`] allowing usage with pyo3 macros +#[derive(Debug)] +pub struct PyArrowType(pub T); + +impl<'source, T: PyArrowConvert> FromPyObject<'source> for PyArrowType { + fn extract(value: &'source PyAny) -> PyResult { + Ok(Self(T::from_pyarrow(value)?)) + } +} + +impl<'a, T: PyArrowConvert> IntoPy for PyArrowType { + fn into_py(self, py: Python) -> PyObject { + self.0.to_pyarrow(py).unwrap() + } +} + +impl From for PyArrowType { + fn from(s: T) -> Self { + Self(s) + } +} From a94fda14d880c27f77b3503a7ad757e91f2be7a6 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 12:13:57 +0100 Subject: [PATCH 09/16] Move ArrowError to arrow-schema --- arrow-schema/src/datatype.rs | 54 +++++++------- arrow-schema/src/error.rs | 86 ++++++++++++++++++---- arrow-schema/src/field.rs | 66 ++++++++--------- arrow-schema/src/schema.rs | 30 ++++---- arrow/src/csv/mod.rs | 19 +++++ arrow/src/csv/reader.rs | 9 +-- arrow/src/csv/writer.rs | 8 ++- arrow/src/error.rs | 124 +------------------------------- arrow/src/json/writer.rs | 5 +- arrow/src/record_batch.rs | 5 +- integration-testing/src/util.rs | 5 +- 11 files changed, 187 insertions(+), 224 deletions(-) diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index f741ccfe368..6a086781bee 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -20,7 +20,7 @@ use std::fmt; use crate::field::Field; #[cfg(feature = "json")] -use crate::error::ArrowSchemaError; +use crate::error::ArrowError; /// The set of datatypes that are supported by this implementation of Apache Arrow. /// @@ -267,7 +267,7 @@ impl fmt::Display for DataType { impl DataType { /// Parse a data type from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; let default_field = Field::new("", DataType::Boolean, true); match *json { @@ -283,7 +283,7 @@ impl DataType { if let Some(Value::Number(size)) = map.get("byteWidth") { Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32)) } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "Expecting a byteWidth for fixedsizebinary".to_string(), )) } @@ -292,13 +292,13 @@ impl DataType { // return a list with any type as its child isn't defined in the map let precision = match map.get("precision") { Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()), - None => Err(ArrowSchemaError::Parse( + None => Err(ArrowError::ParseError( "Expecting a precision for decimal".to_string(), )), }?; let scale = match map.get("scale") { Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "Expecting a scale for decimal".to_string(), )), }?; @@ -312,7 +312,7 @@ impl DataType { } else if bit_width == 256 { Ok(DataType::Decimal256(precision, scale)) } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "Decimal bit_width invalid".to_string(), )) } @@ -321,7 +321,7 @@ impl DataType { Some(p) if p == "HALF" => Ok(DataType::Float16), Some(p) if p == "SINGLE" => Ok(DataType::Float32), Some(p) if p == "DOUBLE" => Ok(DataType::Float64), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "floatingpoint precision missing or invalid".to_string(), )), }, @@ -331,14 +331,14 @@ impl DataType { Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "timestamp unit missing or invalid".to_string(), )), }; let tz = match map.get("timezone") { None => Ok(None), Some(Value::String(tz)) => Ok(Some(tz.clone())), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "timezone must be a string".to_string(), )), }; @@ -347,7 +347,7 @@ impl DataType { Some(s) if s == "date" => match map.get("unit") { Some(p) if p == "DAY" => Ok(DataType::Date32), Some(p) if p == "MILLISECOND" => Ok(DataType::Date64), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "date unit missing or invalid".to_string(), )), }, @@ -357,14 +357,14 @@ impl DataType { Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "time unit missing or invalid".to_string(), )), }; match map.get("bitWidth") { Some(p) if p == 32 => Ok(DataType::Time32(unit?)), Some(p) if p == 64 => Ok(DataType::Time64(unit?)), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "time bitWidth missing or invalid".to_string(), )), } @@ -380,7 +380,7 @@ impl DataType { Some(p) if p == "NANOSECOND" => { Ok(DataType::Duration(TimeUnit::Nanosecond)) } - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "time unit missing or invalid".to_string(), )), }, @@ -394,7 +394,7 @@ impl DataType { Some(p) if p == "MONTH_DAY_NANO" => { Ok(DataType::Interval(IntervalUnit::MonthDayNano)) } - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "interval unit missing or invalid".to_string(), )), }, @@ -405,11 +405,11 @@ impl DataType { Some(16) => Ok(DataType::Int16), Some(32) => Ok(DataType::Int32), Some(64) => Ok(DataType::Int64), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "int bitWidth missing or invalid".to_string(), )), }, - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "int bitWidth missing or invalid".to_string(), )), }, @@ -419,15 +419,15 @@ impl DataType { Some(16) => Ok(DataType::UInt16), Some(32) => Ok(DataType::UInt32), Some(64) => Ok(DataType::UInt64), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "int bitWidth missing or invalid".to_string(), )), }, - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "int bitWidth missing or invalid".to_string(), )), }, - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "int signed missing or invalid".to_string(), )), }, @@ -447,7 +447,7 @@ impl DataType { size.as_i64().unwrap() as i32, )) } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "Expecting a listSize for fixedsizelist".to_string(), )) } @@ -461,7 +461,7 @@ impl DataType { // Return a map with an empty type as its children aren't defined in the map Ok(DataType::Map(Box::new(default_field), *keys_sorted)) } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "Expecting a keysSorted for map".to_string(), )) } @@ -473,7 +473,7 @@ impl DataType { } else if mode == "DENSE" { UnionMode::Dense } else { - return Err(ArrowSchemaError::Parse(format!( + return Err(ArrowError::ParseError(format!( "Unknown union mode {:?} for union", mode ))); @@ -493,23 +493,23 @@ impl DataType { Ok(DataType::Union(default_fields, type_ids, union_mode)) } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "Expecting a typeIds for union ".to_string(), )) } } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "Expecting a mode for union".to_string(), )) } } - Some(other) => Err(ArrowSchemaError::Parse(format!( + Some(other) => Err(ArrowError::ParseError(format!( "invalid or unsupported type name: {} in {:?}", other, json ))), - None => Err(ArrowSchemaError::Parse("type name missing".to_string())), + None => Err(ArrowError::ParseError("type name missing".to_string())), }, - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "invalid json value type".to_string(), )), } diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs index 4ad7c65c503..105d4d5e21f 100644 --- a/arrow-schema/src/error.rs +++ b/arrow-schema/src/error.rs @@ -15,31 +15,89 @@ // specific language governing permissions and limitations // under the License. -//! Defines `ArrowSchemaError` for representing failures in arrow schema +//! Defines `ArrowError` for representing failures in various Arrow operations. +use std::fmt::{Debug, Display, Formatter}; +use std::io::Write; use std::error::Error; +/// Many different operations in the `arrow` crate return this error type. #[derive(Debug)] -pub enum ArrowSchemaError { - Parse(String), - Merge(String), - Field(String), +pub enum ArrowError { + /// Returned when functionality is not yet available. + NotYetImplemented(String), + ExternalError(Box), + CastError(String), + MemoryError(String), + ParseError(String), + SchemaError(String), + ComputeError(String), + DivideByZero, + CsvError(String), + JsonError(String), + IoError(String), + InvalidArgumentError(String), + ParquetError(String), + /// Error during import or export to/from the C Data Interface + CDataInterface(String), + DictionaryKeyOverflowError, } -impl std::fmt::Display for ArrowSchemaError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl ArrowError { + /// Wraps an external error in an `ArrowError`. + pub fn from_external_error(error: Box) -> Self { + Self::ExternalError(error) + } +} + +impl From<::std::io::Error> for ArrowError { + fn from(error: std::io::Error) -> Self { + ArrowError::IoError(error.to_string()) + } +} + +impl From<::std::string::FromUtf8Error> for ArrowError { + fn from(error: std::string::FromUtf8Error) -> Self { + ArrowError::ParseError(error.to_string()) + } +} + +impl From<::std::io::IntoInnerError> for ArrowError { + fn from(error: std::io::IntoInnerError) -> Self { + ArrowError::IoError(error.to_string()) + } +} + +impl Display for ArrowError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - ArrowSchemaError::Parse(message) => { - write!(f, "Error parsing schema: {}", message) + ArrowError::NotYetImplemented(source) => { + write!(f, "Not yet implemented: {}", &source) + } + ArrowError::ExternalError(source) => write!(f, "External error: {}", &source), + ArrowError::CastError(desc) => write!(f, "Cast error: {}", desc), + ArrowError::MemoryError(desc) => write!(f, "Memory error: {}", desc), + ArrowError::ParseError(desc) => write!(f, "Parser error: {}", desc), + ArrowError::SchemaError(desc) => write!(f, "Schema error: {}", desc), + ArrowError::ComputeError(desc) => write!(f, "Compute error: {}", desc), + ArrowError::DivideByZero => write!(f, "Divide by zero error"), + ArrowError::CsvError(desc) => write!(f, "Csv error: {}", desc), + ArrowError::JsonError(desc) => write!(f, "Json error: {}", desc), + ArrowError::IoError(desc) => write!(f, "Io error: {}", desc), + ArrowError::InvalidArgumentError(desc) => { + write!(f, "Invalid argument error: {}", desc) + } + ArrowError::ParquetError(desc) => { + write!(f, "Parquet argument error: {}", desc) } - ArrowSchemaError::Merge(message) => { - write!(f, "Error merging schema: {}", message) + ArrowError::CDataInterface(desc) => { + write!(f, "C Data interface error: {}", desc) } - ArrowSchemaError::Field(message) => { - write!(f, "Error indexing field: {}", message) + ArrowError::DictionaryKeyOverflowError => { + write!(f, "Dictionary key bigger than the key type") } } } } -impl Error for ArrowSchemaError {} +impl Error for ArrowError {} diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index b45e706b77f..7792e245feb 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::error::ArrowSchemaError; +use crate::error::ArrowError; use std::cmp::Ordering; use std::collections::BTreeMap; use std::hash::{Hash, Hasher}; @@ -252,14 +252,14 @@ impl Field { /// Parse a `Field` definition from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; match *json { Value::Object(ref map) => { let name = match map.get("name") { Some(&Value::String(ref name)) => name.to_string(), _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'name' attribute".to_string(), )); } @@ -267,7 +267,7 @@ impl Field { let nullable = match map.get("nullable") { Some(&Value::Bool(b)) => b, _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'nullable' attribute".to_string(), )); } @@ -275,7 +275,7 @@ impl Field { let data_type = match map.get("type") { Some(t) => DataType::from(t)?, _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'type' attribute".to_string(), )); } @@ -289,7 +289,7 @@ impl Field { match value.as_object() { Some(map) => { if map.len() != 2 { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'metadata' must have exact two entries for each key-value map".to_string(), )); } @@ -304,14 +304,14 @@ impl Field { v_str.to_string().clone(), ); } else { - return Err(ArrowSchemaError::Parse("Field 'metadata' must have map value of string type".to_string())); + return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string())); } } else { - return Err(ArrowSchemaError::Parse("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string())); + return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string())); } } _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'metadata' contains non-object key-value pair".to_string(), )); } @@ -327,7 +327,7 @@ impl Field { if let Some(str_value) = v.as_str() { res.insert(k.clone(), str_value.to_string().clone()); } else { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( format!("Field 'metadata' contains non-string value for key {}", k), )); } @@ -335,7 +335,7 @@ impl Field { Some(res) } Some(_) => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field `metadata` is not json array".to_string(), )); } @@ -349,7 +349,7 @@ impl Field { | DataType::FixedSizeList(_, _) => match map.get("children") { Some(Value::Array(values)) => { if values.len() != 1 { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'children' must have one element for a list data type".to_string(), )); } @@ -370,12 +370,12 @@ impl Field { } } Some(_) => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'children' attribute".to_string(), )); } @@ -388,12 +388,12 @@ impl Field { DataType::Struct(fields) } Some(_) => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'children' attribute".to_string(), )); } @@ -408,20 +408,20 @@ impl Field { DataType::Map(Box::new(child), keys_sorted) } t => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( format!("Map children should be a struct with 2 fields, found {:?}", t) )) } } } Some(_) => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'children' must be an array with 1 element" .to_string(), )) } None => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'children' attribute".to_string(), )); } @@ -436,12 +436,12 @@ impl Field { DataType::Union(union_fields, type_ids, mode) } Some(_) => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'children' attribute".to_string(), )); } @@ -457,7 +457,7 @@ impl Field { let index_type = match dictionary.get("indexType") { Some(t) => DataType::from(t)?, _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'indexType' attribute".to_string(), )); } @@ -465,7 +465,7 @@ impl Field { dict_id = match dictionary.get("id") { Some(Value::Number(n)) => n.as_i64().unwrap(), _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'id' attribute".to_string(), )); } @@ -473,7 +473,7 @@ impl Field { dict_is_ordered = match dictionary.get("isOrdered") { Some(&Value::Bool(n)) => n, _ => { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Field missing 'isOrdered' attribute".to_string(), )); } @@ -491,7 +491,7 @@ impl Field { metadata, }) } - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "Invalid json value type for field".to_string(), )), } @@ -543,14 +543,14 @@ impl Field { /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok()); /// assert!(field.is_nullable()); /// ``` - pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowSchemaError> { + pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> { if from.dict_id != self.dict_id { - return Err(ArrowSchemaError::Merge( + return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting dict_id".to_string(), )); } if from.dict_is_ordered != self.dict_is_ordered { - return Err(ArrowSchemaError::Merge( + return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting dict_is_ordered" .to_string(), )); @@ -562,7 +562,7 @@ impl Field { for (key, from_value) in from_metadata { if let Some(self_value) = self_metadata.get(key) { if self_value != from_value { - return Err(ArrowSchemaError::Merge(format!( + return Err(ArrowError::SchemaError(format!( "Fail to merge field due to conflicting metadata data value for key {}", key), )); } @@ -591,7 +591,7 @@ impl Field { } } _ => { - return Err(ArrowSchemaError::Merge( + return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); @@ -611,7 +611,7 @@ impl Field { // If the nested fields in two unions are the same, they must have same // type id. if self_type_id != field_type_id { - return Err(ArrowSchemaError::Merge( + return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting type ids in union datatype" .to_string(), )); @@ -629,7 +629,7 @@ impl Field { } } _ => { - return Err(ArrowSchemaError::Merge( + return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); @@ -668,7 +668,7 @@ impl Field { | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { if self.data_type != from.data_type { - return Err(ArrowSchemaError::Merge( + return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 686fb572911..407d6c6bdea 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use std::fmt; use std::hash::Hash; -use crate::error::ArrowSchemaError; +use crate::error::ArrowError; use crate::field::Field; /// Describes the meta-data of an ordered sequence of relative types. @@ -95,12 +95,12 @@ impl Schema { /// Returns a new schema with only the specified columns in the new schema /// This carries metadata from the parent schema over as well - pub fn project(&self, indices: &[usize]) -> Result { + pub fn project(&self, indices: &[usize]) -> Result { let new_fields = indices .iter() .map(|i| { self.fields.get(*i).cloned().ok_or_else(|| { - ArrowSchemaError::Field(format!( + ArrowError::SchemaError(format!( "project index {} out of bounds, max field {}", i, self.fields().len() @@ -141,7 +141,7 @@ impl Schema { /// ``` pub fn try_merge( schemas: impl IntoIterator, - ) -> Result { + ) -> Result { schemas .into_iter() .try_fold(Self::empty(), |mut merged, schema| { @@ -150,7 +150,7 @@ impl Schema { // merge metadata if let Some(old_val) = merged.metadata.get(&key) { if old_val != &value { - return Err(ArrowSchemaError::Merge(format!( + return Err(ArrowError::SchemaError(format!( "Fail to merge schema due to conflicting metadata. \ Key '{}' has different values '{}' and '{}'", key, old_val, value @@ -192,7 +192,7 @@ impl Schema { } /// Returns an immutable reference of a specific [`Field`] instance selected by name. - pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowSchemaError> { + pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> { Ok(&self.fields[self.index_of(name)?]) } @@ -206,13 +206,13 @@ impl Schema { } /// Find the index of the column with the given name. - pub fn index_of(&self, name: &str) -> Result { + pub fn index_of(&self, name: &str) -> Result { (0..self.fields.len()) .find(|idx| self.fields[*idx].name() == name) .ok_or_else(|| { let valid_fields: Vec = self.fields.iter().map(|f| f.name().clone()).collect(); - ArrowSchemaError::Field(format!( + ArrowError::SchemaError(format!( "Unable to get field named \"{}\". Valid fields: {:?}", name, valid_fields )) @@ -245,14 +245,14 @@ impl Schema { /// Parse a `Schema` definition from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; match *json { Value::Object(ref schema) => { let fields = if let Some(Value::Array(fields)) = schema.get("fields") { fields.iter().map(Field::from).collect::>()? } else { - return Err(ArrowSchemaError::Parse( + return Err(ArrowError::ParseError( "Schema fields should be an array".to_string(), )); }; @@ -265,7 +265,7 @@ impl Schema { Ok(Self { fields, metadata }) } - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "Invalid json value type for schema".to_string(), )), } @@ -276,14 +276,14 @@ impl Schema { #[cfg(feature = "json")] fn from_metadata( json: &serde_json::Value, - ) -> Result, ArrowSchemaError> { + ) -> Result, ArrowError> { use serde_json::Value; match json { Value::Array(_) => { let mut hashmap = HashMap::new(); let values: Vec = serde_json::from_value(json.clone()) .map_err(|_| { - ArrowSchemaError::Parse( + ArrowError::ParseError( "Unable to parse object into key-value pair".to_string(), ) })?; @@ -298,13 +298,13 @@ impl Schema { if let Value::String(v) = v { Ok((k.to_string(), v.to_string())) } else { - Err(ArrowSchemaError::Parse( + Err(ArrowError::ParseError( "metadata `value` field must be a string".to_string(), )) } }) .collect::>(), - _ => Err(ArrowSchemaError::Parse( + _ => Err(ArrowError::ParseError( "`metadata` field must be an object".to_string(), )), } diff --git a/arrow/src/csv/mod.rs b/arrow/src/csv/mod.rs index ffe82f33580..46ba7d71e20 100644 --- a/arrow/src/csv/mod.rs +++ b/arrow/src/csv/mod.rs @@ -25,3 +25,22 @@ pub use self::reader::Reader; pub use self::reader::ReaderBuilder; pub use self::writer::Writer; pub use self::writer::WriterBuilder; +use arrow_schema::ArrowError; + +fn map_csv_error(error: csv_crate::Error) -> ArrowError { + match error.kind() { + csv_crate::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()), + csv_crate::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!( + "Encountered UTF-8 error while reading CSV file: {}", + err + )), + csv_crate::ErrorKind::UnequalLengths { + expected_len, len, .. + } => ArrowError::CsvError(format!( + "Encountered unequal lengths between records on CSV file. Expected {} \ + records, found {} records", + len, expected_len + )), + _ => ArrowError::CsvError("Error reading CSV file".to_string()), + } +} diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index 7eb5514e86f..3ec605dd048 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -58,6 +58,7 @@ use crate::error::{ArrowError, Result}; use crate::record_batch::{RecordBatch, RecordBatchOptions}; use crate::util::reader_parser::Parser; +use crate::csv::map_csv_error; use csv_crate::{ByteRecord, StringRecord}; use std::ops::Neg; @@ -187,10 +188,10 @@ fn infer_reader_schema_with_csv_options( // get or create header names // when has_header is false, creates default column names with column_ prefix let headers: Vec = if roptions.has_header { - let headers = &csv_reader.headers()?.clone(); + let headers = &csv_reader.headers().map_err(map_csv_error)?.clone(); headers.iter().map(|s| s.to_string()).collect() } else { - let first_record_count = &csv_reader.headers()?.len(); + let first_record_count = &csv_reader.headers().map_err(map_csv_error)?.len(); (0..*first_record_count) .map(|i| format!("column_{}", i + 1)) .collect() @@ -208,7 +209,7 @@ fn infer_reader_schema_with_csv_options( let mut record = StringRecord::new(); let max_records = roptions.max_read_records.unwrap_or(usize::MAX); while records_count < max_records { - if !csv_reader.read_record(&mut record)? { + if !csv_reader.read_record(&mut record).map_err(map_csv_error)? { break; } records_count += 1; @@ -289,7 +290,7 @@ pub fn infer_schema_from_files( } } - Ok(Schema::try_merge(schemas)?) + Schema::try_merge(schemas) } // optional bounds of the reader, of the form (min line, max line). diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs index 7097706ba5f..1b377c38b37 100644 --- a/arrow/src/csv/writer.rs +++ b/arrow/src/csv/writer.rs @@ -70,11 +70,13 @@ use crate::compute::kernels::temporal::using_chrono_tz_and_utc_naive_date_time; #[cfg(feature = "chrono-tz")] use chrono::{DateTime, Utc}; +use crate::csv::map_csv_error; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::record_batch::RecordBatch; use crate::util::display::make_string_from_decimal; use crate::{array::*, util::serialization::lexical_to_string}; + const DEFAULT_DATE_FORMAT: &str = "%F"; const DEFAULT_TIME_FORMAT: &str = "%T"; const DEFAULT_TIMESTAMP_FORMAT: &str = "%FT%H:%M:%S.%9f"; @@ -343,7 +345,9 @@ impl Writer { .fields() .iter() .for_each(|field| headers.push(field.name().to_string())); - self.writer.write_record(&headers[..])?; + self.writer + .write_record(&headers[..]) + .map_err(map_csv_error)?; } self.beginning = false; } @@ -364,7 +368,7 @@ impl Writer { for row_index in 0..batch.num_rows() { self.convert(columns.as_slice(), row_index, &mut buffer)?; - self.writer.write_record(&buffer)?; + self.writer.write_record(&buffer).map_err(map_csv_error)?; } self.writer.flush()?; diff --git a/arrow/src/error.rs b/arrow/src/error.rs index b0eb9b99594..f7acec0b34d 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -16,129 +16,7 @@ // under the License. //! Defines `ArrowError` for representing failures in various Arrow operations. -use std::fmt::{Debug, Display, Formatter}; -use std::io::Write; -use std::error::Error; - -// Re-export ArrowSchemaError -pub use arrow_schema::ArrowSchemaError; - -/// Many different operations in the `arrow` crate return this error type. -#[derive(Debug)] -pub enum ArrowError { - /// Returned when functionality is not yet available. - NotYetImplemented(String), - ExternalError(Box), - CastError(String), - MemoryError(String), - ParseError(String), - SchemaError(ArrowSchemaError), - ComputeError(String), - DivideByZero, - CsvError(String), - JsonError(String), - IoError(String), - InvalidArgumentError(String), - ParquetError(String), - /// Error during import or export to/from the C Data Interface - CDataInterface(String), - DictionaryKeyOverflowError, -} - -impl ArrowError { - /// Wraps an external error in an `ArrowError`. - pub fn from_external_error( - error: Box, - ) -> Self { - Self::ExternalError(error) - } -} - -impl From<::std::io::Error> for ArrowError { - fn from(error: std::io::Error) -> Self { - ArrowError::IoError(error.to_string()) - } -} - -impl From for ArrowError { - fn from(error: ArrowSchemaError) -> Self { - Self::SchemaError(error) - } -} - -#[cfg(feature = "csv")] -impl From for ArrowError { - fn from(error: csv_crate::Error) -> Self { - match error.kind() { - csv_crate::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()), - csv_crate::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!( - "Encountered UTF-8 error while reading CSV file: {}", - err - )), - csv_crate::ErrorKind::UnequalLengths { - expected_len, len, .. - } => ArrowError::CsvError(format!( - "Encountered unequal lengths between records on CSV file. Expected {} \ - records, found {} records", - len, expected_len - )), - _ => ArrowError::CsvError("Error reading CSV file".to_string()), - } - } -} - -impl From<::std::string::FromUtf8Error> for ArrowError { - fn from(error: std::string::FromUtf8Error) -> Self { - ArrowError::ParseError(error.to_string()) - } -} - -#[cfg(feature = "json")] -impl From for ArrowError { - fn from(error: serde_json::Error) -> Self { - ArrowError::JsonError(error.to_string()) - } -} - -impl From<::std::io::IntoInnerError> for ArrowError { - fn from(error: std::io::IntoInnerError) -> Self { - ArrowError::IoError(error.to_string()) - } -} - -impl Display for ArrowError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - ArrowError::NotYetImplemented(source) => { - write!(f, "Not yet implemented: {}", &source) - } - ArrowError::ExternalError(source) => write!(f, "External error: {}", &source), - ArrowError::CastError(desc) => write!(f, "Cast error: {}", desc), - ArrowError::MemoryError(desc) => write!(f, "Memory error: {}", desc), - ArrowError::ParseError(desc) => write!(f, "Parser error: {}", desc), - ArrowError::SchemaError(desc) => write!(f, "Schema error: {}", desc), - ArrowError::ComputeError(desc) => write!(f, "Compute error: {}", desc), - ArrowError::DivideByZero => write!(f, "Divide by zero error"), - ArrowError::CsvError(desc) => write!(f, "Csv error: {}", desc), - ArrowError::JsonError(desc) => write!(f, "Json error: {}", desc), - ArrowError::IoError(desc) => write!(f, "Io error: {}", desc), - ArrowError::InvalidArgumentError(desc) => { - write!(f, "Invalid argument error: {}", desc) - } - ArrowError::ParquetError(desc) => { - write!(f, "Parquet argument error: {}", desc) - } - ArrowError::CDataInterface(desc) => { - write!(f, "C Data interface error: {}", desc) - } - ArrowError::DictionaryKeyOverflowError => { - write!(f, "Dictionary key bigger than the key type") - } - } - } -} - -impl Error for ArrowError {} +pub use arrow_schema::ArrowError; pub type Result = std::result::Result; diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs index bf40b31b494..beee02582ff 100644 --- a/arrow/src/json/writer.rs +++ b/arrow/src/json/writer.rs @@ -700,7 +700,10 @@ where } self.format.start_row(&mut self.writer, is_first_row)?; - self.writer.write_all(&serde_json::to_vec(row)?)?; + self.writer.write_all( + &serde_json::to_vec(row) + .map_err(|error| ArrowError::JsonError(error.to_string()))?, + )?; self.format.end_row(&mut self.writer)?; Ok(()) } diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index d71c1a65a2e..4b0d36a43e5 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -18,7 +18,6 @@ //! A two-dimensional batch of column-oriented data with a defined //! [schema](crate::datatypes::Schema). -use arrow_schema::ArrowSchemaError; use std::sync::Arc; use crate::array::*; @@ -204,11 +203,11 @@ impl RecordBatch { .iter() .map(|f| { self.columns.get(*f).cloned().ok_or_else(|| { - ArrowError::SchemaError(ArrowSchemaError::Field(format!( + ArrowError::SchemaError(format!( "project index {} out of bounds, max field {}", f, self.columns.len() - ))) + )) }) }) .collect::>>()?; diff --git a/integration-testing/src/util.rs b/integration-testing/src/util.rs index 382feece09a..3a127ea35d4 100644 --- a/integration-testing/src/util.rs +++ b/integration-testing/src/util.rs @@ -255,8 +255,9 @@ impl ArrowJsonField { /// TODO: convert to use an Into fn to_arrow_field(&self) -> Result { // a bit regressive, but we have to convert the field to JSON in order to convert it - let field = serde_json::to_value(self)?; - Ok(Field::from(&field)?) + let field = serde_json::to_value(self) + .map_err(|error| ArrowError::JsonError(error.to_string()))?; + Field::from(&field) } } From 8efd7266dcef134be47af00df2499a4d5b765c18 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 12:33:41 +0100 Subject: [PATCH 10/16] Fix pyarrow --- arrow/src/pyarrow.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 2965dbc48cb..e41f71a37a7 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -39,10 +39,8 @@ use crate::record_batch::RecordBatch; import_exception!(pyarrow, ArrowException); pub type PyArrowException = ArrowException; -impl From for PyErr { - fn from(err: ArrowError) -> PyErr { - PyArrowException::new_err(err.to_string()) - } +fn to_py_err(err: ArrowError) -> PyErr { + PyArrowException::new_err(err.to_string()) } pub trait PyArrowConvert: Sized { @@ -55,12 +53,12 @@ impl PyArrowConvert for DataType { let c_schema = FFI_ArrowSchema::empty(); let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?; - let dtype = DataType::try_from(&c_schema)?; + let dtype = DataType::try_from(&c_schema).map_err(to_py_err)?; Ok(dtype) } fn to_pyarrow(&self, py: Python) -> PyResult { - let c_schema = FFI_ArrowSchema::try_from(self)?; + let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; let module = py.import("pyarrow")?; let class = module.getattr("DataType")?; @@ -75,12 +73,12 @@ impl PyArrowConvert for Field { let c_schema = FFI_ArrowSchema::empty(); let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?; - let field = Field::try_from(&c_schema)?; + let field = Field::try_from(&c_schema).map_err(to_py_err)?; Ok(field) } fn to_pyarrow(&self, py: Python) -> PyResult { - let c_schema = FFI_ArrowSchema::try_from(self)?; + let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; let module = py.import("pyarrow")?; let class = module.getattr("Field")?; @@ -95,12 +93,12 @@ impl PyArrowConvert for Schema { let c_schema = FFI_ArrowSchema::empty(); let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?; - let schema = Schema::try_from(&c_schema)?; + let schema = Schema::try_from(&c_schema).map_err(to_py_err)?; Ok(schema) } fn to_pyarrow(&self, py: Python) -> PyResult { - let c_schema = FFI_ArrowSchema::try_from(self)?; + let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; let module = py.import("pyarrow")?; let class = module.getattr("Schema")?; @@ -127,15 +125,17 @@ impl PyArrowConvert for ArrayData { ), )?; - let ffi_array = - unsafe { ffi::ArrowArray::try_from_raw(array_pointer, schema_pointer)? }; - let data = ArrayData::try_from(ffi_array)?; + let ffi_array = unsafe { + ffi::ArrowArray::try_from_raw(array_pointer, schema_pointer) + .map_err(to_py_err)? + }; + let data = ArrayData::try_from(ffi_array).map_err(to_py_err)?; Ok(data) } fn to_pyarrow(&self, py: Python) -> PyResult { - let array = ffi::ArrowArray::try_from(self.clone())?; + let array = ffi::ArrowArray::try_from(self.clone()).map_err(to_py_err)?; let (array_pointer, schema_pointer) = ffi::ArrowArray::into_raw(array); let module = py.import("pyarrow")?; @@ -176,7 +176,7 @@ impl PyArrowConvert for RecordBatch { .map(ArrayRef::from_pyarrow) .collect::>()?; - let batch = RecordBatch::try_new(schema, arrays)?; + let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?; Ok(batch) } From 02d0e38979ac3934914ee601cec6a8a5ca4a2758 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 14:09:53 +0100 Subject: [PATCH 11/16] Fix test --- arrow-schema/src/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 407d6c6bdea..8bfc26d5f31 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -432,7 +432,7 @@ mod tests { if let Err(e) = projected { assert_eq!( e.to_string(), - "Error indexing field: project index 3 out of bounds, max field 3" + "Schema error: project index 3 out of bounds, max field 3" .to_string() ) } From 6dfabe1452c9b69e1518c0e4c056782b051a75d6 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 15:37:44 +0100 Subject: [PATCH 12/16] Fix conflicts --- arrow/src/datatypes/decimal.rs | 15 ++++++++------- integration-testing/src/util/mod.rs | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arrow/src/datatypes/decimal.rs b/arrow/src/datatypes/decimal.rs index 5586ac2a20c..ffdb04e0d77 100644 --- a/arrow/src/datatypes/decimal.rs +++ b/arrow/src/datatypes/decimal.rs @@ -641,7 +641,7 @@ pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ ]; /// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value -/// that can be stored in [DataType::Decimal128] value of precision `p` +/// that can be stored in [arrow_schema::DataType::Decimal128] value of precision `p` pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ 9, 99, @@ -684,7 +684,7 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ ]; /// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value -/// that can be stored in a [DataType::Decimal128] value of precision `p` +/// that can be stored in a [arrow_schema::DataType::Decimal128] value of precision `p` pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -9, -99, @@ -726,19 +726,20 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -99999999999999999999999999999999999999, ]; -/// The maximum precision for [DataType::Decimal128] values +/// The maximum precision for [arrow_schema::DataType::Decimal128] values pub const DECIMAL128_MAX_PRECISION: u8 = 38; -/// The maximum scale for [DataType::Decimal128] values +/// The maximum scale for [arrow_schema::DataType::Decimal128] values pub const DECIMAL128_MAX_SCALE: u8 = 38; -/// The maximum precision for [DataType::Decimal256] values +/// The maximum precision for [arrow_schema::DataType::Decimal256] values pub const DECIMAL256_MAX_PRECISION: u8 = 76; -/// The maximum scale for [DataType::Decimal256] values +/// The maximum scale for [arrow_schema::DataType::Decimal256] values pub const DECIMAL256_MAX_SCALE: u8 = 76; -/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values +/// The default scale for [arrow_schema::DataType::Decimal128] and +/// [arrow_schema::DataType::Decimal256] values pub const DECIMAL_DEFAULT_SCALE: u8 = 10; /// Validates that the specified `i128` value can be properly diff --git a/integration-testing/src/util/mod.rs b/integration-testing/src/util/mod.rs index 9ecd301360f..f9ddc0e6f4b 100644 --- a/integration-testing/src/util/mod.rs +++ b/integration-testing/src/util/mod.rs @@ -265,7 +265,8 @@ impl ArrowJsonField { /// TODO: convert to use an Into fn to_arrow_field(&self) -> Result { // a bit regressive, but we have to convert the field to JSON in order to convert it - let field = serde_json::to_value(self)?; + let field = serde_json::to_value(self) + .map_err(|error| ArrowError::JsonError(error.to_string()))?; field_from_json(&field) } } From b614881459b1bd4a0aefeacab34065108c233707 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 16:19:55 +0100 Subject: [PATCH 13/16] Fix pyarrow --- arrow-pyarrow-integration-testing/src/lib.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index 9741d7d1757..f9e70eb8d77 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -28,9 +28,13 @@ use arrow::compute::kernels; use arrow::datatypes::{DataType, Field, Schema}; use arrow::error::ArrowError; use arrow::ffi_stream::ArrowArrayStreamReader; -use arrow::pyarrow::{PyArrowConvert, PyArrowType}; +use arrow::pyarrow::{PyArrowConvert, PyArrowException, PyArrowType}; use arrow::record_batch::RecordBatch; +fn to_py_err(err: ArrowError) -> PyErr { + PyArrowException::new_err(err.to_string()) +} + /// Returns `array + array` of an int64 array. #[pyfunction] fn double(array: &PyAny, py: Python) -> PyResult { @@ -41,8 +45,10 @@ fn double(array: &PyAny, py: Python) -> PyResult { let array = array .as_any() .downcast_ref::() - .ok_or(ArrowError::ParseError("Expects an int64".to_string()))?; - let array = kernels::arithmetic::add(array, array)?; + .ok_or_else(|| ArrowError::ParseError("Expects an int64".to_string())) + .map_err(to_py_err)?; + + let array = kernels::arithmetic::add(array, array).map_err(to_py_err)?; // export array.to_pyarrow(py) @@ -74,7 +80,7 @@ fn substring( let array = ArrayRef::from(array.0); // substring - let array = kernels::substring::substring(array.as_ref(), start, None)?; + let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?; Ok(array.data().to_owned().into()) } @@ -85,7 +91,7 @@ fn concatenate(array: PyArrowType, py: Python) -> PyResult let array = ArrayRef::from(array.0); // concat - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?; + let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?; array.to_pyarrow(py) } From bdaa1cff19a4241453210dccb26af5c83880ba2a Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 17:14:58 +0100 Subject: [PATCH 14/16] Tweak feature flags --- .github/workflows/arrow.yml | 2 ++ arrow-schema/Cargo.toml | 2 +- arrow-schema/src/schema.rs | 24 ++++++++++++++++++++++++ arrow/Cargo.toml | 2 +- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml index d81a551a3b4..2b7ab2039a1 100644 --- a/.github/workflows/arrow.yml +++ b/.github/workflows/arrow.yml @@ -63,6 +63,8 @@ jobs: cargo run --example read_csv_infer_schema - name: Run non-archery based integration-tests run: cargo test -p arrow-integration-testing + - name: Test arrow-schema with all features + run: cargo test -p arrow-schema --all-features # test compilaton features linux-features: diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml index efb26b9fc16..0e0c746ac03 100644 --- a/arrow-schema/Cargo.toml +++ b/arrow-schema/Cargo.toml @@ -44,4 +44,4 @@ serde = { version = "1.0", default-features = false, features = ["derive", "std" default = [] [dev-dependencies] - +serde_json = "1.0" diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index e326786e958..aae86670ed5 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -287,6 +287,30 @@ mod tests { use super::*; use crate::datatype::DataType; + #[test] + #[cfg(feature = "serde")] + fn test_ser_de_metadata() { + // ser/de with empty metadata + let schema = Schema::new(vec![ + Field::new("name", DataType::Utf8, false), + Field::new("address", DataType::Utf8, false), + Field::new("priority", DataType::UInt8, false), + ]); + + let json = serde_json::to_string(&schema).unwrap(); + let de_schema = serde_json::from_str(&json).unwrap(); + + assert_eq!(schema, de_schema); + + // ser/de with non-empty metadata + let schema = schema + .with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect()); + let json = serde_json::to_string(&schema).unwrap(); + let de_schema = serde_json::from_str(&json).unwrap(); + + assert_eq!(schema, de_schema); + } + #[test] fn test_projection() { let mut metadata = HashMap::new(); diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index f787caf5073..c4e7b194ba2 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -75,7 +75,7 @@ default = ["csv", "ipc", "json"] ipc_compression = ["ipc", "zstd", "lz4"] csv = ["csv_crate"] ipc = ["flatbuffers"] -json = ["serde_json", "arrow-schema/serde"] +json = ["serde_json"] simd = ["packed_simd"] prettyprint = ["comfy-table"] # The test utils feature enables code used in benchmarks and tests but From d963c5494393dc8d9f5cb5f2e9b36401f5c241dd Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 14 Sep 2022 17:34:49 +0100 Subject: [PATCH 15/16] Test juggling --- arrow-schema/src/datatype.rs | 119 ++++++++ arrow-schema/src/schema.rs | 396 +++++++++++++++++++++++++ arrow/src/datatypes/mod.rs | 547 ----------------------------------- arrow/src/json/mod.rs | 33 +++ 4 files changed, 548 insertions(+), 547 deletions(-) diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index bc4545bb982..58834bbcc8c 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -347,3 +347,122 @@ impl DataType { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(feature = "serde")] + fn serde_struct_type() { + use std::collections::BTreeMap; + + let kv_array = [("k".to_string(), "v".to_string())]; + let field_metadata: BTreeMap = kv_array.iter().cloned().collect(); + + // Non-empty map: should be converted as JSON obj { ... } + let first_name = Field::new("first_name", DataType::Utf8, false) + .with_metadata(Some(field_metadata)); + + // Empty map: should be omitted. + let last_name = Field::new("last_name", DataType::Utf8, false) + .with_metadata(Some(BTreeMap::default())); + + let person = DataType::Struct(vec![ + first_name, + last_name, + Field::new( + "address", + DataType::Struct(vec![ + Field::new("street", DataType::Utf8, false), + Field::new("zip", DataType::UInt16, false), + ]), + false, + ), + ]); + + let serialized = serde_json::to_string(&person).unwrap(); + + // NOTE that this is testing the default (derived) serialization format, not the + // JSON format specified in metadata.md + + assert_eq!( + "{\"Struct\":[\ + {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\ + {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ + {\"name\":\"address\",\"data_type\":{\"Struct\":\ + [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ + {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\ + ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}", + serialized + ); + + let deserialized = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(person, deserialized); + } + + #[test] + fn test_list_datatype_equality() { + // tests that list type equality is checked while ignoring list names + let list_a = DataType::List(Box::new(Field::new("item", DataType::Int32, true))); + let list_b = DataType::List(Box::new(Field::new("array", DataType::Int32, true))); + let list_c = DataType::List(Box::new(Field::new("item", DataType::Int32, false))); + let list_d = DataType::List(Box::new(Field::new("item", DataType::UInt32, true))); + assert!(list_a.equals_datatype(&list_b)); + assert!(!list_a.equals_datatype(&list_c)); + assert!(!list_b.equals_datatype(&list_c)); + assert!(!list_a.equals_datatype(&list_d)); + + let list_e = + DataType::FixedSizeList(Box::new(Field::new("item", list_a, false)), 3); + let list_f = + DataType::FixedSizeList(Box::new(Field::new("array", list_b, false)), 3); + let list_g = DataType::FixedSizeList( + Box::new(Field::new("item", DataType::FixedSizeBinary(3), true)), + 3, + ); + assert!(list_e.equals_datatype(&list_f)); + assert!(!list_e.equals_datatype(&list_g)); + assert!(!list_f.equals_datatype(&list_g)); + + let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]); + let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]); + let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]); + let list_k = DataType::Struct(vec![ + Field::new("f1", list_f.clone(), false), + Field::new("f2", list_g.clone(), false), + Field::new("f3", DataType::Utf8, true), + ]); + let list_l = DataType::Struct(vec![ + Field::new("ff1", list_f.clone(), false), + Field::new("ff2", list_g.clone(), false), + Field::new("ff3", DataType::LargeUtf8, true), + ]); + let list_m = DataType::Struct(vec![ + Field::new("ff1", list_f, false), + Field::new("ff2", list_g, false), + Field::new("ff3", DataType::Utf8, true), + ]); + assert!(list_h.equals_datatype(&list_i)); + assert!(!list_h.equals_datatype(&list_j)); + assert!(!list_k.equals_datatype(&list_l)); + assert!(list_k.equals_datatype(&list_m)); + } + + #[test] + fn create_struct_type() { + let _person = DataType::Struct(vec![ + Field::new("first_name", DataType::Utf8, false), + Field::new("last_name", DataType::Utf8, false), + Field::new( + "address", + DataType::Struct(vec![ + Field::new("street", DataType::Utf8, false), + Field::new("zip", DataType::UInt16, false), + ]), + false, + ), + ]); + } +} diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index aae86670ed5..9605cdda720 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -286,6 +286,8 @@ impl Hash for Schema { mod tests { use super::*; use crate::datatype::DataType; + use crate::{TimeUnit, UnionMode}; + use std::collections::BTreeMap; #[test] #[cfg(feature = "serde")] @@ -383,4 +385,398 @@ mod tests { assert!(!schema1.contains(&schema2)); assert!(schema2.contains(&schema1)); } + + #[test] + fn schema_equality() { + let schema1 = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Float64, true), + Field::new("c3", DataType::LargeBinary, true), + ]); + let schema2 = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Float64, true), + Field::new("c3", DataType::LargeBinary, true), + ]); + + assert_eq!(schema1, schema2); + + let schema3 = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Float32, true), + ]); + let schema4 = Schema::new(vec![ + Field::new("C1", DataType::Utf8, false), + Field::new("C2", DataType::Float64, true), + ]); + + assert_ne!(schema1, schema3); + assert_ne!(schema1, schema4); + assert_ne!(schema2, schema3); + assert_ne!(schema2, schema4); + assert_ne!(schema3, schema4); + + let f = Field::new("c1", DataType::Utf8, false).with_metadata(Some( + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect(), + )); + let schema5 = Schema::new(vec![ + f, + Field::new("c2", DataType::Float64, true), + Field::new("c3", DataType::LargeBinary, true), + ]); + assert_ne!(schema1, schema5); + } + + #[test] + fn create_schema_string() { + let schema = person_schema(); + assert_eq!(schema.to_string(), + "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \ + Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ + Field { name: \"address\", data_type: Struct([\ + Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ + Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\ + ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ + Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }") + } + + #[test] + fn schema_field_accessors() { + let schema = person_schema(); + + // test schema accessors + assert_eq!(schema.fields().len(), 4); + + // test field accessors + let first_name = &schema.fields()[0]; + assert_eq!(first_name.name(), "first_name"); + assert_eq!(first_name.data_type(), &DataType::Utf8); + assert!(!first_name.is_nullable()); + assert_eq!(first_name.dict_id(), None); + assert_eq!(first_name.dict_is_ordered(), None); + + let metadata = first_name.metadata(); + assert!(metadata.is_some()); + let md = metadata.as_ref().unwrap(); + assert_eq!(md.len(), 1); + let key = md.get("k"); + assert!(key.is_some()); + assert_eq!(key.unwrap(), "v"); + + let interests = &schema.fields()[3]; + assert_eq!(interests.name(), "interests"); + assert_eq!( + interests.data_type(), + &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) + ); + assert_eq!(interests.dict_id(), Some(123)); + assert_eq!(interests.dict_is_ordered(), Some(true)); + } + + #[test] + #[should_panic( + expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]" + )] + fn schema_index_of() { + let schema = person_schema(); + assert_eq!(schema.index_of("first_name").unwrap(), 0); + assert_eq!(schema.index_of("last_name").unwrap(), 1); + schema.index_of("nickname").unwrap(); + } + + #[test] + #[should_panic( + expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]" + )] + fn schema_field_with_name() { + let schema = person_schema(); + assert_eq!( + schema.field_with_name("first_name").unwrap().name(), + "first_name" + ); + assert_eq!( + schema.field_with_name("last_name").unwrap().name(), + "last_name" + ); + schema.field_with_name("nickname").unwrap(); + } + + #[test] + fn schema_field_with_dict_id() { + let schema = person_schema(); + + let fields_dict_123: Vec<_> = schema + .fields_with_dict_id(123) + .iter() + .map(|f| f.name()) + .collect(); + assert_eq!(fields_dict_123, vec!["interests"]); + + assert!(schema.fields_with_dict_id(456).is_empty()); + } + + fn person_schema() -> Schema { + let kv_array = [("k".to_string(), "v".to_string())]; + let field_metadata: BTreeMap = kv_array.iter().cloned().collect(); + let first_name = Field::new("first_name", DataType::Utf8, false) + .with_metadata(Some(field_metadata)); + + Schema::new(vec![ + first_name, + Field::new("last_name", DataType::Utf8, false), + Field::new( + "address", + DataType::Struct(vec![ + Field::new("street", DataType::Utf8, false), + Field::new("zip", DataType::UInt16, false), + ]), + false, + ), + Field::new_dict( + "interests", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), + true, + 123, + true, + ), + ]) + } + + #[test] + fn test_try_merge_field_with_metadata() { + // 1. Different values for the same key should cause error. + let metadata1: BTreeMap = + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect(); + let f1 = Field::new("first_name", DataType::Utf8, false) + .with_metadata(Some(metadata1)); + + let metadata2: BTreeMap = + [("foo".to_string(), "baz".to_string())] + .iter() + .cloned() + .collect(); + let f2 = Field::new("first_name", DataType::Utf8, false) + .with_metadata(Some(metadata2)); + + assert!( + Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]) + .is_err() + ); + + // 2. None + Some + let mut f1 = Field::new("first_name", DataType::Utf8, false); + let metadata2: BTreeMap = + [("missing".to_string(), "value".to_string())] + .iter() + .cloned() + .collect(); + let f2 = Field::new("first_name", DataType::Utf8, false) + .with_metadata(Some(metadata2)); + + assert!(f1.try_merge(&f2).is_ok()); + assert!(f1.metadata().is_some()); + assert_eq!( + f1.metadata().as_ref().unwrap(), + f2.metadata().as_ref().unwrap() + ); + + // 3. Some + Some + let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect(), + )); + let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( + [("foo2".to_string(), "bar2".to_string())] + .iter() + .cloned() + .collect(), + )); + + assert!(f1.try_merge(&f2).is_ok()); + assert!(f1.metadata().is_some()); + assert_eq!( + f1.metadata().cloned().unwrap(), + [ + ("foo".to_string(), "bar".to_string()), + ("foo2".to_string(), "bar2".to_string()) + ] + .iter() + .cloned() + .collect() + ); + + // 4. Some + None. + let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect(), + )); + let f2 = Field::new("first_name", DataType::Utf8, false); + assert!(f1.try_merge(&f2).is_ok()); + assert!(f1.metadata().is_some()); + assert_eq!( + f1.metadata().cloned().unwrap(), + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect() + ); + + // 5. None + None. + let mut f1 = Field::new("first_name", DataType::Utf8, false); + let f2 = Field::new("first_name", DataType::Utf8, false); + assert!(f1.try_merge(&f2).is_ok()); + assert!(f1.metadata().is_none()); + } + + #[test] + fn test_schema_merge() { + let merged = Schema::try_merge(vec![ + Schema::new(vec![ + Field::new("first_name", DataType::Utf8, false), + Field::new("last_name", DataType::Utf8, false), + Field::new( + "address", + DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]), + false, + ), + ]), + Schema::new_with_metadata( + vec![ + // nullable merge + Field::new("last_name", DataType::Utf8, true), + Field::new( + "address", + DataType::Struct(vec![ + // add new nested field + Field::new("street", DataType::Utf8, false), + // nullable merge on nested field + Field::new("zip", DataType::UInt16, true), + ]), + false, + ), + // new field + Field::new("number", DataType::Utf8, true), + ], + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect::>(), + ), + ]) + .unwrap(); + + assert_eq!( + merged, + Schema::new_with_metadata( + vec![ + Field::new("first_name", DataType::Utf8, false), + Field::new("last_name", DataType::Utf8, true), + Field::new( + "address", + DataType::Struct(vec![ + Field::new("zip", DataType::UInt16, true), + Field::new("street", DataType::Utf8, false), + ]), + false, + ), + Field::new("number", DataType::Utf8, true), + ], + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect::>() + ) + ); + + // support merge union fields + assert_eq!( + Schema::try_merge(vec![ + Schema::new(vec![Field::new( + "c1", + DataType::Union( + vec![ + Field::new("c11", DataType::Utf8, true), + Field::new("c12", DataType::Utf8, true), + ], + vec![0, 1], + UnionMode::Dense + ), + false + ),]), + Schema::new(vec![Field::new( + "c1", + DataType::Union( + vec![ + Field::new("c12", DataType::Utf8, true), + Field::new("c13", DataType::Time64(TimeUnit::Second), true), + ], + vec![1, 2], + UnionMode::Dense + ), + false + ),]) + ]) + .unwrap(), + Schema::new(vec![Field::new( + "c1", + DataType::Union( + vec![ + Field::new("c11", DataType::Utf8, true), + Field::new("c12", DataType::Utf8, true), + Field::new("c13", DataType::Time64(TimeUnit::Second), true), + ], + vec![0, 1, 2], + UnionMode::Dense + ), + false + ),]), + ); + + // incompatible field should throw error + assert!(Schema::try_merge(vec![ + Schema::new(vec![ + Field::new("first_name", DataType::Utf8, false), + Field::new("last_name", DataType::Utf8, false), + ]), + Schema::new(vec![Field::new("last_name", DataType::Int64, false),]) + ]) + .is_err()); + + // incompatible metadata should throw error + let res = Schema::try_merge(vec![ + Schema::new_with_metadata( + vec![Field::new("first_name", DataType::Utf8, false)], + [("foo".to_string(), "bar".to_string())] + .iter() + .cloned() + .collect::>(), + ), + Schema::new_with_metadata( + vec![Field::new("last_name", DataType::Utf8, false)], + [("foo".to_string(), "baz".to_string())] + .iter() + .cloned() + .collect::>(), + ), + ]) + .unwrap_err(); + + let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'"; + assert!( + res.to_string().contains(expected), + "Could not find expected string '{}' in '{}'", + expected, + res + ); + } } diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index 090907fd37d..2f83871127f 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -43,550 +43,3 @@ pub use ffi::*; /// A reference-counted reference to a [`Schema`](crate::datatypes::Schema). pub type SchemaRef = Arc; - -#[cfg(test)] -mod tests { - use super::*; - use crate::error::Result; - use std::collections::{BTreeMap, HashMap}; - - #[cfg(feature = "json")] - use crate::json::JsonSerializable; - - #[cfg(feature = "json")] - use serde_json::{ - Number, - Value::{Bool, Number as VNumber, String as VString}, - }; - - #[test] - fn test_list_datatype_equality() { - // tests that list type equality is checked while ignoring list names - let list_a = DataType::List(Box::new(Field::new("item", DataType::Int32, true))); - let list_b = DataType::List(Box::new(Field::new("array", DataType::Int32, true))); - let list_c = DataType::List(Box::new(Field::new("item", DataType::Int32, false))); - let list_d = DataType::List(Box::new(Field::new("item", DataType::UInt32, true))); - assert!(list_a.equals_datatype(&list_b)); - assert!(!list_a.equals_datatype(&list_c)); - assert!(!list_b.equals_datatype(&list_c)); - assert!(!list_a.equals_datatype(&list_d)); - - let list_e = - DataType::FixedSizeList(Box::new(Field::new("item", list_a, false)), 3); - let list_f = - DataType::FixedSizeList(Box::new(Field::new("array", list_b, false)), 3); - let list_g = DataType::FixedSizeList( - Box::new(Field::new("item", DataType::FixedSizeBinary(3), true)), - 3, - ); - assert!(list_e.equals_datatype(&list_f)); - assert!(!list_e.equals_datatype(&list_g)); - assert!(!list_f.equals_datatype(&list_g)); - - let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]); - let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]); - let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]); - let list_k = DataType::Struct(vec![ - Field::new("f1", list_f.clone(), false), - Field::new("f2", list_g.clone(), false), - Field::new("f3", DataType::Utf8, true), - ]); - let list_l = DataType::Struct(vec![ - Field::new("ff1", list_f.clone(), false), - Field::new("ff2", list_g.clone(), false), - Field::new("ff3", DataType::LargeUtf8, true), - ]); - let list_m = DataType::Struct(vec![ - Field::new("ff1", list_f, false), - Field::new("ff2", list_g, false), - Field::new("ff3", DataType::Utf8, true), - ]); - assert!(list_h.equals_datatype(&list_i)); - assert!(!list_h.equals_datatype(&list_j)); - assert!(!list_k.equals_datatype(&list_l)); - assert!(list_k.equals_datatype(&list_m)); - } - - #[test] - #[cfg(feature = "json")] - fn create_struct_type() { - let _person = DataType::Struct(vec![ - Field::new("first_name", DataType::Utf8, false), - Field::new("last_name", DataType::Utf8, false), - Field::new( - "address", - DataType::Struct(vec![ - Field::new("street", DataType::Utf8, false), - Field::new("zip", DataType::UInt16, false), - ]), - false, - ), - ]); - } - - #[test] - #[cfg(feature = "json")] - fn serde_struct_type() { - let kv_array = [("k".to_string(), "v".to_string())]; - let field_metadata: BTreeMap = kv_array.iter().cloned().collect(); - - // Non-empty map: should be converted as JSON obj { ... } - let first_name = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(field_metadata)); - - // Empty map: should be omitted. - let last_name = Field::new("last_name", DataType::Utf8, false) - .with_metadata(Some(BTreeMap::default())); - - let person = DataType::Struct(vec![ - first_name, - last_name, - Field::new( - "address", - DataType::Struct(vec![ - Field::new("street", DataType::Utf8, false), - Field::new("zip", DataType::UInt16, false), - ]), - false, - ), - ]); - - let serialized = serde_json::to_string(&person).unwrap(); - - // NOTE that this is testing the default (derived) serialization format, not the - // JSON format specified in metadata.md - - assert_eq!( - "{\"Struct\":[\ - {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\ - {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ - {\"name\":\"address\",\"data_type\":{\"Struct\":\ - [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ - {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\ - ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}", - serialized - ); - - let deserialized = serde_json::from_str(&serialized).unwrap(); - - assert_eq!(person, deserialized); - } - - #[test] - fn create_schema_string() { - let schema = person_schema(); - assert_eq!(schema.to_string(), - "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \ - Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ - Field { name: \"address\", data_type: Struct([\ - Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ - Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\ - ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ - Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }") - } - - #[test] - fn schema_field_accessors() { - let schema = person_schema(); - - // test schema accessors - assert_eq!(schema.fields().len(), 4); - - // test field accessors - let first_name = &schema.fields()[0]; - assert_eq!(first_name.name(), "first_name"); - assert_eq!(first_name.data_type(), &DataType::Utf8); - assert!(!first_name.is_nullable()); - assert_eq!(first_name.dict_id(), None); - assert_eq!(first_name.dict_is_ordered(), None); - - let metadata = first_name.metadata(); - assert!(metadata.is_some()); - let md = metadata.as_ref().unwrap(); - assert_eq!(md.len(), 1); - let key = md.get("k"); - assert!(key.is_some()); - assert_eq!(key.unwrap(), "v"); - - let interests = &schema.fields()[3]; - assert_eq!(interests.name(), "interests"); - assert_eq!( - interests.data_type(), - &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) - ); - assert_eq!(interests.dict_id(), Some(123)); - assert_eq!(interests.dict_is_ordered(), Some(true)); - } - - #[test] - #[should_panic( - expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]" - )] - fn schema_index_of() { - let schema = person_schema(); - assert_eq!(schema.index_of("first_name").unwrap(), 0); - assert_eq!(schema.index_of("last_name").unwrap(), 1); - schema.index_of("nickname").unwrap(); - } - - #[test] - #[should_panic( - expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]" - )] - fn schema_field_with_name() { - let schema = person_schema(); - assert_eq!( - schema.field_with_name("first_name").unwrap().name(), - "first_name" - ); - assert_eq!( - schema.field_with_name("last_name").unwrap().name(), - "last_name" - ); - schema.field_with_name("nickname").unwrap(); - } - - #[test] - fn schema_field_with_dict_id() { - let schema = person_schema(); - - let fields_dict_123: Vec<_> = schema - .fields_with_dict_id(123) - .iter() - .map(|f| f.name()) - .collect(); - assert_eq!(fields_dict_123, vec!["interests"]); - - assert!(schema.fields_with_dict_id(456).is_empty()); - } - - #[test] - fn schema_equality() { - let schema1 = Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Float64, true), - Field::new("c3", DataType::LargeBinary, true), - ]); - let schema2 = Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Float64, true), - Field::new("c3", DataType::LargeBinary, true), - ]); - - assert_eq!(schema1, schema2); - - let schema3 = Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Float32, true), - ]); - let schema4 = Schema::new(vec![ - Field::new("C1", DataType::Utf8, false), - Field::new("C2", DataType::Float64, true), - ]); - - assert!(schema1 != schema3); - assert!(schema1 != schema4); - assert!(schema2 != schema3); - assert!(schema2 != schema4); - assert!(schema3 != schema4); - - let f = Field::new("c1", DataType::Utf8, false).with_metadata(Some( - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect(), - )); - let schema5 = Schema::new(vec![ - f, - Field::new("c2", DataType::Float64, true), - Field::new("c3", DataType::LargeBinary, true), - ]); - assert!(schema1 != schema5); - } - - #[test] - #[cfg(feature = "json")] - fn test_arrow_native_type_to_json() { - assert_eq!(Some(Bool(true)), true.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value()); - assert_eq!(Some(VString("1".to_string())), 1i128.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value()); - assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value()); - assert_eq!( - Some(VNumber(Number::from_f64(0.01f64).unwrap())), - 0.01.into_json_value() - ); - assert_eq!( - Some(VNumber(Number::from_f64(0.01f64).unwrap())), - 0.01f64.into_json_value() - ); - assert_eq!(None, f32::NAN.into_json_value()); - } - - fn person_schema() -> Schema { - let kv_array = [("k".to_string(), "v".to_string())]; - let field_metadata: BTreeMap = kv_array.iter().cloned().collect(); - let first_name = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(field_metadata)); - - Schema::new(vec![ - first_name, - Field::new("last_name", DataType::Utf8, false), - Field::new( - "address", - DataType::Struct(vec![ - Field::new("street", DataType::Utf8, false), - Field::new("zip", DataType::UInt16, false), - ]), - false, - ), - Field::new_dict( - "interests", - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - true, - 123, - true, - ), - ]) - } - - #[test] - fn test_try_merge_field_with_metadata() { - // 1. Different values for the same key should cause error. - let metadata1: BTreeMap = - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect(); - let f1 = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(metadata1)); - - let metadata2: BTreeMap = - [("foo".to_string(), "baz".to_string())] - .iter() - .cloned() - .collect(); - let f2 = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(metadata2)); - - assert!( - Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]) - .is_err() - ); - - // 2. None + Some - let mut f1 = Field::new("first_name", DataType::Utf8, false); - let metadata2: BTreeMap = - [("missing".to_string(), "value".to_string())] - .iter() - .cloned() - .collect(); - let f2 = Field::new("first_name", DataType::Utf8, false) - .with_metadata(Some(metadata2)); - - assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_some()); - assert_eq!( - f1.metadata().as_ref().unwrap(), - f2.metadata().as_ref().unwrap() - ); - - // 3. Some + Some - let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect(), - )); - let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( - [("foo2".to_string(), "bar2".to_string())] - .iter() - .cloned() - .collect(), - )); - - assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_some()); - assert_eq!( - f1.metadata().cloned().unwrap(), - [ - ("foo".to_string(), "bar".to_string()), - ("foo2".to_string(), "bar2".to_string()) - ] - .iter() - .cloned() - .collect() - ); - - // 4. Some + None. - let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some( - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect(), - )); - let f2 = Field::new("first_name", DataType::Utf8, false); - assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_some()); - assert_eq!( - f1.metadata().cloned().unwrap(), - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect() - ); - - // 5. None + None. - let mut f1 = Field::new("first_name", DataType::Utf8, false); - let f2 = Field::new("first_name", DataType::Utf8, false); - assert!(f1.try_merge(&f2).is_ok()); - assert!(f1.metadata().is_none()); - } - - #[test] - fn test_schema_merge() -> Result<()> { - let merged = Schema::try_merge(vec![ - Schema::new(vec![ - Field::new("first_name", DataType::Utf8, false), - Field::new("last_name", DataType::Utf8, false), - Field::new( - "address", - DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]), - false, - ), - ]), - Schema::new_with_metadata( - vec![ - // nullable merge - Field::new("last_name", DataType::Utf8, true), - Field::new( - "address", - DataType::Struct(vec![ - // add new nested field - Field::new("street", DataType::Utf8, false), - // nullable merge on nested field - Field::new("zip", DataType::UInt16, true), - ]), - false, - ), - // new field - Field::new("number", DataType::Utf8, true), - ], - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect::>(), - ), - ])?; - - assert_eq!( - merged, - Schema::new_with_metadata( - vec![ - Field::new("first_name", DataType::Utf8, false), - Field::new("last_name", DataType::Utf8, true), - Field::new( - "address", - DataType::Struct(vec![ - Field::new("zip", DataType::UInt16, true), - Field::new("street", DataType::Utf8, false), - ]), - false, - ), - Field::new("number", DataType::Utf8, true), - ], - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect::>() - ) - ); - - // support merge union fields - assert_eq!( - Schema::try_merge(vec![ - Schema::new(vec![Field::new( - "c1", - DataType::Union( - vec![ - Field::new("c11", DataType::Utf8, true), - Field::new("c12", DataType::Utf8, true), - ], - vec![0, 1], - UnionMode::Dense - ), - false - ),]), - Schema::new(vec![Field::new( - "c1", - DataType::Union( - vec![ - Field::new("c12", DataType::Utf8, true), - Field::new("c13", DataType::Time64(TimeUnit::Second), true), - ], - vec![1, 2], - UnionMode::Dense - ), - false - ),]) - ])?, - Schema::new(vec![Field::new( - "c1", - DataType::Union( - vec![ - Field::new("c11", DataType::Utf8, true), - Field::new("c12", DataType::Utf8, true), - Field::new("c13", DataType::Time64(TimeUnit::Second), true), - ], - vec![0, 1, 2], - UnionMode::Dense - ), - false - ),]), - ); - - // incompatible field should throw error - assert!(Schema::try_merge(vec![ - Schema::new(vec![ - Field::new("first_name", DataType::Utf8, false), - Field::new("last_name", DataType::Utf8, false), - ]), - Schema::new(vec![Field::new("last_name", DataType::Int64, false),]) - ]) - .is_err()); - - // incompatible metadata should throw error - let res = Schema::try_merge(vec![ - Schema::new_with_metadata( - vec![Field::new("first_name", DataType::Utf8, false)], - [("foo".to_string(), "bar".to_string())] - .iter() - .cloned() - .collect::>(), - ), - Schema::new_with_metadata( - vec![Field::new("last_name", DataType::Utf8, false)], - [("foo".to_string(), "baz".to_string())] - .iter() - .cloned() - .collect::>(), - ), - ]) - .unwrap_err(); - - let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'"; - assert!( - res.to_string().contains(expected), - "Could not find expected string '{}' in '{}'", - expected, - res - ); - - Ok(()) - } -} diff --git a/arrow/src/json/mod.rs b/arrow/src/json/mod.rs index 836145bb08e..21f96d90a5d 100644 --- a/arrow/src/json/mod.rs +++ b/arrow/src/json/mod.rs @@ -80,3 +80,36 @@ impl JsonSerializable for f64 { Number::from_f64(self).map(Value::Number) } } + +#[cfg(test)] +mod tests { + use super::*; + + use serde_json::{ + Number, + Value::{Bool, Number as VNumber, String as VString}, + }; + + #[test] + fn test_arrow_native_type_to_json() { + assert_eq!(Some(Bool(true)), true.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value()); + assert_eq!(Some(VString("1".to_string())), 1i128.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value()); + assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value()); + assert_eq!( + Some(VNumber(Number::from_f64(0.01f64).unwrap())), + 0.01.into_json_value() + ); + assert_eq!( + Some(VNumber(Number::from_f64(0.01f64).unwrap())), + 0.01f64.into_json_value() + ); + assert_eq!(None, f32::NAN.into_json_value()); + } +} From 4ec95e8b50d52084153575d85921d70041738f20 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 20 Sep 2022 19:01:38 +0100 Subject: [PATCH 16/16] Derive PyArrowConvert for Vec --- arrow/src/pyarrow.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index e41f71a37a7..90caa2e3a5c 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -151,6 +151,21 @@ impl PyArrowConvert for ArrayData { } } +impl PyArrowConvert for Vec { + fn from_pyarrow(value: &PyAny) -> PyResult { + let list = value.downcast::()?; + list.iter().map(|x| T::from_pyarrow(&x)).collect() + } + + fn to_pyarrow(&self, py: Python) -> PyResult { + let values = self + .iter() + .map(|v| v.to_pyarrow(py)) + .collect::>>()?; + Ok(values.to_object(py)) + } +} + impl PyArrowConvert for T where T: Array + From,