From a2f6a7233b924cffff7c9a8cb7e7ad38b56d8832 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 12 Sep 2022 17:55:36 +0100 Subject: [PATCH] Split out arrow-schema (#2594) --- Cargo.toml | 1 + arrow-schema/Cargo.toml | 52 + arrow-schema/src/datatype.rs | 683 ++++++++ arrow-schema/src/error.rs | 45 + .../datatypes => arrow-schema/src}/field.rs | 88 +- arrow-schema/src/lib.rs | 23 + .../datatypes => arrow-schema/src}/schema.rs | 63 +- arrow/Cargo.toml | 3 +- arrow/src/array/array_decimal.rs | 14 +- arrow/src/array/builder/decimal_builder.rs | 5 +- arrow/src/array/data.rs | 8 +- arrow/src/csv/reader.rs | 3 +- arrow/src/datatypes/datatype.rs | 1499 ----------------- arrow/src/datatypes/mod.rs | 10 +- arrow/src/datatypes/types.rs | 2 +- arrow/src/error.rs | 11 +- arrow/src/record_batch.rs | 5 +- arrow/src/util/decimal.rs | 809 ++++++++- 18 files changed, 1727 insertions(+), 1597 deletions(-) create mode 100644 arrow-schema/Cargo.toml create mode 100644 arrow-schema/src/datatype.rs create mode 100644 arrow-schema/src/error.rs rename {arrow/src/datatypes => arrow-schema/src}/field.rs (91%) create mode 100644 arrow-schema/src/lib.rs rename {arrow/src/datatypes => arrow-schema/src}/schema.rs (90%) delete mode 100644 arrow/src/datatypes/datatype.rs diff --git a/Cargo.toml b/Cargo.toml index 9bf55c0f236..5afe98173ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ [workspace] members = [ "arrow", + "arrow-schema", "parquet", "parquet_derive", "parquet_derive_test", diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml new file mode 100644 index 00000000000..272e55a8fc5 --- /dev/null +++ b/arrow-schema/Cargo.toml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow-schema" +version = "22.0.0" +description = "Defines the logical types for arrow arrays" +homepage = "https://github.com/apache/arrow-rs" +repository = "https://github.com/apache/arrow-rs" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = ["arrow"] +include = [ + "benches/*.rs", + "src/**/*.rs", + "Cargo.toml", +] +edition = "2021" +rust-version = "1.62" + +[lib] +name = "arrow_schema" +path = "src/lib.rs" +bench = false + +[dependencies] +serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } +serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } + +[package.metadata.docs.rs] +features = ["json"] + +[features] +default = [] +json = ["serde", "serde_json"] + +[dev-dependencies] + diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs new file mode 100644 index 00000000000..8e553749dbc --- /dev/null +++ b/arrow-schema/src/datatype.rs @@ -0,0 +1,683 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt; + +use crate::field::Field; + +#[cfg(feature = "json")] +use crate::error::ArrowSchemaError; + +/// The set of datatypes that are supported by this implementation of Apache Arrow. +/// +/// The Arrow specification on data types includes some more types. +/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs) +/// for Arrow's specification. +/// +/// The variants of this enum include primitive fixed size types as well as parametric or +/// nested types. +/// Currently the Rust implementation supports the following nested types: +/// - `List` +/// - `Struct` +/// +/// Nested types can themselves be nested within other arrays. +/// For more information on these types please see +/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout). +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum DataType { + /// Null type + Null, + /// A boolean datatype representing the values `true` and `false`. + Boolean, + /// A signed 8-bit integer. + Int8, + /// A signed 16-bit integer. + Int16, + /// A signed 32-bit integer. + Int32, + /// A signed 64-bit integer. + Int64, + /// An unsigned 8-bit integer. + UInt8, + /// An unsigned 16-bit integer. + UInt16, + /// An unsigned 32-bit integer. + UInt32, + /// An unsigned 64-bit integer. + UInt64, + /// A 16-bit floating point number. + Float16, + /// A 32-bit floating point number. + Float32, + /// A 64-bit floating point number. + Float64, + /// A timestamp with an optional timezone. + /// + /// Time is measured as a Unix epoch, counting the seconds from + /// 00:00:00.000 on 1 January 1970, excluding leap seconds, + /// as a 64-bit integer. + /// + /// The time zone is a string indicating the name of a time zone, one of: + /// + /// * As used in the Olson time zone database (the "tz database" or + /// "tzdata"), such as "America/New_York" + /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 + /// + /// Timestamps with a non-empty timezone + /// ------------------------------------ + /// + /// If a Timestamp column has a non-empty timezone value, its epoch is + /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone + /// (the Unix epoch), regardless of the Timestamp's own timezone. + /// + /// Therefore, timestamp values with a non-empty timezone correspond to + /// physical points in time together with some additional information about + /// how the data was obtained and/or how to display it (the timezone). + /// + /// For example, the timestamp value 0 with the timezone string "Europe/Paris" + /// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the + /// application may prefer to display it as "January 1st 1970, 01h00" in + /// the Europe/Paris timezone (which is the same physical point in time). + /// + /// One consequence is that timestamp values with a non-empty timezone + /// can be compared and ordered directly, since they all share the same + /// well-known point of reference (the Unix epoch). + /// + /// Timestamps with an unset / empty timezone + /// ----------------------------------------- + /// + /// If a Timestamp column has no timezone value, its epoch is + /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. + /// + /// Therefore, timestamp values without a timezone cannot be meaningfully + /// interpreted as physical points in time, but only as calendar / clock + /// indications ("wall clock time") in an unspecified timezone. + /// + /// For example, the timestamp value 0 with an empty timezone string + /// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there + /// is not enough information to interpret it as a well-defined physical + /// point in time. + /// + /// One consequence is that timestamp values without a timezone cannot + /// be reliably compared or ordered, since they may have different points of + /// reference. In particular, it is *not* possible to interpret an unset + /// or empty timezone as the same as "UTC". + /// + /// Conversion between timezones + /// ---------------------------- + /// + /// If a Timestamp column has a non-empty timezone, changing the timezone + /// to a different non-empty value is a metadata-only operation: + /// the timestamp values need not change as their point of reference remains + /// the same (the Unix epoch). + /// + /// However, if a Timestamp column has no timezone value, changing it to a + /// non-empty value requires to think about the desired semantics. + /// One possibility is to assume that the original timestamp values are + /// relative to the epoch of the timezone being set; timestamp values should + /// then adjusted to the Unix epoch (for example, changing the timezone from + /// empty to "Europe/Paris" would require converting the timestamp values + /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is + /// nevertheless correct). + Timestamp(TimeUnit, Option), + /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01) + /// in days (32 bits). + Date32, + /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) + /// in milliseconds (64 bits). Values are evenly divisible by 86400000. + Date64, + /// A 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. + Time32(TimeUnit), + /// A 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. + Time64(TimeUnit), + /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. + Duration(TimeUnit), + /// A "calendar" interval which models types that don't necessarily + /// have a precise duration without the context of a base timestamp (e.g. + /// days can differ in length during day light savings time transitions). + Interval(IntervalUnit), + /// Opaque binary data of variable length. + Binary, + /// Opaque binary data of fixed size. + /// Enum parameter specifies the number of bytes per value. + FixedSizeBinary(i32), + /// Opaque binary data of variable length and 64-bit offsets. + LargeBinary, + /// A variable-length string in Unicode with UTF-8 encoding. + Utf8, + /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets. + LargeUtf8, + /// A list of some logical data type with variable length. + List(Box), + /// A list of some logical data type with fixed length. + FixedSizeList(Box, i32), + /// A list of some logical data type with variable length and 64-bit offsets. + LargeList(Box), + /// A nested datatype that contains a number of sub-fields. + Struct(Vec), + /// A nested datatype that can represent slots of differing types. Components: + /// + /// 1. [`Field`] for each possible child type the Union can hold + /// 2. The corresponding `type_id` used to identify which Field + /// 3. The type of union (Sparse or Dense) + Union(Vec, Vec, UnionMode), + /// A dictionary encoded array (`key_type`, `value_type`), where + /// each array element is an index of `key_type` into an + /// associated dictionary of `value_type`. + /// + /// Dictionary arrays are used to store columns of `value_type` + /// that contain many repeated values using less memory, but with + /// a higher CPU overhead for some operations. + /// + /// This type mostly used to represent low cardinality string + /// arrays or a limited set of primitive types as integers. + Dictionary(Box, Box), + /// Exact 128-bit width decimal value with precision and scale + /// + /// * precision is the total number of digits + /// * scale is the number of digits past the decimal + /// + /// For example the number 123.45 has precision 5 and scale 2. + Decimal128(u8, u8), + /// Exact 256-bit width decimal value with precision and scale + /// + /// * precision is the total number of digits + /// * scale is the number of digits past the decimal + /// + /// For example the number 123.45 has precision 5 and scale 2. + Decimal256(u8, u8), + /// A Map is a logical nested type that is represented as + /// + /// `List>` + /// + /// The keys and values are each respectively contiguous. + /// The key and value types are not constrained, but keys should be + /// hashable and unique. + /// Whether the keys are sorted can be set in the `bool` after the `Field`. + /// + /// In a field with Map type, the field has a child Struct field, which then + /// has two children: key type and the second the value type. The names of the + /// child fields may be respectively "entries", "key", and "value", but this is + /// not enforced. + Map(Box, bool), +} + +/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TimeUnit { + /// Time in seconds. + Second, + /// Time in milliseconds. + Millisecond, + /// Time in microseconds. + Microsecond, + /// Time in nanoseconds. + Nanosecond, +} + +/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum IntervalUnit { + /// Indicates the number of elapsed whole months, stored as 4-byte integers. + YearMonth, + /// Indicates the number of elapsed days and milliseconds, + /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total). + DayTime, + /// A triple of the number of elapsed months, days, and nanoseconds. + /// The values are stored contiguously in 16 byte blocks. Months and + /// days are encoded as 32 bit integers and nanoseconds is encoded as a + /// 64 bit integer. All integers are signed. Each field is independent + /// (e.g. there is no constraint that nanoseconds have the same sign + /// as days or that the quantity of nanoseconds represents less + /// than a day's worth of time). + MonthDayNano, +} + +// Sparse or Dense union layouts +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum UnionMode { + Sparse, + Dense, +} + +impl fmt::Display for DataType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl DataType { + /// Parse a data type from a JSON representation. + #[cfg(feature = "json")] + pub fn from(json: &serde_json::Value) -> Result { + use serde_json::Value; + let default_field = Field::new("", DataType::Boolean, true); + match *json { + Value::Object(ref map) => match map.get("name") { + Some(s) if s == "null" => Ok(DataType::Null), + Some(s) if s == "bool" => Ok(DataType::Boolean), + Some(s) if s == "binary" => Ok(DataType::Binary), + Some(s) if s == "largebinary" => Ok(DataType::LargeBinary), + Some(s) if s == "utf8" => Ok(DataType::Utf8), + Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8), + Some(s) if s == "fixedsizebinary" => { + // return a list with any type as its child isn't defined in the map + if let Some(Value::Number(size)) = map.get("byteWidth") { + Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32)) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a byteWidth for fixedsizebinary".to_string(), + )) + } + } + Some(s) if s == "decimal" => { + // return a list with any type as its child isn't defined in the map + let precision = match map.get("precision") { + Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()), + None => Err(ArrowSchemaError::Parse( + "Expecting a precision for decimal".to_string(), + )), + }?; + let scale = match map.get("scale") { + Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()), + _ => Err(ArrowSchemaError::Parse( + "Expecting a scale for decimal".to_string(), + )), + }?; + let bit_width: usize = match map.get("bitWidth") { + Some(b) => b.as_u64().unwrap() as usize, + _ => 128, // Default bit width + }; + + if bit_width == 128 { + Ok(DataType::Decimal128(precision, scale)) + } else if bit_width == 256 { + Ok(DataType::Decimal256(precision, scale)) + } else { + Err(ArrowSchemaError::Parse( + "Decimal bit_width invalid".to_string(), + )) + } + } + Some(s) if s == "floatingpoint" => match map.get("precision") { + Some(p) if p == "HALF" => Ok(DataType::Float16), + Some(p) if p == "SINGLE" => Ok(DataType::Float32), + Some(p) if p == "DOUBLE" => Ok(DataType::Float64), + _ => Err(ArrowSchemaError::Parse( + "floatingpoint precision missing or invalid".to_string(), + )), + }, + Some(s) if s == "timestamp" => { + let unit = match map.get("unit") { + Some(p) if p == "SECOND" => Ok(TimeUnit::Second), + Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), + Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), + Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), + _ => Err(ArrowSchemaError::Parse( + "timestamp unit missing or invalid".to_string(), + )), + }; + let tz = match map.get("timezone") { + None => Ok(None), + Some(Value::String(tz)) => Ok(Some(tz.clone())), + _ => Err(ArrowSchemaError::Parse( + "timezone must be a string".to_string(), + )), + }; + Ok(DataType::Timestamp(unit?, tz?)) + } + Some(s) if s == "date" => match map.get("unit") { + Some(p) if p == "DAY" => Ok(DataType::Date32), + Some(p) if p == "MILLISECOND" => Ok(DataType::Date64), + _ => Err(ArrowSchemaError::Parse( + "date unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "time" => { + let unit = match map.get("unit") { + Some(p) if p == "SECOND" => Ok(TimeUnit::Second), + Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), + Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), + Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), + _ => Err(ArrowSchemaError::Parse( + "time unit missing or invalid".to_string(), + )), + }; + match map.get("bitWidth") { + Some(p) if p == 32 => Ok(DataType::Time32(unit?)), + Some(p) if p == 64 => Ok(DataType::Time64(unit?)), + _ => Err(ArrowSchemaError::Parse( + "time bitWidth missing or invalid".to_string(), + )), + } + } + Some(s) if s == "duration" => match map.get("unit") { + Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)), + Some(p) if p == "MILLISECOND" => { + Ok(DataType::Duration(TimeUnit::Millisecond)) + } + Some(p) if p == "MICROSECOND" => { + Ok(DataType::Duration(TimeUnit::Microsecond)) + } + Some(p) if p == "NANOSECOND" => { + Ok(DataType::Duration(TimeUnit::Nanosecond)) + } + _ => Err(ArrowSchemaError::Parse( + "time unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "interval" => match map.get("unit") { + Some(p) if p == "DAY_TIME" => { + Ok(DataType::Interval(IntervalUnit::DayTime)) + } + Some(p) if p == "YEAR_MONTH" => { + Ok(DataType::Interval(IntervalUnit::YearMonth)) + } + Some(p) if p == "MONTH_DAY_NANO" => { + Ok(DataType::Interval(IntervalUnit::MonthDayNano)) + } + _ => Err(ArrowSchemaError::Parse( + "interval unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "int" => match map.get("isSigned") { + Some(&Value::Bool(true)) => match map.get("bitWidth") { + Some(&Value::Number(ref n)) => match n.as_u64() { + Some(8) => Ok(DataType::Int8), + Some(16) => Ok(DataType::Int16), + Some(32) => Ok(DataType::Int32), + Some(64) => Ok(DataType::Int64), + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + Some(&Value::Bool(false)) => match map.get("bitWidth") { + Some(&Value::Number(ref n)) => match n.as_u64() { + Some(8) => Ok(DataType::UInt8), + Some(16) => Ok(DataType::UInt16), + Some(32) => Ok(DataType::UInt32), + Some(64) => Ok(DataType::UInt64), + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + _ => Err(ArrowSchemaError::Parse( + "int bitWidth missing or invalid".to_string(), + )), + }, + _ => Err(ArrowSchemaError::Parse( + "int signed missing or invalid".to_string(), + )), + }, + Some(s) if s == "list" => { + // return a list with any type as its child isn't defined in the map + Ok(DataType::List(Box::new(default_field))) + } + Some(s) if s == "largelist" => { + // return a largelist with any type as its child isn't defined in the map + Ok(DataType::LargeList(Box::new(default_field))) + } + Some(s) if s == "fixedsizelist" => { + // return a list with any type as its child isn't defined in the map + if let Some(Value::Number(size)) = map.get("listSize") { + Ok(DataType::FixedSizeList( + Box::new(default_field), + size.as_i64().unwrap() as i32, + )) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a listSize for fixedsizelist".to_string(), + )) + } + } + Some(s) if s == "struct" => { + // return an empty `struct` type as its children aren't defined in the map + Ok(DataType::Struct(vec![])) + } + Some(s) if s == "map" => { + if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") { + // Return a map with an empty type as its children aren't defined in the map + Ok(DataType::Map(Box::new(default_field), *keys_sorted)) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a keysSorted for map".to_string(), + )) + } + } + Some(s) if s == "union" => { + if let Some(Value::String(mode)) = map.get("mode") { + let union_mode = if mode == "SPARSE" { + UnionMode::Sparse + } else if mode == "DENSE" { + UnionMode::Dense + } else { + return Err(ArrowSchemaError::Parse(format!( + "Unknown union mode {:?} for union", + mode + ))); + }; + if let Some(type_ids) = map.get("typeIds") { + let type_ids = type_ids + .as_array() + .unwrap() + .iter() + .map(|t| t.as_i64().unwrap() as i8) + .collect::>(); + + let default_fields = type_ids + .iter() + .map(|_| default_field.clone()) + .collect::>(); + + Ok(DataType::Union(default_fields, type_ids, union_mode)) + } else { + Err(ArrowSchemaError::Parse( + "Expecting a typeIds for union ".to_string(), + )) + } + } else { + Err(ArrowSchemaError::Parse( + "Expecting a mode for union".to_string(), + )) + } + } + Some(other) => Err(ArrowSchemaError::Parse(format!( + "invalid or unsupported type name: {} in {:?}", + other, json + ))), + None => Err(ArrowSchemaError::Parse("type name missing".to_string())), + }, + _ => Err(ArrowSchemaError::Parse( + "invalid json value type".to_string(), + )), + } + } + + /// Generate a JSON representation of the data type. + #[cfg(feature = "json")] + pub fn to_json(&self) -> serde_json::Value { + use serde_json::json; + match self { + DataType::Null => json!({"name": "null"}), + DataType::Boolean => json!({"name": "bool"}), + DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}), + DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}), + DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}), + DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}), + DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}), + DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}), + DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}), + DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}), + DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}), + DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}), + DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}), + DataType::Utf8 => json!({"name": "utf8"}), + DataType::LargeUtf8 => json!({"name": "largeutf8"}), + DataType::Binary => json!({"name": "binary"}), + DataType::LargeBinary => json!({"name": "largebinary"}), + DataType::FixedSizeBinary(byte_width) => { + json!({"name": "fixedsizebinary", "byteWidth": byte_width}) + } + DataType::Struct(_) => json!({"name": "struct"}), + DataType::Union(_, _, _) => json!({"name": "union"}), + DataType::List(_) => json!({ "name": "list"}), + DataType::LargeList(_) => json!({ "name": "largelist"}), + DataType::FixedSizeList(_, length) => { + json!({"name":"fixedsizelist", "listSize": length}) + } + DataType::Time32(unit) => { + json!({"name": "time", "bitWidth": 32, "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}) + } + DataType::Time64(unit) => { + json!({"name": "time", "bitWidth": 64, "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}) + } + DataType::Date32 => { + json!({"name": "date", "unit": "DAY"}) + } + DataType::Date64 => { + json!({"name": "date", "unit": "MILLISECOND"}) + } + DataType::Timestamp(unit, None) => { + json!({"name": "timestamp", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}) + } + DataType::Timestamp(unit, Some(tz)) => { + json!({"name": "timestamp", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }, "timezone": tz}) + } + DataType::Interval(unit) => json!({"name": "interval", "unit": match unit { + IntervalUnit::YearMonth => "YEAR_MONTH", + IntervalUnit::DayTime => "DAY_TIME", + IntervalUnit::MonthDayNano => "MONTH_DAY_NANO", + }}), + DataType::Duration(unit) => json!({"name": "duration", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}), + DataType::Dictionary(_, _) => json!({ "name": "dictionary"}), + DataType::Decimal128(precision, scale) => { + json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128}) + } + DataType::Decimal256(precision, scale) => { + json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 256}) + } + DataType::Map(_, keys_sorted) => { + json!({"name": "map", "keysSorted": keys_sorted}) + } + } + } + + /// Returns true if this type is numeric: (UInt*, Int*, or Float*). + pub fn is_numeric(t: &DataType) -> bool { + use DataType::*; + matches!( + t, + UInt8 + | UInt16 + | UInt32 + | UInt64 + | Int8 + | Int16 + | Int32 + | Int64 + | Float32 + | Float64 + ) + } + + /// Returns true if this type is temporal: (Date*, Time*, Duration, or Interval). + pub fn is_temporal(t: &DataType) -> bool { + use DataType::*; + matches!( + t, + Date32 + | Date64 + | Timestamp(_, _) + | Time32(_) + | Time64(_) + | Duration(_) + | Interval(_) + ) + } + + /// Returns true if this type is valid as a dictionary key + /// (e.g. [`super::ArrowDictionaryKeyType`] + pub fn is_dictionary_key_type(t: &DataType) -> bool { + use DataType::*; + matches!( + t, + UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 + ) + } + + /// Compares the datatype with another, ignoring nested field names + /// and metadata. + pub fn equals_datatype(&self, other: &DataType) -> bool { + match (&self, other) { + (DataType::List(a), DataType::List(b)) + | (DataType::LargeList(a), DataType::LargeList(b)) => { + a.is_nullable() == b.is_nullable() + && a.data_type().equals_datatype(b.data_type()) + } + (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => { + a_size == b_size + && a.is_nullable() == b.is_nullable() + && a.data_type().equals_datatype(b.data_type()) + } + (DataType::Struct(a), DataType::Struct(b)) => { + a.len() == b.len() + && a.iter().zip(b).all(|(a, b)| { + a.is_nullable() == b.is_nullable() + && a.data_type().equals_datatype(b.data_type()) + }) + } + ( + DataType::Map(a_field, a_is_sorted), + DataType::Map(b_field, b_is_sorted), + ) => a_field == b_field && a_is_sorted == b_is_sorted, + _ => self == other, + } + } +} diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs new file mode 100644 index 00000000000..4ad7c65c503 --- /dev/null +++ b/arrow-schema/src/error.rs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines `ArrowSchemaError` for representing failures in arrow schema + +use std::error::Error; + +#[derive(Debug)] +pub enum ArrowSchemaError { + Parse(String), + Merge(String), + Field(String), +} + +impl std::fmt::Display for ArrowSchemaError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ArrowSchemaError::Parse(message) => { + write!(f, "Error parsing schema: {}", message) + } + ArrowSchemaError::Merge(message) => { + write!(f, "Error merging schema: {}", message) + } + ArrowSchemaError::Field(message) => { + write!(f, "Error indexing field: {}", message) + } + } + } +} + +impl Error for ArrowSchemaError {} diff --git a/arrow/src/datatypes/field.rs b/arrow-schema/src/field.rs similarity index 91% rename from arrow/src/datatypes/field.rs rename to arrow-schema/src/field.rs index ac966cafe34..2da449a5d1e 100644 --- a/arrow/src/datatypes/field.rs +++ b/arrow-schema/src/field.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -use crate::error::{ArrowError, Result}; +use crate::error::ArrowSchemaError; use std::cmp::Ordering; use std::collections::BTreeMap; use std::hash::{Hash, Hasher}; -use super::DataType; +use crate::datatype::DataType; /// Describes a single column in a [`Schema`](super::Schema). /// @@ -145,7 +145,8 @@ impl Field { /// Set the name of the [`Field`] and returns self. /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let field = Field::new("c1", DataType::Int64, false) /// .with_name("c2"); /// @@ -165,7 +166,8 @@ impl Field { /// Set [`DataType`] of the [`Field`] and returns self. /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let field = Field::new("c1", DataType::Int64, false) /// .with_data_type(DataType::Utf8); /// @@ -185,7 +187,8 @@ impl Field { /// Set `nullable` of the [`Field`] and returns self. /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let field = Field::new("c1", DataType::Int64, false) /// .with_nullable(true); /// @@ -252,14 +255,14 @@ impl Field { /// Parse a `Field` definition from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; match *json { Value::Object(ref map) => { let name = match map.get("name") { Some(&Value::String(ref name)) => name.to_string(), _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'name' attribute".to_string(), )); } @@ -267,7 +270,7 @@ impl Field { let nullable = match map.get("nullable") { Some(&Value::Bool(b)) => b, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'nullable' attribute".to_string(), )); } @@ -275,7 +278,7 @@ impl Field { let data_type = match map.get("type") { Some(t) => DataType::from(t)?, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'type' attribute".to_string(), )); } @@ -289,7 +292,7 @@ impl Field { match value.as_object() { Some(map) => { if map.len() != 2 { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'metadata' must have exact two entries for each key-value map".to_string(), )); } @@ -304,14 +307,14 @@ impl Field { v_str.to_string().clone(), ); } else { - return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string())); + return Err(ArrowSchemaError::Parse("Field 'metadata' must have map value of string type".to_string())); } } else { - return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string())); + return Err(ArrowSchemaError::Parse("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string())); } } _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'metadata' contains non-object key-value pair".to_string(), )); } @@ -327,7 +330,7 @@ impl Field { if let Some(str_value) = v.as_str() { res.insert(k.clone(), str_value.to_string().clone()); } else { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( format!("Field 'metadata' contains non-string value for key {}", k), )); } @@ -335,7 +338,7 @@ impl Field { Some(res) } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field `metadata` is not json array".to_string(), )); } @@ -349,7 +352,7 @@ impl Field { | DataType::FixedSizeList(_, _) => match map.get("children") { Some(Value::Array(values)) => { if values.len() != 1 { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must have one element for a list data type".to_string(), )); } @@ -370,30 +373,30 @@ impl Field { } } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } }, DataType::Struct(mut fields) => match map.get("children") { Some(Value::Array(values)) => { - let struct_fields: Result> = + let struct_fields: Result, _> = values.iter().map(Field::from).collect(); fields.append(&mut struct_fields?); DataType::Struct(fields) } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } @@ -408,20 +411,20 @@ impl Field { DataType::Map(Box::new(child), keys_sorted) } t => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( format!("Map children should be a struct with 2 fields, found {:?}", t) )) } } } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array with 1 element" .to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } @@ -429,17 +432,19 @@ impl Field { } DataType::Union(_, type_ids, mode) => match map.get("children") { Some(Value::Array(values)) => { - let union_fields: Vec = - values.iter().map(Field::from).collect::>()?; + let union_fields: Vec = values + .iter() + .map(Field::from) + .collect::>()?; DataType::Union(union_fields, type_ids, mode) } Some(_) => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field 'children' must be an array".to_string(), )) } None => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'children' attribute".to_string(), )); } @@ -455,7 +460,7 @@ impl Field { let index_type = match dictionary.get("indexType") { Some(t) => DataType::from(t)?, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'indexType' attribute".to_string(), )); } @@ -463,7 +468,7 @@ impl Field { dict_id = match dictionary.get("id") { Some(Value::Number(n)) => n.as_i64().unwrap(), _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'id' attribute".to_string(), )); } @@ -471,7 +476,7 @@ impl Field { dict_is_ordered = match dictionary.get("isOrdered") { Some(&Value::Bool(n)) => n, _ => { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Field missing 'isOrdered' attribute".to_string(), )); } @@ -489,7 +494,7 @@ impl Field { metadata, }) } - _ => Err(ArrowError::ParseError( + _ => Err(ArrowSchemaError::Parse( "Invalid json value type for field".to_string(), )), } @@ -536,19 +541,20 @@ impl Field { /// Example: /// /// ``` - /// # use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; /// let mut field = Field::new("c1", DataType::Int64, false); /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok()); /// assert!(field.is_nullable()); /// ``` - pub fn try_merge(&mut self, from: &Field) -> Result<()> { + pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowSchemaError> { if from.dict_id != self.dict_id { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting dict_id".to_string(), )); } if from.dict_is_ordered != self.dict_is_ordered { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting dict_is_ordered" .to_string(), )); @@ -560,7 +566,7 @@ impl Field { for (key, from_value) in from_metadata { if let Some(self_value) = self_metadata.get(key) { if self_value != from_value { - return Err(ArrowError::SchemaError(format!( + return Err(ArrowSchemaError::Merge(format!( "Fail to merge field due to conflicting metadata data value for key {}", key), )); } @@ -589,7 +595,7 @@ impl Field { } } _ => { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); @@ -609,7 +615,7 @@ impl Field { // If the nested fields in two unions are the same, they must have same // type id. if self_type_id != field_type_id { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting type ids in union datatype" .to_string(), )); @@ -627,7 +633,7 @@ impl Field { } } _ => { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); @@ -666,7 +672,7 @@ impl Field { | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { if self.data_type != from.data_type { - return Err(ArrowError::SchemaError( + return Err(ArrowSchemaError::Merge( "Fail to merge schema Field due to conflicting datatype" .to_string(), )); diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs new file mode 100644 index 00000000000..867c428f1ab --- /dev/null +++ b/arrow-schema/src/lib.rs @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Arrow logical types + +pub mod datatype; +pub mod error; +pub mod field; +pub mod schema; diff --git a/arrow/src/datatypes/schema.rs b/arrow-schema/src/schema.rs similarity index 90% rename from arrow/src/datatypes/schema.rs rename to arrow-schema/src/schema.rs index efde4edefa6..e52e12b7057 100644 --- a/arrow/src/datatypes/schema.rs +++ b/arrow-schema/src/schema.rs @@ -19,9 +19,8 @@ use std::collections::HashMap; use std::fmt; use std::hash::Hash; -use crate::error::{ArrowError, Result}; - -use super::Field; +use crate::error::ArrowSchemaError; +use crate::field::Field; /// Describes the meta-data of an ordered sequence of relative types. /// @@ -53,7 +52,9 @@ impl Schema { /// # Example /// /// ``` - /// # use arrow::datatypes::{Field, DataType, Schema}; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::schema::Schema; /// let field_a = Field::new("a", DataType::Int64, false); /// let field_b = Field::new("b", DataType::Boolean, false); /// @@ -69,7 +70,9 @@ impl Schema { /// # Example /// /// ``` - /// # use arrow::datatypes::{Field, DataType, Schema}; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::schema::Schema; /// # use std::collections::HashMap; /// let field_a = Field::new("a", DataType::Int64, false); /// let field_b = Field::new("b", DataType::Boolean, false); @@ -95,19 +98,19 @@ impl Schema { /// Returns a new schema with only the specified columns in the new schema /// This carries metadata from the parent schema over as well - pub fn project(&self, indices: &[usize]) -> Result { + pub fn project(&self, indices: &[usize]) -> Result { let new_fields = indices .iter() .map(|i| { self.fields.get(*i).cloned().ok_or_else(|| { - ArrowError::SchemaError(format!( + ArrowSchemaError::Field(format!( "project index {} out of bounds, max field {}", i, self.fields().len() )) }) }) - .collect::>>()?; + .collect::, _>>()?; Ok(Self::new_with_metadata(new_fields, self.metadata.clone())) } @@ -116,7 +119,9 @@ impl Schema { /// Example: /// /// ``` - /// use arrow::datatypes::*; + /// # use arrow_schema::field::Field; + /// # use arrow_schema::datatype::DataType; + /// # use arrow_schema::schema::Schema; /// /// let merged = Schema::try_merge(vec![ /// Schema::new(vec![ @@ -139,7 +144,9 @@ impl Schema { /// ]), /// ); /// ``` - pub fn try_merge(schemas: impl IntoIterator) -> Result { + pub fn try_merge( + schemas: impl IntoIterator, + ) -> Result { schemas .into_iter() .try_fold(Self::empty(), |mut merged, schema| { @@ -148,7 +155,7 @@ impl Schema { // merge metadata if let Some(old_val) = merged.metadata.get(&key) { if old_val != &value { - return Err(ArrowError::SchemaError(format!( + return Err(ArrowSchemaError::Merge(format!( "Fail to merge schema due to conflicting metadata. \ Key '{}' has different values '{}' and '{}'", key, old_val, value @@ -179,8 +186,7 @@ impl Schema { /// Returns a vector with references to all fields (including nested fields) #[inline] - #[cfg(feature = "ipc")] - pub(crate) fn all_fields(&self) -> Vec<&Field> { + pub fn all_fields(&self) -> Vec<&Field> { self.fields.iter().flat_map(|f| f.fields()).collect() } @@ -191,7 +197,7 @@ impl Schema { } /// Returns an immutable reference of a specific [`Field`] instance selected by name. - pub fn field_with_name(&self, name: &str) -> Result<&Field> { + pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowSchemaError> { Ok(&self.fields[self.index_of(name)?]) } @@ -205,13 +211,13 @@ impl Schema { } /// Find the index of the column with the given name. - pub fn index_of(&self, name: &str) -> Result { + pub fn index_of(&self, name: &str) -> Result { (0..self.fields.len()) .find(|idx| self.fields[*idx].name() == name) .ok_or_else(|| { let valid_fields: Vec = self.fields.iter().map(|f| f.name().clone()).collect(); - ArrowError::InvalidArgumentError(format!( + ArrowSchemaError::Field(format!( "Unable to get field named \"{}\". Valid fields: {:?}", name, valid_fields )) @@ -244,14 +250,14 @@ impl Schema { /// Parse a `Schema` definition from a JSON representation. #[cfg(feature = "json")] - pub fn from(json: &serde_json::Value) -> Result { + pub fn from(json: &serde_json::Value) -> Result { use serde_json::Value; match *json { Value::Object(ref schema) => { let fields = if let Some(Value::Array(fields)) = schema.get("fields") { - fields.iter().map(Field::from).collect::>()? + fields.iter().map(Field::from).collect::>()? } else { - return Err(ArrowError::ParseError( + return Err(ArrowSchemaError::Parse( "Schema fields should be an array".to_string(), )); }; @@ -264,7 +270,7 @@ impl Schema { Ok(Self { fields, metadata }) } - _ => Err(ArrowError::ParseError( + _ => Err(ArrowSchemaError::Parse( "Invalid json value type for schema".to_string(), )), } @@ -273,14 +279,16 @@ impl Schema { /// Parse a `metadata` definition from a JSON representation. /// The JSON can either be an Object or an Array of Objects. #[cfg(feature = "json")] - fn from_metadata(json: &serde_json::Value) -> Result> { + fn from_metadata( + json: &serde_json::Value, + ) -> Result, ArrowSchemaError> { use serde_json::Value; match json { Value::Array(_) => { let mut hashmap = HashMap::new(); let values: Vec = serde_json::from_value(json.clone()) .map_err(|_| { - ArrowError::JsonError( + ArrowSchemaError::Parse( "Unable to parse object into key-value pair".to_string(), ) })?; @@ -295,13 +303,13 @@ impl Schema { if let Value::String(v) = v { Ok((k.to_string(), v.to_string())) } else { - Err(ArrowError::ParseError( + Err(ArrowSchemaError::Parse( "metadata `value` field must be a string".to_string(), )) } }) - .collect::>(), - _ => Err(ArrowError::ParseError( + .collect::>(), + _ => Err(ArrowSchemaError::Parse( "`metadata` field must be an object".to_string(), )), } @@ -364,9 +372,8 @@ struct MetadataKeyValue { #[cfg(test)] mod tests { - use crate::datatypes::DataType; - use super::*; + use crate::datatype::DataType; #[test] #[cfg(feature = "json")] @@ -424,7 +431,7 @@ mod tests { ]) .with_metadata(metadata); - let projected: Result = schema.project(&[0, 3]); + let projected = schema.project(&[0, 3]); assert!(projected.is_err()); if let Err(e) = projected { diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 2de4db64276..10c62c84ebb 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -44,6 +44,7 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } [dependencies] +arrow-schema = { version = "22.0.0", path = "../arrow-schema" } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } indexmap = { version = "1.9", default-features = false, features = ["std"] } @@ -75,7 +76,7 @@ default = ["csv", "ipc", "json"] ipc_compression = ["ipc", "zstd", "lz4"] csv = ["csv_crate"] ipc = ["flatbuffers"] -json = ["serde", "serde_json"] +json = ["serde", "serde_json", "arrow-schema/json"] simd = ["packed_simd"] prettyprint = ["comfy-table"] # The test utils feature enables code used in benchmarks and tests but diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 543fda1b1a8..8780e6315b4 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -27,13 +27,14 @@ use super::{ use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray}; #[allow(deprecated)] use crate::buffer::{Buffer, MutableBuffer}; -use crate::datatypes::validate_decimal_precision; use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type, - Decimal256Type, DecimalType, NativeDecimalType, + DataType, Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType, }; use crate::error::{ArrowError, Result}; -use crate::util::decimal::{Decimal, Decimal256}; +use crate::util::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, Decimal, + Decimal256, +}; /// `Decimal128Array` stores fixed width decimal numbers, /// with a fixed precision and scale. @@ -549,8 +550,9 @@ impl<'a, T: DecimalType> DecimalArray { #[cfg(test)] mod tests { use crate::array::Decimal256Builder; - use crate::datatypes::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; - use crate::util::decimal::Decimal128; + use crate::util::decimal::{ + Decimal128, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, + }; use crate::{array::Decimal128Builder, datatypes::Field}; use num::{BigInt, Num}; diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index daa30eebed9..c2a03862679 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -25,10 +25,9 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder}; use crate::error::{ArrowError, Result}; -use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, +use crate::util::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, Decimal256, }; -use crate::util::decimal::Decimal256; /// Array Builder for [`Decimal128Array`] /// diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 7571ba210d7..bc504ff9453 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -18,12 +18,12 @@ //! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates //! common attributes and operations for Arrow array. -use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, DataType, - IntervalUnit, UnionMode, -}; +use crate::datatypes::{DataType, IntervalUnit, UnionMode}; use crate::error::{ArrowError, Result}; use crate::util::bit_iterator::BitSliceIterator; +use crate::util::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, +}; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use crate::{ buffer::{Buffer, MutableBuffer}, diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index d164d35c3c8..54b7e045bb7 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -58,6 +58,7 @@ use crate::error::{ArrowError, Result}; use crate::record_batch::{RecordBatch, RecordBatchOptions}; use crate::util::reader_parser::Parser; +use crate::util::decimal::validate_decimal_precision; use csv_crate::{ByteRecord, StringRecord}; use std::ops::Neg; @@ -289,7 +290,7 @@ pub fn infer_schema_from_files( } } - Schema::try_merge(schemas) + Ok(Schema::try_merge(schemas)?) } // optional bounds of the reader, of the form (min line, max line). diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs deleted file mode 100644 index b65bfd7725a..00000000000 --- a/arrow/src/datatypes/datatype.rs +++ /dev/null @@ -1,1499 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use num::BigInt; -use std::cmp::Ordering; -use std::fmt; - -use crate::error::{ArrowError, Result}; -use crate::util::decimal::singed_cmp_le_bytes; - -use super::Field; - -/// The set of datatypes that are supported by this implementation of Apache Arrow. -/// -/// The Arrow specification on data types includes some more types. -/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs) -/// for Arrow's specification. -/// -/// The variants of this enum include primitive fixed size types as well as parametric or -/// nested types. -/// Currently the Rust implementation supports the following nested types: -/// - `List` -/// - `Struct` -/// -/// Nested types can themselves be nested within other arrays. -/// For more information on these types please see -/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout). -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum DataType { - /// Null type - Null, - /// A boolean datatype representing the values `true` and `false`. - Boolean, - /// A signed 8-bit integer. - Int8, - /// A signed 16-bit integer. - Int16, - /// A signed 32-bit integer. - Int32, - /// A signed 64-bit integer. - Int64, - /// An unsigned 8-bit integer. - UInt8, - /// An unsigned 16-bit integer. - UInt16, - /// An unsigned 32-bit integer. - UInt32, - /// An unsigned 64-bit integer. - UInt64, - /// A 16-bit floating point number. - Float16, - /// A 32-bit floating point number. - Float32, - /// A 64-bit floating point number. - Float64, - /// A timestamp with an optional timezone. - /// - /// Time is measured as a Unix epoch, counting the seconds from - /// 00:00:00.000 on 1 January 1970, excluding leap seconds, - /// as a 64-bit integer. - /// - /// The time zone is a string indicating the name of a time zone, one of: - /// - /// * As used in the Olson time zone database (the "tz database" or - /// "tzdata"), such as "America/New_York" - /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 - /// - /// Timestamps with a non-empty timezone - /// ------------------------------------ - /// - /// If a Timestamp column has a non-empty timezone value, its epoch is - /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone - /// (the Unix epoch), regardless of the Timestamp's own timezone. - /// - /// Therefore, timestamp values with a non-empty timezone correspond to - /// physical points in time together with some additional information about - /// how the data was obtained and/or how to display it (the timezone). - /// - /// For example, the timestamp value 0 with the timezone string "Europe/Paris" - /// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the - /// application may prefer to display it as "January 1st 1970, 01h00" in - /// the Europe/Paris timezone (which is the same physical point in time). - /// - /// One consequence is that timestamp values with a non-empty timezone - /// can be compared and ordered directly, since they all share the same - /// well-known point of reference (the Unix epoch). - /// - /// Timestamps with an unset / empty timezone - /// ----------------------------------------- - /// - /// If a Timestamp column has no timezone value, its epoch is - /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. - /// - /// Therefore, timestamp values without a timezone cannot be meaningfully - /// interpreted as physical points in time, but only as calendar / clock - /// indications ("wall clock time") in an unspecified timezone. - /// - /// For example, the timestamp value 0 with an empty timezone string - /// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there - /// is not enough information to interpret it as a well-defined physical - /// point in time. - /// - /// One consequence is that timestamp values without a timezone cannot - /// be reliably compared or ordered, since they may have different points of - /// reference. In particular, it is *not* possible to interpret an unset - /// or empty timezone as the same as "UTC". - /// - /// Conversion between timezones - /// ---------------------------- - /// - /// If a Timestamp column has a non-empty timezone, changing the timezone - /// to a different non-empty value is a metadata-only operation: - /// the timestamp values need not change as their point of reference remains - /// the same (the Unix epoch). - /// - /// However, if a Timestamp column has no timezone value, changing it to a - /// non-empty value requires to think about the desired semantics. - /// One possibility is to assume that the original timestamp values are - /// relative to the epoch of the timezone being set; timestamp values should - /// then adjusted to the Unix epoch (for example, changing the timezone from - /// empty to "Europe/Paris" would require converting the timestamp values - /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is - /// nevertheless correct). - Timestamp(TimeUnit, Option), - /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01) - /// in days (32 bits). - Date32, - /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) - /// in milliseconds (64 bits). Values are evenly divisible by 86400000. - Date64, - /// A 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. - Time32(TimeUnit), - /// A 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`. - Time64(TimeUnit), - /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. - Duration(TimeUnit), - /// A "calendar" interval which models types that don't necessarily - /// have a precise duration without the context of a base timestamp (e.g. - /// days can differ in length during day light savings time transitions). - Interval(IntervalUnit), - /// Opaque binary data of variable length. - Binary, - /// Opaque binary data of fixed size. - /// Enum parameter specifies the number of bytes per value. - FixedSizeBinary(i32), - /// Opaque binary data of variable length and 64-bit offsets. - LargeBinary, - /// A variable-length string in Unicode with UTF-8 encoding. - Utf8, - /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets. - LargeUtf8, - /// A list of some logical data type with variable length. - List(Box), - /// A list of some logical data type with fixed length. - FixedSizeList(Box, i32), - /// A list of some logical data type with variable length and 64-bit offsets. - LargeList(Box), - /// A nested datatype that contains a number of sub-fields. - Struct(Vec), - /// A nested datatype that can represent slots of differing types. Components: - /// - /// 1. [`Field`] for each possible child type the Union can hold - /// 2. The corresponding `type_id` used to identify which Field - /// 3. The type of union (Sparse or Dense) - Union(Vec, Vec, UnionMode), - /// A dictionary encoded array (`key_type`, `value_type`), where - /// each array element is an index of `key_type` into an - /// associated dictionary of `value_type`. - /// - /// Dictionary arrays are used to store columns of `value_type` - /// that contain many repeated values using less memory, but with - /// a higher CPU overhead for some operations. - /// - /// This type mostly used to represent low cardinality string - /// arrays or a limited set of primitive types as integers. - Dictionary(Box, Box), - /// Exact 128-bit width decimal value with precision and scale - /// - /// * precision is the total number of digits - /// * scale is the number of digits past the decimal - /// - /// For example the number 123.45 has precision 5 and scale 2. - Decimal128(u8, u8), - /// Exact 256-bit width decimal value with precision and scale - /// - /// * precision is the total number of digits - /// * scale is the number of digits past the decimal - /// - /// For example the number 123.45 has precision 5 and scale 2. - Decimal256(u8, u8), - /// A Map is a logical nested type that is represented as - /// - /// `List>` - /// - /// The keys and values are each respectively contiguous. - /// The key and value types are not constrained, but keys should be - /// hashable and unique. - /// Whether the keys are sorted can be set in the `bool` after the `Field`. - /// - /// In a field with Map type, the field has a child Struct field, which then - /// has two children: key type and the second the value type. The names of the - /// child fields may be respectively "entries", "key", and "value", but this is - /// not enforced. - Map(Box, bool), -} - -/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum TimeUnit { - /// Time in seconds. - Second, - /// Time in milliseconds. - Millisecond, - /// Time in microseconds. - Microsecond, - /// Time in nanoseconds. - Nanosecond, -} - -/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum IntervalUnit { - /// Indicates the number of elapsed whole months, stored as 4-byte integers. - YearMonth, - /// Indicates the number of elapsed days and milliseconds, - /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total). - DayTime, - /// A triple of the number of elapsed months, days, and nanoseconds. - /// The values are stored contiguously in 16 byte blocks. Months and - /// days are encoded as 32 bit integers and nanoseconds is encoded as a - /// 64 bit integer. All integers are signed. Each field is independent - /// (e.g. there is no constraint that nanoseconds have the same sign - /// as days or that the quantity of nanoseconds represents less - /// than a day's worth of time). - MonthDayNano, -} - -// Sparse or Dense union layouts -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum UnionMode { - Sparse, - Dense, -} - -impl fmt::Display for DataType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -// MAX decimal256 value of little-endian format for each precision. -// Each element is the max value of signed 256-bit integer for the specified precision which -// is encoded to the 32-byte width format of little-endian. -pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ - [ - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - [ - 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - [ - 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - [ - 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, - 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, - 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, - 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, - 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, - 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, - 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, - 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, - 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, - 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, - 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, - 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, - 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, - 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, - 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, - 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, - 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, - 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, - 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, - 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, - 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, - 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, - 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, - 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, - 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, - 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, - 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, - 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, - 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, - 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, - 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, - 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, - 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, - 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, - 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, - 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, - ], - [ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, - 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, - ], -]; - -// MIN decimal256 value of little-endian format for each precision. -// Each element is the min value of signed 256-bit integer for the specified precision which -// is encoded to the 76-byte width format of little-endian. -pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ - [ - 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, - 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, - 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, - 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, - 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, - 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, - 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, - 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, - 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, - 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, - 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, - 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, - 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, - 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, - 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, - 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, - 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, - 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, - 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, - 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, - 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, - 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, - 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, - 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, - 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, - 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, - 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, - 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, - 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, - 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, - 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, - ], - [ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, - 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, - ], -]; - -/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value -/// that can be stored in [DataType::Decimal128] value of precision `p` -pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ - 9, - 99, - 999, - 9999, - 99999, - 999999, - 9999999, - 99999999, - 999999999, - 9999999999, - 99999999999, - 999999999999, - 9999999999999, - 99999999999999, - 999999999999999, - 9999999999999999, - 99999999999999999, - 999999999999999999, - 9999999999999999999, - 99999999999999999999, - 999999999999999999999, - 9999999999999999999999, - 99999999999999999999999, - 999999999999999999999999, - 9999999999999999999999999, - 99999999999999999999999999, - 999999999999999999999999999, - 9999999999999999999999999999, - 99999999999999999999999999999, - 999999999999999999999999999999, - 9999999999999999999999999999999, - 99999999999999999999999999999999, - 999999999999999999999999999999999, - 9999999999999999999999999999999999, - 99999999999999999999999999999999999, - 999999999999999999999999999999999999, - 9999999999999999999999999999999999999, - 99999999999999999999999999999999999999, -]; - -/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value -/// that can be stored in a [DataType::Decimal128] value of precision `p` -pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ - -9, - -99, - -999, - -9999, - -99999, - -999999, - -9999999, - -99999999, - -999999999, - -9999999999, - -99999999999, - -999999999999, - -9999999999999, - -99999999999999, - -999999999999999, - -9999999999999999, - -99999999999999999, - -999999999999999999, - -9999999999999999999, - -99999999999999999999, - -999999999999999999999, - -9999999999999999999999, - -99999999999999999999999, - -999999999999999999999999, - -9999999999999999999999999, - -99999999999999999999999999, - -999999999999999999999999999, - -9999999999999999999999999999, - -99999999999999999999999999999, - -999999999999999999999999999999, - -9999999999999999999999999999999, - -99999999999999999999999999999999, - -999999999999999999999999999999999, - -9999999999999999999999999999999999, - -99999999999999999999999999999999999, - -999999999999999999999999999999999999, - -9999999999999999999999999999999999999, - -99999999999999999999999999999999999999, -]; - -/// The maximum precision for [DataType::Decimal128] values -pub const DECIMAL128_MAX_PRECISION: u8 = 38; - -/// The maximum scale for [DataType::Decimal128] values -pub const DECIMAL128_MAX_SCALE: u8 = 38; - -/// The maximum precision for [DataType::Decimal256] values -pub const DECIMAL256_MAX_PRECISION: u8 = 76; - -/// The maximum scale for [DataType::Decimal256] values -pub const DECIMAL256_MAX_SCALE: u8 = 76; - -/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values -pub const DECIMAL_DEFAULT_SCALE: u8 = 10; - -/// Validates that the specified `i128` value can be properly -/// interpreted as a Decimal number with precision `precision` -#[inline] -pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { - if precision > DECIMAL128_MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Max precision of a Decimal128 is {}, but got {}", - DECIMAL128_MAX_PRECISION, precision, - ))); - } - - let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; - let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; - - if value > max { - Err(ArrowError::InvalidArgumentError(format!( - "{} is too large to store in a Decimal128 of precision {}. Max is {}", - value, precision, max - ))) - } else if value < min { - Err(ArrowError::InvalidArgumentError(format!( - "{} is too small to store in a Decimal128 of precision {}. Min is {}", - value, precision, min - ))) - } else { - Ok(()) - } -} - -/// Validates that the specified `byte_array` of little-endian format -/// value can be properly interpreted as a Decimal256 number with precision `precision` -#[inline] -pub(crate) fn validate_decimal256_precision_with_lt_bytes( - lt_value: &[u8], - precision: u8, -) -> Result<()> { - if precision > DECIMAL256_MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Max precision of a Decimal256 is {}, but got {}", - DECIMAL256_MAX_PRECISION, precision, - ))); - } - let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; - let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; - - if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { - Err(ArrowError::InvalidArgumentError(format!( - "{:?} is too large to store in a Decimal256 of precision {}. Max is {:?}", - BigInt::from_signed_bytes_le(lt_value), - precision, - BigInt::from_signed_bytes_le(&max) - ))) - } else if singed_cmp_le_bytes(lt_value, &min) == Ordering::Less { - Err(ArrowError::InvalidArgumentError(format!( - "{:?} is too small to store in a Decimal256 of precision {}. Min is {:?}", - BigInt::from_signed_bytes_le(lt_value), - precision, - BigInt::from_signed_bytes_le(&min) - ))) - } else { - Ok(()) - } -} - -impl DataType { - /// Parse a data type from a JSON representation. - #[cfg(feature = "json")] - pub(crate) fn from(json: &serde_json::Value) -> Result { - use serde_json::Value; - let default_field = Field::new("", DataType::Boolean, true); - match *json { - Value::Object(ref map) => match map.get("name") { - Some(s) if s == "null" => Ok(DataType::Null), - Some(s) if s == "bool" => Ok(DataType::Boolean), - Some(s) if s == "binary" => Ok(DataType::Binary), - Some(s) if s == "largebinary" => Ok(DataType::LargeBinary), - Some(s) if s == "utf8" => Ok(DataType::Utf8), - Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8), - Some(s) if s == "fixedsizebinary" => { - // return a list with any type as its child isn't defined in the map - if let Some(Value::Number(size)) = map.get("byteWidth") { - Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32)) - } else { - Err(ArrowError::ParseError( - "Expecting a byteWidth for fixedsizebinary".to_string(), - )) - } - } - Some(s) if s == "decimal" => { - // return a list with any type as its child isn't defined in the map - let precision = match map.get("precision") { - Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()), - None => Err(ArrowError::ParseError( - "Expecting a precision for decimal".to_string(), - )), - }?; - let scale = match map.get("scale") { - Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()), - _ => Err(ArrowError::ParseError( - "Expecting a scale for decimal".to_string(), - )), - }?; - let bit_width: usize = match map.get("bitWidth") { - Some(b) => b.as_u64().unwrap() as usize, - _ => 128, // Default bit width - }; - - if bit_width == 128 { - Ok(DataType::Decimal128(precision, scale)) - } else if bit_width == 256 { - Ok(DataType::Decimal256(precision, scale)) - } else { - Err(ArrowError::ParseError( - "Decimal bit_width invalid".to_string(), - )) - } - } - Some(s) if s == "floatingpoint" => match map.get("precision") { - Some(p) if p == "HALF" => Ok(DataType::Float16), - Some(p) if p == "SINGLE" => Ok(DataType::Float32), - Some(p) if p == "DOUBLE" => Ok(DataType::Float64), - _ => Err(ArrowError::ParseError( - "floatingpoint precision missing or invalid".to_string(), - )), - }, - Some(s) if s == "timestamp" => { - let unit = match map.get("unit") { - Some(p) if p == "SECOND" => Ok(TimeUnit::Second), - Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), - Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), - Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), - _ => Err(ArrowError::ParseError( - "timestamp unit missing or invalid".to_string(), - )), - }; - let tz = match map.get("timezone") { - None => Ok(None), - Some(serde_json::Value::String(tz)) => Ok(Some(tz.clone())), - _ => Err(ArrowError::ParseError( - "timezone must be a string".to_string(), - )), - }; - Ok(DataType::Timestamp(unit?, tz?)) - } - Some(s) if s == "date" => match map.get("unit") { - Some(p) if p == "DAY" => Ok(DataType::Date32), - Some(p) if p == "MILLISECOND" => Ok(DataType::Date64), - _ => Err(ArrowError::ParseError( - "date unit missing or invalid".to_string(), - )), - }, - Some(s) if s == "time" => { - let unit = match map.get("unit") { - Some(p) if p == "SECOND" => Ok(TimeUnit::Second), - Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), - Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), - Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), - _ => Err(ArrowError::ParseError( - "time unit missing or invalid".to_string(), - )), - }; - match map.get("bitWidth") { - Some(p) if p == 32 => Ok(DataType::Time32(unit?)), - Some(p) if p == 64 => Ok(DataType::Time64(unit?)), - _ => Err(ArrowError::ParseError( - "time bitWidth missing or invalid".to_string(), - )), - } - } - Some(s) if s == "duration" => match map.get("unit") { - Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)), - Some(p) if p == "MILLISECOND" => { - Ok(DataType::Duration(TimeUnit::Millisecond)) - } - Some(p) if p == "MICROSECOND" => { - Ok(DataType::Duration(TimeUnit::Microsecond)) - } - Some(p) if p == "NANOSECOND" => { - Ok(DataType::Duration(TimeUnit::Nanosecond)) - } - _ => Err(ArrowError::ParseError( - "time unit missing or invalid".to_string(), - )), - }, - Some(s) if s == "interval" => match map.get("unit") { - Some(p) if p == "DAY_TIME" => { - Ok(DataType::Interval(IntervalUnit::DayTime)) - } - Some(p) if p == "YEAR_MONTH" => { - Ok(DataType::Interval(IntervalUnit::YearMonth)) - } - Some(p) if p == "MONTH_DAY_NANO" => { - Ok(DataType::Interval(IntervalUnit::MonthDayNano)) - } - _ => Err(ArrowError::ParseError( - "interval unit missing or invalid".to_string(), - )), - }, - Some(s) if s == "int" => match map.get("isSigned") { - Some(&Value::Bool(true)) => match map.get("bitWidth") { - Some(&Value::Number(ref n)) => match n.as_u64() { - Some(8) => Ok(DataType::Int8), - Some(16) => Ok(DataType::Int16), - Some(32) => Ok(DataType::Int32), - Some(64) => Ok(DataType::Int64), - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - Some(&Value::Bool(false)) => match map.get("bitWidth") { - Some(&Value::Number(ref n)) => match n.as_u64() { - Some(8) => Ok(DataType::UInt8), - Some(16) => Ok(DataType::UInt16), - Some(32) => Ok(DataType::UInt32), - Some(64) => Ok(DataType::UInt64), - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - _ => Err(ArrowError::ParseError( - "int bitWidth missing or invalid".to_string(), - )), - }, - _ => Err(ArrowError::ParseError( - "int signed missing or invalid".to_string(), - )), - }, - Some(s) if s == "list" => { - // return a list with any type as its child isn't defined in the map - Ok(DataType::List(Box::new(default_field))) - } - Some(s) if s == "largelist" => { - // return a largelist with any type as its child isn't defined in the map - Ok(DataType::LargeList(Box::new(default_field))) - } - Some(s) if s == "fixedsizelist" => { - // return a list with any type as its child isn't defined in the map - if let Some(Value::Number(size)) = map.get("listSize") { - Ok(DataType::FixedSizeList( - Box::new(default_field), - size.as_i64().unwrap() as i32, - )) - } else { - Err(ArrowError::ParseError( - "Expecting a listSize for fixedsizelist".to_string(), - )) - } - } - Some(s) if s == "struct" => { - // return an empty `struct` type as its children aren't defined in the map - Ok(DataType::Struct(vec![])) - } - Some(s) if s == "map" => { - if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") { - // Return a map with an empty type as its children aren't defined in the map - Ok(DataType::Map(Box::new(default_field), *keys_sorted)) - } else { - Err(ArrowError::ParseError( - "Expecting a keysSorted for map".to_string(), - )) - } - } - Some(s) if s == "union" => { - if let Some(Value::String(mode)) = map.get("mode") { - let union_mode = if mode == "SPARSE" { - UnionMode::Sparse - } else if mode == "DENSE" { - UnionMode::Dense - } else { - return Err(ArrowError::ParseError(format!( - "Unknown union mode {:?} for union", - mode - ))); - }; - if let Some(type_ids) = map.get("typeIds") { - let type_ids = type_ids - .as_array() - .unwrap() - .iter() - .map(|t| t.as_i64().unwrap() as i8) - .collect::>(); - - let default_fields = type_ids - .iter() - .map(|_| default_field.clone()) - .collect::>(); - - Ok(DataType::Union(default_fields, type_ids, union_mode)) - } else { - Err(ArrowError::ParseError( - "Expecting a typeIds for union ".to_string(), - )) - } - } else { - Err(ArrowError::ParseError( - "Expecting a mode for union".to_string(), - )) - } - } - Some(other) => Err(ArrowError::ParseError(format!( - "invalid or unsupported type name: {} in {:?}", - other, json - ))), - None => Err(ArrowError::ParseError("type name missing".to_string())), - }, - _ => Err(ArrowError::ParseError( - "invalid json value type".to_string(), - )), - } - } - - /// Generate a JSON representation of the data type. - #[cfg(feature = "json")] - pub fn to_json(&self) -> serde_json::Value { - use serde_json::json; - match self { - DataType::Null => json!({"name": "null"}), - DataType::Boolean => json!({"name": "bool"}), - DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}), - DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}), - DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}), - DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}), - DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}), - DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}), - DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}), - DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}), - DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}), - DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}), - DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}), - DataType::Utf8 => json!({"name": "utf8"}), - DataType::LargeUtf8 => json!({"name": "largeutf8"}), - DataType::Binary => json!({"name": "binary"}), - DataType::LargeBinary => json!({"name": "largebinary"}), - DataType::FixedSizeBinary(byte_width) => { - json!({"name": "fixedsizebinary", "byteWidth": byte_width}) - } - DataType::Struct(_) => json!({"name": "struct"}), - DataType::Union(_, _, _) => json!({"name": "union"}), - DataType::List(_) => json!({ "name": "list"}), - DataType::LargeList(_) => json!({ "name": "largelist"}), - DataType::FixedSizeList(_, length) => { - json!({"name":"fixedsizelist", "listSize": length}) - } - DataType::Time32(unit) => { - json!({"name": "time", "bitWidth": 32, "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}) - } - DataType::Time64(unit) => { - json!({"name": "time", "bitWidth": 64, "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}) - } - DataType::Date32 => { - json!({"name": "date", "unit": "DAY"}) - } - DataType::Date64 => { - json!({"name": "date", "unit": "MILLISECOND"}) - } - DataType::Timestamp(unit, None) => { - json!({"name": "timestamp", "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}) - } - DataType::Timestamp(unit, Some(tz)) => { - json!({"name": "timestamp", "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }, "timezone": tz}) - } - DataType::Interval(unit) => json!({"name": "interval", "unit": match unit { - IntervalUnit::YearMonth => "YEAR_MONTH", - IntervalUnit::DayTime => "DAY_TIME", - IntervalUnit::MonthDayNano => "MONTH_DAY_NANO", - }}), - DataType::Duration(unit) => json!({"name": "duration", "unit": match unit { - TimeUnit::Second => "SECOND", - TimeUnit::Millisecond => "MILLISECOND", - TimeUnit::Microsecond => "MICROSECOND", - TimeUnit::Nanosecond => "NANOSECOND", - }}), - DataType::Dictionary(_, _) => json!({ "name": "dictionary"}), - DataType::Decimal128(precision, scale) => { - json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128}) - } - DataType::Decimal256(precision, scale) => { - json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 256}) - } - DataType::Map(_, keys_sorted) => { - json!({"name": "map", "keysSorted": keys_sorted}) - } - } - } - - /// Returns true if this type is numeric: (UInt*, Int*, or Float*). - pub fn is_numeric(t: &DataType) -> bool { - use DataType::*; - matches!( - t, - UInt8 - | UInt16 - | UInt32 - | UInt64 - | Int8 - | Int16 - | Int32 - | Int64 - | Float32 - | Float64 - ) - } - - /// Returns true if this type is temporal: (Date*, Time*, Duration, or Interval). - pub fn is_temporal(t: &DataType) -> bool { - use DataType::*; - matches!( - t, - Date32 - | Date64 - | Timestamp(_, _) - | Time32(_) - | Time64(_) - | Duration(_) - | Interval(_) - ) - } - - /// Returns true if this type is valid as a dictionary key - /// (e.g. [`super::ArrowDictionaryKeyType`] - pub fn is_dictionary_key_type(t: &DataType) -> bool { - use DataType::*; - matches!( - t, - UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 - ) - } - - /// Compares the datatype with another, ignoring nested field names - /// and metadata. - pub fn equals_datatype(&self, other: &DataType) -> bool { - match (&self, other) { - (DataType::List(a), DataType::List(b)) - | (DataType::LargeList(a), DataType::LargeList(b)) => { - a.is_nullable() == b.is_nullable() - && a.data_type().equals_datatype(b.data_type()) - } - (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => { - a_size == b_size - && a.is_nullable() == b.is_nullable() - && a.data_type().equals_datatype(b.data_type()) - } - (DataType::Struct(a), DataType::Struct(b)) => { - a.len() == b.len() - && a.iter().zip(b).all(|(a, b)| { - a.is_nullable() == b.is_nullable() - && a.data_type().equals_datatype(b.data_type()) - }) - } - ( - DataType::Map(a_field, a_is_sorted), - DataType::Map(b_field, b_is_sorted), - ) => a_field == b_field && a_is_sorted == b_is_sorted, - _ => self == other, - } - } -} - -#[cfg(test)] -mod test { - use crate::datatypes::datatype::{ - MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION, - MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION, - }; - use crate::util::decimal::Decimal256; - use num::{BigInt, Num}; - - #[test] - fn test_decimal256_min_max_for_precision() { - // The precision from 1 to 76 - let mut max_value = "9".to_string(); - let mut min_value = "-9".to_string(); - for i in 1..77 { - let max_decimal = - Decimal256::from(BigInt::from_str_radix(max_value.as_str(), 10).unwrap()); - let min_decimal = - Decimal256::from(BigInt::from_str_radix(min_value.as_str(), 10).unwrap()); - let max_bytes = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; - let min_bytes = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; - max_value += "9"; - min_value += "9"; - assert_eq!(max_decimal.raw_value(), &max_bytes); - assert_eq!(min_decimal.raw_value(), &min_bytes); - } - } -} diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index 38b6c7bf974..b19890ea7ad 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -26,18 +26,16 @@ use std::sync::Arc; mod native; pub use native::*; -mod field; -pub use field::*; -mod schema; -pub use schema::*; mod numeric; pub use numeric::*; mod types; pub use types::*; -mod datatype; -pub use datatype::*; mod delta; +pub use arrow_schema::datatype::*; +pub use arrow_schema::field::*; +pub use arrow_schema::schema::*; + #[cfg(feature = "ffi")] mod ffi; #[cfg(feature = "ffi")] diff --git a/arrow/src/datatypes/types.rs b/arrow/src/datatypes/types.rs index 1b7d0675bb4..8037ed9a938 100644 --- a/arrow/src/datatypes/types.rs +++ b/arrow/src/datatypes/types.rs @@ -17,7 +17,7 @@ use super::{ArrowPrimitiveType, DataType, IntervalUnit, TimeUnit}; use crate::datatypes::delta::shift_months; -use crate::datatypes::{ +use crate::util::decimal::{ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE, }; diff --git a/arrow/src/error.rs b/arrow/src/error.rs index 5d92fb93017..8f0c78f9e06 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -21,6 +21,9 @@ use std::io::Write; use std::error::Error; +// Re-export ArrowSchemaError +pub use arrow_schema::error::ArrowSchemaError; + /// Many different operations in the `arrow` crate return this error type. #[derive(Debug)] pub enum ArrowError { @@ -30,7 +33,7 @@ pub enum ArrowError { CastError(String), MemoryError(String), ParseError(String), - SchemaError(String), + SchemaError(ArrowSchemaError), ComputeError(String), DivideByZero, CsvError(String), @@ -58,6 +61,12 @@ impl From<::std::io::Error> for ArrowError { } } +impl From for ArrowError { + fn from(error: ArrowSchemaError) -> Self { + Self::SchemaError(error) + } +} + #[cfg(feature = "csv")] impl From for ArrowError { fn from(error: csv_crate::Error) -> Self { diff --git a/arrow/src/record_batch.rs b/arrow/src/record_batch.rs index 4b0d36a43e5..4b86ceb93b5 100644 --- a/arrow/src/record_batch.rs +++ b/arrow/src/record_batch.rs @@ -18,6 +18,7 @@ //! A two-dimensional batch of column-oriented data with a defined //! [schema](crate::datatypes::Schema). +use arrow_schema::error::ArrowSchemaError; use std::sync::Arc; use crate::array::*; @@ -203,11 +204,11 @@ impl RecordBatch { .iter() .map(|f| { self.columns.get(*f).cloned().ok_or_else(|| { - ArrowError::SchemaError(format!( + ArrowError::SchemaError(ArrowSchemaError::Field(format!( "project index {} out of bounds, max field {}", f, self.columns.len() - )) + ))) }) }) .collect::>>()?; diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 421942df5c1..a41e60b5c81 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -17,10 +17,7 @@ //! Decimal related utils -use crate::datatypes::{ - DataType, Decimal128Type, Decimal256Type, DecimalType, DECIMAL256_MAX_PRECISION, - DECIMAL_DEFAULT_SCALE, -}; +use crate::datatypes::{DataType, Decimal128Type, Decimal256Type, DecimalType}; use crate::error::{ArrowError, Result}; use num::bigint::BigInt; use num::Signed; @@ -296,6 +293,791 @@ pub(crate) fn singed_cmp_le_bytes(left: &[u8], right: &[u8]) -> Ordering { Ordering::Equal } +// MAX decimal256 value of little-endian format for each precision. +// Each element is the max value of signed 256-bit integer for the specified precision which +// is encoded to the 32-byte width format of little-endian. +pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ + [ + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + [ + 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + [ + 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ], + [ + 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, + 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, + 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, + 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, + 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, + 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, + 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, + 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, + 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, + 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, + 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, + 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, + 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, + 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, + 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, + 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, + 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, + 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, + 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, + 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, + 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, + 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, + 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, + 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, + 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, + 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, + 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, + 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, + 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, + 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, + 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, + 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, + 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, + 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, + 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, + 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, + ], + [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, + 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, + ], +]; + +// MIN decimal256 value of little-endian format for each precision. +// Each element is the min value of signed 256-bit integer for the specified precision which +// is encoded to the 76-byte width format of little-endian. +pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [[u8; 32]; 76] = [ + [ + 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, + 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, + 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, + 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, + 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, + 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, + 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, + 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, + 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, + 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, + 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, + 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, + 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, + 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, + 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, + 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, + 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, + 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, + 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, + 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, + 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, + 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, + 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, + 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, + 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, + 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, + 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, + 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, + 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, + 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, + 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, + ], + [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, + 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, + ], +]; + +/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value +/// that can be stored in [DataType::Decimal128] value of precision `p` +pub(crate) const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ + 9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999, + 9999999999999999, + 99999999999999999, + 999999999999999999, + 9999999999999999999, + 99999999999999999999, + 999999999999999999999, + 9999999999999999999999, + 99999999999999999999999, + 999999999999999999999999, + 9999999999999999999999999, + 99999999999999999999999999, + 999999999999999999999999999, + 9999999999999999999999999999, + 99999999999999999999999999999, + 999999999999999999999999999999, + 9999999999999999999999999999999, + 99999999999999999999999999999999, + 999999999999999999999999999999999, + 9999999999999999999999999999999999, + 99999999999999999999999999999999999, + 999999999999999999999999999999999999, + 9999999999999999999999999999999999999, + 99999999999999999999999999999999999999, +]; + +/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value +/// that can be stored in a [DataType::Decimal128] value of precision `p` +pub(crate) const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ + -9, + -99, + -999, + -9999, + -99999, + -999999, + -9999999, + -99999999, + -999999999, + -9999999999, + -99999999999, + -999999999999, + -9999999999999, + -99999999999999, + -999999999999999, + -9999999999999999, + -99999999999999999, + -999999999999999999, + -9999999999999999999, + -99999999999999999999, + -999999999999999999999, + -9999999999999999999999, + -99999999999999999999999, + -999999999999999999999999, + -9999999999999999999999999, + -99999999999999999999999999, + -999999999999999999999999999, + -9999999999999999999999999999, + -99999999999999999999999999999, + -999999999999999999999999999999, + -9999999999999999999999999999999, + -99999999999999999999999999999999, + -999999999999999999999999999999999, + -9999999999999999999999999999999999, + -99999999999999999999999999999999999, + -999999999999999999999999999999999999, + -9999999999999999999999999999999999999, + -99999999999999999999999999999999999999, +]; + +/// The maximum precision for [DataType::Decimal128] values +pub(crate) const DECIMAL128_MAX_PRECISION: u8 = 38; + +/// The maximum scale for [DataType::Decimal128] values +pub(crate) const DECIMAL128_MAX_SCALE: u8 = 38; + +/// The maximum precision for [DataType::Decimal256] values +pub(crate) const DECIMAL256_MAX_PRECISION: u8 = 76; + +/// The maximum scale for [DataType::Decimal256] values +pub(crate) const DECIMAL256_MAX_SCALE: u8 = 76; + +/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values +pub(crate) const DECIMAL_DEFAULT_SCALE: u8 = 10; + +/// Validates that the specified `i128` value can be properly +/// interpreted as a Decimal number with precision `precision` +#[inline] +pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { + if precision > DECIMAL128_MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Max precision of a Decimal128 is {}, but got {}", + DECIMAL128_MAX_PRECISION, precision, + ))); + } + + let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + + if value > max { + Err(ArrowError::InvalidArgumentError(format!( + "{} is too large to store in a Decimal128 of precision {}. Max is {}", + value, precision, max + ))) + } else if value < min { + Err(ArrowError::InvalidArgumentError(format!( + "{} is too small to store in a Decimal128 of precision {}. Min is {}", + value, precision, min + ))) + } else { + Ok(()) + } +} + +/// Validates that the specified `byte_array` of little-endian format +/// value can be properly interpreted as a Decimal256 number with precision `precision` +#[inline] +pub(crate) fn validate_decimal256_precision_with_lt_bytes( + lt_value: &[u8], + precision: u8, +) -> Result<()> { + if precision > DECIMAL256_MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Max precision of a Decimal256 is {}, but got {}", + DECIMAL256_MAX_PRECISION, precision, + ))); + } + let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + + if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { + Err(ArrowError::InvalidArgumentError(format!( + "{:?} is too large to store in a Decimal256 of precision {}. Max is {:?}", + BigInt::from_signed_bytes_le(lt_value), + precision, + BigInt::from_signed_bytes_le(&max) + ))) + } else if singed_cmp_le_bytes(lt_value, &min) == Ordering::Less { + Err(ArrowError::InvalidArgumentError(format!( + "{:?} is too small to store in a Decimal256 of precision {}. Min is {:?}", + BigInt::from_signed_bytes_le(lt_value), + precision, + BigInt::from_signed_bytes_le(&min) + ))) + } else { + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; @@ -471,4 +1253,23 @@ mod tests { assert_eq!(left == right, left_decimal == right_decimal) } } + + #[test] + fn test_decimal256_min_max_for_precision() { + // The precision from 1 to 76 + let mut max_value = "9".to_string(); + let mut min_value = "-9".to_string(); + for i in 1..77 { + let max_decimal = + Decimal256::from(BigInt::from_str_radix(max_value.as_str(), 10).unwrap()); + let min_decimal = + Decimal256::from(BigInt::from_str_radix(min_value.as_str(), 10).unwrap()); + let max_bytes = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; + let min_bytes = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[i - 1]; + max_value += "9"; + min_value += "9"; + assert_eq!(max_decimal.raw_value(), &max_bytes); + assert_eq!(min_decimal.raw_value(), &min_bytes); + } + } }