From 0d2b91798bedf2dc852db6264673396d5e1f0e01 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 8 Oct 2022 15:42:45 -0700 Subject: [PATCH 1/3] Remove complicated macro --- arrow/src/compute/kernels/cast.rs | 129 ++++++-- arrow/src/compute/kernels/temporal.rs | 423 ++++++++++---------------- 2 files changed, 274 insertions(+), 278 deletions(-) diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 912ea28830e..11fc54eac39 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -37,14 +37,13 @@ use chrono::format::strftime::StrftimeItems; use chrono::format::{parse, Parsed}; -use chrono::Timelike; +use chrono::{NaiveDateTime, Timelike}; use std::ops::{Div, Mul}; use std::str; use std::sync::Arc; use crate::buffer::MutableBuffer; use crate::compute::kernels::cast_utils::string_to_timestamp_nanos; -use crate::compute::kernels::temporal::extract_component_from_array; use crate::compute::kernels::temporal::return_compute_error_with; use crate::compute::{divide_scalar, multiply_scalar}; use crate::compute::{try_unary, using_chrono_tz_and_utc_naive_date_time}; @@ -1638,6 +1637,98 @@ where unsafe { PrimitiveArray::::from_trusted_len_iter(iter) } } +fn as_time_with_string_op< + A: ArrayAccessor, + OffsetSize, + T: ArrowTemporalType, + F, +>( + iter: ArrayIter, + mut builder: GenericStringBuilder, + op: F, +) -> ArrayRef +where + OffsetSize: OffsetSizeTrait, + F: Fn(NaiveDateTime) -> String, + i64: From, +{ + iter.into_iter().for_each(|value| { + if let Some(value) = value { + match as_datetime::(>::from(value)) { + Some(dt) => builder.append_value(op(dt)), + None => builder.append_null(), + } + } else { + builder.append_null(); + } + }); + + Arc::new(builder.finish()) +} + +fn extract_component_from_datatime_array< + A: ArrayAccessor, + OffsetSize, + T: ArrowTemporalType, + F, +>( + iter: ArrayIter, + mut builder: GenericStringBuilder, + tz: &String, + mut parsed: Parsed, + op: F, +) -> Result +where + OffsetSize: OffsetSizeTrait, + F: Fn(NaiveDateTime) -> String, + i64: From, +{ + if (tz.starts_with('+') || tz.starts_with('-')) && !tz.contains(':') { + return_compute_error_with!( + "Invalid timezone", + "Expected format [+-]XX:XX".to_string() + ) + } else { + let tz_parse_result = parse(&mut parsed, &tz, StrftimeItems::new("%z")); + let fixed_offset_from_parsed = match tz_parse_result { + Ok(_) => match parsed.to_fixed_offset() { + Ok(fo) => Some(fo), + err => return_compute_error_with!("Invalid timezone", err), + }, + _ => None, + }; + + for value in iter.into_iter() { + if let Some(value) = value { + match as_datetime::(>::from(value)) { + Some(utc) => { + let fixed_offset = match fixed_offset_from_parsed { + Some(fo) => fo, + None => { + match using_chrono_tz_and_utc_naive_date_time(&tz, utc) { + Some(fo) => fo, + err => return_compute_error_with!( + "Unable to parse timezone", + err + ), + } + } + }; + builder.append_value(op(utc + fixed_offset)); + } + err => return_compute_error_with!( + "Unable to read value as datetime", + err + ), + } + } else { + builder.append_null(); + } + } + } + Ok(Arc::new(builder.finish())) +} + /// Cast timestamp types to Utf8/LargeUtf8 fn cast_timestamp_to_string( array: &ArrayRef, @@ -1650,38 +1741,32 @@ where { let array = array.as_any().downcast_ref::>().unwrap(); - let mut builder = GenericStringBuilder::::new(); + let builder = GenericStringBuilder::::new(); if let Some(tz) = tz { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); // The macro calls `as_datetime` on timestamp values of the array. // After applying timezone offset on the datatime, calling `to_string` to get // the strings. let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - builder, - to_string, - |value, tz| as_datetime::(>::from(value)) - .map(|datetime| datetime + tz), - tz, - scratch, - |value| as_datetime::(>::from(value)), - |h| h - ) + return Ok( + extract_component_from_datatime_array::<_, OffsetSize, T, _>( + iter, + builder, + tz, + scratch, + |t| t.to_string(), + )?, + ); } else { // No timezone available. Calling `to_string` on the datatime value simply. let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(as_time_with_string_op::<_, OffsetSize, T, _>( iter, builder, - to_string, - |value| as_datetime::(>::from(value)), - |h| h - ) + |t| t.to_string(), + )); } - - Ok(Arc::new(builder.finish()) as ArrayRef) } /// Cast date32 types to Utf8/LargeUtf8 diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index e61fec999ad..34b6d55166d 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -17,7 +17,7 @@ //! Defines temporal kernels for time and date related functions. -use chrono::{Datelike, Timelike}; +use chrono::{Datelike, NaiveDateTime, NaiveTime, Timelike}; use crate::array::*; use crate::datatypes::*; @@ -28,83 +28,111 @@ use chrono::format::strftime::StrftimeItems; use chrono::format::{parse, Parsed}; use chrono::FixedOffset; -macro_rules! extract_component_from_array { - ($iter:ident, $builder:ident, $extract_fn:ident, $using:expr, $convert:expr) => { - $iter.into_iter().for_each(|value| { - if let Some(value) = value { - match $using(value) { - Some(dt) => $builder.append_value($convert(dt.$extract_fn())), - None => $builder.append_null(), - } - } else { - $builder.append_null(); +fn as_time_with_op, T: ArrowTemporalType, F>( + iter: ArrayIter, + mut builder: PrimitiveBuilder, + op: F, +) -> Int32Array +where + F: Fn(NaiveTime) -> i32, + i64: From, +{ + iter.into_iter().for_each(|value| { + if let Some(value) = value { + match as_time::(i64::from(value)) { + Some(dt) => builder.append_value(op(dt)), + None => builder.append_null(), } - }) - }; - ($iter:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, $using:expr, $convert:expr) => { - $iter.into_iter().for_each(|value| { - if let Some(value) = value { - match $using(value) { - Some(dt) => { - $builder.append_value($convert(dt.$extract_fn1().$extract_fn2())); - } - None => $builder.append_null(), - } - } else { - $builder.append_null(); + } else { + builder.append_null(); + } + }); + + builder.finish() +} + +fn as_datetime_with_op, T: ArrowTemporalType, F>( + iter: ArrayIter, + mut builder: PrimitiveBuilder, + op: F, +) -> Int32Array +where + F: Fn(NaiveDateTime) -> i32, + i64: From, +{ + iter.into_iter().for_each(|value| { + if let Some(value) = value { + match as_datetime::(i64::from(value)) { + Some(dt) => builder.append_value(op(dt)), + None => builder.append_null(), } - }) - }; - ($iter:ident, $builder:ident, $extract_fn:ident, $using:expr, $tz:ident, $parsed:ident, $value_as_datetime:expr, $convert:expr) => { - if ($tz.starts_with('+') || $tz.starts_with('-')) && !$tz.contains(':') { - return_compute_error_with!( - "Invalid timezone", - "Expected format [+-]XX:XX".to_string() - ) } else { - let tz_parse_result = parse(&mut $parsed, &$tz, StrftimeItems::new("%z")); - let fixed_offset_from_parsed = match tz_parse_result { - Ok(_) => match $parsed.to_fixed_offset() { - Ok(fo) => Some(fo), - err => return_compute_error_with!("Invalid timezone", err), - }, - _ => None, - }; - - for value in $iter.into_iter() { - if let Some(value) = value { - match $value_as_datetime(value) { - Some(utc) => { - let fixed_offset = match fixed_offset_from_parsed { - Some(fo) => fo, - None => match using_chrono_tz_and_utc_naive_date_time( - &$tz, utc, - ) { + builder.append_null(); + } + }); + + builder.finish() +} + +fn extract_component_from_datatime_array< + A: ArrayAccessor, + T: ArrowTemporalType, + F, +>( + iter: ArrayIter, + mut builder: PrimitiveBuilder, + tz: &String, + mut parsed: Parsed, + op: F, +) -> Result +where + F: Fn(NaiveDateTime) -> i32, + i64: From, +{ + if (tz.starts_with('+') || tz.starts_with('-')) && !tz.contains(':') { + return_compute_error_with!( + "Invalid timezone", + "Expected format [+-]XX:XX".to_string() + ) + } else { + let tz_parse_result = parse(&mut parsed, &tz, StrftimeItems::new("%z")); + let fixed_offset_from_parsed = match tz_parse_result { + Ok(_) => match parsed.to_fixed_offset() { + Ok(fo) => Some(fo), + err => return_compute_error_with!("Invalid timezone", err), + }, + _ => None, + }; + + for value in iter.into_iter() { + if let Some(value) = value { + match as_datetime::(i64::from(value)) { + Some(utc) => { + let fixed_offset = match fixed_offset_from_parsed { + Some(fo) => fo, + None => { + match using_chrono_tz_and_utc_naive_date_time(&tz, utc) { Some(fo) => fo, err => return_compute_error_with!( "Unable to parse timezone", err ), - }, - }; - match $using(value, fixed_offset) { - Some(dt) => { - $builder.append_value($convert(dt.$extract_fn())); } - None => $builder.append_null(), } - } - err => return_compute_error_with!( - "Unable to read value as datetime", - err - ), + }; + builder.append_value(op(utc + fixed_offset)); } - } else { - $builder.append_null(); + err => return_compute_error_with!( + "Unable to read value as datetime", + err + ), } + } else { + builder.append_null(); } } - }; + } + Ok(builder.finish()) } macro_rules! return_compute_error_with { @@ -113,7 +141,6 @@ macro_rules! return_compute_error_with { }; } -pub(crate) use extract_component_from_array; pub(crate) use return_compute_error_with; // Internal trait, which is used for mapping values from DateLike structures @@ -206,47 +233,29 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Time32(_) | DataType::Time64(_) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - hour, - |value| as_time::(i64::from(value)), - |h| h as i32 - ); + return Ok(as_time_with_op::(iter, b, |t| t.hour() as i32)); } DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - hour, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| t.hour() as i32)); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - hour, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.hour() as i32, + )?); } _ => return_compute_error_with!("hour does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the years of a given temporal primitive array as an array of integers @@ -281,22 +290,14 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) => { + let b = Int32Builder::with_capacity(array.len()); let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - year, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| t.year())); } _t => return_compute_error_with!("year does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the quarter of a given temporal primitive array as an array of integers within @@ -335,37 +336,27 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - quarter, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.quarter() as i32 + })); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - quarter, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.quarter() as i32, + )?); } _ => return_compute_error_with!("quarter does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the month of a given temporal primitive array as an array of integers within @@ -403,37 +394,27 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - month, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.month() as i32 + })); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - month, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.month() as i32, + )?); } _ => return_compute_error_with!("month does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the day of week of a given temporal primitive array as an array of @@ -485,37 +466,27 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - num_days_from_monday, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.num_days_from_monday() + })); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - num_days_from_monday, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.num_days_from_monday(), + )?); } _ => return_compute_error_with!("weekday does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the day of week of a given temporal primitive array as an array of @@ -567,40 +538,30 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - num_days_from_sunday, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.num_days_from_sunday() + })); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - num_days_from_sunday, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.num_days_from_sunday(), + )?); } _ => return_compute_error_with!( "num_days_from_sunday does not support", array.data_type() ), } - - Ok(b.finish()) } /// Extracts the day of a given temporal primitive array as an array of integers @@ -635,37 +596,25 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - day, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| t.day() as i32)); } DataType::Timestamp(_, Some(ref tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - day, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.day() as i32, + )?); } _ => return_compute_error_with!("day does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the day of year of a given temporal primitive array as an array of integers @@ -704,37 +653,27 @@ where T::Native: ArrowNativeType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - ordinal, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.ordinal() as i32 + })); } DataType::Timestamp(_, Some(ref tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - ordinal, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.ordinal() as i32, + )?); } _ => return_compute_error_with!("doy does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the minutes of a given temporal primitive array as an array of integers @@ -771,37 +710,27 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - minute, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.minute() as i32 + })); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - minute, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.minute() as i32, + )?); } _ => return_compute_error_with!("minute does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the week of a given temporal primitive array as an array of integers @@ -836,24 +765,16 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); - match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + let b = Int32Builder::with_capacity(array.len()); let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - iso_week, - week, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.iso_week().week() as i32 + })); } _ => return_compute_error_with!("week does not support", array.data_type()), } - - Ok(b.finish()) } /// Extracts the seconds of a given temporal primitive array as an array of integers @@ -890,37 +811,27 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::with_capacity(array.len()); + let b = Int32Builder::with_capacity(array.len()); match dt { DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - extract_component_from_array!( - iter, - b, - second, - |value| { as_datetime::(i64::from(value)) }, - |h| h as i32 - ) + return Ok(as_datetime_with_op::(iter, b, |t| { + t.second() as i32 + })); } DataType::Timestamp(_, Some(tz)) => { - let mut scratch = Parsed::new(); + let scratch = Parsed::new(); let iter = ArrayIter::new(array); - extract_component_from_array!( + return Ok(extract_component_from_datatime_array::( iter, b, - second, - |value, tz| as_datetime::(i64::from(value)) - .map(|datetime| datetime + tz), tz, scratch, - |value| as_datetime::(i64::from(value)), - |h| h as i32 - ) + |t| t.second() as i32, + )?); } _ => return_compute_error_with!("second does not support", array.data_type()), } - - Ok(b.finish()) } #[cfg(test)] From fd0df58cdc11493309585394784b8a42d73580d0 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 8 Oct 2022 16:50:16 -0700 Subject: [PATCH 2/3] Fix clippy --- arrow/src/compute/kernels/cast.rs | 28 +++--- arrow/src/compute/kernels/temporal.rs | 138 ++++++++++---------------- 2 files changed, 64 insertions(+), 102 deletions(-) diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 11fc54eac39..b573c65d026 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -1674,7 +1674,7 @@ fn extract_component_from_datatime_array< >( iter: ArrayIter, mut builder: GenericStringBuilder, - tz: &String, + tz: &str, mut parsed: Parsed, op: F, ) -> Result @@ -1689,7 +1689,7 @@ where "Expected format [+-]XX:XX".to_string() ) } else { - let tz_parse_result = parse(&mut parsed, &tz, StrftimeItems::new("%z")); + let tz_parse_result = parse(&mut parsed, tz, StrftimeItems::new("%z")); let fixed_offset_from_parsed = match tz_parse_result { Ok(_) => match parsed.to_fixed_offset() { Ok(fo) => Some(fo), @@ -1698,14 +1698,14 @@ where _ => None, }; - for value in iter.into_iter() { + for value in iter { if let Some(value) = value { match as_datetime::(>::from(value)) { Some(utc) => { let fixed_offset = match fixed_offset_from_parsed { Some(fo) => fo, None => { - match using_chrono_tz_and_utc_naive_date_time(&tz, utc) { + match using_chrono_tz_and_utc_naive_date_time(tz, utc) { Some(fo) => fo, err => return_compute_error_with!( "Unable to parse timezone", @@ -1749,23 +1749,21 @@ where // After applying timezone offset on the datatime, calling `to_string` to get // the strings. let iter = ArrayIter::new(array); - return Ok( - extract_component_from_datatime_array::<_, OffsetSize, T, _>( - iter, - builder, - tz, - scratch, - |t| t.to_string(), - )?, - ); + extract_component_from_datatime_array::<_, OffsetSize, T, _>( + iter, + builder, + tz, + scratch, + |t| t.to_string(), + ) } else { // No timezone available. Calling `to_string` on the datatime value simply. let iter = ArrayIter::new(array); - return Ok(as_time_with_string_op::<_, OffsetSize, T, _>( + Ok(as_time_with_string_op::<_, OffsetSize, T, _>( iter, builder, |t| t.to_string(), - )); + )) } } diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index 34b6d55166d..6f222e70938 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -81,7 +81,7 @@ fn extract_component_from_datatime_array< >( iter: ArrayIter, mut builder: PrimitiveBuilder, - tz: &String, + tz: &str, mut parsed: Parsed, op: F, ) -> Result @@ -95,7 +95,7 @@ where "Expected format [+-]XX:XX".to_string() ) } else { - let tz_parse_result = parse(&mut parsed, &tz, StrftimeItems::new("%z")); + let tz_parse_result = parse(&mut parsed, tz, StrftimeItems::new("%z")); let fixed_offset_from_parsed = match tz_parse_result { Ok(_) => match parsed.to_fixed_offset() { Ok(fo) => Some(fo), @@ -104,14 +104,14 @@ where _ => None, }; - for value in iter.into_iter() { + for value in iter { if let Some(value) = value { match as_datetime::(i64::from(value)) { Some(utc) => { let fixed_offset = match fixed_offset_from_parsed { Some(fo) => fo, None => { - match using_chrono_tz_and_utc_naive_date_time(&tz, utc) { + match using_chrono_tz_and_utc_naive_date_time(tz, utc) { Some(fo) => fo, err => return_compute_error_with!( "Unable to parse timezone", @@ -237,22 +237,18 @@ where match dt { DataType::Time32(_) | DataType::Time64(_) => { let iter = ArrayIter::new(array); - return Ok(as_time_with_op::(iter, b, |t| t.hour() as i32)); + Ok(as_time_with_op::(iter, b, |t| t.hour() as i32)) } DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| t.hour() as i32)); + Ok(as_datetime_with_op::(iter, b, |t| t.hour() as i32)) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.hour() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.hour() as i32 + }) } _ => return_compute_error_with!("hour does not support", array.data_type()), } @@ -294,7 +290,7 @@ where DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) => { let b = Int32Builder::with_capacity(array.len()); let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| t.year())); + Ok(as_datetime_with_op::(iter, b, |t| t.year())) } _t => return_compute_error_with!("year does not support", array.data_type()), } @@ -340,20 +336,16 @@ where match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.quarter() as i32 - })); + })) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.quarter() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.quarter() as i32 + }) } _ => return_compute_error_with!("quarter does not support", array.data_type()), } @@ -398,20 +390,16 @@ where match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.month() as i32 - })); + })) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.month() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.month() as i32 + }) } _ => return_compute_error_with!("month does not support", array.data_type()), } @@ -470,20 +458,16 @@ where match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.num_days_from_monday() - })); + })) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.num_days_from_monday(), - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.num_days_from_monday() + }) } _ => return_compute_error_with!("weekday does not support", array.data_type()), } @@ -542,20 +526,16 @@ where match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.num_days_from_sunday() - })); + })) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.num_days_from_sunday(), - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.num_days_from_sunday() + }) } _ => return_compute_error_with!( "num_days_from_sunday does not support", @@ -600,18 +580,14 @@ where match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| t.day() as i32)); + Ok(as_datetime_with_op::(iter, b, |t| t.day() as i32)) } DataType::Timestamp(_, Some(ref tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.day() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.day() as i32 + }) } _ => return_compute_error_with!("day does not support", array.data_type()), } @@ -657,20 +633,16 @@ where match dt { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.ordinal() as i32 - })); + })) } DataType::Timestamp(_, Some(ref tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.ordinal() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.ordinal() as i32 + }) } _ => return_compute_error_with!("doy does not support", array.data_type()), } @@ -714,20 +686,16 @@ where match dt { DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.minute() as i32 - })); + })) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.minute() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.minute() as i32 + }) } _ => return_compute_error_with!("minute does not support", array.data_type()), } @@ -769,9 +737,9 @@ where DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { let b = Int32Builder::with_capacity(array.len()); let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.iso_week().week() as i32 - })); + })) } _ => return_compute_error_with!("week does not support", array.data_type()), } @@ -815,20 +783,16 @@ where match dt { DataType::Date64 | DataType::Timestamp(_, None) => { let iter = ArrayIter::new(array); - return Ok(as_datetime_with_op::(iter, b, |t| { + Ok(as_datetime_with_op::(iter, b, |t| { t.second() as i32 - })); + })) } DataType::Timestamp(_, Some(tz)) => { let scratch = Parsed::new(); let iter = ArrayIter::new(array); - return Ok(extract_component_from_datatime_array::( - iter, - b, - tz, - scratch, - |t| t.second() as i32, - )?); + extract_component_from_datatime_array::(iter, b, tz, scratch, |t| { + t.second() as i32 + }) } _ => return_compute_error_with!("second does not support", array.data_type()), } From 66be8a4ba2b419ebb32deba3d08212efed1a64be Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 12 Oct 2022 15:50:36 -0700 Subject: [PATCH 3/3] Add doc. --- arrow/src/compute/kernels/temporal.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index 6f222e70938..220b7dadcc5 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -28,6 +28,9 @@ use chrono::format::strftime::StrftimeItems; use chrono::format::{parse, Parsed}; use chrono::FixedOffset; +/// This function takes an `ArrayIter` of input array and an extractor `op` which takes +/// an input `NaiveTime` and returns time component (e.g. hour) as `i32` value. +/// The extracted values are built by the given `builder` to be an `Int32Array`. fn as_time_with_op, T: ArrowTemporalType, F>( iter: ArrayIter, mut builder: PrimitiveBuilder, @@ -51,6 +54,9 @@ where builder.finish() } +/// This function takes an `ArrayIter` of input array and an extractor `op` which takes +/// an input `NaiveDateTime` and returns data time component (e.g. hour) as `i32` value. +/// The extracted values are built by the given `builder` to be an `Int32Array`. fn as_datetime_with_op, T: ArrowTemporalType, F>( iter: ArrayIter, mut builder: PrimitiveBuilder, @@ -74,6 +80,13 @@ where builder.finish() } +/// This function extracts date time component (e.g. hour) from an array of datatime. +/// `iter` is the `ArrayIter` of input datatime array. `builder` is used to build the +/// returned `Int32Array` containing the extracted components. `tz` is timezone string +/// which will be added to datetime values in the input array. `parsed` is a `Parsed` +/// object used to parse timezone string. `op` is the extractor closure which takes +/// data time object of `NaiveDateTime` type and returns `i32` value of extracted +/// component. fn extract_component_from_datatime_array< A: ArrayAccessor, T: ArrowTemporalType,