diff --git a/Cargo.toml b/Cargo.toml index c3d817b01c..e9ee6bd3ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,6 @@ serde_cbor = { version = "0.11.2", optional = true } async-trait = "0.1.53" arc-swap = "1.5.0" gcd = "2.1.0" -libdivide = "0.4.0" [target.'cfg(windows)'.dependencies] winapi = "0.3.9" diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index c4bb1db870..f17b381546 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; use std::marker::PhantomData; use std::path::Path; -use common::BinarySerializable; use fastfield_codecs::bitpacked::{ BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer, }; diff --git a/src/fastfield/serializer/gcd.rs b/src/fastfield/serializer/gcd.rs index 5753831c95..044efe280d 100644 --- a/src/fastfield/serializer/gcd.rs +++ b/src/fastfield/serializer/gcd.rs @@ -1,71 +1,46 @@ -use fastfield_codecs::{FastFieldDataAccess, FastFieldStats}; +use fastdivide::DividerU64; use gcd::Gcd; -use libdivide::Divider; pub const GCD_DEFAULT: u64 = 1; -fn compute_gcd(vals: &[u64], base: u64) -> u64 { - let mut gcd = (vals[0] - base).gcd(vals[1] - base); - - for el in vals.iter().map(|el| el - base) { - gcd = gcd.gcd(el); +pub fn find_gcd(mut numbers: impl Iterator, base: u64) -> Option { + let mut num1 = 0; + let mut num2 = 0; + loop { + let num = numbers.next()? - base; + if num1 == 0 { + num1 = num; + } + if num2 == 0 { + num2 = num; + } + if num1 != 0 && num2 != 0 { + break; + } } - gcd -} - -fn is_valid_gcd(vals: impl Iterator, divider: u64, base: u64) -> bool { - if divider <= 1 { - return false; + let mut gcd = (num1).gcd(num2); + if gcd == 0 { + return None; } - let d = Divider::new(divider).unwrap(); // this is slow - for val in vals { + let mut gcd_divider = DividerU64::divide_by(gcd); + for val in numbers { let val = val - base; - if val != (val / &d) * divider { - return false; + if val == 0 { + continue; + } + let rem = val - (gcd_divider.divide(val)) * gcd; + if rem == 0 { + continue; + } + gcd = gcd.gcd(val); + if gcd == 1 { + return None; } - } - true -} - -fn get_samples(fastfield_accessor: &impl FastFieldDataAccess, stats: &FastFieldStats) -> Vec { - // let's sample at 0%, 5%, 10% .. 95%, 100% - let num_samples = stats.num_vals.min(20); - let step_size = 100.0 / num_samples as f32; - let mut sample_values = (0..num_samples) - .map(|idx| (idx as f32 * step_size / 100.0 * stats.num_vals as f32) as usize) - .map(|pos| fastfield_accessor.get_val(pos as u64)) - .collect::>(); - - sample_values.push(stats.min_value); - sample_values.push(stats.max_value); - sample_values -} - -pub(crate) fn find_gcd_from_samples( - samples: &[u64], - vals: impl Iterator, - base: u64, -) -> Option { - let estimate_gcd = compute_gcd(samples, base); - if is_valid_gcd(vals, estimate_gcd, base) { - Some(estimate_gcd) - } else { - None - } -} -pub(crate) fn find_gcd( - fastfield_accessor: &impl FastFieldDataAccess, - stats: FastFieldStats, - vals: impl Iterator, -) -> Option { - if stats.num_vals == 0 { - return None; + gcd_divider = DividerU64::divide_by(gcd); } - - let samples = get_samples(fastfield_accessor, &stats); - find_gcd_from_samples(&samples, vals, stats.min_value) + Some(gcd) } #[cfg(test)] @@ -75,7 +50,6 @@ mod tests { use common::HasLen; - use super::*; use crate::directory::{CompositeFile, RamDirectory, WritePtr}; use crate::fastfield::serializer::{FastFieldCodecEnableCheck, FastFieldCodecName, ALL_CODECS}; use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64}; @@ -199,28 +173,4 @@ mod tests { assert_eq!(test_fastfield.get(1), 200); assert_eq!(test_fastfield.get(2), 300); } - - #[test] - fn test_gcd() { - let data = vec![ - 9223372036854775808_u64, - 9223372036854775808, - 9223372036854775808, - ]; - - let gcd = find_gcd_from_samples(&data, data.iter().cloned(), *data.iter().min().unwrap()); - assert_eq!(gcd, None); - } - - #[test] - fn test_gcd2() { - let data = vec![ - 9223372036854775808_u64, - 9223372036854776808, - 9223372036854777808, - ]; - - let gcd = find_gcd_from_samples(&data, data.iter().cloned(), *data.iter().min().unwrap()); - assert_eq!(gcd, Some(1000)); - } } diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index 8af4805d66..b8171a3280 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -175,7 +175,8 @@ impl CompositeFastFieldSerializer { (self.fastfield_accessor.get_val(position) - self.min_value) / self.gcd } } - let gcd = find_gcd(&fastfield_accessor, stats.clone(), iter_gen()).unwrap_or(GCD_DEFAULT); + //let gcd = find_gcd(&fastfield_accessor, stats.clone(), iter_gen()).unwrap_or(GCD_DEFAULT); + let gcd = find_gcd(iter_gen(), stats.min_value).unwrap_or(GCD_DEFAULT); let fastfield_accessor = WrappedFFAccess { fastfield_accessor, min_value: stats.min_value,