Skip to content

Commit

Permalink
use single pass for gcd
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Jul 22, 2022
1 parent fff1a03 commit 73dd5f0
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 85 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Expand Up @@ -61,7 +61,6 @@ serde_cbor = { version = "0.11.2", optional = true }
async-trait = "0.1.53"
arc-swap = "1.5.0"
gcd = "2.1.0"
libdivide = "0.4.0"

[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"
Expand Down
1 change: 0 additions & 1 deletion src/fastfield/reader.rs
Expand Up @@ -2,7 +2,6 @@ use std::collections::HashMap;
use std::marker::PhantomData;
use std::path::Path;

use common::BinarySerializable;
use fastfield_codecs::bitpacked::{
BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
};
Expand Down
114 changes: 32 additions & 82 deletions src/fastfield/serializer/gcd.rs
@@ -1,71 +1,46 @@
use fastfield_codecs::{FastFieldDataAccess, FastFieldStats};
use fastdivide::DividerU64;
use gcd::Gcd;
use libdivide::Divider;

pub const GCD_DEFAULT: u64 = 1;

fn compute_gcd(vals: &[u64], base: u64) -> u64 {
let mut gcd = (vals[0] - base).gcd(vals[1] - base);

for el in vals.iter().map(|el| el - base) {
gcd = gcd.gcd(el);
pub fn find_gcd(mut numbers: impl Iterator<Item = u64>, base: u64) -> Option<u64> {
let mut num1 = 0;
let mut num2 = 0;
loop {
let num = numbers.next()? - base;
if num1 == 0 {
num1 = num;
}
if num2 == 0 {
num2 = num;
}
if num1 != 0 && num2 != 0 {
break;
}
}
gcd
}

fn is_valid_gcd(vals: impl Iterator<Item = u64>, divider: u64, base: u64) -> bool {
if divider <= 1 {
return false;
let mut gcd = (num1).gcd(num2);
if gcd == 0 {
return None;
}
let d = Divider::new(divider).unwrap(); // this is slow

for val in vals {
let mut gcd_divider = DividerU64::divide_by(gcd);
for val in numbers {
let val = val - base;
if val != (val / &d) * divider {
return false;
if val == 0 {
continue;
}
let rem = val - (gcd_divider.divide(val)) * gcd;
if rem == 0 {
continue;
}
gcd = gcd.gcd(val);
if gcd == 1 {
return None;
}
}
true
}

fn get_samples(fastfield_accessor: &impl FastFieldDataAccess, stats: &FastFieldStats) -> Vec<u64> {
// let's sample at 0%, 5%, 10% .. 95%, 100%
let num_samples = stats.num_vals.min(20);
let step_size = 100.0 / num_samples as f32;
let mut sample_values = (0..num_samples)
.map(|idx| (idx as f32 * step_size / 100.0 * stats.num_vals as f32) as usize)
.map(|pos| fastfield_accessor.get_val(pos as u64))
.collect::<Vec<_>>();

sample_values.push(stats.min_value);
sample_values.push(stats.max_value);
sample_values
}

pub(crate) fn find_gcd_from_samples(
samples: &[u64],
vals: impl Iterator<Item = u64>,
base: u64,
) -> Option<u64> {
let estimate_gcd = compute_gcd(samples, base);
if is_valid_gcd(vals, estimate_gcd, base) {
Some(estimate_gcd)
} else {
None
}
}

pub(crate) fn find_gcd(
fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
vals: impl Iterator<Item = u64>,
) -> Option<u64> {
if stats.num_vals == 0 {
return None;
gcd_divider = DividerU64::divide_by(gcd);
}

let samples = get_samples(fastfield_accessor, &stats);
find_gcd_from_samples(&samples, vals, stats.min_value)
Some(gcd)
}

#[cfg(test)]
Expand All @@ -75,7 +50,6 @@ mod tests {

use common::HasLen;

use super::*;
use crate::directory::{CompositeFile, RamDirectory, WritePtr};
use crate::fastfield::serializer::{FastFieldCodecEnableCheck, FastFieldCodecName, ALL_CODECS};
use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
Expand Down Expand Up @@ -199,28 +173,4 @@ mod tests {
assert_eq!(test_fastfield.get(1), 200);
assert_eq!(test_fastfield.get(2), 300);
}

#[test]
fn test_gcd() {
let data = vec![
9223372036854775808_u64,
9223372036854775808,
9223372036854775808,
];

let gcd = find_gcd_from_samples(&data, data.iter().cloned(), *data.iter().min().unwrap());
assert_eq!(gcd, None);
}

#[test]
fn test_gcd2() {
let data = vec![
9223372036854775808_u64,
9223372036854776808,
9223372036854777808,
];

let gcd = find_gcd_from_samples(&data, data.iter().cloned(), *data.iter().min().unwrap());
assert_eq!(gcd, Some(1000));
}
}
3 changes: 2 additions & 1 deletion src/fastfield/serializer/mod.rs
Expand Up @@ -175,7 +175,8 @@ impl CompositeFastFieldSerializer {
(self.fastfield_accessor.get_val(position) - self.min_value) / self.gcd
}
}
let gcd = find_gcd(&fastfield_accessor, stats.clone(), iter_gen()).unwrap_or(GCD_DEFAULT);
//let gcd = find_gcd(&fastfield_accessor, stats.clone(), iter_gen()).unwrap_or(GCD_DEFAULT);
let gcd = find_gcd(iter_gen(), stats.min_value).unwrap_or(GCD_DEFAULT);
let fastfield_accessor = WrappedFFAccess {
fastfield_accessor,
min_value: stats.min_value,
Expand Down

0 comments on commit 73dd5f0

Please sign in to comment.