Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apply gcd on fastfield as preprocessing #1418

Merged
merged 9 commits into from Jul 29, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -60,6 +60,7 @@ pretty_assertions = "1.2.1"
serde_cbor = { version = "0.11.2", optional = true }
async-trait = "0.1.53"
arc-swap = "1.5.0"
gcd = "2.1.0"

[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/bitpacked.rs
Expand Up @@ -107,7 +107,7 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
/// values.
fn serialize(
write: &mut impl Write,
_fastfield_accessor: &impl FastFieldDataAccess,
_fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
_data_iter1: impl Iterator<Item = u64>,
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/lib.rs
Expand Up @@ -42,7 +42,7 @@ pub trait FastFieldCodecSerializer {
/// The iterators should be preferred over using fastfield_accessor for performance reasons.
fn serialize(
write: &mut impl Write,
fastfield_accessor: &impl FastFieldDataAccess,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
data_iter1: impl Iterator<Item = u64>,
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/linearinterpol.rs
Expand Up @@ -111,7 +111,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
fastfield_accessor: &impl FastFieldDataAccess,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
data_iter1: impl Iterator<Item = u64>,
Expand Down
3 changes: 2 additions & 1 deletion fastfield_codecs/src/multilinearinterpol.rs
Expand Up @@ -75,6 +75,7 @@ impl BinarySerializable for Function {
self.positive_val_offset.serialize(write)?;
self.slope.serialize(write)?;
self.num_bits.serialize(write)?;

PSeitz marked this conversation as resolved.
Show resolved Hide resolved
Ok(())
}

Expand Down Expand Up @@ -195,7 +196,7 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
fastfield_accessor: &impl FastFieldDataAccess,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
_data_iter1: impl Iterator<Item = u64>,
Expand Down
94 changes: 76 additions & 18 deletions src/fastfield/mod.rs
Expand Up @@ -276,10 +276,17 @@ mod tests {
schema_builder.build()
});

pub static SCHEMAI64: Lazy<Schema> = Lazy::new(|| {
let mut schema_builder = Schema::builder();
schema_builder.add_i64_field("field", FAST);
schema_builder.build()
});

pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
pub static FIELDI64: Lazy<Field> = Lazy::new(|| SCHEMAI64.get_field("field").unwrap());

#[test]
pub fn test_fastfield() {
pub fn test_fastfield2() {
PSeitz marked this conversation as resolved.
Show resolved Hide resolved
let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
assert_eq!(test_fastfield.get(0), 100);
assert_eq!(test_fastfield.get(1), 200);
Expand Down Expand Up @@ -309,7 +316,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 37);
assert_eq!(file.len(), 55);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(*FIELD).unwrap();
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
Expand Down Expand Up @@ -340,7 +347,7 @@ mod tests {
serializer.close()?;
}
let file = directory.open_read(path)?;
assert_eq!(file.len(), 62);
assert_eq!(file.len(), 80);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
Expand Down Expand Up @@ -376,7 +383,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 35);
assert_eq!(file.len(), 53);
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite.open_read(*FIELD).unwrap();
Expand Down Expand Up @@ -408,7 +415,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 80043);
assert_eq!(file.len(), 80061);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
Expand Down Expand Up @@ -448,7 +455,8 @@ mod tests {
}
let file = directory.open_read(path).unwrap();
// assert_eq!(file.len(), 17710 as usize); //bitpacked size
assert_eq!(file.len(), 10175_usize); // linear interpol size
// assert_eq!(file.len(), 10201_usize); // linear interpol size, before gcd = min_value
assert_eq!(file.len(), 93_usize);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(i64_field).unwrap();
Expand Down Expand Up @@ -505,10 +513,15 @@ mod tests {
permutation
}

#[test]
fn test_intfastfield_permutation() -> crate::Result<()> {
// Warning: this generates the same permutation at each call
pub fn generate_permutation_gcd() -> Vec<u64> {
PSeitz marked this conversation as resolved.
Show resolved Hide resolved
let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
permutation
}

fn test_intfastfield_permutation_with_data(permutation: Vec<u64>) -> crate::Result<()> {
let path = Path::new("test");
let permutation = generate_permutation();
let n = permutation.len();
let directory = RamDirectory::create();
{
Expand All @@ -527,15 +540,27 @@ mod tests {
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;

let mut a = 0u64;
for _ in 0..n {
for a in 0..n {
assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]);
a = fast_field_reader.get(a as u32);
PSeitz marked this conversation as resolved.
Show resolved Hide resolved
}
}
Ok(())
}

#[test]
fn test_intfastfield_permutation_gcd() -> crate::Result<()> {
let permutation = generate_permutation_gcd();
test_intfastfield_permutation_with_data(permutation)?;
Ok(())
}

#[test]
fn test_intfastfield_permutation() -> crate::Result<()> {
let permutation = generate_permutation();
test_intfastfield_permutation_with_data(permutation)?;
Ok(())
}

#[test]
fn test_merge_missing_date_fast_field() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
Expand Down Expand Up @@ -861,7 +886,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 36);
assert_eq!(file.len(), 54);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(field).unwrap();
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
Expand Down Expand Up @@ -897,7 +922,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 48);
assert_eq!(file.len(), 66);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(field).unwrap();
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
Expand Down Expand Up @@ -931,7 +956,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 35);
assert_eq!(file.len(), 53);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(field).unwrap();
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
Expand All @@ -951,6 +976,7 @@ mod bench {
use super::tests::{generate_permutation, FIELD, SCHEMA};
use super::*;
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::fastfield::tests::generate_permutation_gcd;
use crate::fastfield::FastFieldReader;

#[bench]
Expand Down Expand Up @@ -1037,10 +1063,42 @@ mod bench {
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data).unwrap();

b.iter(|| {
let n = test::black_box(1000u32);
let mut a = 0u32;
for _ in 0u32..n {
a = fast_field_reader.get(a) as u32;
for i in 0u32..permutation.len() as u32 {
a = fast_field_reader.get(i) as u32;
}
a
});
}
}

#[bench]
fn bench_intfastfield_fflookup_gcd(b: &mut Bencher) {
let path = Path::new("test");
let permutation = generate_permutation_gcd();
let directory: RamDirectory = RamDirectory::create();
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for &x in &permutation {
fast_field_writers.add_document(&doc!(*FIELD=>x));
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
serializer.close().unwrap();
}
let file = directory.open_read(&path).unwrap();
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data).unwrap();

b.iter(|| {
let mut a = 0u32;
for i in 0u32..permutation.len() as u32 {
a = fast_field_reader.get(i) as u32;
}
a
});
Expand Down
7 changes: 7 additions & 0 deletions src/fastfield/multivalued/mod.rs
Expand Up @@ -346,6 +346,13 @@ mod tests {
assert!(test_multivalued_no_panic(&ops[..]).is_ok());
}
}
#[test]
fn test_multivalued_proptest_gcd() {
use IndexingOp::*;
let ops = [AddDoc { id: 9 }, AddDoc { id: 9 }, Merge];

assert!(test_multivalued_no_panic(&ops[..]).is_ok());
}

#[test]
fn test_multivalued_proptest_off_by_one_bug_1151() {
Expand Down