From d3f036ed7127daf70cfe8e232bd16b1537e04744 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 21 Oct 2022 22:40:06 +0800 Subject: [PATCH] switch get_val() to u32 Fixes #1638 --- CHANGELOG.md | 1 + bitpacker/src/bitpacker.rs | 8 ++--- bitpacker/src/blocked_bitpacker.rs | 2 +- examples/custom_collector.rs | 2 +- examples/warmer.rs | 2 +- fastfield_codecs/benches/bench.rs | 16 +++++----- fastfield_codecs/src/bitpacked.rs | 2 +- fastfield_codecs/src/blockwise_linear.rs | 10 +++---- fastfield_codecs/src/column.rs | 18 ++++++------ fastfield_codecs/src/compact_space/mod.rs | 20 ++++++------- fastfield_codecs/src/lib.rs | 6 ++-- fastfield_codecs/src/line.rs | 8 ++--- fastfield_codecs/src/linear.rs | 8 ++--- src/aggregation/bucket/histogram/histogram.rs | 10 +++---- src/aggregation/bucket/range.rs | 10 +++---- src/aggregation/metric/average.rs | 10 +++---- src/aggregation/metric/stats.rs | 10 +++---- src/collector/filter_collector_wrapper.rs | 2 +- src/collector/histogram_collector.rs | 2 +- src/collector/tests.rs | 2 +- src/collector/top_score_collector.rs | 8 ++--- src/fastfield/bytes/reader.rs | 11 ++++--- src/fastfield/mod.rs | 14 ++++----- src/fastfield/multivalued/mod.rs | 6 ++-- src/fastfield/multivalued/reader.rs | 29 ++++++++++--------- src/fastfield/multivalued/writer.rs | 4 +-- src/fastfield/writer.rs | 2 +- src/indexer/index_writer.rs | 14 ++++----- src/indexer/merger.rs | 6 ++-- src/indexer/merger_sorted_index_test.rs | 12 ++++---- src/indexer/sorted_doc_id_column.rs | 6 ++-- .../sorted_doc_id_multivalue_column.rs | 4 +-- 32 files changed, 133 insertions(+), 132 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b4232592e..2208b8b38e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ Tantivy 0.19 ================================ +- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz) - Major bugfix: Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz) - Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396) The `DateTime` type has been updated to hold timestamps with microseconds precision. diff --git a/bitpacker/src/bitpacker.rs b/bitpacker/src/bitpacker.rs index 716f865f96..86dd1a7ca9 100644 --- a/bitpacker/src/bitpacker.rs +++ b/bitpacker/src/bitpacker.rs @@ -87,15 +87,15 @@ impl BitUnpacker { } #[inline] - pub fn get(&self, idx: u64, data: &[u8]) -> u64 { + pub fn get(&self, idx: u32, data: &[u8]) -> u64 { if self.num_bits == 0 { return 0u64; } - let addr_in_bits = idx * self.num_bits; + let addr_in_bits = idx * self.num_bits as u32; let addr = addr_in_bits >> 3; let bit_shift = addr_in_bits & 7; debug_assert!( - addr + 8 <= data.len() as u64, + addr + 8 <= data.len() as u32, "The fast field field should have been padded with 7 bytes." ); let bytes: [u8; 8] = (&data[(addr as usize)..(addr as usize) + 8]) @@ -130,7 +130,7 @@ mod test { fn test_bitpacker_util(len: usize, num_bits: u8) { let (bitunpacker, vals, data) = create_fastfield_bitpacker(len, num_bits); for (i, val) in vals.iter().enumerate() { - assert_eq!(bitunpacker.get(i as u64, &data), *val); + assert_eq!(bitunpacker.get(i as u32, &data), *val); } } diff --git a/bitpacker/src/blocked_bitpacker.rs b/bitpacker/src/blocked_bitpacker.rs index bfe958e767..fa4ee59679 100644 --- a/bitpacker/src/blocked_bitpacker.rs +++ b/bitpacker/src/blocked_bitpacker.rs @@ -130,7 +130,7 @@ impl BlockedBitpacker { let pos_in_block = idx % BLOCK_SIZE as usize; if let Some(metadata) = self.offset_and_bits.get(metadata_pos) { let unpacked = BitUnpacker::new(metadata.num_bits()).get( - pos_in_block as u64, + pos_in_block as u32, &self.compressed_blocks[metadata.offset() as usize..], ); unpacked + metadata.base_value() diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index f6ac5dcfb8..a162839c05 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -105,7 +105,7 @@ impl SegmentCollector for StatsSegmentCollector { type Fruit = Option; fn collect(&mut self, doc: u32, _score: Score) { - let value = self.fast_field_reader.get_val(doc as u64) as f64; + let value = self.fast_field_reader.get_val(doc) as f64; self.stats.count += 1; self.stats.sum += value; self.stats.squared_sum += value * value; diff --git a/examples/warmer.rs b/examples/warmer.rs index 6b8c2830ff..c9dc699f25 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -51,7 +51,7 @@ impl Warmer for DynamicPriceColumn { let product_id_reader = segment.fast_fields().u64(self.field)?; let product_ids: Vec = segment .doc_ids_alive() - .map(|doc| product_id_reader.get_val(doc as u64)) + .map(|doc| product_id_reader.get_val(doc)) .collect(); let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter(); let mut price_vals: Vec = Vec::new(); diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index f2fd6bdde4..f73c6a5bd8 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -65,7 +65,7 @@ mod tests { b.iter(|| { let mut a = 0u64; for _ in 0..n { - a = column.get_val(a as u64); + a = column.get_val(a as u32); } a }); @@ -137,7 +137,7 @@ mod tests { b.iter(|| { let mut a = 0u128; for i in 0u64..column.num_vals() as u64 { - a += column.get_val(i); + a += column.get_val(i as u32); } a }); @@ -151,7 +151,7 @@ mod tests { let n = column.num_vals(); let mut a = 0u128; for i in (0..n / 5).map(|val| val * 5) { - a += column.get_val(i as u64); + a += column.get_val(i); } a }); @@ -176,9 +176,9 @@ mod tests { let n = permutation.len(); let column: Arc> = serialize_and_load(&permutation); b.iter(|| { - let mut a = 0u64; + let mut a = 0; for i in (0..n / 7).map(|val| val * 7) { - a += column.get_val(i as u64); + a += column.get_val(i as u32); } a }); @@ -191,7 +191,7 @@ mod tests { let column: Arc> = serialize_and_load(&permutation); b.iter(|| { let mut a = 0u64; - for i in 0u64..n as u64 { + for i in 0u32..n as u32 { a += column.get_val(i); } a @@ -205,8 +205,8 @@ mod tests { let column: Arc> = serialize_and_load(&permutation); b.iter(|| { let mut a = 0u64; - for i in 0..n as u64 { - a += column.get_val(i); + for i in 0..n { + a += column.get_val(i as u32); } a }); diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 25416d947a..044debb967 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -17,7 +17,7 @@ pub struct BitpackedReader { impl Column for BitpackedReader { #[inline] - fn get_val(&self, doc: u64) -> u64 { + fn get_val(&self, doc: u32) -> u64 { self.bit_unpacker.get(doc, &self.data) } #[inline] diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs index c589d304e4..553463cc7b 100644 --- a/fastfield_codecs/src/blockwise_linear.rs +++ b/fastfield_codecs/src/blockwise_linear.rs @@ -78,7 +78,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { let mut first_chunk: Vec = column.iter().take(CHUNK_SIZE as usize).collect(); let line = Line::train(&VecColumn::from(&first_chunk)); for (i, buffer_val) in first_chunk.iter_mut().enumerate() { - let interpolated_val = line.eval(i as u64); + let interpolated_val = line.eval(i as u32); *buffer_val = buffer_val.wrapping_sub(interpolated_val); } let estimated_bit_width = first_chunk @@ -121,7 +121,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { assert!(!buffer.is_empty()); for (i, buffer_val) in buffer.iter_mut().enumerate() { - let interpolated_val = line.eval(i as u64); + let interpolated_val = line.eval(i as u32); *buffer_val = buffer_val.wrapping_sub(interpolated_val); } let bit_width = buffer.iter().copied().map(compute_num_bits).max().unwrap(); @@ -161,9 +161,9 @@ pub struct BlockwiseLinearReader { impl Column for BlockwiseLinearReader { #[inline(always)] - fn get_val(&self, idx: u64) -> u64 { - let block_id = (idx / CHUNK_SIZE as u64) as usize; - let idx_within_block = idx % (CHUNK_SIZE as u64); + fn get_val(&self, idx: u32) -> u64 { + let block_id = (idx / CHUNK_SIZE as u32) as usize; + let idx_within_block = idx % (CHUNK_SIZE as u32); let block = &self.blocks[block_id]; let interpoled_val: u64 = block.line.eval(idx_within_block); let block_bytes = &self.data[block.data_start_offset..]; diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index 864601a919..e0ebefc062 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -14,7 +14,7 @@ pub trait Column: Send + Sync { /// # Panics /// /// May panic if `idx` is greater than the column length. - fn get_val(&self, idx: u64) -> T; + fn get_val(&self, idx: u32) -> T; /// Fills an output buffer with the fast field values /// associated with the `DocId` going from @@ -27,7 +27,7 @@ pub trait Column: Send + Sync { #[inline] fn get_range(&self, start: u64, output: &mut [T]) { for (out, idx) in output.iter_mut().zip(start..) { - *out = self.get_val(idx); + *out = self.get_val(idx as u32); } } @@ -35,10 +35,10 @@ pub trait Column: Send + Sync { #[inline] fn get_between_vals(&self, range: RangeInclusive) -> Vec { let mut vals = Vec::new(); - for idx in 0..self.num_vals() as u64 { + for idx in 0..self.num_vals() { let val = self.get_val(idx); if range.contains(&val) { - vals.push(idx); + vals.push(idx as u64); } } vals @@ -65,7 +65,7 @@ pub trait Column: Send + Sync { /// Returns a iterator over the data fn iter<'a>(&'a self) -> Box + 'a> { - Box::new((0..self.num_vals() as u64).map(|idx| self.get_val(idx))) + Box::new((0..self.num_vals()).map(|idx| self.get_val(idx))) } } @@ -77,7 +77,7 @@ pub struct VecColumn<'a, T = u64> { } impl<'a, C: Column, T: Copy + PartialOrd> Column for &'a C { - fn get_val(&self, idx: u64) -> T { + fn get_val(&self, idx: u32) -> T { (*self).get_val(idx) } @@ -103,7 +103,7 @@ impl<'a, C: Column, T: Copy + PartialOrd> Column for &'a C { } impl<'a, T: Copy + PartialOrd + Send + Sync> Column for VecColumn<'a, T> { - fn get_val(&self, position: u64) -> T { + fn get_val(&self, position: u32) -> T { self.values[position as usize] } @@ -188,7 +188,7 @@ where Output: PartialOrd + Send + Sync + Clone, { #[inline] - fn get_val(&self, idx: u64) -> Output { + fn get_val(&self, idx: u32) -> Output { let from_val = self.from_column.get_val(idx); self.monotonic_mapping.mapping(from_val) } @@ -241,7 +241,7 @@ where T: Iterator + Clone + ExactSizeIterator + Send + Sync, T::Item: PartialOrd, { - fn get_val(&self, idx: u64) -> T::Item { + fn get_val(&self, idx: u32) -> T::Item { self.0.clone().nth(idx as usize).unwrap() } diff --git a/fastfield_codecs/src/compact_space/mod.rs b/fastfield_codecs/src/compact_space/mod.rs index b47bb5744c..9a12c29a85 100644 --- a/fastfield_codecs/src/compact_space/mod.rs +++ b/fastfield_codecs/src/compact_space/mod.rs @@ -284,7 +284,7 @@ impl BinarySerializable for IPCodecParams { impl Column for CompactSpaceDecompressor { #[inline] - fn get_val(&self, doc: u64) -> u128 { + fn get_val(&self, doc: u32) -> u128 { self.get(doc) } @@ -385,17 +385,17 @@ impl CompactSpaceDecompressor { positions.push(idx); } }; - let get_val = |idx| self.params.bit_unpacker.get(idx as u64, &self.data); + let get_val = |idx| self.params.bit_unpacker.get(idx, &self.data); // unrolled loop for idx in (0..cutoff).step_by(step_size as usize) { let idx1 = idx; let idx2 = idx + 1; let idx3 = idx + 2; let idx4 = idx + 3; - let val1 = get_val(idx1); - let val2 = get_val(idx2); - let val3 = get_val(idx3); - let val4 = get_val(idx4); + let val1 = get_val(idx1 as u32); + let val2 = get_val(idx2 as u32); + let val3 = get_val(idx3 as u32); + let val4 = get_val(idx4 as u32); push_if_in_range(idx1, val1); push_if_in_range(idx2, val2); push_if_in_range(idx3, val3); @@ -404,7 +404,7 @@ impl CompactSpaceDecompressor { // handle rest for idx in cutoff..self.params.num_vals as u64 { - push_if_in_range(idx, get_val(idx)); + push_if_in_range(idx, get_val(idx as u32)); } positions @@ -413,7 +413,7 @@ impl CompactSpaceDecompressor { #[inline] fn iter_compact(&self) -> impl Iterator + '_ { (0..self.params.num_vals) - .map(move |idx| self.params.bit_unpacker.get(idx as u64, &self.data) as u64) + .map(move |idx| self.params.bit_unpacker.get(idx, &self.data) as u64) } #[inline] @@ -425,7 +425,7 @@ impl CompactSpaceDecompressor { } #[inline] - pub fn get(&self, idx: u64) -> u128 { + pub fn get(&self, idx: u32) -> u128 { let compact = self.params.bit_unpacker.get(idx, &self.data); self.compact_to_u128(compact) } @@ -491,7 +491,7 @@ mod tests { fn test_all(data: OwnedBytes, expected: &[u128]) { let decompressor = CompactSpaceDecompressor::open(data).unwrap(); for (idx, expected_val) in expected.iter().cloned().enumerate() { - let val = decompressor.get(idx as u64); + let val = decompressor.get(idx as u32); assert_eq!(val, expected_val); let test_range = |range: RangeInclusive| { diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 25ca123e27..3268319f40 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -201,7 +201,7 @@ mod tests { let reader = crate::open::(OwnedBytes::new(out)).unwrap(); assert_eq!(reader.num_vals(), data.len() as u32); for (doc, orig_val) in data.iter().copied().enumerate() { - let val = reader.get_val(doc as u64); + let val = reader.get_val(doc as u32); assert_eq!( val, orig_val, "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data \ @@ -429,7 +429,7 @@ mod bench { b.iter(|| { let mut sum = 0u64; for pos in value_iter() { - let val = col.get_val(pos as u64); + let val = col.get_val(pos as u32); sum = sum.wrapping_add(val); } sum @@ -441,7 +441,7 @@ mod bench { b.iter(|| { let mut sum = 0u64; for pos in value_iter() { - let val = col.get_val(pos as u64); + let val = col.get_val(pos as u32); sum = sum.wrapping_add(val); } sum diff --git a/fastfield_codecs/src/line.rs b/fastfield_codecs/src/line.rs index 4613faf046..3b081b0ede 100644 --- a/fastfield_codecs/src/line.rs +++ b/fastfield_codecs/src/line.rs @@ -62,8 +62,8 @@ fn compute_slope(y0: u64, y1: u64, num_vals: NonZeroU32) -> u64 { impl Line { #[inline(always)] - pub fn eval(&self, x: u64) -> u64 { - let linear_part = (x.wrapping_mul(self.slope) >> 32) as i32 as u64; + pub fn eval(&self, x: u32) -> u64 { + let linear_part = ((x as u64).wrapping_mul(self.slope) >> 32) as i32 as u64; self.intercept.wrapping_add(linear_part) } @@ -129,7 +129,7 @@ impl Line { }; let heuristic_shift = y0.wrapping_sub(MID_POINT); line.intercept = positions_and_values - .map(|(pos, y)| y.wrapping_sub(line.eval(pos))) + .map(|(pos, y)| y.wrapping_sub(line.eval(pos as u32))) .min_by_key(|&val| val.wrapping_sub(heuristic_shift)) .unwrap_or(0u64); //< Never happens. line @@ -199,7 +199,7 @@ mod tests { let line = Line::train(&VecColumn::from(&ys)); ys.iter() .enumerate() - .map(|(x, y)| y.wrapping_sub(line.eval(x as u64))) + .map(|(x, y)| y.wrapping_sub(line.eval(x as u32))) .max() } diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs index ad2a0ca74e..d75eeea808 100644 --- a/fastfield_codecs/src/linear.rs +++ b/fastfield_codecs/src/linear.rs @@ -19,7 +19,7 @@ pub struct LinearReader { impl Column for LinearReader { #[inline] - fn get_val(&self, doc: u64) -> u64 { + fn get_val(&self, doc: u32) -> u64 { let interpoled_val: u64 = self.linear_params.line.eval(doc); let bitpacked_diff = self.linear_params.bit_unpacker.get(doc, &self.data); interpoled_val.wrapping_add(bitpacked_diff) @@ -93,7 +93,7 @@ impl FastFieldCodec for LinearCodec { .iter() .enumerate() .map(|(pos, actual_value)| { - let calculated_value = line.eval(pos as u64); + let calculated_value = line.eval(pos as u32); actual_value.wrapping_sub(calculated_value) }) .max() @@ -108,7 +108,7 @@ impl FastFieldCodec for LinearCodec { let mut bit_packer = BitPacker::new(); for (pos, actual_value) in column.iter().enumerate() { - let calculated_value = line.eval(pos as u64); + let calculated_value = line.eval(pos as u32); let offset = actual_value.wrapping_sub(calculated_value); bit_packer.write(offset, num_bits, write)?; } @@ -140,7 +140,7 @@ impl FastFieldCodec for LinearCodec { let estimated_bit_width = sample_positions_and_values .into_iter() .map(|(pos, actual_value)| { - let interpolated_val = line.eval(pos as u64); + let interpolated_val = line.eval(pos as u32); actual_value.wrapping_sub(interpolated_val) }) .map(|diff| ((diff as f32 * 1.5) * 2.0) as u64) diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index 92053fc216..c2d0c1277c 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -331,10 +331,10 @@ impl SegmentHistogramCollector { .expect("unexpected fast field cardinatility"); let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0] as u64)); - let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1] as u64)); - let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2] as u64)); - let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3] as u64)); + let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0])); + let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1])); + let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2])); + let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3])); let bucket_pos0 = get_bucket_num(val0); let bucket_pos1 = get_bucket_num(val1); @@ -371,7 +371,7 @@ impl SegmentHistogramCollector { )?; } for &doc in iter.remainder() { - let val = f64_from_fastfield_u64(accessor.get_val(doc as u64), &self.field_type); + let val = f64_from_fastfield_u64(accessor.get_val(doc), &self.field_type); if !bounds.contains(val) { continue; } diff --git a/src/aggregation/bucket/range.rs b/src/aggregation/bucket/range.rs index 4773831640..33645cb8f0 100644 --- a/src/aggregation/bucket/range.rs +++ b/src/aggregation/bucket/range.rs @@ -263,10 +263,10 @@ impl SegmentRangeCollector { .as_single() .expect("unexpected fast field cardinality"); for docs in iter.by_ref() { - let val1 = accessor.get_val(docs[0] as u64); - let val2 = accessor.get_val(docs[1] as u64); - let val3 = accessor.get_val(docs[2] as u64); - let val4 = accessor.get_val(docs[3] as u64); + let val1 = accessor.get_val(docs[0]); + let val2 = accessor.get_val(docs[1]); + let val3 = accessor.get_val(docs[2]); + let val4 = accessor.get_val(docs[3]); let bucket_pos1 = self.get_bucket_pos(val1); let bucket_pos2 = self.get_bucket_pos(val2); let bucket_pos3 = self.get_bucket_pos(val3); @@ -278,7 +278,7 @@ impl SegmentRangeCollector { self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?; } for &doc in iter.remainder() { - let val = accessor.get_val(doc as u64); + let val = accessor.get_val(doc); let bucket_pos = self.get_bucket_pos(val); self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?; } diff --git a/src/aggregation/metric/average.rs b/src/aggregation/metric/average.rs index 206bb76079..2f22430b4a 100644 --- a/src/aggregation/metric/average.rs +++ b/src/aggregation/metric/average.rs @@ -60,10 +60,10 @@ impl SegmentAverageCollector { pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column) { let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val1 = field.get_val(docs[0] as u64); - let val2 = field.get_val(docs[1] as u64); - let val3 = field.get_val(docs[2] as u64); - let val4 = field.get_val(docs[3] as u64); + let val1 = field.get_val(docs[0]); + let val2 = field.get_val(docs[1]); + let val3 = field.get_val(docs[2]); + let val4 = field.get_val(docs[3]); let val1 = f64_from_fastfield_u64(val1, &self.field_type); let val2 = f64_from_fastfield_u64(val2, &self.field_type); let val3 = f64_from_fastfield_u64(val3, &self.field_type); @@ -74,7 +74,7 @@ impl SegmentAverageCollector { self.data.collect(val4); } for &doc in iter.remainder() { - let val = field.get_val(doc as u64); + let val = field.get_val(doc); let val = f64_from_fastfield_u64(val, &self.field_type); self.data.collect(val); } diff --git a/src/aggregation/metric/stats.rs b/src/aggregation/metric/stats.rs index cb4236b8a1..f84944c261 100644 --- a/src/aggregation/metric/stats.rs +++ b/src/aggregation/metric/stats.rs @@ -166,10 +166,10 @@ impl SegmentStatsCollector { pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column) { let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val1 = field.get_val(docs[0] as u64); - let val2 = field.get_val(docs[1] as u64); - let val3 = field.get_val(docs[2] as u64); - let val4 = field.get_val(docs[3] as u64); + let val1 = field.get_val(docs[0]); + let val2 = field.get_val(docs[1]); + let val3 = field.get_val(docs[2]); + let val4 = field.get_val(docs[3]); let val1 = f64_from_fastfield_u64(val1, &self.field_type); let val2 = f64_from_fastfield_u64(val2, &self.field_type); let val3 = f64_from_fastfield_u64(val3, &self.field_type); @@ -180,7 +180,7 @@ impl SegmentStatsCollector { self.stats.collect(val4); } for &doc in iter.remainder() { - let val = field.get_val(doc as u64); + let val = field.get_val(doc); let val = f64_from_fastfield_u64(val, &self.field_type); self.stats.collect(val); } diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index 323bddc092..15f52e29c7 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -177,7 +177,7 @@ where type Fruit = TSegmentCollector::Fruit; fn collect(&mut self, doc: u32, score: Score) { - let value = self.fast_field_reader.get_val(doc as u64); + let value = self.fast_field_reader.get_val(doc); if (self.predicate)(value) { self.segment_collector.collect(doc, score) } diff --git a/src/collector/histogram_collector.rs b/src/collector/histogram_collector.rs index 915751f51b..dac0e19d95 100644 --- a/src/collector/histogram_collector.rs +++ b/src/collector/histogram_collector.rs @@ -94,7 +94,7 @@ impl SegmentCollector for SegmentHistogramCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { - let value = self.ff_reader.get_val(doc as u64); + let value = self.ff_reader.get_val(doc); self.histogram_computer.add_value(value); } diff --git a/src/collector/tests.rs b/src/collector/tests.rs index 6901108408..2dd1942452 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -201,7 +201,7 @@ impl SegmentCollector for FastFieldSegmentCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { - let val = self.reader.get_val(doc as u64); + let val = self.reader.get_val(doc); self.vals.push(val); } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 43b45cdbfe..fdd6fb1fd7 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -137,7 +137,7 @@ struct ScorerByFastFieldReader { impl CustomSegmentScorer for ScorerByFastFieldReader { fn score(&mut self, doc: DocId) -> u64 { - self.ff_reader.get_val(doc as u64) + self.ff_reader.get_val(doc) } } @@ -458,7 +458,7 @@ impl TopDocs { /// /// // We can now define our actual scoring function /// move |doc: DocId, original_score: Score| { - /// let popularity: u64 = popularity_reader.get_val(doc as u64); + /// let popularity: u64 = popularity_reader.get_val(doc); /// // Well.. For the sake of the example we use a simple logarithm /// // function. /// let popularity_boost_score = ((2u64 + popularity) as Score).log2(); @@ -567,8 +567,8 @@ impl TopDocs { /// /// // We can now define our actual scoring function /// move |doc: DocId| { - /// let popularity: u64 = popularity_reader.get_val(doc as u64); - /// let boosted: u64 = boosted_reader.get_val(doc as u64); + /// let popularity: u64 = popularity_reader.get_val(doc); + /// let boosted: u64 = boosted_reader.get_val(doc); /// // Score do not have to be `f64` in tantivy. /// // Here we return a couple to get lexicographical order /// // for free. diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs index 1340bfb664..c4a4f2ffc9 100644 --- a/src/fastfield/bytes/reader.rs +++ b/src/fastfield/bytes/reader.rs @@ -32,10 +32,9 @@ impl BytesFastFieldReader { Ok(BytesFastFieldReader { idx_reader, values }) } - fn range(&self, doc: DocId) -> Range { - let idx = doc as u64; - let start = self.idx_reader.get_val(idx); - let end = self.idx_reader.get_val(idx + 1); + fn range(&self, doc: DocId) -> Range { + let start = self.idx_reader.get_val(doc) as u32; + let end = self.idx_reader.get_val(doc + 1) as u32; start..end } @@ -48,7 +47,7 @@ impl BytesFastFieldReader { /// Returns the length of the bytes associated with the given `doc` pub fn num_bytes(&self, doc: DocId) -> u64 { let range = self.range(doc); - range.end - range.start + (range.end - range.start) as u64 } /// Returns the overall number of bytes in this bytes fast field. @@ -58,7 +57,7 @@ impl BytesFastFieldReader { } impl MultiValueLength for BytesFastFieldReader { - fn get_range(&self, doc_id: DocId) -> std::ops::Range { + fn get_range(&self, doc_id: DocId) -> std::ops::Range { self.range(doc_id) } fn get_len(&self, doc_id: DocId) -> u64 { diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index cf65cb1697..5bd90f6ce4 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -51,7 +51,7 @@ mod writer; /// for a doc_id pub trait MultiValueLength { /// returns the positions for a docid - fn get_range(&self, doc_id: DocId) -> std::ops::Range; + fn get_range(&self, doc_id: DocId) -> std::ops::Range; /// returns the num of values associated with a doc_id fn get_len(&self, doc_id: DocId) -> u64; /// returns the sum of num values for all doc_ids @@ -184,9 +184,9 @@ mod tests { #[test] pub fn test_fastfield() { let test_fastfield = fastfield_codecs::serialize_and_load(&[100u64, 200u64, 300u64][..]); - assert_eq!(test_fastfield.get_val(0u64), 100); - assert_eq!(test_fastfield.get_val(1u64), 200); - assert_eq!(test_fastfield.get_val(2u64), 300); + assert_eq!(test_fastfield.get_val(0), 100); + assert_eq!(test_fastfield.get_val(1), 200); + assert_eq!(test_fastfield.get_val(2), 300); } #[test] @@ -402,7 +402,7 @@ mod tests { assert_eq!(fast_field_reader.min_value(), -100i64); assert_eq!(fast_field_reader.max_value(), 9_999i64); for (doc, i) in (-100i64..10_000i64).enumerate() { - assert_eq!(fast_field_reader.get_val(doc as u64), i); + assert_eq!(fast_field_reader.get_val(doc as u32), i); } let mut buffer = vec![0i64; 100]; fast_field_reader.get_range(53, &mut buffer[..]); @@ -484,7 +484,7 @@ mod tests { let fast_field_reader = open::(data)?; for a in 0..n { - assert_eq!(fast_field_reader.get_val(a as u64), permutation[a as usize]); + assert_eq!(fast_field_reader.get_val(a as u32), permutation[a as usize]); } } Ok(()) @@ -976,7 +976,7 @@ mod tests { let test_fastfield = open::(file.read_bytes()?)?; for (i, time) in times.iter().enumerate() { - assert_eq!(test_fastfield.get_val(i as u64), time.truncate(precision)); + assert_eq!(test_fastfield.get_val(i as u32), time.truncate(precision)); } Ok(len) } diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index c625a2e76d..0437ef491d 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -515,7 +515,7 @@ mod bench { for val in block { doc.add_u64(field, *val); } - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -573,7 +573,7 @@ mod bench { for val in block { doc.add_u64(field, *val); } - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -606,7 +606,7 @@ mod bench { for val in block { doc.add_u64(field, *val); } - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), Some(&doc_id_mapping)) diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 054bb01e4d..54f3144f3a 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -33,19 +33,19 @@ impl MultiValuedFastFieldReader { /// Returns `[start, end)`, such that the values associated with /// the given document are `start..end`. #[inline] - fn range(&self, doc: DocId) -> Range { - let idx = doc as u64; - let start = self.idx_reader.get_val(idx); - let end = self.idx_reader.get_val(idx + 1); + fn range(&self, doc: DocId) -> Range { + let start = self.idx_reader.get_val(doc) as u32; + let end = self.idx_reader.get_val(doc + 1) as u32; start..end } /// Returns the array of values associated with the given `doc`. #[inline] - fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { + fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { let len = (range.end - range.start) as usize; vals.resize(len, Item::make_zero()); - self.vals_reader.get_range(range.start, &mut vals[..]); + self.vals_reader + .get_range(range.start as u64, &mut vals[..]); } /// Returns the array of values associated with the given `doc`. @@ -88,7 +88,7 @@ impl MultiValuedFastFieldReader { } impl MultiValueLength for MultiValuedFastFieldReader { - fn get_range(&self, doc_id: DocId) -> Range { + fn get_range(&self, doc_id: DocId) -> Range { self.range(doc_id) } fn get_len(&self, doc_id: DocId) -> u64 { @@ -127,9 +127,9 @@ impl MultiValuedU128FastFieldReader { /// Returns `[start, end)`, such that the values associated /// to the given document are `start..end`. #[inline] - fn range(&self, doc: DocId) -> Range { - let start = self.idx_reader.get_val(doc as u64); - let end = self.idx_reader.get_val(doc as u64 + 1); + fn range(&self, doc: DocId) -> Range { + let start = self.idx_reader.get_val(doc) as u32; + let end = self.idx_reader.get_val(doc + 1) as u32; start..end } @@ -145,10 +145,11 @@ impl MultiValuedU128FastFieldReader { /// Returns the array of values associated to the given `doc`. #[inline] - fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { + fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { let len = (range.end - range.start) as usize; vals.resize(len, T::from_u128(0)); - self.vals_reader.get_range(range.start, &mut vals[..]); + self.vals_reader + .get_range(range.start as u64, &mut vals[..]); } /// Returns the array of values associated to the given `doc`. @@ -203,7 +204,7 @@ impl MultiValuedU128FastFieldReader { } impl MultiValueLength for MultiValuedU128FastFieldReader { - fn get_range(&self, doc_id: DocId) -> std::ops::Range { + fn get_range(&self, doc_id: DocId) -> std::ops::Range { self.range(doc_id) } fn get_len(&self, doc_id: DocId) -> u64 { @@ -230,7 +231,7 @@ fn positions_to_docids(positions: &[u64], idx_reader: &C) -> for pos in positions { loop { - let end = idx_reader.get_val(cur_doc as u64 + 1); + let end = idx_reader.get_val(cur_doc + 1); if end > *pos { // avoid duplicates if Some(cur_doc) == last_doc { diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 446f210047..fa248b5e1a 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -232,7 +232,7 @@ impl<'a, C: Column> MultivalueStartIndex<'a, C> { } } impl<'a, C: Column> Column for MultivalueStartIndex<'a, C> { - fn get_val(&self, _idx: u64) -> u64 { + fn get_val(&self, _idx: u32) -> u64 { unimplemented!() } @@ -262,7 +262,7 @@ fn iter_remapped_multivalue_index<'a, C: Column>( ) -> impl Iterator + 'a { let mut offset = 0; std::iter::once(0).chain(doc_id_map.iter_old_doc_ids().map(move |old_doc| { - let num_vals_for_doc = column.get_val(old_doc as u64 + 1) - column.get_val(old_doc as u64); + let num_vals_for_doc = column.get_val(old_doc + 1) - column.get_val(old_doc); offset += num_vals_for_doc; offset as u64 })) diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index f12027ad16..82dccf09a1 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -538,7 +538,7 @@ impl<'map, 'bitp> Column for WriterFastFieldAccessProvider<'map, 'bitp> { /// # Panics /// /// May panic if `doc` is greater than the index. - fn get_val(&self, _doc: u64) -> u64 { + fn get_val(&self, _doc: u32) -> u64 { unimplemented!() } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index ab1c8efe68..272300549e 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1472,7 +1472,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get_val(doc as u64)) + .map(|doc| fast_field_reader.get_val(doc)) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]); Ok(()) @@ -1533,7 +1533,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get_val(doc as u64)) + .map(|doc| fast_field_reader.get_val(doc)) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 2, 0]); Ok(()) @@ -1760,7 +1760,7 @@ mod tests { let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); segment_reader .doc_ids_alive() - .map(move |doc| ff_reader.get_val(doc as u64)) + .map(move |doc| ff_reader.get_val(doc)) }) .collect(); @@ -1771,7 +1771,7 @@ mod tests { let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); segment_reader .doc_ids_alive() - .map(move |doc| ff_reader.get_val(doc as u64)) + .map(move |doc| ff_reader.get_val(doc)) }) .collect(); @@ -1804,7 +1804,7 @@ mod tests { .flat_map(|segment_reader| { let ff_reader = segment_reader.fast_fields().ip_addr(ip_field).unwrap(); segment_reader.doc_ids_alive().flat_map(move |doc| { - let val = ff_reader.get_val(doc as u64); + let val = ff_reader.get_val(doc); if val == Ipv6Addr::from_u128(0) { // TODO Fix null handling None @@ -1861,7 +1861,7 @@ mod tests { ff_reader.get_vals(doc, &mut vals); assert_eq!(vals.len(), 2); assert_eq!(vals[0], vals[1]); - assert_eq!(id_reader.get_val(doc as u64), vals[0]); + assert_eq!(id_reader.get_val(doc), vals[0]); let mut bool_vals = vec![]; bool_ff_reader.get_vals(doc, &mut bool_vals); @@ -2012,7 +2012,7 @@ mod tests { facet_reader .facet_from_ord(facet_ords[0], &mut facet) .unwrap(); - let id = ff_reader.get_val(doc_id as u64); + let id = ff_reader.get_val(doc_id); let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string())); assert_eq!(facet, facet_expected); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index bfe535c87d..26ec45106f 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -397,7 +397,7 @@ impl IndexMerger { let iter_gen = || { doc_id_mapping.iter_old_doc_addrs().map(|doc_addr| { let fast_field_reader = &fast_field_readers[doc_addr.segment_ord as usize]; - fast_field_reader.get_val(doc_addr.doc_id as u64) + fast_field_reader.get_val(doc_addr.doc_id) }) }; fast_field_serializer.create_u128_fast_field_with_idx( @@ -510,8 +510,8 @@ impl IndexMerger { doc_id_reader_pair .into_iter() .kmerge_by(|a, b| { - let val1 = a.2.get_val(a.0 as u64); - let val2 = b.2.get_val(b.0 as u64); + let val1 = a.2.get_val(a.0); + let val2 = b.2.get_val(b.0); if sort_by_field.order == Order::Asc { val1 < val2 } else { diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 127ad192c3..ba41e62f09 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -190,13 +190,13 @@ mod tests { assert_eq!(fast_field.get_val(4), 2u64); assert_eq!(fast_field.get_val(3), 3u64); if force_disjunct_segment_sort_values { - assert_eq!(fast_field.get_val(2u64), 20u64); - assert_eq!(fast_field.get_val(1u64), 100u64); + assert_eq!(fast_field.get_val(2), 20u64); + assert_eq!(fast_field.get_val(1), 100u64); } else { - assert_eq!(fast_field.get_val(2u64), 10u64); - assert_eq!(fast_field.get_val(1u64), 20u64); + assert_eq!(fast_field.get_val(2), 10u64); + assert_eq!(fast_field.get_val(1), 20u64); } - assert_eq!(fast_field.get_val(0u64), 1_000u64); + assert_eq!(fast_field.get_val(0), 1_000u64); // test new field norm mapping { @@ -545,7 +545,7 @@ mod bench_sorted_index_merge { // add values in order of the new doc_ids let mut val = 0; for (doc_id, _reader, field_reader) in sorted_doc_ids { - val = field_reader.get_val(doc_id as u64); + val = field_reader.get_val(doc_id); } val diff --git a/src/indexer/sorted_doc_id_column.rs b/src/indexer/sorted_doc_id_column.rs index 3d5b360598..75665bab02 100644 --- a/src/indexer/sorted_doc_id_column.rs +++ b/src/indexer/sorted_doc_id_column.rs @@ -32,7 +32,7 @@ fn compute_min_max_val( // we need to recompute the max / min segment_reader .doc_ids_alive() - .map(|doc_id| u64_reader.get_val(doc_id as u64)) + .map(|doc_id| u64_reader.get_val(doc_id)) .minmax() .into_option() } @@ -79,7 +79,7 @@ impl<'a> RemappedDocIdColumn<'a> { } impl<'a> Column for RemappedDocIdColumn<'a> { - fn get_val(&self, _doc: u64) -> u64 { + fn get_val(&self, _doc: u32) -> u64 { unimplemented!() } @@ -90,7 +90,7 @@ impl<'a> Column for RemappedDocIdColumn<'a> { .map(|old_doc_addr| { let fast_field_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize]; - fast_field_reader.get_val(old_doc_addr.doc_id as u64) + fast_field_reader.get_val(old_doc_addr.doc_id) }), ) } diff --git a/src/indexer/sorted_doc_id_multivalue_column.rs b/src/indexer/sorted_doc_id_multivalue_column.rs index 650043f604..1886a69b1d 100644 --- a/src/indexer/sorted_doc_id_multivalue_column.rs +++ b/src/indexer/sorted_doc_id_multivalue_column.rs @@ -67,7 +67,7 @@ impl<'a> RemappedDocIdMultiValueColumn<'a> { } impl<'a> Column for RemappedDocIdMultiValueColumn<'a> { - fn get_val(&self, _pos: u64) -> u64 { + fn get_val(&self, _pos: u32) -> u64 { unimplemented!() } @@ -137,7 +137,7 @@ impl<'a, T: MultiValueLength> RemappedDocIdMultiValueIndexColumn<'a, T> { } impl<'a, T: MultiValueLength + Send + Sync> Column for RemappedDocIdMultiValueIndexColumn<'a, T> { - fn get_val(&self, _pos: u64) -> u64 { + fn get_val(&self, _pos: u32) -> u64 { unimplemented!() }