Skip to content

Commit

Permalink
switch get_val() to u32
Browse files Browse the repository at this point in the history
Fixes #1638
  • Loading branch information
PSeitz committed Oct 24, 2022
1 parent c24157f commit d3f036e
Show file tree
Hide file tree
Showing 32 changed files with 133 additions and 132 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
@@ -1,6 +1,7 @@
Tantivy 0.19
================================

- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz)
- Major bugfix: Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz)
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
The `DateTime` type has been updated to hold timestamps with microseconds precision.
Expand Down
8 changes: 4 additions & 4 deletions bitpacker/src/bitpacker.rs
Expand Up @@ -87,15 +87,15 @@ impl BitUnpacker {
}

#[inline]
pub fn get(&self, idx: u64, data: &[u8]) -> u64 {
pub fn get(&self, idx: u32, data: &[u8]) -> u64 {
if self.num_bits == 0 {
return 0u64;
}
let addr_in_bits = idx * self.num_bits;
let addr_in_bits = idx * self.num_bits as u32;
let addr = addr_in_bits >> 3;
let bit_shift = addr_in_bits & 7;
debug_assert!(
addr + 8 <= data.len() as u64,
addr + 8 <= data.len() as u32,
"The fast field field should have been padded with 7 bytes."
);
let bytes: [u8; 8] = (&data[(addr as usize)..(addr as usize) + 8])
Expand Down Expand Up @@ -130,7 +130,7 @@ mod test {
fn test_bitpacker_util(len: usize, num_bits: u8) {
let (bitunpacker, vals, data) = create_fastfield_bitpacker(len, num_bits);
for (i, val) in vals.iter().enumerate() {
assert_eq!(bitunpacker.get(i as u64, &data), *val);
assert_eq!(bitunpacker.get(i as u32, &data), *val);
}
}

Expand Down
2 changes: 1 addition & 1 deletion bitpacker/src/blocked_bitpacker.rs
Expand Up @@ -130,7 +130,7 @@ impl BlockedBitpacker {
let pos_in_block = idx % BLOCK_SIZE as usize;
if let Some(metadata) = self.offset_and_bits.get(metadata_pos) {
let unpacked = BitUnpacker::new(metadata.num_bits()).get(
pos_in_block as u64,
pos_in_block as u32,
&self.compressed_blocks[metadata.offset() as usize..],
);
unpacked + metadata.base_value()
Expand Down
2 changes: 1 addition & 1 deletion examples/custom_collector.rs
Expand Up @@ -105,7 +105,7 @@ impl SegmentCollector for StatsSegmentCollector {
type Fruit = Option<Stats>;

fn collect(&mut self, doc: u32, _score: Score) {
let value = self.fast_field_reader.get_val(doc as u64) as f64;
let value = self.fast_field_reader.get_val(doc) as f64;
self.stats.count += 1;
self.stats.sum += value;
self.stats.squared_sum += value * value;
Expand Down
2 changes: 1 addition & 1 deletion examples/warmer.rs
Expand Up @@ -51,7 +51,7 @@ impl Warmer for DynamicPriceColumn {
let product_id_reader = segment.fast_fields().u64(self.field)?;
let product_ids: Vec<ProductId> = segment
.doc_ids_alive()
.map(|doc| product_id_reader.get_val(doc as u64))
.map(|doc| product_id_reader.get_val(doc))
.collect();
let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
let mut price_vals: Vec<Price> = Vec::new();
Expand Down
16 changes: 8 additions & 8 deletions fastfield_codecs/benches/bench.rs
Expand Up @@ -65,7 +65,7 @@ mod tests {
b.iter(|| {
let mut a = 0u64;
for _ in 0..n {
a = column.get_val(a as u64);
a = column.get_val(a as u32);
}
a
});
Expand Down Expand Up @@ -137,7 +137,7 @@ mod tests {
b.iter(|| {
let mut a = 0u128;
for i in 0u64..column.num_vals() as u64 {
a += column.get_val(i);
a += column.get_val(i as u32);
}
a
});
Expand All @@ -151,7 +151,7 @@ mod tests {
let n = column.num_vals();
let mut a = 0u128;
for i in (0..n / 5).map(|val| val * 5) {
a += column.get_val(i as u64);
a += column.get_val(i);
}
a
});
Expand All @@ -176,9 +176,9 @@ mod tests {
let n = permutation.len();
let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
b.iter(|| {
let mut a = 0u64;
let mut a = 0;
for i in (0..n / 7).map(|val| val * 7) {
a += column.get_val(i as u64);
a += column.get_val(i as u32);
}
a
});
Expand All @@ -191,7 +191,7 @@ mod tests {
let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
b.iter(|| {
let mut a = 0u64;
for i in 0u64..n as u64 {
for i in 0u32..n as u32 {
a += column.get_val(i);
}
a
Expand All @@ -205,8 +205,8 @@ mod tests {
let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
b.iter(|| {
let mut a = 0u64;
for i in 0..n as u64 {
a += column.get_val(i);
for i in 0..n {
a += column.get_val(i as u32);
}
a
});
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/bitpacked.rs
Expand Up @@ -17,7 +17,7 @@ pub struct BitpackedReader {

impl Column for BitpackedReader {
#[inline]
fn get_val(&self, doc: u64) -> u64 {
fn get_val(&self, doc: u32) -> u64 {
self.bit_unpacker.get(doc, &self.data)
}
#[inline]
Expand Down
10 changes: 5 additions & 5 deletions fastfield_codecs/src/blockwise_linear.rs
Expand Up @@ -78,7 +78,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
let mut first_chunk: Vec<u64> = column.iter().take(CHUNK_SIZE as usize).collect();
let line = Line::train(&VecColumn::from(&first_chunk));
for (i, buffer_val) in first_chunk.iter_mut().enumerate() {
let interpolated_val = line.eval(i as u64);
let interpolated_val = line.eval(i as u32);
*buffer_val = buffer_val.wrapping_sub(interpolated_val);
}
let estimated_bit_width = first_chunk
Expand Down Expand Up @@ -121,7 +121,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
assert!(!buffer.is_empty());

for (i, buffer_val) in buffer.iter_mut().enumerate() {
let interpolated_val = line.eval(i as u64);
let interpolated_val = line.eval(i as u32);
*buffer_val = buffer_val.wrapping_sub(interpolated_val);
}
let bit_width = buffer.iter().copied().map(compute_num_bits).max().unwrap();
Expand Down Expand Up @@ -161,9 +161,9 @@ pub struct BlockwiseLinearReader {

impl Column for BlockwiseLinearReader {
#[inline(always)]
fn get_val(&self, idx: u64) -> u64 {
let block_id = (idx / CHUNK_SIZE as u64) as usize;
let idx_within_block = idx % (CHUNK_SIZE as u64);
fn get_val(&self, idx: u32) -> u64 {
let block_id = (idx / CHUNK_SIZE as u32) as usize;
let idx_within_block = idx % (CHUNK_SIZE as u32);
let block = &self.blocks[block_id];
let interpoled_val: u64 = block.line.eval(idx_within_block);
let block_bytes = &self.data[block.data_start_offset..];
Expand Down
18 changes: 9 additions & 9 deletions fastfield_codecs/src/column.rs
Expand Up @@ -14,7 +14,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
/// # Panics
///
/// May panic if `idx` is greater than the column length.
fn get_val(&self, idx: u64) -> T;
fn get_val(&self, idx: u32) -> T;

/// Fills an output buffer with the fast field values
/// associated with the `DocId` going from
Expand All @@ -27,18 +27,18 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
#[inline]
fn get_range(&self, start: u64, output: &mut [T]) {
for (out, idx) in output.iter_mut().zip(start..) {
*out = self.get_val(idx);
*out = self.get_val(idx as u32);
}
}

/// Return the positions of values which are in the provided range.
#[inline]
fn get_between_vals(&self, range: RangeInclusive<T>) -> Vec<u64> {
let mut vals = Vec::new();
for idx in 0..self.num_vals() as u64 {
for idx in 0..self.num_vals() {
let val = self.get_val(idx);
if range.contains(&val) {
vals.push(idx);
vals.push(idx as u64);
}
}
vals
Expand All @@ -65,7 +65,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {

/// Returns a iterator over the data
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = T> + 'a> {
Box::new((0..self.num_vals() as u64).map(|idx| self.get_val(idx)))
Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
}
}

Expand All @@ -77,7 +77,7 @@ pub struct VecColumn<'a, T = u64> {
}

impl<'a, C: Column<T>, T: Copy + PartialOrd> Column<T> for &'a C {
fn get_val(&self, idx: u64) -> T {
fn get_val(&self, idx: u32) -> T {
(*self).get_val(idx)
}

Expand All @@ -103,7 +103,7 @@ impl<'a, C: Column<T>, T: Copy + PartialOrd> Column<T> for &'a C {
}

impl<'a, T: Copy + PartialOrd + Send + Sync> Column<T> for VecColumn<'a, T> {
fn get_val(&self, position: u64) -> T {
fn get_val(&self, position: u32) -> T {
self.values[position as usize]
}

Expand Down Expand Up @@ -188,7 +188,7 @@ where
Output: PartialOrd + Send + Sync + Clone,
{
#[inline]
fn get_val(&self, idx: u64) -> Output {
fn get_val(&self, idx: u32) -> Output {
let from_val = self.from_column.get_val(idx);
self.monotonic_mapping.mapping(from_val)
}
Expand Down Expand Up @@ -241,7 +241,7 @@ where
T: Iterator + Clone + ExactSizeIterator + Send + Sync,
T::Item: PartialOrd,
{
fn get_val(&self, idx: u64) -> T::Item {
fn get_val(&self, idx: u32) -> T::Item {
self.0.clone().nth(idx as usize).unwrap()
}

Expand Down
20 changes: 10 additions & 10 deletions fastfield_codecs/src/compact_space/mod.rs
Expand Up @@ -284,7 +284,7 @@ impl BinarySerializable for IPCodecParams {

impl Column<u128> for CompactSpaceDecompressor {
#[inline]
fn get_val(&self, doc: u64) -> u128 {
fn get_val(&self, doc: u32) -> u128 {
self.get(doc)
}

Expand Down Expand Up @@ -385,17 +385,17 @@ impl CompactSpaceDecompressor {
positions.push(idx);
}
};
let get_val = |idx| self.params.bit_unpacker.get(idx as u64, &self.data);
let get_val = |idx| self.params.bit_unpacker.get(idx, &self.data);
// unrolled loop
for idx in (0..cutoff).step_by(step_size as usize) {
let idx1 = idx;
let idx2 = idx + 1;
let idx3 = idx + 2;
let idx4 = idx + 3;
let val1 = get_val(idx1);
let val2 = get_val(idx2);
let val3 = get_val(idx3);
let val4 = get_val(idx4);
let val1 = get_val(idx1 as u32);
let val2 = get_val(idx2 as u32);
let val3 = get_val(idx3 as u32);
let val4 = get_val(idx4 as u32);
push_if_in_range(idx1, val1);
push_if_in_range(idx2, val2);
push_if_in_range(idx3, val3);
Expand All @@ -404,7 +404,7 @@ impl CompactSpaceDecompressor {

// handle rest
for idx in cutoff..self.params.num_vals as u64 {
push_if_in_range(idx, get_val(idx));
push_if_in_range(idx, get_val(idx as u32));
}

positions
Expand All @@ -413,7 +413,7 @@ impl CompactSpaceDecompressor {
#[inline]
fn iter_compact(&self) -> impl Iterator<Item = u64> + '_ {
(0..self.params.num_vals)
.map(move |idx| self.params.bit_unpacker.get(idx as u64, &self.data) as u64)
.map(move |idx| self.params.bit_unpacker.get(idx, &self.data) as u64)
}

#[inline]
Expand All @@ -425,7 +425,7 @@ impl CompactSpaceDecompressor {
}

#[inline]
pub fn get(&self, idx: u64) -> u128 {
pub fn get(&self, idx: u32) -> u128 {
let compact = self.params.bit_unpacker.get(idx, &self.data);
self.compact_to_u128(compact)
}
Expand Down Expand Up @@ -491,7 +491,7 @@ mod tests {
fn test_all(data: OwnedBytes, expected: &[u128]) {
let decompressor = CompactSpaceDecompressor::open(data).unwrap();
for (idx, expected_val) in expected.iter().cloned().enumerate() {
let val = decompressor.get(idx as u64);
let val = decompressor.get(idx as u32);
assert_eq!(val, expected_val);

let test_range = |range: RangeInclusive<u128>| {
Expand Down
6 changes: 3 additions & 3 deletions fastfield_codecs/src/lib.rs
Expand Up @@ -201,7 +201,7 @@ mod tests {
let reader = crate::open::<u64>(OwnedBytes::new(out)).unwrap();
assert_eq!(reader.num_vals(), data.len() as u32);
for (doc, orig_val) in data.iter().copied().enumerate() {
let val = reader.get_val(doc as u64);
let val = reader.get_val(doc as u32);
assert_eq!(
val, orig_val,
"val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data \
Expand Down Expand Up @@ -429,7 +429,7 @@ mod bench {
b.iter(|| {
let mut sum = 0u64;
for pos in value_iter() {
let val = col.get_val(pos as u64);
let val = col.get_val(pos as u32);
sum = sum.wrapping_add(val);
}
sum
Expand All @@ -441,7 +441,7 @@ mod bench {
b.iter(|| {
let mut sum = 0u64;
for pos in value_iter() {
let val = col.get_val(pos as u64);
let val = col.get_val(pos as u32);
sum = sum.wrapping_add(val);
}
sum
Expand Down
8 changes: 4 additions & 4 deletions fastfield_codecs/src/line.rs
Expand Up @@ -62,8 +62,8 @@ fn compute_slope(y0: u64, y1: u64, num_vals: NonZeroU32) -> u64 {

impl Line {
#[inline(always)]
pub fn eval(&self, x: u64) -> u64 {
let linear_part = (x.wrapping_mul(self.slope) >> 32) as i32 as u64;
pub fn eval(&self, x: u32) -> u64 {
let linear_part = ((x as u64).wrapping_mul(self.slope) >> 32) as i32 as u64;
self.intercept.wrapping_add(linear_part)
}

Expand Down Expand Up @@ -129,7 +129,7 @@ impl Line {
};
let heuristic_shift = y0.wrapping_sub(MID_POINT);
line.intercept = positions_and_values
.map(|(pos, y)| y.wrapping_sub(line.eval(pos)))
.map(|(pos, y)| y.wrapping_sub(line.eval(pos as u32)))
.min_by_key(|&val| val.wrapping_sub(heuristic_shift))
.unwrap_or(0u64); //< Never happens.
line
Expand Down Expand Up @@ -199,7 +199,7 @@ mod tests {
let line = Line::train(&VecColumn::from(&ys));
ys.iter()
.enumerate()
.map(|(x, y)| y.wrapping_sub(line.eval(x as u64)))
.map(|(x, y)| y.wrapping_sub(line.eval(x as u32)))
.max()
}

Expand Down
8 changes: 4 additions & 4 deletions fastfield_codecs/src/linear.rs
Expand Up @@ -19,7 +19,7 @@ pub struct LinearReader {

impl Column for LinearReader {
#[inline]
fn get_val(&self, doc: u64) -> u64 {
fn get_val(&self, doc: u32) -> u64 {
let interpoled_val: u64 = self.linear_params.line.eval(doc);
let bitpacked_diff = self.linear_params.bit_unpacker.get(doc, &self.data);
interpoled_val.wrapping_add(bitpacked_diff)
Expand Down Expand Up @@ -93,7 +93,7 @@ impl FastFieldCodec for LinearCodec {
.iter()
.enumerate()
.map(|(pos, actual_value)| {
let calculated_value = line.eval(pos as u64);
let calculated_value = line.eval(pos as u32);
actual_value.wrapping_sub(calculated_value)
})
.max()
Expand All @@ -108,7 +108,7 @@ impl FastFieldCodec for LinearCodec {

let mut bit_packer = BitPacker::new();
for (pos, actual_value) in column.iter().enumerate() {
let calculated_value = line.eval(pos as u64);
let calculated_value = line.eval(pos as u32);
let offset = actual_value.wrapping_sub(calculated_value);
bit_packer.write(offset, num_bits, write)?;
}
Expand Down Expand Up @@ -140,7 +140,7 @@ impl FastFieldCodec for LinearCodec {
let estimated_bit_width = sample_positions_and_values
.into_iter()
.map(|(pos, actual_value)| {
let interpolated_val = line.eval(pos as u64);
let interpolated_val = line.eval(pos as u32);
actual_value.wrapping_sub(interpolated_val)
})
.map(|diff| ((diff as f32 * 1.5) * 2.0) as u64)
Expand Down

0 comments on commit d3f036e

Please sign in to comment.