Skip to content

Commit

Permalink
switch to variable size footer
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Nov 27, 2022
1 parent 9e6551b commit 29f4c12
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 37 deletions.
5 changes: 2 additions & 3 deletions fastfield_codecs/src/compact_space/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ mod tests {

use super::*;
use crate::format_version::read_format_version;
use crate::null_index_footer::NullIndexFooter;
use crate::null_index_footer::read_null_index_footer;
use crate::serialize::U128Header;
use crate::{open_u128, serialize_u128};

Expand Down Expand Up @@ -544,7 +544,7 @@ mod tests {

let data = OwnedBytes::new(out);
let (data, _format_version) = read_format_version(data).unwrap();
let data = data.slice(0..data.len() - NullIndexFooter::SIZE_IN_BYTES);
let (data, _null_index_footer) = read_null_index_footer(data).unwrap();
test_all(data.clone(), u128_vals);

data
Expand Down Expand Up @@ -794,7 +794,6 @@ mod tests {
let vals = &[1_000_000_000u128; 100];
let _data = test_aux_vals(vals);
}
use common::FixedSize;
use itertools::Itertools;
use proptest::prelude::*;

Expand Down
8 changes: 4 additions & 4 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ use std::io;
use std::io::Write;
use std::sync::Arc;

use common::{BinarySerializable, FixedSize};
use common::BinarySerializable;
use compact_space::CompactSpaceDecompressor;
use format_version::read_format_version;
use monotonic_mapping::{
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
};
use null_index_footer::read_null_index_footer;
use ownedbytes::OwnedBytes;
use serialize::{Header, U128Header};

Expand All @@ -51,7 +52,6 @@ pub use self::monotonic_mapping_u128::MonotonicallyMappableToU128;
pub use self::serialize::{
estimate, serialize, serialize_and_load, serialize_u128, NormalizedHeader,
};
use crate::null_index_footer::NullIndexFooter;

#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
#[repr(u8)]
Expand Down Expand Up @@ -136,7 +136,7 @@ pub fn open_u128<Item: MonotonicallyMappableToU128>(
bytes: OwnedBytes,
) -> io::Result<Arc<dyn Column<Item>>> {
let (bytes, _format_version) = read_format_version(bytes)?;
let mut bytes = bytes.slice(0..bytes.len() - NullIndexFooter::SIZE_IN_BYTES);
let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
let header = U128Header::deserialize(&mut bytes)?;
assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
let reader = CompactSpaceDecompressor::open(bytes)?;
Expand All @@ -148,7 +148,7 @@ pub fn open_u128<Item: MonotonicallyMappableToU128>(
/// Returns the correct codec reader wrapped in the `Arc` for the data.
pub fn open<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Arc<dyn Column<T>>> {
let (bytes, _format_version) = read_format_version(bytes)?;
let mut bytes = bytes.slice(0..bytes.len() - NullIndexFooter::SIZE_IN_BYTES);
let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
let header = Header::deserialize(&mut bytes)?;
match header.codec_type {
FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
Expand Down
39 changes: 30 additions & 9 deletions fastfield_codecs/src/null_index_footer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::io::{self, Write};
use std::ops::Range;

use common::{BinarySerializable, FixedSize};
use common::{BinarySerializable, CountingWriter, VInt};
use ownedbytes::OwnedBytes;

#[derive(Debug, Clone, Copy)]
pub(crate) enum FastFieldCardinality {
Expand Down Expand Up @@ -73,28 +74,48 @@ pub(crate) struct NullIndexFooter {
pub(crate) null_index_byte_range: Range<u64>,
}

impl FixedSize for NullIndexFooter {
const SIZE_IN_BYTES: usize = 18;
}

impl BinarySerializable for NullIndexFooter {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
self.cardinality.serialize(writer)?;
self.null_index_codec.serialize(writer)?;
self.null_index_byte_range.start.serialize(writer)?;
self.null_index_byte_range.end.serialize(writer)?;
VInt(self.null_index_byte_range.start).serialize(writer)?;
VInt(self.null_index_byte_range.end).serialize(writer)?;
Ok(())
}

fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let cardinality = FastFieldCardinality::deserialize(reader)?;
let null_index_codec = NullIndexCodec::deserialize(reader)?;
let null_index_byte_range_start = u64::deserialize(reader)?;
let null_index_byte_range_end = u64::deserialize(reader)?;
let null_index_byte_range_start = VInt::deserialize(reader)?.0;
let null_index_byte_range_end = VInt::deserialize(reader)?.0;
Ok(Self {
cardinality,
null_index_codec,
null_index_byte_range: null_index_byte_range_start..null_index_byte_range_end,
})
}
}

pub(crate) fn append_null_index_footer(
output: &mut impl io::Write,
null_index_footer: NullIndexFooter,
) -> io::Result<()> {
let mut counting_write = CountingWriter::wrap(output);
null_index_footer.serialize(&mut counting_write)?;
let footer_payload_len = counting_write.written_bytes();
BinarySerializable::serialize(&(footer_payload_len as u16), &mut counting_write)?;

Ok(())
}

pub(crate) fn read_null_index_footer(
data: OwnedBytes,
) -> io::Result<(OwnedBytes, NullIndexFooter)> {
let (data, null_footer_length_bytes) = data.rsplit(2);

let footer_length = u16::deserialize(&mut null_footer_length_bytes.as_slice())?;
let (data, null_index_footer_bytes) = data.rsplit(footer_length as usize);
let null_index_footer = NullIndexFooter::deserialize(&mut null_index_footer_bytes.as_ref())?;

Ok((data, null_index_footer))
}
19 changes: 8 additions & 11 deletions fastfield_codecs/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ use crate::monotonic_mapping::{
StrictlyMonotonicFn, StrictlyMonotonicMappingToInternal,
StrictlyMonotonicMappingToInternalGCDBaseval,
};
use crate::null_index_footer::{FastFieldCardinality, NullIndexCodec, NullIndexFooter};
use crate::null_index_footer::{
append_null_index_footer, FastFieldCardinality, NullIndexCodec, NullIndexFooter,
};
use crate::{
monotonic_map_column, Column, FastFieldCodec, FastFieldCodecType, MonotonicallyMappableToU64,
U128FastFieldCodecType, VecColumn, ALL_CODEC_TYPES,
Expand Down Expand Up @@ -205,7 +207,7 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
null_index_codec: NullIndexCodec::Full,
null_index_byte_range: 0..0,
};
null_index_footer.serialize(output)?;
append_null_index_footer(output, null_index_footer)?;
append_format_version(output)?;

Ok(())
Expand Down Expand Up @@ -237,7 +239,7 @@ pub fn serialize<T: MonotonicallyMappableToU64>(
null_index_codec: NullIndexCodec::Full,
null_index_byte_range: 0..0,
};
null_index_footer.serialize(output)?;
append_null_index_footer(output, null_index_footer)?;
append_format_version(output)?;

Ok(())
Expand Down Expand Up @@ -302,8 +304,6 @@ pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(

#[cfg(test)]
mod tests {
use common::FixedSize;

use super::*;

#[test]
Expand Down Expand Up @@ -331,7 +331,7 @@ mod tests {
let col = VecColumn::from(&[false, true][..]);
serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
// 5 bytes of header, 1 byte of value, 7 bytes of padding.
assert_eq!(buffer.len(), 3 + 5 + 8 + NullIndexFooter::SIZE_IN_BYTES);
assert_eq!(buffer.len(), 3 + 5 + 8 + 4 + 2);
}

#[test]
Expand All @@ -340,7 +340,7 @@ mod tests {
let col = VecColumn::from(&[true][..]);
serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
// 5 bytes of header, 0 bytes of value, 7 bytes of padding.
assert_eq!(buffer.len(), 3 + 5 + 7 + NullIndexFooter::SIZE_IN_BYTES);
assert_eq!(buffer.len(), 3 + 5 + 7 + 4 + 2);
}

#[test]
Expand All @@ -350,9 +350,6 @@ mod tests {
let col = VecColumn::from(&vals[..]);
serialize(col, &mut buffer, &[FastFieldCodecType::Bitpacked]).unwrap();
// Values are stored over 3 bits.
assert_eq!(
buffer.len(),
3 + 7 + (3 * 80 / 8) + 7 + NullIndexFooter::SIZE_IN_BYTES
);
assert_eq!(buffer.len(), 3 + 7 + (3 * 80 / 8) + 7 + 4 + 2);
}
}
20 changes: 10 additions & 10 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 46);
assert_eq!(file.len(), 34);
let composite_file = CompositeFile::open(&file)?;
let fast_field_bytes = composite_file.open_read(*FIELD).unwrap().read_bytes()?;
let fast_field_reader = open::<u64>(fast_field_bytes)?;
Expand Down Expand Up @@ -256,7 +256,7 @@ mod tests {
serializer.close()?;
}
let file = directory.open_read(path)?;
assert_eq!(file.len(), 74);
assert_eq!(file.len(), 62);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite
Expand Down Expand Up @@ -297,7 +297,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 47);
assert_eq!(file.len(), 35);
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite
Expand Down Expand Up @@ -336,7 +336,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 80061);
assert_eq!(file.len(), 80049);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite
Expand Down Expand Up @@ -378,7 +378,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 61_usize);
assert_eq!(file.len(), 49_usize);

{
let fast_fields_composite = CompositeFile::open(&file)?;
Expand Down Expand Up @@ -822,7 +822,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 45);
assert_eq!(file.len(), 33);
let composite_file = CompositeFile::open(&file)?;
let data = composite_file.open_read(field).unwrap().read_bytes()?;
let fast_field_reader = open::<bool>(data)?;
Expand Down Expand Up @@ -860,7 +860,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 57);
assert_eq!(file.len(), 45);
let composite_file = CompositeFile::open(&file)?;
let data = composite_file.open_read(field).unwrap().read_bytes()?;
let fast_field_reader = open::<bool>(data)?;
Expand Down Expand Up @@ -892,7 +892,7 @@ mod tests {
}
let file = directory.open_read(path).unwrap();
let composite_file = CompositeFile::open(&file)?;
assert_eq!(file.len(), 44);
assert_eq!(file.len(), 32);
let data = composite_file.open_read(field).unwrap().read_bytes()?;
let fast_field_reader = open::<bool>(data)?;
assert_eq!(fast_field_reader.get_val(0), false);
Expand Down Expand Up @@ -926,10 +926,10 @@ mod tests {
pub fn test_gcd_date() -> crate::Result<()> {
let size_prec_sec =
test_gcd_date_with_codec(FastFieldCodecType::Bitpacked, DatePrecision::Seconds)?;
assert_eq!(size_prec_sec, 3 + 18 + 28 + (1_000 * 13) / 8); // 13 bits per val = ceil(log_2(number of seconds in 2hours);
assert_eq!(size_prec_sec, 5 + 4 + 28 + (1_000 * 13) / 8); // 13 bits per val = ceil(log_2(number of seconds in 2hours);
let size_prec_micro =
test_gcd_date_with_codec(FastFieldCodecType::Bitpacked, DatePrecision::Microseconds)?;
assert_eq!(size_prec_micro, 3 + 18 + 26 + (1_000 * 33) / 8); // 33 bits per val = ceil(log_2(number of microsecsseconds in 2hours);
assert_eq!(size_prec_micro, 5 + 4 + 26 + (1_000 * 33) / 8); // 33 bits per val = ceil(log_2(number of microsecsseconds in 2hours);
Ok(())
}

Expand Down

0 comments on commit 29f4c12

Please sign in to comment.