Skip to content

Commit

Permalink
Refactors DecodedUInt::uint_from_slice
Browse files Browse the repository at this point in the history
  • Loading branch information
Zack Slayton committed May 16, 2024
1 parent c42f704 commit 6ec63ba
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 129 deletions.
38 changes: 18 additions & 20 deletions src/binary/uint.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::io::Write;
use std::mem;

use crate::result::IonResult;
use crate::result::{IonFailure, IonResult};
use crate::{Int, IonError, UInt};

// This limit is used for stack-allocating buffer space to encode/decode UInts.
Expand All @@ -27,29 +27,27 @@ impl DecodedUInt {
}

/// Interprets all of the bytes in the provided slice as big-endian unsigned integer bytes.
/// The caller must confirm that `uint_bytes` is no longer than 8 bytes long; otherwise,
/// overflow may quietly occur.
pub(crate) fn small_uint_from_slice(uint_bytes: &[u8]) -> u64 {
// TODO: copy from the slice and use from_be_bytes
let mut magnitude: u64 = 0;
for &byte in uint_bytes {
let byte = u64::from(byte);
magnitude <<= 8;
magnitude |= byte;
/// If the length of `uint_bytes` is greater than the size of a `u128`, returns `Err`.
#[inline]
pub(crate) fn uint_from_slice(uint_bytes: &[u8]) -> IonResult<u128> {
if uint_bytes.len() > mem::size_of::<u128>() {
return IonResult::decoding_error(
"integer size is currently limited to the range of an i128",
);
}
magnitude

Ok(Self::uint_from_slice_unchecked(uint_bytes))
}

/// Interprets all of the bytes in the provided slice as big-endian unsigned integer bytes.
pub(crate) fn big_uint_from_slice(uint_bytes: &[u8]) -> u128 {
// TODO: copy from the slice and use from_be_bytes
let mut magnitude: u128 = 0;
for &byte in uint_bytes {
let byte = u128::from(byte);
magnitude <<= 8;
magnitude |= byte;
}
magnitude
/// Panics if the length of `uint_bytes` is greater than the size of a `u128`.
#[inline]
pub(crate) fn uint_from_slice_unchecked(uint_bytes: &[u8]) -> u128 {
const BUFFER_SIZE: usize = mem::size_of::<u128>();
let mut buffer = [0u8; BUFFER_SIZE];
// Copy the big-endian bytes into the end of the buffer
buffer[BUFFER_SIZE - uint_bytes.len()..].copy_from_slice(uint_bytes);
u128::from_be_bytes(buffer)
}

/// Encodes the provided `magnitude` as a UInt and writes it to the provided `sink`.
Expand Down
103 changes: 1 addition & 102 deletions src/lazy/binary/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use std::ops::{Neg, Range};

use crate::binary::constants::v1_0::{length_codes, IVM};
use crate::binary::int::DecodedInt;
use crate::binary::uint::DecodedUInt;
use crate::binary::var_int::VarInt;
use crate::binary::var_uint::VarUInt;
use crate::lazy::binary::encoded_value::EncodedValue;
Expand All @@ -17,7 +16,6 @@ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt;
use crate::lazy::encoding::BinaryEncoding_1_0;
use crate::result::IonFailure;
use crate::types::UInt;
use crate::{IonError, IonResult, IonType};

// The size of a u128
Expand Down Expand Up @@ -326,56 +324,6 @@ impl<'a> ImmutableBuffer<'a> {
))
}

/// Reads the first `length` bytes from the buffer as a `UInt` encoding primitive. If it is
/// successful, returns an `Ok(_)` containing its `DecodedUInt` representation.
///
/// See: <https://amazon-ion.github.io/ion-docs/docs/binary.html#uint-and-int-fields>
pub fn read_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> {
if length <= mem::size_of::<u64>() {
return self.read_small_uint(length);
}

// The UInt is too large to fit in a u64; read it as a BigUInt instead.
self.read_big_uint(length)
}

/// Reads the first `length` bytes from the buffer as a `UInt`. The caller must confirm that
/// `length` is small enough to fit in a `u64`.
#[inline]
fn read_small_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> {
let uint_bytes = self
.peek_n_bytes(length)
.ok_or_else(|| IonError::incomplete("a UInt", self.offset()))?;
let magnitude = DecodedUInt::small_uint_from_slice(uint_bytes);
Ok((
DecodedUInt::new(UInt::from(magnitude), length),
self.consume(length),
))
}

/// Reads the first `length` bytes from the buffer as a `UInt`. If `length` is small enough
/// that the value can fit in a `usize`, it is strongly recommended that you use
/// `read_small_uint` instead as it will be much faster.
#[inline(never)]
// This method performs allocations and its generated assembly is rather large. Isolating its
// logic in a separate method that is never inlined keeps `read_uint` (its caller) small enough
// to inline. This is important as `read_uint` is on the hot path for most Ion streams.
fn read_big_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> {
if length > MAX_UINT_SIZE_IN_BYTES {
return Self::value_too_large("a Uint", length, MAX_UINT_SIZE_IN_BYTES);
}

let uint_bytes = self
.peek_n_bytes(length)
.ok_or_else(|| IonError::incomplete("a UInt", self.offset()))?;

let magnitude = DecodedUInt::big_uint_from_slice(uint_bytes);
Ok((
DecodedUInt::new(UInt::from(magnitude), length),
self.consume(length),
))
}

#[inline(never)]
// This method is inline(never) because it is rarely invoked and its allocations/formatting
// compile to a non-trivial number of instructions.
Expand Down Expand Up @@ -539,6 +487,7 @@ impl<'a> ImmutableBuffer<'a> {
/// from the buffer to interpret as the value's length. If it is successful, returns an `Ok(_)`
/// containing a [VarUInt] representation of the value's length. If no additional bytes were
/// read, the returned `VarUInt`'s `size_in_bytes()` method will return `0`.
#[inline]
pub fn read_value_length(self, header: Header) -> ParseResult<'a, VarUInt> {
use IonType::*;
// Some type-specific `length` field overrides
Expand Down Expand Up @@ -951,56 +900,6 @@ mod tests {
Ok(())
}

#[test]
fn read_one_byte_uint() -> IonResult<()> {
let buffer = ImmutableBuffer::new(&[0b1000_0000]);
let var_int = buffer.read_uint(buffer.len())?.0;
assert_eq!(var_int.size_in_bytes(), 1);
assert_eq!(var_int.value(), &UInt::from(128u64));
Ok(())
}

#[test]
fn read_two_byte_uint() -> IonResult<()> {
let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]);
let var_int = buffer.read_uint(buffer.len())?.0;
assert_eq!(var_int.size_in_bytes(), 2);
assert_eq!(var_int.value(), &UInt::from(32_767u64));
Ok(())
}

#[test]
fn read_three_byte_uint() -> IonResult<()> {
let buffer = ImmutableBuffer::new(&[0b0011_1100, 0b1000_0111, 0b1000_0001]);
let var_int = buffer.read_uint(buffer.len())?.0;
assert_eq!(var_int.size_in_bytes(), 3);
assert_eq!(var_int.value(), &UInt::from(3_966_849u64));
Ok(())
}

#[test]
fn test_read_ten_byte_uint() -> IonResult<()> {
let data = vec![0xFFu8; 10];
let buffer = ImmutableBuffer::new(&data);
let uint = buffer.read_uint(buffer.len())?.0;
assert_eq!(uint.size_in_bytes(), 10);
assert_eq!(
uint.value(),
&UInt::from(u128::from_str_radix("ffffffffffffffffffff", 16).unwrap())
);
Ok(())
}

#[test]
fn test_read_uint_too_large() {
let mut buffer = Vec::with_capacity(MAX_UINT_SIZE_IN_BYTES + 1);
buffer.resize(MAX_UINT_SIZE_IN_BYTES + 1, 1);
let buffer = ImmutableBuffer::new(&buffer);
let _uint = buffer
.read_uint(buffer.len())
.expect_err("This exceeded the configured max UInt size.");
}

#[test]
fn read_int_negative_zero() -> IonResult<()> {
let buffer = ImmutableBuffer::new(&[0b1000_0000]); // Negative zero
Expand Down
11 changes: 4 additions & 7 deletions src/lazy/binary/raw/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,11 +387,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> {
debug_assert!(self.encoded_value.ion_type() == IonType::Int);
// `value_body()` returns a buffer starting at the body of the value.
let uint_bytes = self.value_body();
let magnitude: Int = if uint_bytes.len() <= mem::size_of::<u64>() {
DecodedUInt::small_uint_from_slice(uint_bytes).into()
} else {
DecodedUInt::big_uint_from_slice(uint_bytes).try_into()?
};
let magnitude: Int = DecodedUInt::uint_from_slice(uint_bytes)?.try_into()?;

use crate::binary::type_code::IonTypeCode::*;
use num_traits::Zero;
Expand Down Expand Up @@ -554,8 +550,9 @@ impl<'top> LazyRawBinaryValue_1_0<'top> {
"found a symbol ID that was too large to fit in a usize",
);
}
let magnitude = DecodedUInt::small_uint_from_slice(uint_bytes);
// This cast is safe because we've confirmed the value was small enough to fit in a usize.
// We've already confirmed that the uint fits in a `usize`, so we can `unwrap()` the result
// of this method and then cast its output to a `usize`.
let magnitude = DecodedUInt::uint_from_slice_unchecked(uint_bytes);
Ok(magnitude as usize)
}

Expand Down
2 changes: 2 additions & 0 deletions tests/element_display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ const TO_STRING_SKIP_LIST: &[&str] = &[
"ion-tests/iontestdata_1_0/good/intBigSize512.ion",
"ion-tests/iontestdata_1_0/good/intBigSize1201.10n",
"ion-tests/iontestdata_1_0/good/equivs/bigInts.ion",
"ion-tests/iontestdata_1_0/good/equivs/intsLargePositive3.10n",
"ion-tests/iontestdata_1_0/good/equivs/intsLargeNegative3.10n",
];

#[test_resources("ion-tests/iontestdata_1_0/good/**/*.ion")]
Expand Down
2 changes: 2 additions & 0 deletions tests/ion_data_consistency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const SKIP_LIST: &[&str] = &[
"ion-tests/iontestdata_1_0/good/intBigSize1201.10n",
"ion-tests/iontestdata_1_0/good/equivs/bigInts.ion",
"ion-tests/iontestdata_1_0/good/subfieldVarInt.ion",
"ion-tests/iontestdata_1_0/good/equivs/intsLargePositive3.10n",
"ion-tests/iontestdata_1_0/good/equivs/intsLargeNegative3.10n",
];

#[test_resources("ion-tests/iontestdata_1_0/good/equivs/**/*.ion")]
Expand Down
2 changes: 2 additions & 0 deletions tests/ion_tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ pub const ELEMENT_GLOBAL_SKIP_LIST: SkipList = &[
"ion-tests/iontestdata_1_0/good/intBigSize512.ion",
"ion-tests/iontestdata_1_0/good/intBigSize1201.10n",
"ion-tests/iontestdata_1_0/good/equivs/bigInts.ion",
"ion-tests/iontestdata_1_0/good/equivs/intsLargePositive3.10n",
"ion-tests/iontestdata_1_0/good/equivs/intsLargeNegative3.10n",
];

pub const ELEMENT_ROUND_TRIP_SKIP_LIST: SkipList = &[
Expand Down

0 comments on commit 6ec63ba

Please sign in to comment.