Skip to content

Commit

Permalink
Add unpack8, unpack16, unpack64 (apache#2276)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Aug 2, 2022
1 parent cd45ecb commit fd82128
Show file tree
Hide file tree
Showing 4 changed files with 293 additions and 3,714 deletions.
231 changes: 231 additions & 0 deletions parquet/src/util/bit_pack.rs
@@ -0,0 +1,231 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Vectorised bit-packing utilities

macro_rules! unroll_impl {
(1, $offset:expr, $v:ident, $c:block) => {{
const $v: usize = $offset;
$c
}};
(2, $offset:expr, $v:ident, $c:block) => {{
unroll_impl!(1, $offset, $v, $c);
unroll_impl!(1, $offset + 1, $v, $c);
}};
(4, $offset:expr, $v:ident, $c:block) => {{
unroll_impl!(2, 0, __v4, { unroll_impl!(2, __v4 * 2 + $offset, $v, $c) });
}};
(8, $offset:expr, $v:ident, $c:block) => {{
unroll_impl!(2, 0, __v8, { unroll_impl!(4, __v8 * 4 + $offset, $v, $c) });
}};
(16, $offset:expr, $v:ident, $c:block) => {{
unroll_impl!(4, 0, __v16, {
unroll_impl!(4, __v16 * 4 + $offset, $v, $c)
});
}};
(32, $offset:expr, $v:ident, $c:block) => {{
unroll_impl!(2, 0, __v32, {
unroll_impl!(16, __v32 * 16 + $offset, $v, $c)
});
}};
(64, $offset:expr, $v:ident, $c:block) => {{
unroll_impl!(2, 0, __v64, {
unroll_impl!(32, __v64 * 32 + $offset, $v, $c)
});
}};
}

/// Manually unrolls a loop body, this is useful for two reasons
///
/// - force unrolling of a loop so that LLVM optimises it properly
/// - call a const generic function with the loop value
macro_rules! unroll {
(for $v:ident in 0..$end:tt $c:block) => {
#[allow(non_upper_case_globals)]
{
unroll_impl!($end, 0, $v, $c)
}
};
}

/// Macro that generates an unpack function taking the number of bits as a const generic
macro_rules! unpack_impl {
($t:ty, $bytes:literal, $bits:tt) => {
pub fn unpack<const NUM_BITS: usize>(input: &[u8], output: &mut [$t; $bits]) {
if NUM_BITS == 0 {
for out in output {
*out = 0;
}
return;
}

assert!(NUM_BITS <= $bytes * 8);

let mask = match NUM_BITS {
$bits => <$t>::MAX,
_ => ((1 << NUM_BITS) - 1),
};

assert!(input.len() >= NUM_BITS * $bytes);

let r = |output_idx: usize| {
<$t>::from_le_bytes(
input[output_idx * $bytes..output_idx * $bytes + $bytes]
.try_into()
.unwrap(),
)
};

unroll!(for i in 0..$bits {
let start_bit = i * NUM_BITS;
let end_bit = start_bit + NUM_BITS;

let start_bit_offset = start_bit % $bits;
let end_bit_offset = end_bit % $bits;
let start_byte = start_bit / $bits;
let end_byte = end_bit / $bits;
if start_byte != end_byte && end_bit_offset != 0 {
let val = r(start_byte);
let a = val >> start_bit_offset;
let val = r(end_byte);
let b = val << (NUM_BITS - end_bit_offset);

output[i] = a | (b & mask);
} else {
let val = r(start_byte);
output[i] = (val >> start_bit_offset) & mask;
}
});
}
};
}

/// Macro that generates unpack functions that accept num_bits as a parameter
macro_rules! unpack {
($name:ident, $t:ty, $bytes:literal, $bits:tt) => {
mod $name {
unpack_impl!($t, $bytes, $bits);
}

/// Unpack packed `input` into `output` with a bit width of `num_bits`
pub fn $name(input: &[u8], output: &mut [$t; $bits], num_bits: usize) {
// This will get optimised into a jump table
unroll!(for i in 0..$bits {
if i == num_bits {
return $name::unpack::<i>(input, output);
}
});
if num_bits == $bits {
return $name::unpack::<$bits>(input, output);
}
unreachable!("invalid num_bits {}", num_bits);
}
};
}

unpack!(unpack8, u8, 1, 8);
unpack!(unpack16, u16, 2, 16);
unpack!(unpack32, u32, 4, 32);
unpack!(unpack64, u64, 8, 64);

#[cfg(test)]
mod tests {
use super::*;
use rand::{thread_rng, Rng};

#[inline(never)]
fn generate() -> Vec<u8> {
let mut rand = thread_rng();
(0..8).map(|_| rand.gen()).collect()
}

#[test]
fn test_unroll() {
let mut vec = vec![];
unroll!(for i in 0..1 {
vec.push(i);
});
assert_eq!(vec, (0..1).collect::<Vec<_>>());

let mut vec = vec![];
unroll!(for i in 0..4 {
vec.push(i);
});
assert_eq!(vec, (0..4).collect::<Vec<_>>());

let mut vec = vec![];
unroll!(for i in 0..8 {
vec.push(i);
});
assert_eq!(vec, (0..8).collect::<Vec<_>>());

let mut vec = vec![];
unroll!(for i in 0..16 {
vec.push(i);
});
assert_eq!(vec, (0..16).collect::<Vec<_>>());

let mut vec = vec![];
unroll!(for i in 0..32 {
vec.push(i);
});
assert_eq!(vec, (0..32).collect::<Vec<_>>());

let mut vec = vec![];
unroll!(for i in 0..64 {
vec.push(i);
});
assert_eq!(vec, (0..64).collect::<Vec<_>>());
}

#[test]
fn test_basic() {
let input = [0xFF; 4096];

for i in 0..=8 {
let mut output = [0; 8];
unpack8(&input, &mut output, i);
for (idx, out) in output.iter().enumerate() {
assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
}
}

for i in 0..=16 {
let mut output = [0; 16];
unpack16(&input, &mut output, i);
for (idx, out) in output.iter().enumerate() {
assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
}
}

for i in 0..=32 {
let mut output = [0; 32];
unpack32(&input, &mut output, i);
for (idx, out) in output.iter().enumerate() {
assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
}
}

for i in 0..=64 {
let mut output = [0; 64];
unpack64(&input, &mut output, i);
for (idx, out) in output.iter().enumerate() {
assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
}
}
}
}

0 comments on commit fd82128

Please sign in to comment.