Add unpack8, unpack16, unpack64 (apache#2276)

tustvold · Aug 2, 2022 · 766e478 · 766e478
1 parent d4f038a
commit 766e478
Show file tree

Hide file tree

Showing 4 changed files with 293 additions and 3,714 deletions.
diff --git a/parquet/src/util/bit_pack.rs b/parquet/src/util/bit_pack.rs
@@ -0,0 +1,231 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Vectorised bit-packing utilities
+
+macro_rules! unroll_impl {
+    (1, $offset:expr, $v:ident, $c:block) => {{
+        const $v: usize = $offset;
+        $c
+    }};
+    (2, $offset:expr, $v:ident, $c:block) => {{
+        unroll_impl!(1, $offset, $v, $c);
+        unroll_impl!(1, $offset + 1, $v, $c);
+    }};
+    (4, $offset:expr, $v:ident, $c:block) => {{
+        unroll_impl!(2, 0, __v4, { unroll_impl!(2, __v4 * 2 + $offset, $v, $c) });
+    }};
+    (8, $offset:expr, $v:ident, $c:block) => {{
+        unroll_impl!(2, 0, __v8, { unroll_impl!(4, __v8 * 4 + $offset, $v, $c) });
+    }};
+    (16, $offset:expr, $v:ident, $c:block) => {{
+        unroll_impl!(4, 0, __v16, {
+            unroll_impl!(4, __v16 * 4 + $offset, $v, $c)
+        });
+    }};
+    (32, $offset:expr, $v:ident, $c:block) => {{
+        unroll_impl!(2, 0, __v32, {
+            unroll_impl!(16, __v32 * 16 + $offset, $v, $c)
+        });
+    }};
+    (64, $offset:expr, $v:ident, $c:block) => {{
+        unroll_impl!(2, 0, __v64, {
+            unroll_impl!(32, __v64 * 32 + $offset, $v, $c)
+        });
+    }};
+}
+
+/// Manually unrolls a loop body, this is useful for two reasons
+///
+/// - force unrolling of a loop so that LLVM optimises it properly
+/// - call a const generic function with the loop value
+macro_rules! unroll {
+    (for $v:ident in 0..$end:tt $c:block) => {
+        #[allow(non_upper_case_globals)]
+        {
+            unroll_impl!($end, 0, $v, $c)
+        }
+    };
+}
+
+/// Macro that generates an unpack function taking the number of bits as a const generic
+macro_rules! unpack_impl {
+    ($t:ty, $bytes:literal, $bits:tt) => {
+        pub fn unpack<const NUM_BITS: usize>(input: &[u8], output: &mut [$t; $bits]) {
+            if NUM_BITS == 0 {
+                for out in output {
+                    *out = 0;
+                }
+                return;
+            }
+
+            assert!(NUM_BITS <= $bytes * 8);
+
+            let mask = match NUM_BITS {
+                $bits => <$t>::MAX,
+                _ => ((1 << NUM_BITS) - 1),
+            };
+
+            assert!(input.len() >= NUM_BITS * $bytes);
+
+            let r = |output_idx: usize| {
+                <$t>::from_le_bytes(
+                    input[output_idx * $bytes..output_idx * $bytes + $bytes]
+                        .try_into()
+                        .unwrap(),
+                )
+            };
+
+            unroll!(for i in 0..$bits {
+                let start_bit = i * NUM_BITS;
+                let end_bit = start_bit + NUM_BITS;
+
+                let start_bit_offset = start_bit % $bits;
+                let end_bit_offset = end_bit % $bits;
+                let start_byte = start_bit / $bits;
+                let end_byte = end_bit / $bits;
+                if start_byte != end_byte && end_bit_offset != 0 {
+                    let val = r(start_byte);
+                    let a = val >> start_bit_offset;
+                    let val = r(end_byte);
+                    let b = val << (NUM_BITS - end_bit_offset);
+
+                    output[i] = a | (b & mask);
+                } else {
+                    let val = r(start_byte);
+                    output[i] = (val >> start_bit_offset) & mask;
+                }
+            });
+        }
+    };
+}
+
+/// Macro that generates unpack functions that accept num_bits as a parameter
+macro_rules! unpack {
+    ($name:ident, $t:ty, $bytes:literal, $bits:tt) => {
+        mod $name {
+            unpack_impl!($t, $bytes, $bits);
+        }
+
+        /// Unpack packed `input` into `output` with a bit width of `num_bits`
+        pub fn $name(input: &[u8], output: &mut [$t; $bits], num_bits: usize) {
+            // This will get optimised into a jump table
+            unroll!(for i in 0..$bits {
+                if i == num_bits {
+                    return $name::unpack::<i>(input, output);
+                }
+            });
+            if num_bits == $bits {
+                return $name::unpack::<$bits>(input, output);
+            }
+            unreachable!("invalid num_bits {}", num_bits);
+        }
+    };
+}
+
+unpack!(unpack8, u8, 1, 8);
+unpack!(unpack16, u16, 2, 16);
+unpack!(unpack32, u32, 4, 32);
+unpack!(unpack64, u64, 8, 64);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::{thread_rng, Rng};
+
+    #[inline(never)]
+    fn generate() -> Vec<u8> {
+        let mut rand = thread_rng();
+        (0..8).map(|_| rand.gen()).collect()
+    }
+
+    #[test]
+    fn test_unroll() {
+        let mut vec = vec![];
+        unroll!(for i in 0..1 {
+            vec.push(i);
+        });
+        assert_eq!(vec, (0..1).collect::<Vec<_>>());
+
+        let mut vec = vec![];
+        unroll!(for i in 0..4 {
+            vec.push(i);
+        });
+        assert_eq!(vec, (0..4).collect::<Vec<_>>());
+
+        let mut vec = vec![];
+        unroll!(for i in 0..8 {
+            vec.push(i);
+        });
+        assert_eq!(vec, (0..8).collect::<Vec<_>>());
+
+        let mut vec = vec![];
+        unroll!(for i in 0..16 {
+            vec.push(i);
+        });
+        assert_eq!(vec, (0..16).collect::<Vec<_>>());
+
+        let mut vec = vec![];
+        unroll!(for i in 0..32 {
+            vec.push(i);
+        });
+        assert_eq!(vec, (0..32).collect::<Vec<_>>());
+
+        let mut vec = vec![];
+        unroll!(for i in 0..64 {
+            vec.push(i);
+        });
+        assert_eq!(vec, (0..64).collect::<Vec<_>>());
+    }
+
+    #[test]
+    fn test_basic() {
+        let input = [0xFF; 4096];
+
+        for i in 0..=8 {
+            let mut output = [0; 8];
+            unpack8(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+
+        for i in 0..=16 {
+            let mut output = [0; 16];
+            unpack16(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+
+        for i in 0..=32 {
+            let mut output = [0; 32];
+            unpack32(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+
+        for i in 0..=64 {
+            let mut output = [0; 64];
+            unpack64(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+    }
+}