diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 367a703..d5a8653 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: rust: [stable, nightly] - features: ["+avx2", "+sse2"] + features: ["+avx2", "+avx", "+sse2,+sse4.1", "+sse2"] env: RUSTFLAGS: "-C target-feature=${{matrix.features}} -D warnings" steps: diff --git a/src/block/avx.rs b/src/block/avx.rs new file mode 100644 index 0000000..88c2704 --- /dev/null +++ b/src/block/avx.rs @@ -0,0 +1,92 @@ +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; +use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not}; + +#[derive(Copy, Clone, Debug)] +#[repr(transparent)] +pub struct Block(pub(super) __m256d); + +impl Block { + #[inline] + pub fn is_empty(self) -> bool { + unsafe { + let value = core::mem::transmute(self); + _mm256_testz_si256(value, value) == 1 + } + } + + #[inline] + pub fn andnot(self, other: Self) -> Self { + unsafe { Self(_mm256_andnot_pd(other.0, self.0)) } + } +} + +impl Not for Block { + type Output = Block; + #[inline] + fn not(self) -> Self::Output { + unsafe { Self(_mm256_xor_pd(self.0, Self::ALL.0)) } + } +} + +impl BitAnd for Block { + type Output = Block; + #[inline] + fn bitand(self, other: Self) -> Self::Output { + unsafe { Self(_mm256_and_pd(self.0, other.0)) } + } +} + +impl BitAndAssign for Block { + #[inline] + fn bitand_assign(&mut self, other: Self) { + unsafe { + self.0 = _mm256_and_pd(self.0, other.0); + } + } +} + +impl BitOr for Block { + type Output = Block; + #[inline] + fn bitor(self, other: Self) -> Self::Output { + unsafe { Self(_mm256_or_pd(self.0, other.0)) } + } +} + +impl BitOrAssign for Block { + #[inline] + fn bitor_assign(&mut self, other: Self) { + unsafe { + self.0 = _mm256_or_pd(self.0, other.0); + } + } +} + +impl BitXor for Block { + type Output = Block; + #[inline] + fn bitxor(self, other: Self) -> Self::Output { + unsafe { Self(_mm256_xor_pd(self.0, other.0)) } + } +} + +impl BitXorAssign for Block { + #[inline] + fn bitxor_assign(&mut self, other: Self) { + unsafe { self.0 = _mm256_xor_pd(self.0, other.0) } + } +} + +impl PartialEq for Block { + #[inline] + fn eq(&self, other: &Self) -> bool { + unsafe { + let new = _mm256_xor_pd(self.0, other.0); + let neq = core::mem::transmute(new); + _mm256_testz_si256(neq, neq) == 1 + } + } +} diff --git a/src/block/avx2.rs b/src/block/avx2.rs index 4258a5a..b359377 100644 --- a/src/block/avx2.rs +++ b/src/block/avx2.rs @@ -6,24 +6,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct Block(__m256i); +pub struct Block(pub(super) __m256i); impl Block { - pub const USIZE_COUNT: usize = core::mem::size_of::() / core::mem::size_of::(); - pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]); - pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]); - pub const BITS: usize = core::mem::size_of::() * 8; - - #[inline] - pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] { - unsafe { core::mem::transmute(self.0) } - } - - #[inline] - pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self { - Self(unsafe { core::mem::transmute(array) }) - } - #[inline] pub fn is_empty(self) -> bool { unsafe { _mm256_testz_si256(self.0, self.0) == 1 } @@ -96,8 +81,8 @@ impl PartialEq for Block { #[inline] fn eq(&self, other: &Self) -> bool { unsafe { - let eq = _mm256_cmpeq_epi8(self.0, other.0); - _mm256_movemask_epi8(eq) == !(0i32) + let neq = _mm256_xor_si256(self.0, other.0); + _mm256_testz_si256(neq, neq) == 1 } } } diff --git a/src/block/default.rs b/src/block/default.rs index 7545ad1..7fc460f 100644 --- a/src/block/default.rs +++ b/src/block/default.rs @@ -2,26 +2,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, #[derive(Copy, Clone, PartialEq, Debug)] #[repr(transparent)] -pub struct Block(usize); +pub struct Block(pub(super) usize); impl Block { - pub const USIZE_COUNT: usize = 1; - pub const NONE: Self = Block(0); - #[allow(dead_code)] - pub const ALL: Self = Block(!0); - pub const BITS: usize = core::mem::size_of::() * 8; - - #[inline] - pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] { - [self.0] - } - - #[inline] - #[allow(dead_code)] - pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self { - Self(array[0]) - } - #[inline] pub const fn is_empty(self) -> bool { self.0 == Self::NONE.0 diff --git a/src/block/mod.rs b/src/block/mod.rs index 52158ce..226f379 100644 --- a/src/block/mod.rs +++ b/src/block/mod.rs @@ -1,43 +1,84 @@ #![allow(clippy::undocumented_unsafe_blocks)] +#![allow(dead_code)] use core::cmp::Ordering; use core::hash::{Hash, Hasher}; #[cfg(all( - not(target_arch = "wasm32"), + not(all(target_family = "wasm", target_feature = "simd128")), not(target_feature = "sse2"), + not(target_feature = "avx"), not(target_feature = "avx2"), ))] mod default; #[cfg(all( - not(target_arch = "wasm32"), + not(all(target_family = "wasm", target_feature = "simd128")), not(target_feature = "sse2"), + not(target_feature = "avx"), not(target_feature = "avx2"), ))] pub use self::default::*; #[cfg(all( - not(target_arch = "wasm32"), + any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", + not(target_feature = "avx"), not(target_feature = "avx2"), ))] mod sse2; #[cfg(all( - not(target_arch = "wasm32"), + any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", + not(target_feature = "avx"), not(target_feature = "avx2"), ))] pub use self::sse2::*; -#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))] +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx", + not(target_feature = "avx2") +))] +mod avx; +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx", + not(target_feature = "avx2") +))] +pub use self::avx::*; + +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx2" +))] mod avx2; -#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))] +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx2" +))] pub use self::avx2::*; -#[cfg(target_arch = "wasm32")] -mod wasm32; -#[cfg(target_arch = "wasm32")] -pub use self::wasm32::*; +#[cfg(all(target_family = "wasm", target_feature = "simd128"))] +mod wasm; +#[cfg(all(target_arch = "wasm", target_feature = "simd128"))] +pub use self::wasm::*; + +impl Block { + pub const USIZE_COUNT: usize = core::mem::size_of::() / core::mem::size_of::(); + pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]); + pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]); + pub const BITS: usize = core::mem::size_of::() * 8; + + #[inline] + pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] { + unsafe { core::mem::transmute(self.0) } + } + + #[inline] + pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self { + Self(unsafe { core::mem::transmute(array) }) + } +} impl Eq for Block {} @@ -51,15 +92,7 @@ impl PartialOrd for Block { impl Ord for Block { #[inline] fn cmp(&self, other: &Self) -> Ordering { - let a = self.into_usize_array(); - let b = other.into_usize_array(); - for i in 0..Self::USIZE_COUNT { - match a[i].cmp(&b[i]) { - Ordering::Equal => continue, - cmp => return cmp, - } - } - Ordering::Equal + self.into_usize_array().cmp(&other.into_usize_array()) } } @@ -73,6 +106,6 @@ impl Default for Block { impl Hash for Block { #[inline] fn hash(&self, hasher: &mut H) { - self.into_usize_array().hash(hasher) + Hash::hash_slice(&self.into_usize_array(), hasher); } } diff --git a/src/block/sse2.rs b/src/block/sse2.rs index 6f61948..6db08f7 100644 --- a/src/block/sse2.rs +++ b/src/block/sse2.rs @@ -8,24 +8,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct Block(__m128i); +pub struct Block(pub(super) __m128i); impl Block { - pub const USIZE_COUNT: usize = core::mem::size_of::() / core::mem::size_of::(); - pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]); - pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]); - pub const BITS: usize = core::mem::size_of::() * 8; - - #[inline] - pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] { - unsafe { core::mem::transmute(self.0) } - } - - #[inline] - pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self { - Self(unsafe { core::mem::transmute(array) }) - } - #[inline] pub fn is_empty(self) -> bool { #[cfg(not(target_feature = "sse4.1"))] @@ -34,7 +19,7 @@ impl Block { } #[cfg(target_feature = "sse4.1")] { - unsafe { _mm_test_all_zeros(self.0, Self::ALL.0) == 1 } + unsafe { _mm_test_all_zeros(self.0, self.0) == 1 } } } diff --git a/src/block/wasm32.rs b/src/block/wasm.rs similarity index 69% rename from src/block/wasm32.rs rename to src/block/wasm.rs index 2dac999..cef686d 100644 --- a/src/block/wasm32.rs +++ b/src/block/wasm.rs @@ -8,24 +8,9 @@ use core::{ #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct Block(v128); +pub struct Block(pub(super) v128); impl Block { - pub const USIZE_COUNT: usize = core::mem::size_of::() / core::mem::size_of::(); - pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]); - pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]); - pub const BITS: usize = core::mem::size_of::() * 8; - - #[inline] - pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] { - unsafe { core::mem::transmute(self.0) } - } - - #[inline] - pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self { - Self(unsafe { core::mem::transmute(array) }) - } - #[inline] pub fn is_empty(self) -> bool { !v128_any_true(self.0) @@ -33,7 +18,7 @@ impl Block { #[inline] pub fn andnot(self, other: Self) -> Self { - Self(unsafe { v128_andnot(self.0, other.0) }) + Self(v128_andnot(self.0, other.0)) } } diff --git a/src/lib.rs b/src/lib.rs index 7008147..1e19140 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ //! When SIMD is not available on the target, the crate will gracefully fallback to a default implementation. It is intended to add support for other SIMD architectures //! once they appear in stable Rust. //! -//! Currently only SSE2/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports. +//! Currently only SSE2/AVX/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports. #![no_std] #![deny(clippy::undocumented_unsafe_blocks)]