Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for AVX #121

Merged
merged 6 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
matrix:
rust: [stable, nightly]
features: ["+avx2", "+sse2"]
features: ["+avx2", "+avx", "+sse2,+sse4.1", "+sse2"]
env:
RUSTFLAGS: "-C target-feature=${{matrix.features}} -D warnings"
steps:
Expand Down
92 changes: 92 additions & 0 deletions src/block/avx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(pub(super) __m256d);

impl Block {
#[inline]
pub fn is_empty(self) -> bool {
unsafe {
let value = core::mem::transmute(self);
_mm256_testz_si256(value, value) == 1
}
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
unsafe { Self(_mm256_andnot_pd(other.0, self.0)) }
}
}

impl Not for Block {
type Output = Block;
#[inline]
fn not(self) -> Self::Output {
unsafe { Self(_mm256_xor_pd(self.0, Self::ALL.0)) }
}
}

impl BitAnd for Block {
type Output = Block;
#[inline]
fn bitand(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_and_pd(self.0, other.0)) }
}
}

impl BitAndAssign for Block {
#[inline]
fn bitand_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_and_pd(self.0, other.0);
}
}
}

impl BitOr for Block {
type Output = Block;
#[inline]
fn bitor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_or_pd(self.0, other.0)) }
}
}

impl BitOrAssign for Block {
#[inline]
fn bitor_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_or_pd(self.0, other.0);
}
}
}

impl BitXor for Block {
type Output = Block;
#[inline]
fn bitxor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_xor_pd(self.0, other.0)) }
}
}

impl BitXorAssign for Block {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
unsafe { self.0 = _mm256_xor_pd(self.0, other.0) }
}
}

impl PartialEq for Block {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
let new = _mm256_xor_pd(self.0, other.0);
let neq = core::mem::transmute(new);
_mm256_testz_si256(neq, neq) == 1
}
}
}
21 changes: 3 additions & 18 deletions src/block/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(__m256i);
pub struct Block(pub(super) __m256i);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
unsafe { _mm256_testz_si256(self.0, self.0) == 1 }
Expand Down Expand Up @@ -96,8 +81,8 @@ impl PartialEq for Block {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
let eq = _mm256_cmpeq_epi8(self.0, other.0);
_mm256_movemask_epi8(eq) == !(0i32)
let neq = _mm256_xor_si256(self.0, other.0);
_mm256_testz_si256(neq, neq) == 1
}
}
}
19 changes: 1 addition & 18 deletions src/block/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,

#[derive(Copy, Clone, PartialEq, Debug)]
#[repr(transparent)]
pub struct Block(usize);
pub struct Block(pub(super) usize);

impl Block {
pub const USIZE_COUNT: usize = 1;
pub const NONE: Self = Block(0);
#[allow(dead_code)]
pub const ALL: Self = Block(!0);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
[self.0]
}

#[inline]
#[allow(dead_code)]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(array[0])
}

#[inline]
pub const fn is_empty(self) -> bool {
self.0 == Self::NONE.0
Expand Down
73 changes: 53 additions & 20 deletions src/block/mod.rs
Original file line number Diff line number Diff line change
@@ -1,43 +1,84 @@
#![allow(clippy::undocumented_unsafe_blocks)]
#![allow(dead_code)]

use core::cmp::Ordering;
use core::hash::{Hash, Hasher};

#[cfg(all(
not(target_arch = "wasm32"),
not(all(target_family = "wasm", target_feature = "simd128")),
not(target_feature = "sse2"),
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
mod default;
#[cfg(all(
not(target_arch = "wasm32"),
not(all(target_family = "wasm", target_feature = "simd128")),
not(target_feature = "sse2"),
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
pub use self::default::*;

#[cfg(all(
not(target_arch = "wasm32"),
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2",
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
mod sse2;
#[cfg(all(
not(target_arch = "wasm32"),
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2",
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
pub use self::sse2::*;

#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx",
not(target_feature = "avx2")
))]
mod avx;
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx",
not(target_feature = "avx2")
))]
pub use self::avx::*;

#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx2"
))]
mod avx2;
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx2"
))]
pub use self::avx2::*;

#[cfg(target_arch = "wasm32")]
mod wasm32;
#[cfg(target_arch = "wasm32")]
pub use self::wasm32::*;
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
mod wasm;
#[cfg(all(target_arch = "wasm", target_feature = "simd128"))]
pub use self::wasm::*;

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}
}

impl Eq for Block {}

Expand All @@ -51,15 +92,7 @@ impl PartialOrd for Block {
impl Ord for Block {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
let a = self.into_usize_array();
let b = other.into_usize_array();
for i in 0..Self::USIZE_COUNT {
match a[i].cmp(&b[i]) {
Ordering::Equal => continue,
cmp => return cmp,
}
}
Ordering::Equal
self.into_usize_array().cmp(&other.into_usize_array())
}
}

Expand All @@ -73,6 +106,6 @@ impl Default for Block {
impl Hash for Block {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.into_usize_array().hash(hasher)
Hash::hash_slice(&self.into_usize_array(), hasher);
}
}
19 changes: 2 additions & 17 deletions src/block/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(__m128i);
pub struct Block(pub(super) __m128i);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
#[cfg(not(target_feature = "sse4.1"))]
Expand All @@ -34,7 +19,7 @@ impl Block {
}
#[cfg(target_feature = "sse4.1")]
{
unsafe { _mm_test_all_zeros(self.0, Self::ALL.0) == 1 }
unsafe { _mm_test_all_zeros(self.0, self.0) == 1 }
}
}

Expand Down
19 changes: 2 additions & 17 deletions src/block/wasm32.rs → src/block/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,17 @@ use core::{

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(v128);
pub struct Block(pub(super) v128);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
!v128_any_true(self.0)
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
Self(unsafe { v128_andnot(self.0, other.0) })
Self(v128_andnot(self.0, other.0))
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
//! When SIMD is not available on the target, the crate will gracefully fallback to a default implementation. It is intended to add support for other SIMD architectures
//! once they appear in stable Rust.
//!
//! Currently only SSE2/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports.
//! Currently only SSE2/AVX/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports.
#![no_std]
#![deny(clippy::undocumented_unsafe_blocks)]

Expand Down