Skip to content

Commit

Permalink
Add support for AVX (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
james7132 committed Mar 21, 2024
1 parent e820380 commit 2937449
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 92 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
matrix:
rust: [stable, nightly]
features: ["+avx2", "+sse2"]
features: ["+avx2", "+avx", "+sse2,+sse4.1", "+sse2"]
env:
RUSTFLAGS: "-C target-feature=${{matrix.features}} -D warnings"
steps:
Expand Down
92 changes: 92 additions & 0 deletions src/block/avx.rs
@@ -0,0 +1,92 @@
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(pub(super) __m256d);

impl Block {
#[inline]
pub fn is_empty(self) -> bool {
unsafe {
let value = core::mem::transmute(self);
_mm256_testz_si256(value, value) == 1
}
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
unsafe { Self(_mm256_andnot_pd(other.0, self.0)) }
}
}

impl Not for Block {
type Output = Block;
#[inline]
fn not(self) -> Self::Output {
unsafe { Self(_mm256_xor_pd(self.0, Self::ALL.0)) }
}
}

impl BitAnd for Block {
type Output = Block;
#[inline]
fn bitand(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_and_pd(self.0, other.0)) }
}
}

impl BitAndAssign for Block {
#[inline]
fn bitand_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_and_pd(self.0, other.0);
}
}
}

impl BitOr for Block {
type Output = Block;
#[inline]
fn bitor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_or_pd(self.0, other.0)) }
}
}

impl BitOrAssign for Block {
#[inline]
fn bitor_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_or_pd(self.0, other.0);
}
}
}

impl BitXor for Block {
type Output = Block;
#[inline]
fn bitxor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_xor_pd(self.0, other.0)) }
}
}

impl BitXorAssign for Block {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
unsafe { self.0 = _mm256_xor_pd(self.0, other.0) }
}
}

impl PartialEq for Block {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
let new = _mm256_xor_pd(self.0, other.0);
let neq = core::mem::transmute(new);
_mm256_testz_si256(neq, neq) == 1
}
}
}
21 changes: 3 additions & 18 deletions src/block/avx2.rs
Expand Up @@ -6,24 +6,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(__m256i);
pub struct Block(pub(super) __m256i);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
unsafe { _mm256_testz_si256(self.0, self.0) == 1 }
Expand Down Expand Up @@ -96,8 +81,8 @@ impl PartialEq for Block {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
let eq = _mm256_cmpeq_epi8(self.0, other.0);
_mm256_movemask_epi8(eq) == !(0i32)
let neq = _mm256_xor_si256(self.0, other.0);
_mm256_testz_si256(neq, neq) == 1
}
}
}
19 changes: 1 addition & 18 deletions src/block/default.rs
Expand Up @@ -2,26 +2,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,

#[derive(Copy, Clone, PartialEq, Debug)]
#[repr(transparent)]
pub struct Block(usize);
pub struct Block(pub(super) usize);

impl Block {
pub const USIZE_COUNT: usize = 1;
pub const NONE: Self = Block(0);
#[allow(dead_code)]
pub const ALL: Self = Block(!0);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
[self.0]
}

#[inline]
#[allow(dead_code)]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(array[0])
}

#[inline]
pub const fn is_empty(self) -> bool {
self.0 == Self::NONE.0
Expand Down
73 changes: 53 additions & 20 deletions src/block/mod.rs
@@ -1,43 +1,84 @@
#![allow(clippy::undocumented_unsafe_blocks)]
#![allow(dead_code)]

use core::cmp::Ordering;
use core::hash::{Hash, Hasher};

#[cfg(all(
not(target_arch = "wasm32"),
not(all(target_family = "wasm", target_feature = "simd128")),
not(target_feature = "sse2"),
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
mod default;
#[cfg(all(
not(target_arch = "wasm32"),
not(all(target_family = "wasm", target_feature = "simd128")),
not(target_feature = "sse2"),
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
pub use self::default::*;

#[cfg(all(
not(target_arch = "wasm32"),
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2",
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
mod sse2;
#[cfg(all(
not(target_arch = "wasm32"),
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2",
not(target_feature = "avx"),
not(target_feature = "avx2"),
))]
pub use self::sse2::*;

#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx",
not(target_feature = "avx2")
))]
mod avx;
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx",
not(target_feature = "avx2")
))]
pub use self::avx::*;

#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx2"
))]
mod avx2;
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx2"
))]
pub use self::avx2::*;

#[cfg(target_arch = "wasm32")]
mod wasm32;
#[cfg(target_arch = "wasm32")]
pub use self::wasm32::*;
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
mod wasm;
#[cfg(all(target_arch = "wasm", target_feature = "simd128"))]
pub use self::wasm::*;

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}
}

impl Eq for Block {}

Expand All @@ -51,15 +92,7 @@ impl PartialOrd for Block {
impl Ord for Block {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
let a = self.into_usize_array();
let b = other.into_usize_array();
for i in 0..Self::USIZE_COUNT {
match a[i].cmp(&b[i]) {
Ordering::Equal => continue,
cmp => return cmp,
}
}
Ordering::Equal
self.into_usize_array().cmp(&other.into_usize_array())
}
}

Expand All @@ -73,6 +106,6 @@ impl Default for Block {
impl Hash for Block {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.into_usize_array().hash(hasher)
Hash::hash_slice(&self.into_usize_array(), hasher);
}
}
19 changes: 2 additions & 17 deletions src/block/sse2.rs
Expand Up @@ -8,24 +8,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(__m128i);
pub struct Block(pub(super) __m128i);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
#[cfg(not(target_feature = "sse4.1"))]
Expand All @@ -34,7 +19,7 @@ impl Block {
}
#[cfg(target_feature = "sse4.1")]
{
unsafe { _mm_test_all_zeros(self.0, Self::ALL.0) == 1 }
unsafe { _mm_test_all_zeros(self.0, self.0) == 1 }
}
}

Expand Down
19 changes: 2 additions & 17 deletions src/block/wasm32.rs → src/block/wasm.rs
Expand Up @@ -8,32 +8,17 @@ use core::{

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(v128);
pub struct Block(pub(super) v128);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
!v128_any_true(self.0)
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
Self(unsafe { v128_andnot(self.0, other.0) })
Self(v128_andnot(self.0, other.0))
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Expand Up @@ -12,7 +12,7 @@
//! When SIMD is not available on the target, the crate will gracefully fallback to a default implementation. It is intended to add support for other SIMD architectures
//! once they appear in stable Rust.
//!
//! Currently only SSE2/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports.
//! Currently only SSE2/AVX/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports.
#![no_std]
#![deny(clippy::undocumented_unsafe_blocks)]

Expand Down

0 comments on commit 2937449

Please sign in to comment.