-
Notifications
You must be signed in to change notification settings - Fork 421
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
provide Standard for x86 __m128/256i on stable Rust, add 128xN/sizexN SIMD types #1162
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,12 +10,10 @@ | |
|
||
use crate::distributions::{Distribution, Standard}; | ||
use crate::Rng; | ||
#[cfg(all(target_arch = "x86", feature = "simd_support"))] | ||
use core::arch::x86::{__m128i, __m256i}; | ||
#[cfg(all(target_arch = "x86_64", feature = "simd_support"))] | ||
use core::arch::x86_64::{__m128i, __m256i}; | ||
use core::num::{NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, | ||
NonZeroU128}; | ||
#[cfg(target_arch = "x86")] use core::arch::x86::*; | ||
#[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; | ||
use core::mem; | ||
use core::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize}; | ||
#[cfg(feature = "simd_support")] use packed_simd::*; | ||
|
||
impl Distribution<u8> for Standard { | ||
|
@@ -109,53 +107,67 @@ impl_nzint!(NonZeroU64, NonZeroU64::new); | |
impl_nzint!(NonZeroU128, NonZeroU128::new); | ||
impl_nzint!(NonZeroUsize, NonZeroUsize::new); | ||
|
||
|
||
#[cfg(feature = "simd_support")] | ||
macro_rules! simd_impl { | ||
($(($intrinsic:ident, $vec:ty),)+) => {$( | ||
impl Distribution<$intrinsic> for Standard { | ||
#[inline] | ||
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $intrinsic { | ||
$intrinsic::from_bits(rng.gen::<$vec>()) | ||
macro_rules! packed_simd_types_impl { | ||
($($ty:ty),+) => { | ||
$( | ||
impl Distribution<$ty> for Standard { | ||
#[inline] | ||
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty { | ||
let mut vec: $ty = <$ty>::default(); | ||
unsafe { | ||
let ptr = &mut vec; | ||
let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; mem::size_of::<$ty>()]); | ||
rng.fill_bytes(b_ptr); | ||
} | ||
vec.to_le() | ||
Comment on lines
+118
to
+124
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is correct, but we should really use Unfortunately There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confused by this. Do you mean use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I hadn't figured on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could do something like let mut bytes = [0_u8; mem::size_of::<$ty>()];
rng.fill_bytes(&mut bytes);
let vec = $ty::from_bits($u8xN::from_slice_unaligned(&bytes));
vec.to_le() but usizexN don't have |
||
} | ||
} | ||
} | ||
)+}; | ||
)+ | ||
}; | ||
} | ||
|
||
($bits:expr,) => {}; | ||
($bits:expr, $ty:ty, $($ty_more:ty,)*) => { | ||
simd_impl!($bits, $($ty_more,)*); | ||
#[cfg(feature = "simd_support")] | ||
#[rustfmt::skip] | ||
packed_simd_types_impl!( | ||
u8x2, i8x2, | ||
u8x4, i8x4, u16x2, i16x2, | ||
u8x8, i8x8, u16x4, i16x4, u32x2, i32x2, | ||
u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2, u128x1, i128x1, | ||
u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4, u128x2, i128x2, | ||
u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8, u128x4, i128x4 | ||
); | ||
|
||
impl Distribution<$ty> for Standard { | ||
#[inline] | ||
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty { | ||
let mut vec: $ty = Default::default(); | ||
unsafe { | ||
let ptr = &mut vec; | ||
let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; $bits/8]); | ||
rng.fill_bytes(b_ptr); | ||
// x86/64 are already little endian so we don't need packed_simd's `to_le` and | ||
// therefore can provide this on stable Rust. | ||
macro_rules! intrinsic_native_le_impl { | ||
($(($ty:ty, $init:ident)),+) => { | ||
$( | ||
impl Distribution<$ty> for Standard { | ||
/// This is supported on x86/64 and supported target features only. | ||
#[inline] | ||
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty { | ||
let mut vec: $ty = unsafe { $init() }; | ||
unsafe { | ||
let ptr = &mut vec; | ||
let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; mem::size_of::<$ty>()]); | ||
rng.fill_bytes(b_ptr); | ||
} | ||
vec | ||
} | ||
vec.to_le() | ||
} | ||
} | ||
)+ | ||
}; | ||
} | ||
|
||
#[cfg(feature = "simd_support")] | ||
simd_impl!(16, u8x2, i8x2,); | ||
#[cfg(feature = "simd_support")] | ||
simd_impl!(32, u8x4, i8x4, u16x2, i16x2,); | ||
#[cfg(feature = "simd_support")] | ||
simd_impl!(64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,); | ||
#[cfg(feature = "simd_support")] | ||
simd_impl!(128, u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2,); | ||
#[cfg(feature = "simd_support")] | ||
simd_impl!(256, u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4,); | ||
#[cfg(feature = "simd_support")] | ||
simd_impl!(512, u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8,); | ||
#[cfg(all( | ||
feature = "simd_support", | ||
any(target_arch = "x86", target_arch = "x86_64") | ||
))] | ||
simd_impl!((__m128i, u8x16), (__m256i, u8x32),); | ||
// this could perhaps be _mm_undefined_si128 but it seems the return type | ||
// for that will change to MaybeUninit<__m128i> | ||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] | ||
intrinsic_native_le_impl!( | ||
(__m128i, _mm_setzero_si128), | ||
(__m256i, _mm256_setzero_si256) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm baffled: (1) the types exist without additional target features while the constructors require ( Stupid questions, but:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIK there are no dedicated instructions for the I agree that transmuting arrays would be a simpler solution, but instead of creating an array with proper alignment I think it will be easier to write something like this: let mut buf = [0u8; mem::size_of::<$ty>()];
rng.fill_bytes(&mut buf);
unsafe { mem::transmute_copy(&buf) }
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It will compile just fine but without see/avx it will fail to run There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TheIronBorn |
||
); | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We only want two items, right? I'm not so keen on using glob imports.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
4 items now. Added 2
setzero
intrinsicsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True, though if you make the change below those will go away.