Skip to content

Commit

Permalink
Drop FMA intrinsic
Browse files Browse the repository at this point in the history
Danila Kutenin pointed out:
> Technically speaking, _mm_fmadd_ps is not an SSE extension, this was
> introduced with fma extension which took place even after AVX.

To clarify the purpose of SSE2NEON, this pach would drop the existing
FMA implementation.

Related: #82
  • Loading branch information
jserv committed Jun 4, 2021
1 parent 5425c24 commit c7af009
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 29 deletions.
28 changes: 7 additions & 21 deletions sse2neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,6 @@ FORCE_INLINE __m128d _mm_round_pd(__m128d, int);
FORCE_INLINE __m128 _mm_round_ps(__m128, int);
// SSE4.2
FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
// FMA
FORCE_INLINE __m128 _mm_fmadd_ps(__m128, __m128, __m128);

/* Backwards compatibility for compilers with lack of specific type support */

Expand Down Expand Up @@ -6025,7 +6023,13 @@ FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b)
FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
{
__m128 mask = {-1.0f, 1.0f, -1.0f, 1.0f};
return _mm_fmadd_ps(b, mask, a);
#if defined(__aarch64__)
return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(a),
vreinterpretq_f32_m128(mask),
vreinterpretq_f32_m128(b)));
#else
return _mm_add_ps(_mm_mul_ps(b, mask), a);
#endif
}

// Horizontally add adjacent pairs of double-precision (64-bit) floating-point
Expand Down Expand Up @@ -8003,24 +8007,6 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
}
#endif

/* FMA */

// Computes the fused multiple add product of 32-bit floating point numbers.
//
// Return Value
// Multiplies A and B, and adds C to the temporary result before returning it.
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd
FORCE_INLINE __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c)
{
#if defined(__aarch64__)
return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(c),
vreinterpretq_f32_m128(b),
vreinterpretq_f32_m128(a)));
#else
return _mm_add_ps(_mm_mul_ps(a, b), c);
#endif
}

/* Others */

// Perform a carry-less multiplication of two 64-bit integers, selected from a
Expand Down
6 changes: 0 additions & 6 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8773,12 +8773,6 @@ result_t test_mm_aeskeygenassist_si128(const SSE2NEONTestImpl &impl, uint32_t i)
return validate128(resultReference, resultIntrinsic);
}

/* FMA */
result_t test_mm_fmadd_ps(const SSE2NEONTestImpl &impl, uint32_t i)
{
return TEST_UNIMPL;
}

/* Others */
result_t test_mm_clmulepi64_si128(const SSE2NEONTestImpl &impl, uint32_t i)
{
Expand Down
2 changes: 0 additions & 2 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,6 @@
TYPE(mm_aesenc_si128) \
TYPE(mm_aesenclast_si128) \
TYPE(mm_aeskeygenassist_si128) \
/* FMA */ \
TYPE(mm_fmadd_ps) \
/* Others */ \
TYPE(mm_clmulepi64_si128) \
TYPE(mm_popcnt_u32) \
Expand Down

0 comments on commit c7af009

Please sign in to comment.