Skip to content

Commit

Permalink
Use optimized popcnt instruction on all platforms if available
Browse files Browse the repository at this point in the history
Check availability of popcnt instruction at runtime with cpuid on x86.
Refuse to plot with bitfield if it's not available.

Assume presence of optimized popcount on other architectures (e.g. CNT
on aarch64).
  • Loading branch information
rostislav authored and hoffmang9 committed Mar 18, 2021
1 parent e2e8b59 commit 2f88912
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 10 deletions.
12 changes: 2 additions & 10 deletions src/bitfield.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,13 @@ struct bitfield
uint64_t const* end = buffer_.get() + end_bit / 64;
int64_t ret = 0;
while (start != end) {
#ifdef _MSC_VER
ret += __popcnt64(*start);
#else
ret += __builtin_popcountl(*start);
#endif
ret += Util::PopCount(*start);
++start;
}
int const tail = end_bit % 64;
if (tail > 0) {
uint64_t const mask = (uint64_t(1) << tail) - 1;
#ifdef _MSC_VER
ret += __popcnt64(*end & mask);
#else
ret += __builtin_popcountl(*end & mask);
#endif
ret += Util::PopCount(*end & mask);
}
return ret;
}
Expand Down
6 changes: 6 additions & 0 deletions src/plotter_disk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ class DiskPlotter {
throw InvalidValueException("Stripe size too large");
}

#if defined(_WIN32) || defined(__x86_64__)
if (!nobitfield && !Util::HavePopcnt()) {
throw InvalidValueException("Bitfield plotting not supported by CPU");
}
#endif /* defined(_WIN32) || defined(__x86_64__) */

std::cout << std::endl
<< "Starting plotting progress into temporary dirs: " << tmp_dirname << " and "
<< tmp2_dirname << std::endl;
Expand Down
41 changes: 41 additions & 0 deletions src/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ std::ostream &operator<<(std::ostream &strm, uint128_t const &v)
#include <byteswap.h>
#endif

/* Platform-specific cpuid include. */
#if defined(_WIN32)
#include <intrin.h>
#elif defined(__x86_64__)
#include <cpuid.h>
#endif

class Timer {
public:
Timer()
Expand Down Expand Up @@ -339,6 +346,40 @@ namespace Util {
double b = ldexp(frac, exp);
return b;
}

#if defined(_WIN32) || defined(__x86_64__)
void CpuID(uint32_t leaf, uint32_t *regs)
{
#if defined(_WIN32)
__cpuid((int *)regs, (int)leaf);
#else
__get_cpuid(leaf, &regs[0], &regs[1], &regs[2], &regs[3]);
#endif /* defined(_WIN32) */
}

bool HavePopcnt(void)
{
// EAX, EBX, ECX, EDX
uint32_t regs[4] = {0};

CpuID(1, regs);
// Bit 23 of ECX indicates POPCNT instruction support
return (regs[2] >> 23) & 1;
}
#endif /* defined(_WIN32) || defined(__x86_64__) */

inline uint64_t PopCount(uint64_t n)
{
#if defined(_WIN32)
return __popcnt64(n);
#elif defined(__x86_64__)
uint64_t r;
__asm__("popcnt %1, %0" : "=r"(r) : "r"(n));
return r;
#else
return __builtin_popcountl(n);
#endif /* defined(_WIN32) ... defined(__x86_64__) */
}
}

#endif // SRC_CPP_UTIL_HPP_

0 comments on commit 2f88912

Please sign in to comment.