计算字节中设置的位数

14

我很感兴趣,想知道在这种情况下计算字节中设置的比特数的最佳方法是什么

template< unsigned char byte > class BITS_SET
{
public:
    enum {
     B0 = (byte & 0x01) ? 1:0,
     B1 = (byte & 0x02) ? 1:0,
     B2 = (byte & 0x04) ? 1:0,
     B3 = (byte & 0x08) ? 1:0,
     B4 = (byte & 0x10) ? 1:0,
     B5 = (byte & 0x20) ? 1:0,
     B6 = (byte & 0x40) ? 1:0,
     B7 = (byte & 0x80) ? 1:0
    };
public:
 enum{RESULT = B0+B1+B2+B3+B4+B5+B6+B7};
};

如果在运行时知道字节的值,那么使用这种方法是否最优?建议在代码中使用吗?


这被称为人口统计,并且可以比一次测试一个位更有效地完成。在x86上,可以使用单个指令完成。在其他体系结构上,可以用几条指令完成。 - Paul R
1
@PaulR,所提出的模板解决方案将在编译时计算,运行时只需要不到一条指令! - Mark Ransom
啊 - 抱歉 - 没有理解问题的重点! - Paul R
@PaulR,这个问题有点令人困惑-它展示了只能在编译时工作的代码,但却问其是否在运行时最优。我猜测提问者的母语不是英语。 - Mark Ransom
是的,当然。这不是我的母语,我来自格鲁吉亚,但我不明白你为什么提到这个? - user466534
https://en.cppreference.com/w/cpp/numeric/popcount - Jesper Juhl
13个回答

0
int count(int a){ return a == 0 ? 0 : 1 + count(a&(a-1)); }

递归调用函数会使函数速度呈指数级增长吗?也许使用while循环更好? - user466534
现代编译器正在将这样的简单函数优化为“非递归”版本。 - Jarosław Gomułka
如果单词中有N个位,这仍然需要N步。如果N=32,则每个步骤都需要一个条件分支,这将需要32个步骤。坏消息。最好使用对数求和;对于N~~32,您可以获得恒定时间。我引用的BitHacks代码需要14条机器指令,其中许多在现代CPU上只需要一个时钟周期。 - Ira Baxter
这需要 2 * number_of_set_bits + 1 步。因此对于 10000010,它将需要 5 条指令。但是一般来说,从 BitHacks 得到的解决方案绝对更好。 - Jarosław Gomułka

0

对于C++ 14+: (https://onlinegdb.com/q_PR4Iocb)

#include <stdint.h>
#include <iostream>
#include <sstream>

using namespace std;

template <class T, int width = sizeof(T) << 3>
struct stBitPattern
{
    static inline constexpr T res(T x)
    {
        constexpr int hw = width >> 1;

        x = stBitPattern<T, hw>::res(x);
        x += x << hw;

        return x;
    }
};

template <class T>
struct stBitPattern<T, 8>
{
    static inline constexpr T res(T x)
    {
        return x;
    }
};

class csPop
{
    private:
        template <class T>
        static inline constexpr T _pop8(T x)
        {
            constexpr T m1 = stBitPattern<T>::res(0x55); //binary: 0101...
            constexpr T m2 = stBitPattern<T>::res(0x33); //binary: 00110011..
            constexpr T m4 = stBitPattern<T>::res(0x0f); //binary:  4 zeros,  4 ones ...

            x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
            x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
            x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits
            return x;
        }

        template <class T, int width = sizeof(T) << 3>
        struct _pop
        {
            static inline constexpr T res(T x)
            {
                const int hw = width >> 1;
    
                x = _pop<T, hw>::res(x);
                x += x >> hw;
    
                return x;
            }
        };

        template <class T>
        struct _pop<T, 8>
        {
            static inline constexpr T res(T x)
            {
                x = _pop8(x);
                return x;
            }
        };

        template<class T>
        static inline constexpr bool _chkIfTAvail(T *p, T *pend);

    public:
        template <class T>
        static inline constexpr int pop(T x)
        {
            x = _pop<T>::res(x);
            return (int)(x & 0x7f);
        }

        template <class T>
        static int pop(T *p, size_t sz);
};

template<class T>
constexpr bool csPop::_chkIfTAvail(T *p, T *pend)
{
    return ((uint8_t*)pend-(uint8_t*)p) >= sizeof(T);
}

template <class T>
int csPop::pop(T *p, size_t sz)
{
    int res = 0;
    T *pend = (T*)((uint8_t*)p + sz);
    
    if (sz >= sizeof(T))
    {
        int t = (size_t)p & (sizeof(T) - 1);
        
        sz -= t;

        if (t >= sizeof(uint8_t))
        {
            res += pop(*(uint8_t*)p);
            p = (T*)((uint8_t*)p + 1);
            t -= sizeof(uint8_t);
        }       
        if (t >= sizeof(uint16_t))
        {
            res += pop(*(uint16_t*)p);
            p = (T*)((uint16_t*)p + 1);
            t -= sizeof(uint16_t);
        }       
        if (t >= sizeof(uint32_t))
        {
            res += pop(*(uint32_t*)p);
            p = (T*)((uint32_t*)p + 1);
            t -= sizeof(uint32_t);
        }
        if (t >= sizeof(uint64_t))
        {
            res += pop(*(uint64_t*)p);
            p = (T*)((uint64_t*)p + 1);
            t -= sizeof(uint64_t);
        }

        if (_chkIfTAvail(p, pend))
        {
            sz -= sizeof(T);
            res += pop(*p++);

            for(; _chkIfTAvail(p, pend); p++, sz-= sizeof(T))
            {
                res += pop(*p);
            }       
        }
    } // if (sz >= sizeof(T))

    if (sz >= sizeof(uint64_t))
    {
        res += pop(*(uint64_t*)p);
        p = (T*)((uint64_t*)p + 1);
        sz -= sizeof(uint64_t);
    }
    if (sz >= sizeof(uint32_t))
    {
        res += pop(*(uint32_t*)p);
        p = (T*)((uint32_t*)p + 1);
        sz -= sizeof(uint32_t);
    }
    if (sz >= sizeof(uint16_t))
    {
        res += pop(*(uint16_t*)p);
        p = (T*)((uint16_t*)p + 1);
        sz -= sizeof(uint16_t);
    }
    if (sz >= sizeof(uint8_t))
    {
        res += pop(*(uint8_t*)p);
        p = (T*)((uint8_t*)p + 1);
        sz -= sizeof(uint8_t);
    }

    return res;
}

int main()
{
    uint32_t x = 16777215;
    cout << "pop(" << x << ") = " << csPop::pop(x) << endl;
    
    uint32_t a[] = {x, x, x, x, 15};
    uint32_t *p_end = a + sizeof(a)/sizeof(a[0]) - 1;
    uint32_t *p = a;
    stringstream ss;
    
    for(; p_end - p; p++)
    {
        ss << to_string(*p) << ", ";
    }
    ss << to_string(*p);
    
    cout << "pop({" << ss.str() << "}) = " << csPop::pop(a, sizeof(a)) << endl;

    return 0;
}

结果:

pop(16777215) = 24
pop({16777215, 16777215, 16777215, 16777215, 15}) = 100

-2
#include <ctime>
#include <iostream>
using namespace std;

int count1s(unsigned char byte) {
  if (byte == 0) {
    return 0;
  }

  if (byte & 0x01) {
    return 1 + count1s(byte >> 1);
  }
  return count1s(byte >> 1);
}

int count1s2(unsigned char byte) {
  static const int ones[256] = {
      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
      2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
      2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
      4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
      3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
      4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};

  return ones[(int)byte];
}

int main() {
  time_t start = clock();
  int c = count1s(205);
  time_t end = clock();
  cout << "count1: " << c << " time: " << double(end - start) << endl;
  start = clock();
  c = count1s2(205);
  end = clock();
  cout << "count2: " << c << " time: " << double(end - start) << endl;
  return 0;
}

1
请正确格式化您的答案,并给出解释。简单地在回复中添加代码并不足以成为一个好的答案。 - m_callens
感谢您提供了这个256元素查找表。我只希望它是正确的。 - Rodrigo

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接