有没有一种方法只使用位运算来找到被设置次数最少的比特位?
例如,如果我有三个位数组:
11011001
11100000
11101101
三个向量中只有一个向量的第3位和第5位被设置为1。
目前我有一个时间复杂度为O(n)
的解决方案,其中n是位数组中的位数,我遍历位数组中的每个位,并在有1时递增,但出于某种原因,我认为可以使用少量的位运算来实现O(1)
的解决方案。 有人能提供建议吗?谢谢。
有没有一种方法只使用位运算来找到被设置次数最少的比特位?
例如,如果我有三个位数组:
11011001
11100000
11101101
三个向量中只有一个向量的第3位和第5位被设置为1。
目前我有一个时间复杂度为O(n)
的解决方案,其中n是位数组中的位数,我遍历位数组中的每个位,并在有1时递增,但出于某种原因,我认为可以使用少量的位运算来实现O(1)
的解决方案。 有人能提供建议吗?谢谢。
bits1 = (bits >> 3) & 0x11;
bits2 = (bits >> 2) & 0x11;
bits3 = (bits >> 1) & 0x11;
bits4 = bits & 0x11;
bitsSum1 += bits1;
bitsSum2 += bits2;
bitsSum3 += bits3;
bitsSum4 += bits4;
111
111
011
100
101
001
000
101
然后使用标准的位计数方法来计算已设置的位数。
直接这样做通常比正常方法更慢,但您可以尝试调整算法以从不同的字中提取位,而不是使用它们的技术。最快的技术一次处理多个位,因此在您的情况下似乎难以进行优化。
uint32 x0,y0,z0,x1,y1,z1, ... x4,y4,z4; // 输入值 uint32 even0,even1,...even4,odd0...odd4; uint lowereven,lowerodd,uppereven,upperodd;
even0 = (x0 & 0x55555555) + (y0 & 0x55555555) + (z0 & 0x555555555); odd0 = ((x0>>1) & 0x55555555) + ((y0>>1) & 0x55555555) + ((z0>>1) & 0x555555555); ... 然后对 even1...even4 和 odd1...odd4 做同样的操作 lowereven = ((even0 & 0x333333333) + (even1 & 0x33333333) + (even2 & 0x33333333)...; lowerodd = ((even0 & 0x333333333) + (even1 & 0x33333333) + (even2 & 0x33333333)...; uppereven = ((even0 >> 2) & 0x33333333) + ((even1 >> 2) & 0x33333333) + ...; oddeven = ((odd0 >> 2) & 0x33333333) + ((odd1 >> 2) & 0x33333333) + ...;
在这些操作之后,这四个值将保存所有位的位计数。LowerEven将保存位0、4、8、16等的计数;LowerOdd保存1、5、9等;UpperEven保存2、6、10等;UpperOdd保存3、7、11等。
如果有超过15个数字,则可以通过对每个组执行上述操作来处理最多255个数字,然后使用八个语句来组合所有组。
值得注意的是,如果每个位都有一些前导零位,则将所有输入值相加将产生每个位的位计数,我们只需要屏蔽它或类似的操作即可检索它。那么比特计数本身变得微不足道,但会引发其他问题,例如:
在下面的代码中,我决定支持最大位数为15,但很容易扩展到255。我决定仅考虑函数的格式良好的输入(没有空的或太大的输入数组)。即使由调用者生成访问位字段的汇编可能涉及一些移位或掩码,我认为那也没关系。
此实现使用查找表进行扩展,尽管我没有对其进行分析,但我认为它应该比逐位循环解决方案快得多。
struct BitCount
{
unsigned char bit0 : 4;
unsigned char bit1 : 4;
unsigned char bit2 : 4;
unsigned char bit3 : 4;
unsigned char bit4 : 4;
unsigned char bit5 : 4;
unsigned char bit6 : 4;
unsigned char bit7 : 4;
unsigned char bit8 : 4;
unsigned char bit9 : 4;
unsigned char bitA : 4;
unsigned char bitB : 4;
unsigned char bitC : 4;
unsigned char bitD : 4;
unsigned char bitE : 4;
unsigned char bitF : 4;
};
void CountBits(const short *invals, unsigned incount, BitCount &bitcount)
{
assert(incount && incount <= 0xF && sizeof bitcount == 8);
static const unsigned expand[256] = {
// _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F
0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111, // 0_
0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111, // 1_
0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111, // 2_
0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111, // 3_
0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, // 4_
0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111, // 5_
0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111, // 6_
0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111, // 7_
0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111, // 8_
0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, // 9_
0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111, // A_
0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111, // B_
0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111, // C_
0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111, // D_
0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, // E_
0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111 }; // F_
unsigned *const countLo = (unsigned*)&bitcount;
unsigned *const countHi = (unsigned*)&bitcount + 1;
*countLo = expand[*invals & 0xFF];
*countHi = expand[*invals++ >> 8];
switch (incount)
{
case 0xF:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0xE:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0xD:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0xC:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0xB:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0xA:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x9:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x8:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x7:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x6:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x5:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x4:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x3:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals++ >> 8];
case 0x2:
*countLo += expand[*invals & 0xFF];
*countHi += expand[*invals >> 8];
};
}
编辑 如果您利用乘法的特性,将位数值从4位扩展到8位,您可以在64位系统上跳过上述表格查找。
这是我们感兴趣的特性(a、b、c和d为0或1,n为二进制数):
n * (a * 2^3 + b * 2^2 + c *2^1 + d * 2^0) <=> ((a * n) << 3) + ((b * n) << 2) + ((c * n) << 1) + ((d * n) << 0)
因此,如果我们将一个字节转换为64位整数,并仔细选择一个乘数,我们就可以在产品的每个字节的第一位得到0或1。以下是这样的乘数:
00000000 00000010 00000100 00001000 00010000 00100000 01000000 10000001
或0x002040810204081
因此,我们可以像这样将一个字节扩展到64位:
unsigned char b = ...
// this operation can be used in substitution of the below look-up table
// (if the code is written for 8-bit wide count, instead of 4-bit wide counts)
unsigned __int64 valx = ((unsigned __int64)b * 0x002040810204081) & 0x0101010101010101;
union ResultType {
unsigned __int64 result;
unsigned char bitcount[8]; // bitcount[x] is the number of times the x-th most significant bit appeared
};
ResultType r;
r.result = val1 + val2 + val3 + ...; // up to 255 values can be summed before we risk overflow
r.bitcount[2] // how many times the 00000100 bit was set