以下是一些Python 2代码,它创建了一个包含质数的文件,并将其打包成字节,每个字节编码30个数字块的素性,利用所有大于5的质数都与30互质的事实,因此它们与模30中的(1,7,11,13,17,19,23,29)之一同余。
sieve_bin.py
''' Prime sieve.
Save primes to a file with the primes in each block of 30 numbers
packed into a byte, utilising the fact that all primes > 5 are
coprime to 30 and hence are congruent to one of
(1, 7, 11, 13, 17, 19, 23, 29) mod 30
Written by PM 2Ring 2016.02.06
Prime sieve by Robert William Hanks
See https://dev59.com/EJPfa4cB1Zd3GeqPHseH
'''
import sys
from array import array
def rwh_primes(n):
''' Returns a boolean list of odd primes < n
Adapted from code by Robert William Hanks
See https://dev59.com/snI95IYBdhLWcg3w-DH0#3035188
'''
sieve = [True] * (n//2)
for i in xrange(3, int(n**0.5) + 1, 2):
if sieve[i//2]:
sieve[i*i//2::i] = [False] * ((n - i*i - 1) // (2*i) + 1)
return sieve
def main():
if len(sys.argv) != 2:
print '''Generate a file of primes packed into bits.
Usage:
%s hi
to generate a file of primes < `hi`
If `hi` isn't a multiple of 30 it will be rounded down to the nearest multiple of 30
''' % sys.argv[0]
exit()
hi = int(sys.argv[1]) // 30 * 30
fname = 'primebits'
print 'Generating primes less than %d...' % hi
odd_primes = rwh_primes(hi)
print 'Packing primes into bytes...'
prime_residues = (1, 7, 11, 13, 17, 19, 23, 29)
bitmasks = [(1<<j, (u - 1) // 2) for j, u in enumerate(prime_residues)]
packer = (sum(mask for mask, r in bitmasks if odd_primes[i + r])
for i in xrange(0, hi//2, 15))
primebytes = array('B', packer)
with open(fname, 'wb') as f:
primebytes.tofile(f)
print 'Saved to', fname
if __name__ == '__main__':
main()
这里有一个程序,它读取由 sieve_bin.py
创建的 primebits
文件。该文件被读入一个无符号字节数组 array 中,因此在 RAM 使用方面非常高效:每个从文件中读取的数据字节占用一个字节加上 array
对象本身的一些开销(在 32 位机器上为 28 字节)。
该程序的 main
函数使用 isprime
函数创建了一个质数列表。这比使用筛法慢大约 4 倍,但内存开销要小得多。它可能可以稍微加速,但这样的优化将留给读者作为练习。 :)
isprime_bin.py
''' Test if a number is prime, using a table read from disk
The table contains the primes packed into bytes, with
each byte encoding the primality of a block of 30 numbers,
utilising the fact that all primes > 5 are coprime to 30 and
hence are congruent to one of (1, 7, 11, 13, 17, 19, 23, 29) mod 30
See https://dev59.com/EJPfa4cB1Zd3GeqPHseH
'''
import sys
import os.path
from array import array
def load_primes(fname):
primebytes = array('B')
filesize = os.path.getsize(fname)
with open(fname, 'rb') as f:
primebytes.fromfile(f, filesize)
return primebytes
prime_residues = (1, 7, 11, 13, 17, 19, 23, 29)
bitmasks = dict((v, 1<<i) for i, v in enumerate(prime_residues))
def isprime(n, primebytes):
if n < 2:
return False
if n in (2, 3, 5):
return True
r = n % 30
if r not in bitmasks:
return False
b = primebytes[n // 30]
return b & bitmasks[r]
def main():
m = int(sys.argv[1]) if len(sys.argv) > 1 else 300
fname = 'primebits'
primebytes = load_primes(fname)
primes = [i for i in xrange(m) if isprime(i, primebytes)]
print primes
if __name__ == '__main__':
main()
在我的旧单核2GHz机器上,配备2GB的RAM,sieve_bin.py
创建一个primebits
文件来处理小于64000020的数字(文件大小=2133334字节)需要大约26秒;其中大约一半的时间用于筛选素数。isprime_bin.py
从该文件中生成素数列表需要大约124秒(将输出发送到/dev/null)。
通过与传统的Eratosthenes筛程序生成的输出进行比较,已验证输出。
isprime_bin.py
中的代码旨在测试任意正整数是否为质数。要简单生成素数列表,可以大大加快速度,因为前两个if
测试仅适用于数字<= 5。这是一个修改后的版本,它需要约47秒才能从2133334字节的primebits
文件中生成所有质数列表。
import sys
import os.path
from array import array
def load_primes(fname):
primebytes = array('B')
filesize = os.path.getsize(fname)
with open(fname, 'rb') as f:
primebytes.fromfile(f, filesize)
return primebytes
prime_residues = (1, 7, 11, 13, 17, 19, 23, 29)
bitmasks = dict((v, 1<<i) for i, v in enumerate(prime_residues))
def isprime(n, primebytes):
r = n % 30
if r not in bitmasks:
return False
b = primebytes[n // 30]
return b & bitmasks[r]
def primes(primebytes):
s = (2, 3, 5) + prime_residues[1:]
for i in s:
yield i
s = prime_residues
j = 30
while True:
try:
for i in s:
p = j + i
if isprime(p, primebytes):
yield p
j += 30
except IndexError:
return
def main():
m = int(sys.argv[1]) if len(sys.argv) > 1 else 300
fname = 'primebits'
primebytes = load_primes(fname)
primelist = []
for p in primes(primebytes):
if p > m:
break
primelist.append(p)
print primelist
if __name__ == '__main__':
main()