您可以使用一些技巧来使用
np.unique
:
import numpy as np
def safe_method(image, k):
out = np.zeros(image.shape[:-1], dtype=np.int32)
out8 = out.view(np.int8)
out8.reshape(image.shape[:-1] + (4,))[..., 1:] = image
uniq, map_ = np.unique(out, return_inverse=True)
assert uniq.size == k
map_.shape = image.shape[:-1]
colours = uniq.view(np.uint8).reshape(-1, 4)[:, 1:]
return colours, map_
然而,如果像素数量远大于颜色数量,以下启发式算法可能会大大提高速度。它试图找到一种廉价的哈希函数(例如仅查看红色通道),如果成功则使用它创建查找表。如果失败,则退回到上述安全方法。
CHEAP_HASHES = [lambda x: x[..., 0], lambda x: x[..., 1], lambda x: x[..., 2]]
def fast_method(image, k):
chunk = int(4 * k * np.log(k)) + 1
colours = set()
for chunk_start in range(0, image.size // 3, chunk):
colours |= set(
map(tuple, image.reshape(-1,3)[chunk_start:chunk_start+chunk]))
if len(colours) == k:
break
colours = np.array(sorted(colours))
for method in CHEAP_HASHES:
if len(set(method(colours))) == k:
break
else:
safe_method(image, k)
hashed = method(colours)
lookup = np.empty((hashed.max() + 1,), int)
lookup[hashed] = np.arange(k)
return colours, lookup[method(image)]
测试和计时:
from timeit import timeit
def create_image(k, M, N):
colours = np.random.randint(0, 256, (k, 3)).astype(np.uint8)
map_ = np.random.randint(0, k, (M, N))
image = colours[map_, :]
return colours, map_, image
k, M, N = 12, 1000, 1000
colours, map_, image = create_image(k, M, N)
for f in fast_method, safe_method:
print('{:16s} {:10.6f} ms'.format(f.__name__, timeit(
lambda: f(image, k), number=10)*100))
rec_colours, rec_map_ = f(image, k)
print('solution correct:', np.all(rec_colours[rec_map_, :] == image))
示例输出(12种颜色,1000x1000像素):
fast_method 3.425885 ms
solution correct: True
safe_method 73.622813 ms
solution correct: True
[230,100,140]
转换成0
? - Divakar