既然您说“我更感兴趣的是运行时”,这里有一些更快的替代方案,可以替换您的groupby
解决方案中的cursor += sum(1 for _ in group)
。
使用@Guy的data1
,但所有段长度都除以10:
normal optimized
original 870 ms 871 ms
zip_count 612 ms 611 ms
count_of 344 ms 345 ms
list_index 387 ms 386 ms
length_hint 223 ms 222 ms
使用list(range(1000000))
代替:
normal optimized
original 385 ms 331 ms
zip_count 463 ms 401 ms
count_of 197 ms 165 ms
list_index 175 ms 143 ms
length_hint 226 ms 127 ms
优化版本使用更多的本地变量或列表推导。
我认为即使对于列表迭代器,
__length_hint__
也不能保证是精确的,但它似乎是(通过我的正确性检查),我不明白为什么不会这样。
以下是需要翻译的内容(请保留HTML标签):
这段代码 (在线尝试),但您需要缩小一些内容以不超过时间限制:
from timeit import default_timer as timer
from itertools import groupby, count
from collections import deque
from operator import countOf
def original(data):
cursor = 0
result = []
for key, group in groupby(data):
cursor += sum(1 for _ in group)
result.append(cursor)
return result
def original_opti(data):
cursor = 0
sum_ = sum
return [cursor := cursor + sum_(1 for _ in group)
for _, group in groupby(data)]
def zip_count(data):
cursor = 0
result = []
for key, group in groupby(data):
index = count()
deque(zip(group, index), 0)
cursor += next(index)
result.append(cursor)
return result
def zip_count_opti(data):
cursor = 0
result = []
append = result.append
count_, deque_, zip_, next_ = count, deque, zip, next
for key, group in groupby(data):
index = count_()
deque_(zip_(group, index), 0)
cursor += next_(index)
append(cursor)
return result
def count_of(data):
cursor = 0
result = []
for key, group in groupby(data):
cursor += countOf(group, key)
result.append(cursor)
return result
def count_of_opti(data):
cursor = 0
countOf_ = countOf
result = [cursor := cursor + countOf_(group, key)
for key, group in groupby(data)]
return result
def list_index(data):
cursor = 0
result = []
for key, _ in groupby(data):
cursor = data.index(key, cursor)
result.append(cursor)
del result[0]
result.append(len(data))
return result
def list_index_opti(data):
cursor = 0
index = data.index
groups = groupby(data)
next(groups, None)
result = [cursor := index(key, cursor)
for key, _ in groups]
result.append(len(data))
return result
def length_hint(data):
result = []
it = iter(data)
for _ in groupby(it):
result.append(len(data) - (1 + it.__length_hint__()))
del result[0]
result.append(len(data))
return result
def length_hint_opti(data):
it = iter(data)
groups = groupby(it)
next(groups, None)
n_minus_1 = len(data) - 1
length_hint = it.__length_hint__
result = [n_minus_1 - length_hint()
for _ in groups]
result.append(len(data))
return result
funcss = [
(original, original_opti),
(zip_count, zip_count_opti),
(count_of, count_of_opti),
(list_index, list_index_opti),
(length_hint, length_hint_opti),
]
data1 = [1] * 3 + [2] * 3 + [4] * 5 + [5] * 2 + [7] * 4 + [9] * 3 + [11] * 1 + [15] * 3
data1 = [x for x in data1 for _ in range(1000000)]
data2 = list(range(1000000))
for _ in range(3):
for name in 'data1', 'data2':
print(name)
data = eval(name)
expect = None
for funcs in funcss:
print(f'{funcs[0].__name__:11}', end='')
for func in funcs:
times = []
for _ in range(5):
start_time = timer()
result = func(data)
end_time = timer()
times.append(end_time - start_time)
print(f'{round(min(times) * 1e3):5d} ms', end='')
if expect is None:
expect = result
else:
assert result == expect
print()
print()
O(n)
和O(k*log n)
之间的权衡取决于k
,这可能是您事先不知道的。但如果你要寻找实践中最快的算法,我预计一个简单的for循环检查元素与之前的元素是否相同将是最快的,除非k
远小于n
。但你需要进行基准测试来确保。 - Vincent van der Weelebisect(data,data [cursor],cursor)
也可以并且更好(代码更少,不需要值为整数)。 - no comment