这里是一个针对你问题的矢量化方法。它可以显著地加快速度。
import numpy as np
def findCells(points, bounds):
points = points.reshape((-1,2))
allInBounds = (points[:,0] > bounds[:,None,0])
allInBounds &= (points[:,1] > bounds[:,None,1])
allInBounds &= (points[:,0] < bounds[:,None,2])
allInBounds &= (points[:,1] < bounds[:,None,3])
nz = np.nonzero(allInBounds)
r = np.full(points.shape[0], np.nan)
r[nz[1]] = nz[0]
return r
def findCellsParallel(points, bounds, chunksize=100):
import multiprocessing as mp
from functools import partial
func = partial(findCells, bounds=bounds)
p = mp.Pool()
try:
return np.hstack(p.map(func, points, chunksize))
finally:
p.close()
def main():
nPoints = 1e6
nBounds = 1e4
points = np.random.random([nPoints, 2])
bounds = np.sort(np.random.random([nBounds, 2, 2]), 1).reshape(nBounds, 4)
r = findCellsParallel(points, bounds)
print(points[:10])
for bIdx in np.unique(r[:10]):
if np.isnan(bIdx):
continue
print("{}: {}".format(bIdx, bounds[bIdx]))
print(r[:10])
if __name__ == "__main__":
main()
编辑:
我使用你提供的数据量时出现了
MemoryError
。如果你使用
multiprocessing.Pool
和它的
map
函数,可以避免这种情况并加快速度,详见更新后的代码。
结果:
>time python test.py
[[ 0.69083585 0.19840985]
[ 0.31732711 0.80462512]
[ 0.30542996 0.08569184]
[ 0.72582609 0.46687164]
[ 0.50534322 0.35530554]
[ 0.93581095 0.36375539]
[ 0.66226118 0.62573407]
[ 0.08941219 0.05944215]
[ 0.43015872 0.95306899]
[ 0.43171644 0.74393729]]
9935.0: [ 0.31584562 0.18404152 0.98215445 0.83625487]
9963.0: [ 0.00526106 0.017255 0.33177741 0.9894455 ]
9989.0: [ 0.17328876 0.08181912 0.33170444 0.23493507]
9992.0: [ 0.34548987 0.15906761 0.92277442 0.9972481 ]
9993.0: [ 0.12448765 0.5404578 0.33981119 0.906822 ]
9996.0: [ 0.41198261 0.50958195 0.62843379 0.82677092]
9999.0: [ 0.437169 0.17833114 0.91096133 0.70713434]
[ 9999. 9993. 9989. 9999. 9999. 9935. 9999. 9963. 9992. 9996.]
real 0m 24.352s
user 3m 4.919s
sys 0m 1.464s
[0 0 1 0 NaN NaN]
是之前bounds
和points
的结果吗?你能解释一下你是如何使用bounds
的吗? - Mazdakbounds
是一个[minx miny maxx maxy]值的数组,则问题是实现函数x > minx & x < maxx & y > miny & y < maxy
,以确定例如第一个点是否在bounds
数组的第一个单元格中。希望这有所帮助。 - Fabio Lamannabounds
有两个项目!!!你是如何使用这两个项目的? - Mazdak