这里有一种方法 -
def per_col(a):
idx = a.argmax(0)
out = np.zeros_like(a)
r = np.arange(a.shape[1])
out[idx, r] = a[idx, r]
return out
样例运行
案例 #1 :
In [41]: a
Out[41]:
array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]])
In [42]: per_col(a)
Out[42]:
array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])
案例 #2(插入全0列):
In [78]: a[:,1] = 0
In [79]: a
Out[79]:
array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]])
In [80]: per_col(a)
Out[80]:
array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])
如果你喜欢单行代码或者是广播技术的粉丝,这里有另一种——
((a.argmax(0) == np.arange(a.shape[0])[:,None]).astype(int))*a.any(0)
运行示例 -
In [89]: a
Out[89]:
array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]])
In [90]: ((a.argmax(0) == np.arange(a.shape[0])[:,None]).astype(int))*a.any(0)
Out[90]:
array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])
运行时测试 -
In [98]: a = np.random.randint(0,2,(100,10000))
In [99]: %timeit ((a == 1) & (a.cumsum(axis=0) == 1)).astype(int)
100 loops, best of 3: 5.19 ms per loop
In [100]: %timeit per_col(a)
100 loops, best of 3: 3.4 ms per loop
In [101]: %timeit ((a.argmax(0) == np.arange(a.shape[0])[:,None]).astype(int))*a.any(0)
100 loops, best of 3: 7.73 ms per loop