考虑以下数组:
a = np.array([1,2,3,4,3,2,1])
我希望能够获取将数组均匀分割的元素,即在该元素之前数组的总和等于该元素之后数组的总和。在这种情况下,第4个元素
a [3]
可以将数组均匀分割。有没有更快的(numpy)方法来实现呢?还是我必须遍历所有元素?期望的函数:
f(a) = 3
我会选择类似这样的东西:
def equib(a):
c = a.cumsum()
return np.argmin(np.abs(c-(c[-1]/2)))
a
的累积和。累加和意味着 c[i] = sum(a[:i])
。然后,我们找出值与总权重之差的绝对值最小的位置。
更新:@DSM注意到我的第一个版本有一点偏移,因此这里提供另一个版本:def equib(a):
c1 = a.cumsum()
c2 = a[::-1].cumsum()[::-1]
return np.argmin(np.abs(c1-c2))
如果所有的输入值都是非负数,那么最有效的方法之一可能是构建一个累加和数组,并在其中进行二分查找,以找到两侧和为总和一半的位置。然而,很容易出现二分查找错误的情况。在试图处理所有边缘情况时,我得到了以下测试结果:
class SplitpointTest(unittest.TestCase):
def testFloatRounding(self):
# Due to rounding error, the cumulative sums for these inputs are
# [1.1, 3.3000000000000003, 3.3000000000000003, 5.5, 6.6]
# and [0.1, 0.7999999999999999, 0.7999999999999999, 1.5, 1.6]
# Note that under default settings, numpy won't display
# enough precision to see that.
self.assertEquals(2, splitpoint([1.1, 2.2, 1e-20, 2.2, 1.1]))
self.assertEquals(2, splitpoint([0.1, 0.7, 1e-20, 0.7, 0.1]))
def testIntRounding(self):
self.assertEquals(1, splitpoint([1, 1, 1]))
def testIntPrecision(self):
self.assertEquals(2, splitpoint([2**60, 1, 1, 1, 2**60]))
def testIntMax(self):
self.assertEquals(
2,
splitpoint(numpy.array([40, 23, 1, 63], dtype=numpy.int8))
)
def testIntZeros(self):
self.assertEquals(
4,
splitpoint(numpy.array([0, 1, 0, 2, 0, 2, 0, 1], dtype=int))
)
def testFloatZeros(self):
self.assertEquals(
4,
splitpoint(numpy.array([0, 1, 0, 2, 0, 2, 0, 1], dtype=float))
)
在决定放弃之前,我尝试了以下几个版本:
def splitpoint(a):
c = numpy.cumsum(a)
return numpy.searchsorted(c, c[-1]/2)
# Fails on [1, 1, 1]
def splitpoint(a):
c = numpy.cumsum(a)
return numpy.searchsorted(c, c[-1]/2.0)
# Fails on [2**60, 1, 1, 1, 2**60]
def splitpoint(a):
c = numpy.cumsum(a)
if c.dtype.kind == 'f':
# Floating-point input.
return numpy.searchsorted(c, c[-1]/2.0)
elif c.dtype.kind in ('i', 'u'):
# Integer input.
return numpy.searchsorted(c, (c[-1]+1)//2)
else:
# Probably an object dtype. No great options.
return numpy.searchsorted(c, c[-1]/2.0)
# Fails on numpy.array([63, 1, 63], dtype=int8)
def splitpoint(a):
c = numpy.cumsum(a)
if c.dtype.kind == 'f':
# Floating-point input.
return numpy.searchsorted(c, c[-1]/2.0)
elif c.dtype.kind in ('i', 'u'):
# Integer input.
return numpy.searchsorted(c, c[-1]//2 + c[-1]%2)
else:
# Probably an object dtype. No great options.
return numpy.searchsorted(c, c[-1]/2.0)
# Still fails the floating-point rounding and zeros tests.
如果一直尝试,可能会使其工作,但不值得。基于chw21的第二种解决方案,即通过显式最小化左右两侧和的绝对差异来实现,更容易理解并更普遍适用。加上a = numpy.asarray(a)
,它通过了所有以上测试用例以及以下测试用例,这些测试用例扩展了算法预计要处理的输入类型:
class SplitpointGeneralizedTest(unittest.TestCase):
def testNegatives(self):
self.assertEquals(2, splitpoint([-1, 5, 2, 4]))
def testComplex(self):
self.assertEquals(2, splitpoint([1+1j, -5+2j, 43, -4+3j]))
def testObjectDtype(self):
from fractions import Fraction
from decimal import Decimal
self.assertEquals(2, splitpoint(map(Fraction, [1.5, 2.5, 3.5, 4])))
self.assertEquals(2, splitpoint(map(Decimal, [1.5, 2.5, 3.5, 4])))
除非明确发现速度过慢,否则我会选择chw21的第二个解决方案。在我测试过的稍作修改后,如下:
def splitpoint(a):
a = np.asarray(a)
c1 = a.cumsum()
c2 = a[::-1].cumsum()[::-1]
return np.argmin(np.abs(c1-c2))
我唯一能看出的缺陷是,如果输入具有无符号数据类型并且没有确切分割输入的索引,由于np.abs(c1-c2)
对于无符号数据类型不起作用,因此此算法可能不会返回最接近分割输入的索引。从未指定算法在没有分割索引的情况下应该执行什么操作,因此这种行为是可接受的,但可能值得在注释中提到np.abs(c1-c2)
和无符号数据类型。如果我们想要最接近分割输入的索引,则可以以一些额外的运行时间为代价获得它:
def splitpoint(a):
a = np.asarray(a)
c1 = a.cumsum()
c2 = a[::-1].cumsum()[::-1]
if a.dtype.kind == 'u':
# np.abs(c1-c2) doesn't work on unsigned ints
absdiffs = np.where(c1>c2, c1-c2, c2-c1)
else:
# c1>c2 doesn't work on complex input.
# We also use this case for other dtypes, since it's
# probably faster.
absdiffs = np.abs(c1-c2)
return np.argmin(absdiffs)
class SplitpointUnsignedTest(unittest.TestCase):
def testBestApproximation(self):
self.assertEquals(1, splitpoint(numpy.array([5, 5, 4, 5], dtype=numpy.uint32)))
a = [1,3,5,2,2]
b = equilibirum(a)
n = len(a)
first_sum = 0
last_sum = 0
if n==1:
print (1)
return 0
for i in range(n):
first_sum=first_sum+a[i]
for j in range(i+2,n):
last_sum=last_sum+a[j]
if first_sum ==last_sum:
s=i+2
print (s)
return 0
last_sum=0
好的,这是我得到的内容,但我不确定这是否是最快的方法:
def eq(a)
c = np.cumsum(a)
return sum(c <= c[-1]/2)
f(a) = 4 # (because a[4] = 3)
改为:
f(a) = 3
def equi(arr):
length = len(arr)
if length == 0: return -1
if length == 1: return 0
i = 1
j = 0
# starting sum1 (the 'left' sum)
sum1 = 0
# starting sum2 (the 'right' sum)
sum2 = sum(arr[1:])
while j < length:
if sum1 == sum2: return j
if j == length-1:
sum2 = 0
else:
sum1 += arr[j]
sum2 -= arr[j+1]
j += 1
if sum1 != 0: return -1
顺便说一下,这是我在Stack Overflow上的第一次贡献,我是一个编程初学者。如果我的解决方案不好,请随意评论!
argmin
中的参数)。 - DSMsearchsorted
来在c
中搜索c[-1] / 2
。 - user2357112