Python中针对超过4D数据的核密度估计

Question

Python中针对超过4D数据的核密度估计

3

我正在尝试使用SciPy的gaussian_kde函数来估计多元数据的密度。
在下面的代码中，如果维度数超过4d，可能会出现以下错误（约50%）。
如果维度数低于3d，则在大多数情况下不会出现错误。

# Import
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline

# Data
N1 = np.random.normal(size=400)
N2 = np.random.normal(scale=0.5, size=400)
N3 = np.random.normal(scale=0.5, size=400)
N4 = np.random.normal(scale=0.5, size=400)
N5 = np.random.normal(scale=0.5, size=400)
N6 = np.random.normal(scale=0.5, size=400)

a1 = N1+1*N2
a2 = N1-1*N2
a3 = N1+1*N3
a4 = N1-1*N3
a5 = N1+1*N4
a6 = N1-1*N4
a7 = N1+1*N5
a8 = N1-1*N5
a9 = N1+1*N6
a0 = N1-1*N6

# Kernel density
xy = np.vstack([a1,a2,a3,a4,a5,a6,a7,a8,a9,a0])
kernel = stats.gaussian_kde(xy)
z_est = kernel.evaluate(xy)

# Visualization
x = a1
y = a2
plt.scatter(x, y, c=z_est)

错误消息

LinAlgError                               Traceback (most recent call last)
<ipython-input-11-ce4c335d8dd1> in <module>
      2 xy = np.vstack([a1,a2,a3,a4,a5])
      3 kernel = stats.gaussian_kde(xy)
----> 4 z_est = kernel.evaluate(xy)

~\program\anaconda\lib\site-packages\scipy\stats\kde.py in evaluate(self, points)
    244         result = zeros((m,), dtype=float)
    245 
--> 246         whitening = linalg.cholesky(self.inv_cov)
    247         scaled_dataset = dot(whitening, self.dataset)
    248         scaled_points = dot(whitening, points)

~\program\anaconda\lib\site-packages\scipy\linalg\decomp_cholesky.py in cholesky(a, lower, overwrite_a, check_finite)
     89     """
     90     c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
---> 91                          check_finite=check_finite)
     92     return c
     93 

~\program\anaconda\lib\site-packages\scipy\linalg\decomp_cholesky.py in _cholesky(a, lower, overwrite_a, clean, check_finite)
     38     if info > 0:
     39         raise LinAlgError("%d-th leading minor of the array is not positive "
---> 40                           "definite" % info)
     41     if info < 0:
     42         raise ValueError('LAPACK reported an illegal value in {}-th argument'

LinAlgError: 1-th leading minor of the array is not positive definite

我为什么会收到错误提示？

- hiro_fff

1个回答

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- MeralTd · Answer 1

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline

def measure(n):
    a1 = np.random.normal(size=n)
    a2= np.random.normal(scale=0.5, size=n)
    return a1+a2, a1-a2

a1, a2 = measure(4000)
xmin = a1.min()
xmax = a1.max()
ymin = a2.min()
ymax = a2.max()

X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([a1, a2])
kernel = stats.gaussian_kde(values)
z_est = np.reshape(kernel.evaluate(positions).T, X.shape)


fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(np.rot90(z_est), cmap=plt.cm.gist_earth_r,
          extent=[xmin, xmax, ymin, ymax])
ax.plot(a1, a2, 'k.', markersize=2)
ax.set_xlim([xmin, xmax])
ax.set_ylim([ymin, ymax])
plt.show()

输入图像描述