如何在Keras中将相关系数指定为损失函数

19

我第一次使用keras+tensorflow。我想将相关系数指定为损失函数。将其平方后,它会变成一个介于0和1之间的数字,其中0表示不好,1表示很好。

我的基本代码目前看起来像:

def baseline_model():
        model = Sequential()
        model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        # Compile model
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=0)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

如何更改此内容,以便它优化以最小化平方相关系数呢?


我尝试了以下方法:

def correlation_coefficient(y_true, y_pred):
    pearson_r, _ = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
    return 1-pearson_r**2

def baseline_model():
# create model
        model = Sequential()
        model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
#        model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        # Compile model
        model.compile(loss=correlation_coefficient, optimizer='adam')
        return model

但是这个会导致崩溃:

Traceback (most recent call last):
  File "deeplearning-det.py", line 67, in <module>
    results = cross_val_score(pipeline, X, Y, cv=kfold)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 321, in cross_val_score
    pre_dispatch=pre_dispatch)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate
    for train, test in cv.split(X, y, groups))
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
    while self.dispatch_one_batch(iterator):
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
    self._dispatch(tasks)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async
    result = ImmediateResult(func)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__
    self.results = batch()
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 437, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/pipeline.py", line 259, in fit
    self._final_estimator.fit(Xt, y, **fit_params)
  File "/home/user/.local/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py", line 147, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/home/user/.local/lib/python3.5/site-packages/keras/models.py", line 867, in fit
    initial_epoch=initial_epoch)
  File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1575, in fit
    self._make_train_function()
  File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 960, in _make_train_function
    loss=self.total_loss)
  File "/home/user/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/home/user/.local/lib/python3.5/site-packages/keras/optimizers.py", line 432, in get_updates
    m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 856, in binary_op_wrapper
    y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 611, in convert_to_tensor
    as_ref=False)
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 676, in internal_convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 121, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 364, in make_tensor_proto
    raise ValueError("None values not supported.")
ValueError: None values not supported.

更新1

根据下面的答案,现在代码可以运行。不幸的是,correlation_coefficientcorrelation_coefficient_loss函数所给出的值彼此不同,而我也不确定它们中的任何一个是否与从1- scipy.stats.pearsonr()[0]**2获得的值相同。

为什么损失函数会给出错误的输出,并如何将它们更正以给出与1 - scipy.stats.pearsonr()[0]**2相同的值?

这里是完全自包含的代码,应该可以直接运行:

import numpy as np
import sys
import math
from scipy.stats import ortho_group
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import tensorflow as tf
from keras import backend as K


def permanent(M):
    n = M.shape[0]
    d = np.ones(n)
    j = 0
    s = 1
    f = np.arange(n)
    v = M.sum(axis=0)
    p = np.prod(v)
    while (j < n-1):
        v -= 2*d[j]*M[j]
        d[j] = -d[j]
        s = -s
        prod = np.prod(v)
        p += s*prod
        f[0] = 0
        f[j] = f[j+1]
        f[j+1] = j+1
        j = f[0]
    return p/2**(n-1)


def correlation_coefficient_loss(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x)
    my = K.mean(y)
    xm, ym = x-mx, y-my
    r_num = K.sum(xm * ym)
    r_den = K.sum(K.sum(K.square(xm)) * K.sum(K.square(ym)))
    r = r_num / r_den
    return 1 - r**2


def correlation_coefficient(y_true, y_pred):
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'correlation_coefficient' in i.name.split('/')[1]]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        pearson_r = tf.identity(pearson_r)
        return 1-pearson_r**2


def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(4000, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
#    model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient])
    return model


no_rows = 8

print("Making the input data using seed 7", file=sys.stderr)
np.random.seed(7)
U = ortho_group.rvs(no_rows**2)
U = U[:, :no_rows]
# U is a random orthogonal matrix
X = []
Y = []
print(U)
for i in range(40000):
        I = np.random.choice(no_rows**2, size = no_rows)
        A = U[I][np.lexsort(np.rot90(U[I]))]
        X.append(A.ravel())
        Y.append(-math.log(permanent(A)**2, 2))

X = np.array(X)
Y = np.array(Y)

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    train_size=0.75, test_size=0.25)
pipeline.fit(X_train, y_train)

更新2

我已经放弃使用correlation_coefficient函数,现在只使用JulioDanielReyes提供的correlation_coefficient_loss函数。但是,无论如何,这仍然是错误的,或者keras过度拟合了。甚至当我有:

def baseline_model():
        model = Sequential()
        model.add(Dense(40, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient_loss])
        return model

在100个epochs后,我得到了0.6653的损失,但在测试经过训练的模型时却得到了0.857的损失。

这么少的隐藏层节点怎么可能会出现过拟合呢?


@DanielMöller 没有,你介意再详细说明一下你的想法吗? - Simd
不是 1 - pearson_r**2,而是完全一样的。 - Daniel Möller
我认为correlation_coefficient_lossscipy.stats.pearsonr给出的结果相同,这里是测试代码 https://bpaste.net/show/0e8086fec50e - Julio Daniel Reyes
1
如果有人能解释为什么correlation_coefficient不起作用,那就太棒了。 - Julio Daniel Reyes
你的correlation_coefficient_loss函数已经过时,请重新复制我的答案中的函数。 - Julio Daniel Reyes
显示剩余8条评论
4个回答

28
根据keras文档,您应该将平方相关系数作为函数传递,而不是字符串'mean_squared_error'
该函数需要接收2个张量(y_true, y_pred)。您可以查看keras 源代码以获得灵感。
tensorflow还实现了一个名为tf.contrib.metrics.streaming_pearson_correlation的函数。只需注意参数的顺序即可,应该像这样:
更新1:根据此问题初始化本地变量。
import tensorflow as tf
def correlation_coefficient(y_true, y_pred):
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r'
    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'pearson_r'  in i.name.split('/')]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        pearson_r = tf.identity(pearson_r)
        return 1-pearson_r**2

...

model.compile(loss=correlation_coefficient, optimizer='adam')
更新2: 即使您不能直接使用scipy函数,您也可以查看implementation并使用keras backend将其移植到您的代码中。

更新3:tensorflow函数可能不可微分,您的损失函数需要像这样:(请检查数学)

from keras import backend as K
def correlation_coefficient_loss(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x)
    my = K.mean(y)
    xm, ym = x-mx, y-my
    r_num = K.sum(tf.multiply(xm,ym))
    r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
    r = r_num / r_den

    r = K.maximum(K.minimum(r, 1.0), -1.0)
    return 1 - K.square(r)

更新4:两个函数的结果不同,但是correlation_coefficient_lossscipy.stats.pearsonr的结果相同: 以下是测试代码:

import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy.stats

inputa = np.array([[3,1,2,3,4,5],
                    [1,2,3,4,5,6],
                    [1,2,3,4,5,6]])
inputb = np.array([[3,1,2,3,4,5],
                    [3,1,2,3,4,5],
                    [6,5,4,3,2,1]])

with tf.Session() as sess:
    a = tf.placeholder(tf.float32, shape=[None])
    b = tf.placeholder(tf.float32, shape=[None])
    f1 = correlation_coefficient(a, b)
    f2 = correlation_coefficient_loss(a, b)

    sess.run(tf.global_variables_initializer())

    for i in range(inputa.shape[0]):

        f1_result, f2_result = sess.run([f1, f2], feed_dict={a: inputa[i], b: inputb[i]})
        scipy_result =1- scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2
        print("a: "+ str(inputa[i]) + " b: " + str(inputb[i]))
        print("correlation_coefficient: " + str(f1_result))
        print("correlation_coefficient_loss: " + str(f2_result))
        print("scipy.stats.pearsonr:" + str(scipy_result))

结果:

a: [3 1 2 3 4 5] b: [3 1 2 3 4 5]
correlation_coefficient: -2.38419e-07
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0
a: [1 2 3 4 5 6] b: [3 1 2 3 4 5]
correlation_coefficient: 0.292036
correlation_coefficient_loss: 0.428571
scipy.stats.pearsonr:0.428571428571
a: [1 2 3 4 5 6] b: [6 5 4 3 2 1]
correlation_coefficient: 0.994918
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0

另一个选项是使用原语和 keras.backend 实现该函数。 - Julio Daniel Reyes
1
这是进步...在本地变量的名称中有一个额外的命名空间(metrics/correlation_coefficient/pearson/covariance/count/read),在i.name.split('/')[1]中我没有考虑到这一点,已更新第一个函数。 - Julio Daniel Reyes
1
这是因为该函数需要张量作为参数,而不是numpy数组。https://bpaste.net/show/34459748fd6c,但你说得对,结果是不同的,我不确定为什么,抱歉。 - Julio Daniel Reyes
1
这里是有关度量函数的说明,https://www.tensorflow.org/api_guides/python/contrib.metrics。 - Julio Daniel Reyes
1
关于更新3,我在r = r_num / (r_den + K.epsilon())中添加了K.epsilon()以避免出现nan值。 - undefined
显示剩余17条评论

11

以下代码是在tensorflow 2.0版本中实现相关系数的示例

import tensorflow as tf

def correlation(x, y):    
    mx = tf.math.reduce_mean(x)
    my = tf.math.reduce_mean(y)
    xm, ym = x-mx, y-my
    r_num = tf.math.reduce_mean(tf.multiply(xm,ym))        
    r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
    return r_num / r_den

它返回与numpy的corrcoef函数相同的结果。


1
你应该在return后删除等号。 - Catalina Chircu

2

@Trifon的答案是正确的,如果你有所有数据同时可用的话。下面的代码实现了Pearson相关性作为Keras度量标准,允许你使用批处理输入获取度量标准,这通常在DNN训练/评估期间完成:

class PearsonCorrelation(tf.keras.metrics.Metric):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.cov = tf.metrics.Sum()
        self.sq_yt = tf.metrics.Sum()
        self.sq_yp = tf.metrics.Sum()
        self.mean_yp = tf.metrics.Mean()
        self.mean_yt = tf.metrics.Mean()
        self.count = tf.metrics.Sum()

    def update_state(self, y_true, y_pred, ):
        ''' Note y_pred are one-hot predictions, not probs/scores '''
        self.cov(y_true * y_pred)
        self.sq_yp(y_pred**2)
        self.sq_yt(y_true**2)
        self.mean_yp(y_pred)
        self.mean_yt(y_true)
        self.count(tf.reduce_sum(tf.shape(y_true)))

    def result(self):
        count = self.count.result()
        mean_yp = self.mean_yp.result()
        mean_yt = self.mean_yt.result()
        numerator = (self.cov.result() - count * self.mean_yp.result() * self.mean_yt.result())
        denominator = tf.sqrt(self.sq_yp.result() - count * mean_yp**2) * \
                      tf.sqrt(self.sq_yt.result() - count * mean_yt**2)
        return numerator / denominator

    def reset_states(self):
        self.cov.reset_states()
        self.sq_yt.reset_states()
        self.sq_yp.reset_states()
        self.mean_yp.reset_states()
        self.mean_yt.reset_states()
        self.count.reset_states()

0
r = scipy.stats.pearsonr(inputa[i], inputb[i])[0] 

r 是相关系数,那你为什么要对 r 取平方?

scipy_result = 1 - scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2

rscipy_result 之间的关系是什么?


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接