Keras自定义损失函数

Question

Keras自定义损失函数

pythontensorflowkeraslossrecurrent-neural-network

3

我正在尝试为Keras的RNN（LSTM）实现自定义损失函数。以下是我的代码。

import sys
sys.path.insert(0, "C:\\Users\\skaul\\AppData\\Local\\Continuum\\Anaconda3\\envs\\tensorflow\\Lib\\site-packages")

import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
import keras.backend as K

timesteps = 10
data_dim = 5
timesteps = 10
num_classes = 2

# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(32, return_sequences=True,
               input_shape=(timesteps, data_dim)))  # returns a sequence of vectors of dimension 32
model.add(LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
model.add(LSTM(32))  # return a single vector of dimension 32
model.add(Dense(2, activation='softmax'))

def custom_loss(y_true, y_pred):
    ytrue = K.argmax(y_true, axis = 1) 
    ypred = K.argmax(y_pred, axis = 1)

    true1 = ytrue
    pred1 = ypred
    pred0 = ypred - K.cast(K.variable(1),dtype = 'int64')
    pred0 = pred0 * K.cast(K.variable(-1),dtype = 'int64')
    tp = K.sum(true1*pred1) #true positives
    fn = K.sum(true1*pred0) #false negatives

    return K.cast(fn/tp,dtype = 'float32')


model.compile(loss = custom_loss,
              optimizer='adam',
              metrics=['accuracy'])

# Generate dummy training data
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))

# Generate dummy validation data
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))

y_a = np.random.random(y_train.shape)
y_b = np.random.random(y_train.shape)

out1 = K.eval(custom_loss(K.variable(y_a), K.variable(y_b)))
print(out1)

model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))

我收到以下错误信息：

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-0551e4a8e8ed> in <module>()
     52 print(out1)
     53 
---> 54 model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))

~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    868                               class_weight=class_weight,
    869                               sample_weight=sample_weight,
--> 870                               initial_epoch=initial_epoch)
    871 
    872     def evaluate(self, x, y, batch_size=32, verbose=1,

~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
   1488         else:
   1489             ins = x + y + sample_weights
-> 1490         self._make_train_function()
   1491         f = self.train_function
   1492 

~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\engine\training.py in _make_train_function(self)
   1012                 self._collected_trainable_weights,
   1013                 self.constraints,
-> 1014                 self.total_loss)
   1015             updates = self.updates + training_updates
   1016             # Gets loss and metrics. Updates weights at each call.

~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\optimizers.py in get_updates(self, params, constraints, loss)
    420 
    421         for p, g, m, v in zip(params, grads, ms, vs):
--> 422             m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
    423             v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
    424             p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\ops\math_ops.py in binary_op_wrapper(x, y)
    827       if not isinstance(y, sparse_tensor.SparseTensor):
    828         try:
--> 829           y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
    830         except TypeError:
    831           # If the RHS is not a tensor, it might be a tensor aware object

c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
    674       name=name,
    675       preferred_dtype=preferred_dtype,
--> 676       as_ref=False)
    677 
    678 

c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype)
    739 
    740         if ret is None:
--> 741           ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    742 
    743         if ret is NotImplemented:

c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    111                                          as_ref=False):
    112   _ = as_ref
--> 113   return constant(v, dtype=dtype, name=name)
    114 
    115 

c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\constant_op.py in constant(value, dtype, shape, name, verify_shape)
    100   tensor_value = attr_value_pb2.AttrValue()
    101   tensor_value.tensor.CopyFrom(
--> 102       tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
    103   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
    104   const_tensor = g.create_op(

c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape)
    362   else:
    363     if values is None:
--> 364       raise ValueError("None values not supported.")
    365     # if dtype is provided, forces numpy array to be the type
    366     # provided if possible.

ValueError: None values not supported.

这让我认为我的损失函数返回了一个“None”值，但是，在显示上述错误之前，我的代码输出了0.941634。这来自于print(out1)语句，该语句在RNN外测试损失函数。有什么想法可能是错的吗？

- mowgli

但是您可以尝试不使用强制转换方法（它仍然会带来结果，如果float64部分对您很重要，可能会有更大的精度误差） - Daniel Möller

@Daniel 感谢您发现这个问题 - 我现在使用argmax而不是sum来生成ytrue ypred。我相信keras期望从损失函数得到一个float32输出，这就是为什么我要进行强制转换的原因（如果我不这样做，会出错）。但我仍然遇到了None值错误。 - mowgli

这是你的完整代码吗？你可以用你的更改来更新它 :) - Daniel Möller

好的，我测试了你的代码，它可以正常运行。肯定问题出在你没有展示的地方。 - Daniel Möller

@Daniel 刚在同事的电脑上运行了代码。他没有搞砸 keras/tensorflow 的安装，所以他不需要导入 sys 和路径插入，但他仍然遇到了“不支持 None 值”的错误。 - mowgli

显示剩余7条评论

2个回答

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Ferret Zhang · Answer 1

你可以使用 tf.Print(z,[z])（其中 z 是你的变量）来打印自定义损失函数中的所有变量，在 return 语句之前。这样，你就可以知道在执行最终的 return 语句之前它们所取的值。问题将会更加清晰明了。

- Ihsan Zulkarnain · Answer 2

首先，我不建议使用 fn/tp 作为损失函数，因为如果 tp = 0，它可能会导致 NaN 值。因此，我建议仅使用您的自定义损失函数作为要监控的评估指标。

model.compile(loss = 'binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', custom_loss])

其次，我无法复现您的错误代码。因此，我不确定这个想法能否解决您的问题。尝试使用TensorFlow逻辑和reduce_sum函数替换计算tp和fn的方式。

def custom_loss(y_true, y_pred):
    ytrue = K.argmax(y_true, axis = 1) 
    ypred = K.argmax(y_pred, axis = 1)

    ypred_bool = tf.equal(ypred > 0, True)
    ytrue_bool = tf.equal(ytrue > 0, True) 
    
    tp = tf.reduce_sum(tf.cast(tf.logical_and(ypred_bool, ytrue_bool),dtype=tf.float32),axis=0) # true positives
    fn = tf.reduce_sum(tf.cast(tf.logical_and(tf.logical_not(ypred_bool), ytrue_bool),dtype=tf.float32),axis=0) # false negatives
    
    return K.cast(fn/tp,dtype = 'float32')