callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_loss', patience = 3,
min_delta=0.001
)
]
根据EarlyStopping - TensorFlow 2.0页面,min_delta参数的定义如下: min_delta:被监控指标的最小变化量,用于判断是否有改进。如果绝对变化量小于min_delta,则不算作有改进。
训练60000个样本,验证10000个样本
第1轮/15 60000/60000 [==============================] - 10s 173us/sample - 损失: 0.2040 - 准确率: 0.9391 - 验证损失: 0.1117 - 验证准确率: 0.9648
第2轮/15 60000/60000 [==============================] - 9s 150us/sample - 损失: 0.0845 - 准确率: 0.9736 - 验证损失: 0.0801 - 验证准确率: 0.9748
第3轮/15 60000/60000 [==============================] - 9s 151us/sample - 损失: 0.0574 - 准确率: 0.9817 - 验证损失: 0.0709 - 验证准确率: 0.9795
第4轮/15 60000/60000 [==============================] - 9s 149us/sample - 损失: 0.0434 - 准确率: 0.9858 - 验证损失: 0.0787 - 验证准确率: 0.9761
第5轮/15 60000/60000 [==============================] - 9s 151us/sample - 损失: 0.0331 - 准确率: 0.9893 - 验证损失: 0.0644 - 验证准确率: 0.9808
第6轮/15 60000/60000 [==============================] - 9s 150us/sample - 损失: 0.0275 - 准确率: 0.9910 - 验证损失: 0.0873 - 验证准确率: 0.9779
第7轮/15 60000/60000 [==============================] - 9s 151us/sample - 损失: 0.0232 - 准确率: 0.9921 - 验证损失: 0.0746 - 验证准确率: 0.9805
第8轮/15 60000/60000 [==============================] - 9s 151us/sample - 损失: 0.0188 - 准确率: 0.9936 - 验证损失: 0.1088 - 验证准确率: 0.9748
现在,如果我查看最后三个时期,即第6、7和8个时期,并查看验证损失('val_loss'),它们的值分别为:0.0688、0.0843和0.0847。而连续3个术语之间的差异为:0.0155、0.0004。但是,第一个差异是否大于回调中定义的“min_delta”?
我设计的EarlyStopping代码如下:
# numpy array to hold last 'patience = 3' values-
pv = [0.0688, 0.0843, 0.0847]
# numpy array to compute differences between consecutive elements in 'pv'-
differences = np.diff(pv, n=1)
differences
# array([0.0155, 0.0004])
# minimum change required for monitored metric's improvement-
min_delta = 0.001
# Check whether the consecutive differences is greater than 'min_delta' parameter-
check = differences > min_delta
check
# array([ True, False])
# Condition to see whether all 3 'val_loss' differences are less than 'min_delta'
# for training to stop since EarlyStopping is called-
if np.all(check == False):
print("Stop Training - EarlyStopping is called")
# stop training
但根据 "val_loss",最后3个时期中不是所有的差异都大于0.001的 "min_delta"。例如,第一个差异大于0.001(0.0843-0.0688),而第二个差异小于0.001(0.0847-0.0843)。
此外,根据“EarlyStopping”的 "patience" 参数定义:
patience:没有改进的时期数,在此之后将停止训练。
因此,当连续3个时期没有 "val_loss" 的改进且小于 "min_delta" 的绝对变化不算作改进时,应调用EarlyStopping。
那么为什么会调用EarlyStopping呢?
模型定义和 "fit()" 的代码如下:
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity import keras as sparsity
import matplotlib.pyplot as plt from tensorflow.keras.layers import AveragePooling2D, Conv2D
from tensorflow.keras import models, layers, datasets
from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer
from tensorflow.keras.models import Sequential, Model
# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:
pruning_params_unpruned = {
'pruning_schedule': sparsity.PolynomialDecay(
initial_sparsity=0.0, final_sparsity=0.0,
begin_step = 0, end_step=0, frequency=100) }
def pruned_nn(pruning_params):
"""
Function to define the architecture of a neural network model
following 300 100 architecture for MNIST dataset and using
provided parameter which are used to prune the model.
Input: 'pruning_params' Python 3 dictionary containing parameters which are used for pruning
Output: Returns designed and compiled neural network model
"""
pruned_model = Sequential()
pruned_model.add(l.InputLayer(input_shape=(784, )))
pruned_model.add(Flatten())
pruned_model.add(sparsity.prune_low_magnitude(
Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),
**pruning_params))
# pruned_model.add(l.Dropout(0.2))
pruned_model.add(sparsity.prune_low_magnitude(
Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),
**pruning_params))
# pruned_model.add(l.Dropout(0.1))
pruned_model.add(sparsity.prune_low_magnitude(
Dense(units = num_classes, activation='softmax'),
**pruning_params))
# Compile pruned CNN-
pruned_model.compile(
loss=tf.keras.losses.categorical_crossentropy,
# optimizer='adam',
optimizer=tf.keras.optimizers.Adam(lr = 0.001),
metrics=['accuracy'])
return pruned_model
batch_size = 32
epochs = 50
# Instantiate NN-
orig_model = pruned_nn(pruning_params_unpruned)
# Train unpruned Neural Network-
history_orig = orig_model.fit(
x = X_train, y = y_train,
batch_size = batch_size,
epochs = epochs,
verbose = 1,
callbacks = callbacks,
validation_data = (X_test, y_test),
shuffle = True )