TensorFlow 2.0加载.h5文件时出现了ValueError

9

我有一个VAE架构脚本如下:

import numpy as np

import tensorflow as tf

from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Lambda, Reshape, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

INPUT_DIM = (64,64,3)

CONV_FILTERS = [32,64,64, 128]
CONV_KERNEL_SIZES = [4,4,4,4]
CONV_STRIDES = [2,2,2,2]
CONV_ACTIVATIONS = ['relu','relu','relu','relu']

DENSE_SIZE = 1024

CONV_T_FILTERS = [64,64,32,3]
CONV_T_KERNEL_SIZES = [5,5,6,6]
CONV_T_STRIDES = [2,2,2,2]
CONV_T_ACTIVATIONS = ['relu','relu','relu','sigmoid']

Z_DIM = 32

BATCH_SIZE = 100
LEARNING_RATE = 0.0001
KL_TOLERANCE = 0.5




class Sampling(Layer):
    def call(self, inputs):
        mu, log_var = inputs
        epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
        return mu + K.exp(log_var / 2) * epsilon


class VAEModel(Model):


    def __init__(self, encoder, decoder, r_loss_factor, **kwargs):
        super(VAEModel, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.r_loss_factor = r_loss_factor

    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0]
        def compute_kernel(x, y):
            x_size = tf.shape(x)[0]
            y_size = tf.shape(y)[0]
            dim = tf.shape(x)[1]
            tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
            tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
            return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))

        def compute_mmd(x, y):
            x_kernel = compute_kernel(x, x)
            y_kernel = compute_kernel(y, y)
            xy_kernel = compute_kernel(x, y)
            return tf.reduce_mean(x_kernel) + tf.reduce_mean(y_kernel) - 2 * tf.reduce_mean(xy_kernel)

        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.square(data - reconstruction), axis = [1,2,3]
            )
            reconstruction_loss *= self.r_loss_factor
            kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
            kl_loss = tf.reduce_sum(kl_loss, axis = 1)
            kl_loss *= -0.5

            true_samples = tf.random.normal(tf.stack([BATCH_SIZE, Z_DIM]))
            loss_mmd = compute_mmd(true_samples, z)
            

            total_loss = reconstruction_loss + loss_mmd
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return {
            "loss": total_loss,
            "reconstruction_loss": reconstruction_loss,
            "kl_loss": kl_loss,
            "mmd_loss": loss_mmd
        }
    
    def call(self,inputs):
        latent = self.encoder(inputs)
        return self.decoder(latent)



class VAE():
    def __init__(self):
        self.models = self._build()
        self.full_model = self.models[0]
        self.encoder = self.models[1]
        self.decoder = self.models[2]

        self.input_dim = INPUT_DIM
        self.z_dim = Z_DIM
        self.learning_rate = LEARNING_RATE
        self.kl_tolerance = KL_TOLERANCE

    def _build(self):
        vae_x = Input(shape=INPUT_DIM, name='observation_input')
        vae_c1 = Conv2D(filters = CONV_FILTERS[0], kernel_size = CONV_KERNEL_SIZES[0], strides = CONV_STRIDES[0], activation=CONV_ACTIVATIONS[0], name='conv_layer_1')(vae_x)
        vae_c2 = Conv2D(filters = CONV_FILTERS[1], kernel_size = CONV_KERNEL_SIZES[1], strides = CONV_STRIDES[1], activation=CONV_ACTIVATIONS[0], name='conv_layer_2')(vae_c1)
        vae_c3= Conv2D(filters = CONV_FILTERS[2], kernel_size = CONV_KERNEL_SIZES[2], strides = CONV_STRIDES[2], activation=CONV_ACTIVATIONS[0], name='conv_layer_3')(vae_c2)
        vae_c4= Conv2D(filters = CONV_FILTERS[3], kernel_size = CONV_KERNEL_SIZES[3], strides = CONV_STRIDES[3], activation=CONV_ACTIVATIONS[0], name='conv_layer_4')(vae_c3)

        vae_z_in = Flatten()(vae_c4)

        vae_z_mean = Dense(Z_DIM, name='mu')(vae_z_in)
        vae_z_log_var = Dense(Z_DIM, name='log_var')(vae_z_in)

        vae_z = Sampling(name='z')([vae_z_mean, vae_z_log_var])
        

        #### DECODER: 
        vae_z_input = Input(shape=(Z_DIM,), name='z_input')

        vae_dense = Dense(1024, name='dense_layer')(vae_z_input)
        vae_unflatten = Reshape((1,1,DENSE_SIZE), name='unflatten')(vae_dense)
        vae_d1 = Conv2DTranspose(filters = CONV_T_FILTERS[0], kernel_size = CONV_T_KERNEL_SIZES[0] , strides = CONV_T_STRIDES[0], activation=CONV_T_ACTIVATIONS[0], name='deconv_layer_1')(vae_unflatten)
        vae_d2 = Conv2DTranspose(filters = CONV_T_FILTERS[1], kernel_size = CONV_T_KERNEL_SIZES[1] , strides = CONV_T_STRIDES[1], activation=CONV_T_ACTIVATIONS[1], name='deconv_layer_2')(vae_d1)
        vae_d3 = Conv2DTranspose(filters = CONV_T_FILTERS[2], kernel_size = CONV_T_KERNEL_SIZES[2] , strides = CONV_T_STRIDES[2], activation=CONV_T_ACTIVATIONS[2], name='deconv_layer_3')(vae_d2)
        vae_d4 = Conv2DTranspose(filters = CONV_T_FILTERS[3], kernel_size = CONV_T_KERNEL_SIZES[3] , strides = CONV_T_STRIDES[3], activation=CONV_T_ACTIVATIONS[3], name='deconv_layer_4')(vae_d3)
        

        #### MODELS

    
        vae_encoder = Model(vae_x, [vae_z_mean, vae_z_log_var, vae_z], name = 'encoder')
        vae_decoder = Model(vae_z_input, vae_d4, name = 'decoder')

        vae_full = VAEModel(vae_encoder, vae_decoder, 10000)

        opti = Adam(lr=LEARNING_RATE)
        vae_full.compile(optimizer=opti)
        
        return (vae_full,vae_encoder, vae_decoder)

    def set_weights(self, filepath):
        self.full_model.load_weights(filepath)

    def train(self, data):

        self.full_model.fit(data, data,
                shuffle=True,
                epochs=1,
                batch_size=BATCH_SIZE)
        
    def save_weights(self, filepath):
        self.full_model.save_weights(filepath)

问题:

vae = VAE()
vae.set_weights(filepath)

抛出:

File
"/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py",
line 2200, in load_weights
    'Unable to load weights saved in HDF5 format into a subclassed ' ValueError: Unable to load weights saved in HDF5 format into a
subclassed Model which has not created its variables yet. Call the
Model first, then load the weights.

我不太懂面向对象编程,也不确定这是什么意思。令人惊讶的是,上面的代码一直在工作,直到它停止工作。该模型正在从头开始训练,并将权重保存在filepath中。但现在加载相同的权重时,它会抛出以上错误!

6个回答

20
如果你在加载模型权重之前设置model.built = True,它就会生效。

我认为这是一种捷径... 在我看来,在模型架构被指定之后,你应该调用build(input_shape)来构建它。然后你可以加载权重。 - Edward Gaere
2
这不是正确的方法,应该被downvote/删除。层连接完全未知,无法确定是否会运行正确的架构,因为模型实际上还没有运行其构建函数。它将尝试将权重放入构建中,但如果您使用不同的批量大小,就会遇到问题。请在未来使用TF格式,并在加载权重之前先运行单个批处理。 - alwaysmvp45

6

我在加载权重时遇到了相同的错误:

model.load_weights("Detection_model.h5")

数值错误:无法将保存为HDF5格式的权重加载到尚未创建其变量的子类化模型中。先调用模型,然后再加载权重。 通过在加载权重之前构建模型来解决该问题。
model.build(input_shape = <INPUT_SHAPE>)
model.load_weights("Detection_model.h5")

备注,tensorflow版本:2.5.0


1
我不明白为什么需要使用这个构建方法?虽然它能工作,但需要理解背后的原因,因为旧版本不需要包含此方法。 - Swapnil Pote

3
你正在运行哪个版本的TF?一段时间以来,默认的保存格式是hdf5,但是这种格式不能像简单地支持子类化模型,所以你会遇到这个错误。可能可以通过先在一个批次上训练,然后加载权重(以确定部分之间的连接方式,这在hdf5中没有保存)来解决它。
将来建议确保所有保存都使用TF文件格式,这将为你节省额外的工作。

我之前使用的是Tensorflow 2.3版本。为了解决问题(或者说是暂时解决),我将其降级到了Tensorflow 1.14.0版本。 - Nirmal Roy
你尝试过对单个批次进行运行,然后再加载权重吗?我相信这会起作用,而且您不必降级TF并失去最近版本的功能。 - alwaysmvp45
我很好奇,为什么在单个批次上运行会有所帮助。 - Nirmal Roy
我相信 HDF5 文件不会存储层之间的连接方式,而子类网络可能在序列格式之外进行连接,因此这些信息是未知的。然后在单批次上运行可以设置变量的连接方式,加载权重应该会起作用。 - alwaysmvp45
1
嗨@alwaysmvp45,我可以确认这个方法是可行的,我之前也遇到了同样的问题。我也在使用TF 2.3。但是真的只有这种方式才能在TF 2.3中加载权重吗?在加载权重之前,我只需要在我的代码中添加一行小小的model.fit(x_train[:1], y_train[:1], batch_size=1, epochs=1),但是它仍然感觉很不正规。 - Ulf Aslak
2
@UlfAslak 如果你要保存为 hdf5 格式,那么不幸的是这是最好的方法,因为它不会存储重建模型所需的信息。请注意,有一个 model.train_on_batch() 方法可能更有效率。我认为最好的解决方案是将其保存为 .tf 格式,因为这应该保存连接信息,所以你不需要在单个批次上进行训练。 - alwaysmvp45

3
正如alwaysmvp45所指出的,“hdf5不会存储层之间的连接方式”。为了使这些层相互连接,另一种方法是在加载权重之前使用形状为(1,w,h,c)的零数组调用模型进行预测:
model(np.zeros((1,w,h,c)))

0

在使用.h5文件加载权重之前,您应该先构建模型:

model.build(input_shape=())
model.load_weights(r'your_h5_files.h5')

input_shape 应该是一个四维元组,如果你的输入是三维的,只需将第一维用 None 替代,例如:model.build(input_shape=(None, 224, 224, 3))


0

不确定在更近的版本中是否已经改变(我在2.4上)。但我必须走这条路:

# Do all the build and training 
# ...
# Save the weights
model.save('path/to/location.h5')

# delete any reference to the model
del model

# Now do the load for testing
from tensorflow import keras
model = keras.models.load_model('path/to/location.h5')

如果我尝试了其他建议,会收到关于层不存在的警告,我必须构建与训练相同的模型。这没什么大不了的,在某个函数中添加它即可,但这对我来说效果更好。

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接