TensorFlow MNIST示例准确率不增加。

3
我正在学习TensorFlow和TensorBoard,遵循这个教程。以下是我的代码。准确率停留在随机水平,但我无法找出问题所在。
请问有人能指出错误吗?我也想知道如何在TensorFlow中进行调试。谢谢。

导入

from tensorflow.examples.tutorials.mnist import input_data  
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)  
import tensorflow as tf

定义卷积层

def conv_layer(input, size_in, size_out, name="conv"):  
    with tf.name_scope(name):  
        w = tf.Variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        conv = tf.nn.conv2d(input, w, strides=[1,1,1,1], padding="SAME")
        act = tf.nn.relu(conv + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return tf.nn.max_pool(act, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

定义全连接层

def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.nn.relu(tf.matmul(input, w) + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act

定义模型

def mnist_model(learning_rate, path):
    tf.reset_default_graph()
    sess = tf.Session()

    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    tf.summary.image('input', x_image, 3)
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")

    conv1 = conv_layer(x_image, 1, 32, "conv1")
    conv_out = conv_layer(conv1, 32, 64, "conv2")

    flattened = tf.reshape(conv_out, [-1, 7 * 7 * 64])

    fc1 = fc_layer(flattened, 7 * 7 * 64, 1024, "fc1")
    logits = fc_layer(fc1, 1024, 10, "fc2")

    with tf.name_scope("xent"):
        xent = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=y), name="xent")
        tf.summary.scalar("xent", xent)

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    summ = tf.summary.merge_all()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(path)
    writer.add_graph(sess.graph)


    for i in range(2000):
        batch = mnist.train.next_batch(100)
        if i % 50 == 0:
            [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})
            print train_accuracy
            writer.add_summary(s, i)
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

运行

mnist_model(1e-3, path = "/tmp/mnist_demo/10")

输出

0.09
0.08
0.04
0.07
0.12
0.12
0.09
0.12
0.08
0.1
0.11
0.14
0.11
0.11
0.13
0.11
0.19
0.06
1个回答

4
问题在于您在最后一层应用了relu激活函数,因此所有的logits都被截断为零。
解决方案:
更改:
def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.nn.relu(tf.matmul(input, w) + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act

为了

def fc_layer(input, size_in, size_out, name="fc", activation=tf.nn.relu):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.matmul(input, w) + b
        if activation is not None:
            act = activation(act)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act

在最后一个全连接层中将激活函数设为None。
logits = fc_layer(fc1, 1024, 10, "fc2", activation=None)

谢谢!您是否也有一些关于如何在TensorFlow中进行调试的建议? - Q. Li
据我所知,目前没有通用的解决方案。Tensorflow 1.0有一个调试器,但我还没有尝试过。一个好的选择是尽可能多地打印出信息(例如梯度、激活、权重),并尝试找出错误所在。这在大多数情况下有助于找到解决方案。检查图表(在tensorboard中)也有帮助。 - Dmitriy Danevskiy

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接