如何使用神经网络识别手写数字

3

我按照教程创建了一个使用Sigmoid函数的简单神经网络 "使用神经网络识别手写数字", 教程包含理论和代码示例。

问题在于它没有给出使用 network.py 进行数字识别的示例。

例如,我有以下数字,并希望从下面的图像中将其识别为0 enter image description here 接下来该怎么做才能进行数字识别呢?

要进行数字识别,需要使用其他技术,如Theano或TensorFlow吗? 祝您有美好的一天!


但是数字识别器从图像的示例在哪里? - Isabel Cariod
1个回答

3

在下面的示例基础上,您可以在NeuralNetwork类中添加一个用于预测的函数:

def predict(self, image):
    return np.argmax(self.__feedforward(image.astype(bool).astype(int)))

一旦你成功训练了神经网络,你可以使用以下方法预测未知数字,准确率高达97%:

prediction = NN.predict(image)


摘自神经网络入门:用Python实现简单的ANN。原作者为cᴏʟᴅsᴘᴇᴇᴅdontloo。有关归属权的详细信息,请参阅贡献者页面。该来源受CC BY-SA 3.0许可,可在文档存档中找到。参考主题ID:2709,示例ID:9069。

下面的代码清单试图对来自MNIST数据集的手写数字进行分类。这些数字长这样:

MNIST

代码将对这些数字进行预处理,将每个图像转换为0和1的2D数组,然后使用此数据训练具有高达97%准确率(50个时期)的神经网络。
"""
Deep Neural Net 

(Name: Classic Feedforward)

"""

import numpy as np
import pickle, json
import sklearn.datasets
import random
import time
import os

# cataloguing the various activation functions and their derivatives

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

def relU(z):
    return np.maximum(z, 0, z)

def relU_prime(z):
    return z * (z <= 0)

def tanh(z):
    return np.tanh(z)

def tanh_prime(z):
    return 1 - (tanh(z) ** 2)

def transform_target(y):
    t = np.zeros((10, 1))
    t[int(y)] = 1.0
    return t


class NeuralNet:

    def __init__(self, layers, learning_rate=0.05, reg_lambda=0.01):
        self.num_layers = len(layers)  

        # initialising network parameters
        self.layers = layers          
        self.biases = [np.zeros((y, 1)) for y in layers[1:]]    
        self.weights = [np.random.normal(loc=0.0, scale=0.1, size=(y, x)) 
                                       for x, y in zip(layers[:-1], layers[1:])]
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda

        # initialising network activation function 
        self.nonlinearity = relU
        self.nonlinearity_prime = relU_prime

    def __feedforward(self, x):
        ''' Returns softmax probabilities for the output layer '''

        for w, b in zip(self.weights, self.biases):
            x = self.nonlinearity(np.dot(w, np.reshape(x, (len(x), 1))) + b)

        return np.exp(x) / np.sum(np.exp(x))

    def __backpropagation(self, x, y):
        '''
        Perform the forward pass followed by backprop 
        :param x: input
        :param y: target

        '''

        weight_gradients = [np.zeros(w.shape) for w in self.weights]
        bias_gradients = [np.zeros(b.shape) for b in self.biases]

        # forward pass - transform input to output softmax probabilities
        activation = x
        hidden_activations = [np.reshape(x, (len(x), 1))]
        z_list = []

        for w, b in zip(self.weights, self.biases):    
            z = np.dot(w, np.reshape(activation, (len(activation), 1))) + b
            z_list.append(z)
            activation = self.nonlinearity(z)
            hidden_activations.append(activation)

        t = hidden_activations[-1] 
        hidden_activations[-1] = np.exp(t) / np.sum(np.exp(t))   # softmax layer

        # backward pass
        delta = (hidden_activations[-1] - y) * (z_list[-1] > 0)
        weight_gradients[-1] = np.dot(delta, hidden_activations[-2].T)
        bias_gradients[-1] = delta

        for l in range(2, self.num_layers):
            z = z_list[-l]
            delta = np.dot(self.weights[-l + 1].T, delta) * (z > 0)
            weight_gradients[-l] = np.dot(delta, hidden_activations[-l - 1].T)
            bias_gradients[-l] = delta

        return (weight_gradients, bias_gradients)

    def __update_params(self, weight_gradients, bias_gradients):
        ''' Update network parameters after backprop step '''
        for i in xrange(len(self.weights)):
            self.weights[i] += -self.learning_rate * weight_gradients[i]
            self.biases[i] += -self.learning_rate * bias_gradients[i]

    def train(self, training_data, validation_data=None, epochs=10):
        ''' Train the network for `epoch` iterations '''

        bias_gradients = None
        for i in xrange(epochs):
            random.shuffle(training_data)
            inputs = [data[0] for data in training_data]
            targets = [data[1] for data in training_data]

            for j in xrange(len(inputs)):
                (weight_gradients, bias_gradients) = self.__backpropagation(inputs[j], targets[j])
                self.__update_params(weight_gradients, bias_gradients)

            if validation_data: 
                random.shuffle(validation_data)
                inputs = [data[0] for data in validation_data]
                targets = [data[1] for data in validation_data]

                for j in xrange(len(inputs)):
                    (weight_gradients, bias_gradients) = self.__backpropagation(inputs[j], targets[j])
                    self.__update_params(weight_gradients, bias_gradients)

            print("{} epoch(s) done".format(i + 1))

        print("Training done.")

    def test(self, test_data):
        test_results = [(np.argmax(self.__feedforward(x[0])), np.argmax(x[1])) for x in test_data]
        return float(sum([int(x == y) for (x, y) in test_results])) / len(test_data) * 100

    def dump(self, file):
        pickle.dump(self, open(file, "wb"))



if __name__ == "__main__":
    total = 5000
    training = int(total * 0.7)
    val = int(total * 0.15)
    test = int(total * 0.15)

    mnist = sklearn.datasets.fetch_mldata('MNIST original', data_home='./data')

    data = zip(mnist.data, mnist.target)
    random.shuffle(data)
    data = data[:total]
    data = [(x[0].astype(bool).astype(int), transform_target(x[1])) for x in data]

    train_data = data[:training]
    val_data = data[training:training+val]
    test_data = data[training+val:]

    print "Data fetched"

    NN = NeuralNet([784, 32, 10]) # defining an ANN with 1 input layer (size 784 = size of the image flattened), 1 hidden layer (size 32), and 1 output layer (size 10, unit at index i will predict the probability of the image being digit i, where 0 <= i <= 9)  

    NN.train(train_data, val_data, epochs=5)

    print "Network trained"

    print "Accuracy:", str(NN.test(test_data)) + "%"

这是一个自包含的代码示例,可在不进行任何修改的情况下运行。确保您已安装适用于您的 Python 版本的 numpyscikit learn。

我应该如何将图像作为参数传递,就像这样 **prediction = NN.predict(open("number.png"))**? - Isabel Cariod
@IsabelCariod,现在情况不再那么简单了......您需要将图像转换为灰度图像,使其具有numpy数组表示,并且必须将其调整为28 x 28维度。 - cs95
那是我的唯一问题。 - Isabel Cariod
1
嗯,非常有趣的@coldspeed。我不知道你在机器学习领域!(虽然我猜我应该知道,因为大多数numpy/pandas用户似乎都是;-) ) - Christian Dean
1
@cᴏʟᴅsᴘᴇᴇᴅ:问题已解决。感谢您告诉我。 - Jon Ericson
显示剩余3条评论

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接