作为一个机器学习新手,我的第一个项目是基于mnist数据集建立手写数字识别神经网络。当我使用数据集提供的测试图像进行测试时,它似乎表现得非常好(这就是函数test_predict的作用)。现在,我想让网络识别我拍摄的一些实际手写数字。函数partial_img_rec接受包含多个数字的图像,并将被multiple_digits调用。我知道在这里使用递归可能看起来很奇怪,我相信有一些更有效的方法来做到这一点,但这不是重点。为了测试partial_img_rec,我提供了一些存储在文件夹.\individual_test中的单个数字照片,它们都长这样:
。
问题是:我的神经网络对于每个测试图像的预测都是“5”。无论实际数字显示为何,概率始终约为22%。我完全理解为什么结果不如mnist数据集的测试图像那样好,但我肯定没有预料到会出现这种情况。你有任何想法为什么会发生这种情况吗?欢迎任何建议。谢谢! 以下是我的代码(已编辑并运行):
![1_digit.jpg](https://istack.dev59.com/N98PK.webp)
问题是:我的神经网络对于每个测试图像的预测都是“5”。无论实际数字显示为何,概率始终约为22%。我完全理解为什么结果不如mnist数据集的测试图像那样好,但我肯定没有预料到会出现这种情况。你有任何想法为什么会发生这种情况吗?欢迎任何建议。谢谢! 以下是我的代码(已编辑并运行):
# import keras and the MNIST dataset
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.utils import np_utils
# numpy is necessary since keras uses numpy arrays
import numpy as np
# imports for pictures
from PIL import Image
from PIL import ImageOps
# imports for tests
import random
import os
class mnist_network():
def __init__(self):
""" load data, create and train model """
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
# create model
self.model = Sequential()
self.model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
self.model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
# Compile model
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# train the model
self.model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
self.train_img = X_train
self.train_res = y_train
self.test_img = X_test
self.test_res = y_test
def test_all(self):
""" evaluates the success rate using all the test data """
scores = self.model.evaluate(self.test_img, self.test_res, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
def predict_result(self, img, num_pixels = None, show=False):
""" predicts the number in a picture (vector) """
assert type(img) == np.ndarray and img.shape == (784,)
"""if show:
# show the picture!!!! some problem here
plt.imshow(img, cmap='Greys')
plt.show()"""
num_pixels = img.shape[0]
# the actual number
res_number = np.argmax(self.model.predict(img.reshape(-1,num_pixels)), axis = 1)
# the probabilities
res_probabilities = self.model.predict(img.reshape(-1,num_pixels))
return (res_number[0], res_probabilities.tolist()[0]) # we only need the first element since they only have one
def test_predict(self, amount_test = 100):
""" test some random numbers from the test part of the data set """
assert type(amount_test) == int and amount_test <= 10000
cnt_right = 0
cnt_wrong = 0
for i in range(amount_test):
ind = random.randrange(0,10000) # there are 10000 images in the test part of the data set
""" correct_res is the actual result stored in the data set
It's represented as a list of 10 elements one of which being 1, the rest 0 """
correct_list = self.test_res.tolist()
correct_list = correct_list[ind] # the correct sublist
correct_res = correct_list.index(1.0)
predicted_res = self.predict_result(self.test_img[ind])[0]
if correct_res != predicted_res:
cnt_wrong += 1
print("Error in predict ! \
index = ", ind, " predicted result = ", predicted_res, " correct result = ", correct_res)
else:
cnt_right += 1
print("The machine predicted correctly ",cnt_right," out of ",amount_test," examples. That is a success rate of ", (cnt_right/amount_test)*100,"%.")
def partial_img_rec(self, image, upper_left, lower_right, results=[]):
""" partial is a part of an image """
left_x, left_y = upper_left
right_x, right_y = lower_right
print("current test part: ", upper_left, lower_right)
print("results: ", results)
# condition to stop recursion: we've reached the full width of the picture
width, height = image.size
if right_x > width:
return results
partial = image.crop((left_x, left_y, right_x, right_y))
# rescale image to 28 *28 dimension
partial = partial.resize((28,28), Image.ANTIALIAS)
partial.show()
# transform to vector
partial = ImageOps.invert(partial)
partial = np.asarray(partial, "float32")
partial = partial / 255.
partial[partial < 0.5] = 0.
# flatten image to 28*28 = 784 vector
num_pixels = partial.shape[0] * partial.shape[1]
partial = partial.reshape(num_pixels)
step = height // 10
# is there a number in this part of the image?
res, prop = self.predict_result(partial)
print("result: ", res, ". probabilities: ", prop)
# only count this result if the network is >= 50% sure
if prop[res] >= 0.5:
results.append(res)
# step is 80% of the partial image's size (which is equivalent to the original image's height)
step = int(height * 0.8)
print("found valid result")
else:
# if there is no number found we take smaller steps
step = height // 20
print("step: ", step)
# recursive call with modified positions ( move on step variables )
return self.partial_img_rec(image, (left_x+step, left_y), (right_x+step, right_y), results=results)
def test_individual_digits(self):
""" test partial_img_rec with some individual digits (square shaped images)
saved in the folder 'individual_test' following the pattern 'number_digit.jpg' """
cnt_right, cnt_wrong = 0,0
folder_content = os.listdir(".\individual_test")
for imageName in folder_content:
# image file must be a jpg or png
assert imageName[-4:] == ".jpg" or imageName[-4:] == ".png"
correct_res = int(imageName[0])
image = Image.open(".\\individual_test\\" + imageName).convert("L")
# only square images in this test
if image.size[0] != image.size[1]:
print(imageName, " has the wrong proportions: ", image.size,". It has to be a square.")
continue
predicted_res = self.partial_img_rec(image, (0,0), (image.size[0], image.size[1]), results=[])
if predicted_res == []:
print("No prediction possible for ", imageName)
else:
predicted_res = predicted_res[0]
if predicted_res != correct_res:
print("error in partial_img-rec! Predicted ", predicted_res, ". The correct result would have been ", correct_res)
cnt_wrong += 1
else:
cnt_right += 1
print("correctly predicted ",imageName)
print(cnt_right, " out of ", cnt_right + cnt_wrong," digits were correctly recognised. The success rate is therefore ", (cnt_right / (cnt_right + cnt_wrong)) * 100," %.")
def multiple_digits(self, img):
""" takes as input an image without unnecessary whitespace surrounding the digits """
#assert type(img) == myImage
width, height = img.size
# start with the first quadratic part of the image
res_list = self.partial_img_rec(img, (0,0),(height ,height))
res_str =""
for elem in res_list:
res_str += str(elem)
return res_str
network = mnist_network()
network.test_individual_digits()
编辑
@Geecode的回答非常有帮助,现在网络可以正确预测包括上面展示的图片在内的一些图片。然而,整体成功率低于50%。你有任何想法如何改进吗?
返回错误结果的图像示例: