一个单层的基础神经网络实现手写字识别-手写识别神经网络

先上代码

import tensorflow 
 
from tensorflow.examples.tutorials.mnist import input_data 
 
import matplotlib.pyplot as plt 
 
 
 
# 普通的神经网络学习 
 
# 学习训练类 
 
class Normal: 
 
 
 
    weight = [] 
 
    biases = [] 
 
 
 
    def __init__(self): 
 
        self.times = 1000 
 
        self.mnist = [] 
 
        self.session = tensorflow.Session() 
 
        self.xs = tensorflow.placeholder(tensorflow.float32, [None, 784]) 
 
        self.ys = tensorflow.placeholder(tensorflow.float32, [None, 10]) 
 
        self.save_path = 'learn/result/normal.ckpt' 
 
 
 
    def run(self): 
 
        self.import_data() 
 
        self.train() 
 
        self.save() 
 
 
 
    def _setWeight(self,weight): 
 
        self.weight = weight 
 
 
 
    def _setBiases(self,biases): 
 
        self.biases = biases 
 
 
 
    def _getWeight(self): 
 
        return self.weight 
 
 
 
    def _getBiases(self): 
 
        return self.biases 
 
    # 训练 
 
    def train(self): 
 
 
 
        prediction = self.add_layer(self.xs, 784, 10, activation_function=tensorflow.nn.softmax) 
 
 
 
        cross_entropy = tensorflow.reduce_mean( 
 
            -tensorflow.reduce_sum( 
 
                self.ys * tensorflow.log(prediction) 
 
                , reduction_indices=[1]) 
 
        ) 
 
        train_step = tensorflow.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 
 
 
 
        self.session.run(tensorflow.global_variables_initializer()) 
 
 
 
        for i in range(self.times): 
 
            batch_xs, batch_ys = self.mnist.train.next_batch(100) 
 
            self.session.run(train_step, feed_dict={self.xs: batch_xs, self.ys: batch_ys}) 
 
            if i % 50 == 0: 
 
                # images 变换为 labels，images相当于x，labels相当于y 
 
                accurary = self.computer_accurary( 
 
                    self.mnist.test.images, 
 
                    self.mnist.test.labels, 
 
                    prediction 
 
                ) 
 
 
 
    # 数据导入 
 
    def import_data(self): 
 
        self.mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 
 
 
 
    # 数据保存 
 
    def save(self): 
 
        saver = tensorflow.train.Saver() 
 
        path = saver.save(self.session,self.save_path) 
 
 
 
    # 添加隐藏层 
 
    def add_layer(self,inputs,input_size,output_size,activation_function=None): 
 
 
 
        weight = tensorflow.Variable(tensorflow.random_normal([input_size,output_size]),dtype=tensorflow.float32,name='weight') 
 
 
 
        biases = tensorflow.Variable(tensorflow.zeros([1,output_size]) + 0.1,dtype=tensorflow.float32,name='biases') 
 
        Wx_plus_b = tensorflow.matmul(inputs,weight) + biases 
 
 
 
        self._setBiases(biases) 
 
        self._setWeight(weight) 
 
 
 
        if activation_function is None: 
 
            outputs = Wx_plus_b 
 
        else: 
 
            outputs = activation_function(Wx_plus_b,) 
 
 
 
        return outputs 
 
 
 
 
 
    # 计算结果数据与实际数据的正确率 
 
    def computer_accurary(self,x_data,y_data,tf_prediction): 
 
 
 
        prediction = self.session.run(tf_prediction,feed_dict={self.xs:x_data,self.ys:y_data}) 
 
 
 
        # 返回两个矩阵中***值的索引号位置，然后进行相应位置的值大小比较并在此位置设置为True/False 
 
        correct_predition = tensorflow.equal(tensorflow.argmax(prediction,1),tensorflow.argmax(y_data,1)) 
 
 
 
        # 进行数据格式转换，然后进行降维求平均值 
 
        accurary = tensorflow.reduce_mean(tensorflow.cast(correct_predition,tensorflow.float32)) 
 
 
 
        result = self.session.run(accurary,feed_dict={self.xs:x_data,self.ys:y_data}) 
 
 
 
        return result 
 
 
 
# 识别类 
 
class NormalRead(Normal): 
 
 
 
    input_size = 784 
 
    output_size = 10 
 
 
 
    def run(self): 
 
        self.import_data() 
 
        self.getSaver() 
 
        origin_input = self._getInput() 
 
        output = self.recognize(origin_input) 
 
 
 
        self._showImage(origin_input) 
 
        self._showOutput(output) 
 
        pass 
 
 
 
    # 显示识别结果 
 
    def _showOutput(self,output): 
 
        number = output.index(1) 
 
        print('识别到的数字:',number) 
 
 
 
    # 显示被识别图片 
 
    def _showImage(self,origin_input): 
 
        data = [] 
 
        tmp = [] 
 
        i = 1 
 
        # 原数据转换为可显示的矩阵 
 
        for v in origin_input[0]: 
 
            if i %28 == 0: 
 
                tmp.append(v) 
 
                data.append(tmp) 
 
                tmp = [] 
 
            else: 
 
                tmp.append(v) 
 
            i += 1 
 
 
 
        plt.figure() 
 
        plt.imshow(data, cmap='binary')  # 黑白显示 
 
        plt.show() 
 
 
 
 
 
    def _setBiases(self,biases): 
 
        self.biases = biases 
 
        pass 
 
 
 
    def _setWeight(self,weight): 
 
        self.weight = weight 
 
        pass 
 
 
 
    def _getBiases(self): 
 
        return self.biases 
 
 
 
    def _getWeight(self): 
 
        return self.weight 
 
 
 
    # 获取训练模型 
 
    def getSaver(self): 
 
        weight = tensorflow.Variable(tensorflow.random_normal([self.input_size, self.output_size]), dtype=tensorflow.float32,name='weight') 
 
 
 
        biases = tensorflow.Variable(tensorflow.zeros([1, self.output_size]) + 0.1, dtype=tensorflow.float32, name='biases') 
 
 
 
        saver = tensorflow.train.Saver() 
 
        saver.restore(self.session,self.save_path) 
 
 
 
        self._setWeight(weight) 
 
        self._setBiases(biases) 
 
 
 
    def recognize(self,origin_input): 
 
        input = tensorflow.placeholder(tensorflow.float32,[None,784]) 
 
        weight = self._getWeight() 
 
        biases = self._getBiases() 
 
 
 
        result = tensorflow.matmul(input,weight) + biases 
 
        resultSof = tensorflow.nn.softmax(result,) # 把结果集使用softmax进行激励 
 
        resultSig = tensorflow.nn.sigmoid(resultSof,) # 把结果集以sigmoid函数进行激励，用于后续分类 
 
        output = self.session.run(resultSig,{input:origin_input}) 
 
 
 
        output = output[0] 
 
 
 
        # 对识别结果进行分类处理 
 
        output_tmp = [] 
 
        for item in output: 
 
            if item < 0.6: 
 
                output_tmp.append(0) 
 
            else : 
 
                output_tmp.append(1) 
 
 
 
        return output_tmp 
 
 
 
    def _getInput(self): 
 
        inputs, y = self.mnist.train.next_batch(100); 
 
        return [inputs[50]]

以上是程序，整个程序基于TensorFlow来实现的，具体的TensorFlow安装我就不说了。

整个训练过程不做多说，我发现网上关于训练的教程很多，但是训练结果的教程很少。

整个程序里，通过tensorflow.train.Saver()的save进行训练结果模型进行存储，然后再用tensorflow.train.Saver()的restore进行模型恢复然后取到训练好的weight和baises。

这里要注意的一个地方是因为一次性随机取出100张手写图片进行批量训练的，我在取的时候其实也是批量随机取100张，但是我传入识别的是一张，通过以下这段程序：

def _getInput(self): 
 
        inputs, y = self.mnist.train.next_batch(100); 
 
        return [inputs[50]]

注意一下return这里的数据结构，其实是取这批量的第50张，实际上这段程序写成：

def _getInput(self): 
 
        inputs, y = self.mnist.train.next_batch(1); 
 
        return [inputs[0]]

会更好。

因为识别的时候是需要用到训练的隐藏层来进行的，所以在此我虽然识别的是一张图片，但是我必须要传入一个批量数据的这样一个结构。

然后再识别的地方，我使用了两个激励函数：

resultSof = tensorflow.nn.softmax(result,) # 把结果集使用softmax进行激励 
 
resultSig = tensorflow.nn.sigmoid(resultSof,) # 把结果集以sigmoid函数进行激励，用于后续分类

这里的话，***个softmax激励后的数据我发现得到的是以e为底的指数形式，转换成普通的浮点数来看，不是很清楚到底是什么，那么我在做识别数字判断的时候就不方便，所以再通过了一次sigmoid的激励。

后续我通过一个循环判断进行一次实际上的分类，这个原因首先要说到识别结果形式：

[0,0,0,0,0,0,0,0,1,0]

像以上这个数据，表示的是8，也就是说，数组下表第几位为1就表示是几，如0的表示:

[1,0,0,0,0,0,0,0,0,0]

而sigmoid函数在这个地方其实就是对每个位置的数据进行了分类，我发现如果分类值小于0.52这样的数据其实代表的是否，也就是说此位置的值对应的是0，大于0.52应该对应的是真，也就是1；而我在程序里取的是0.6为界限做判断。

实际上，这个界限值应该是在神经网络训练的时候取的，而不是看识别结果来进行凭感觉取的（虽然训练的时候的参数也是凭感觉取的）

这篇文章是我根据个人的一些理解来写的，后续如果发现有错误，我会在新文章说出来，但这篇文章不做保留，方便后续检查思考记录的时候知道到底怎么踩坑的。

以下是我上次写的sigmoid函数的文章：

https://segmentfault.com/a/11...

关于其他激励函数，可以网上找资料进行了解，很多基础性的数学知识，放到一些比较具体的应用，会显得非常的有意思。