简单两层神经网络的实现
文章目录
开一个系列,用python和numpy实现神经网络,这是第一篇,实现一个简单的二层神经网络,处理经典mnist手写数字识别,精度可在94%左右。后续会继续优化,实现更多复杂计算与调优功能。
import numpy as np
import pickle
from PIL import Image
#激活函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))
#激活函数的梯度计算
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
#输出函数,转换为概率输出
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 防止溢出
return np.exp(x) / np.sum(np.exp(x))
def mean_squared_error(y,t):
return 0.5*np.sum((y-t)**2)
#转换为one_hot编码
def one_hot(X):
T = np.zeros((X.size, 10))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
def load_mnist():
# https://s3.amazonaws.com/img-datasets/mnist.npz
#从上面地址下载文件到本地
file='/Users/justinhuang/src/ML/deeplearning/data/mnist.npz'
f = np.load(file)
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
#x_train格式默认是[60000,28,28],即为60000个样本,28*28的矩阵
# 进行平面化处理,
x_train=x_train.reshape(60000, 784)
x_test=x_test.reshape(10000,784)
#正规化0.0~1.0的值,灰度值取值范围是0-255,所以除以255即可归一
#正规化的目的:1、防止计算溢出;2、梯度下降需要,防止由于输出值太大导致梯度过大的情况(梯度一大,又要调整学习率,而我们使用固定学习率)
x_train=x_train.astype(np.float32)
x_train/=255.0
x_test=x_test.astype(np.float32)
x_test/=255.0
#将标签用one_hot编码
#即为[0,1,0,0,0,0,0,0,0,0]形式,其中等于1的正确值的标签
y_train=one_hot(y_train)
y_test=one_hot(y_test)
f.close()
return (x_train, y_train), (x_test, y_test)
#可以将npz文件转换为png,方便查看,在本例子里用途不大,只是为了辅助查看npz数据
def conv_npz_to_png():
file='/Users/justinhuang/src/ML/deeplearning/data/mnist.npz'
baseDir="./MNIST_zip"
f = np.load(file)
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
#转换训练样本
for i in range(x_train.shape[0]):
new_im = Image.fromarray(x_train[i,:,:])
#将label值写到文件名里
new_im.save(baseDir+'train/No:%d label:%d.png'%(i,y_train[i]))
#转换测试样本
for j in range(x_test.shape[0]):
new_im = Image.fromarray(x_test[j,:,:])
new_im.save(baseDir+'/test/No:%d label:%d.png'%(j,y_test[j]))
f.close()
print('completed')
class twolayerNet:
def __init__(self,input_size,hidden_size,output_size,init_std=0.01):
self.params={}
self.params['w1']=init_std*np.random.randn(input_size,hidden_size)
self.params["b1"]=np.zeros(hidden_size)
self.params['w2']=init_std*np.random.randn(hidden_size,output_size)
self.params["b2"]=np.zeros(output_size)
#预测计算函数
def predict(self, x):
# y=softmax(w2*(sigmoid(w1*x+b1))+b2)
w1, w2 = self.params['w1'], self.params['w2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, w1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, w2) + b2
y = softmax(a2)
return y
#使用均方误差
def loss(self, x,t):
y=self.predict(x)
return mean_squared_error(y,t)
#计算精度
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
#梯度计算
def gradient(self, x, t):
w1, w2 = self.params['w1'], self.params['w2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}
batch_size = x.shape[0]
#先保存前向计算值
a1 = np.dot(x, w1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, w2) + b2
y = softmax(a2)
#后向计算梯度:输出层(y)隐含层2(w2,b2)->激活函数(sigmoid_grad)->隐含层1(w1,b1)
#输出层梯度
dy = (y - t) / batch_size
#隐含层2的梯度
grads['w2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
dHidden2 = np.dot(dy, w2.T)
#激活函数的梯度
dSigmoid = sigmoid_grad(a1) * dHidden2
#隐含层1的梯度
grads['w1'] = np.dot(x.T, dSigmoid)
grads['b1'] = np.sum(dSigmoid, axis=0)
return grads
#加载数据,分为学习数据和测试数据,x为输入,y为标签
(x_train, y_train), (x_test, y_test)=load_mnist()
#mini-batch的实现
# print(x_train.shape) #输出:[60000,784]
# print(y_train.shape)#输出:[10000,10]
train_size=x_train.shape[0]
#每次迭代的样本数
batch_size=100
#迭代计算次数
iters_num=10000
#学习率
learning_rate=0.1
iter_per_epoch=max(train_size/batch_size,1)
network=twolayerNet(input_size=784,hidden_size=100,output_size=10)
#迭代计算
for i in range(iters_num):
#随机选取batch_size个样本
batch_mask=np.random.choice(train_size,batch_size)
x_batch=x_train[batch_mask]
y_batch=y_train[batch_mask]
#计算梯度
grads=network.gradient(x_batch,y_batch)
#梯度下降
for key in ('w1','b1','w2','b2'):
network.params[key]-=learning_rate*grads[key]
#计算损失
loss=network.loss(x_batch,y_batch)
#结果输出
if i % iter_per_epoch ==0:
#计算精度并输出
train_acc=network.accuracy(x_train,y_train)
test_acc=network.accuracy(x_test,y_test)
print("train accuracy:"+str(train_acc)+";"+"test accuracy "+str(test_acc))
输出:
train accuracy:0.09863333333333334;test accuracy 0.0958
train accuracy:0.81785;test accuracy 0.8226
train accuracy:0.8816166666666667;test accuracy 0.8867
train accuracy:0.9;test accuracy 0.9022
train accuracy:0.90795;test accuracy 0.9101
train accuracy:0.9129666666666667;test accuracy 0.9148
train accuracy:0.9184833333333333;test accuracy 0.92
train accuracy:0.9220833333333334;test accuracy 0.9247
train accuracy:0.9253666666666667;test accuracy 0.927
train accuracy:0.9290333333333334;test accuracy 0.9302
train accuracy:0.9320166666666667;test accuracy 0.9325
train accuracy:0.9345666666666667;test accuracy 0.9359
train accuracy:0.9366666666666666;test accuracy 0.9368
train accuracy:0.9395666666666667;test accuracy 0.9398
train accuracy:0.9413833333333333;test accuracy 0.9408
train accuracy:0.9437833333333333;test accuracy 0.9432
train accuracy:0.9461833333333334;test accuracy 0.9454
文章作者 justin huang
上次更新 2018-09-09