第12节:RNN原理及numpy实现
2023-04-18 16:57:23 时间
文章目录
RNN
RNN训练
- s_t = tanh(Ux_t+Ws_{t-1})
- 分类器输出每个类输出的概率
- y'_t=softmax(Vs_t)
- 交叉熵损失函数
- E(y_t,y'_t)=-y_tlog(y'_t) y_t 时刻的标准答案是一个只有一个是1,其他都是0;y'_t 维度是相同的,但是一个概率向量,因此需要累加
- E(y_t,ŷ_t)=sum^{t}{E_t(y_t,ŷ_t)}
- 优化使用BPTT,因为RNN的记忆不仅仅是与输入有关还与当前的记忆有关.
- [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ariabhT6-1640942687639)(https://raw.githubusercontent.com/errolyan/tuchuang/master/uPic/BRjOxP.png)]
numpy复现
# -*- coding:utf-8 -*-
# /usr/bin/python
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
class rnn():
def __init__(self,epoch=10000,ALPHA=0.1,HIDDEN_DIM = 10,checkpoint_path="./"):
'''初始化参数'''
# data generation
self.BIN_DIM = 0
self.INPUT_DIM = 0
self.HIDDEN_DIM = HIDDEN_DIM
self.OUTPUT_DIM = 0
self.ALPHA = ALPHA
self.ITER_NUM = epoch
self.LOG_ITER = self.ITER_NUM // 10
self.PLOT_ITER = self.ITER_NUM // 200
self.checkpoint_path = checkpoint_path
def init(self):
'''初始化权重'''
self.w0 = np.random.normal(0, 1, [self.INPUT_DIM, self.HIDDEN_DIM])
self.w1 = np.random.normal(0, 1, [self.HIDDEN_DIM, self.OUTPUT_DIM])
self.wh = np.random.normal(0, 2, [self.HIDDEN_DIM, self.HIDDEN_DIM])
self.d0 = np.zeros_like(self.w0)
self.d1 = np.zeros_like(self.w1)
self.dh = np.zeros_like(self.wh)
def saveweight(self):
'''保存权重'''
weight = (self.w0,self.w1,self.wh,self.d0,self.d1,self.dh)
name = os.path.join(self.checkpoint_path,"weight.pkl")
with open(name,"wb") as wg:
pickle.dump(weight,wg)
def sigmoid(self,x):
'''激活函数'''
return 1 / (1 + np.exp(-x))
def deriv_sigmoid(self,out):
'''激活函数求导'''
return out * (1 - out)
def bin2dec(self,b):
out = 0
for i, x in enumerate(b[::-1]):
out += x * pow(2, i)
return out
def forward_propagation(self,x_train,y_train):
'''前向传播'''
# forward propagation
overall_err = 0 # total error in the whole calculation process.
pred = np.zeros_like(y_train)
output_deltas = list()
hidden_values = list()
hidden_values.append(np.zeros(self.HIDDEN_DIM))
for pos in range(self.BIN_DIM)[::-1]:
X = np.array([x_train[pos]]) # shape=(1, 2)
Y = np.array([y_train[pos]]) # shape=(1, 1)
hidden = self.sigmoid(np.dot(X, self.w0) + np.dot(hidden_values[-1], self.wh))
output = self.sigmoid(np.dot(hidden, self.w1))
pred[pos] = np.round(output[0][0])
# squared mean error
output_err = Y - output
output_deltas.append(output_err * self.deriv_sigmoid(output))
hidden_values.append(hidden)
overall_err += np.abs(output_err[0])
return hidden_values,output_deltas,overall_err,pred
def backpropagation(self,x_train,hidden_values,output_deltas):
'''后向传播'''
future_delta = np.zeros(self.HIDDEN_DIM)
# backpropagation through time
for pos in range(self.BIN_DIM):
X = np.array([x_train[pos]])
hidden = hidden_values[-(pos + 1)]
prev_hidden = hidden_values[-(pos + 2)]
output_delta = output_deltas[-(pos + 1)]
hidden_delta = (np.dot(future_delta, self.wh.T) + np.dot(output_delta, self.w1.T)) * self.deriv_sigmoid(hidden)
self.d1 += np.dot(np.atleast_2d(hidden).T, output_delta)
self.dh += np.dot(np.atleast_2d(prev_hidden).T, hidden_delta)
self.d0 += np.dot(X.T, hidden_delta)
future_delta = hidden_delta
self.w1 += self.ALPHA * self.d1
self.w0 += self.ALPHA * self.d0
self.wh += self.ALPHA * self.dh
self.d1 *= 0
self.d0 *= 0
self.dh *= 0
def acc(self,pred,c_dec):
'''准确率计算'''
if (self.bin2dec(pred) == c_dec):
rnn.accuracy += 1
def train(self,x_train,y_train):
'''训练'''
self.BIN_DIM = x_train.shape[0]
self.INPUT_DIM = x_train.shape[1]
self.OUTPUT_DIM = y_train.shape[1]
self.init()
errs = list()
accs = list()
error = 0
accuracy = 0
for i in range(self.ITER_NUM + 1):
hidden_values,output_deltas,overall_err,pred = self.forward_propagation(x_train,y_train)
self.backpropagation(x_train,hidden_values,output_deltas)
error += overall_err
if (i % self.PLOT_ITER == 0):
errs.append(error / self.PLOT_ITER)
accs.append(accuracy / self.PLOT_ITER)
error = 0
accuracy = 0
if (i % self.LOG_ITER == 0):
print('Iter', i)
print("Error :", overall_err)
print('----------')
self.saveweight()
def predict(self,):
'''预测'''
# 加载权重
name = os.path.join(self.checkpoint_path, "weight.pkl")
with open(name, "rb") as wg:
(self.w0,self.w1,self.wh,self.d0,self.d1,self.dh) = pickle.load(wg)
# 预测
X = np.array([[0, 0]]) # shape=(1, 2)
hidden = self.sigmoid(np.dot(X, self.w0) + np.dot(np.zeros(self.HIDDEN_DIM), self.wh))
output = self.sigmoid(np.dot(hidden, self.w1))
result = np.round(output[0][0])
print("预测结果为",result)
def show(self,):
'''显示训练图像'''
plt.plot(rnn.errs, label='error')
plt.plot(rnn.accs, label='accuracy')
plt.legend()
plt.show()
def generate_data():
# a + b = c
BIN_DIM = 8
largest = pow(2, BIN_DIM)
a_dec = np.random.randint(largest / 2)
b_dec = np.random.randint(largest / 2)
c_dec = a_dec + b_dec
decimal = np.array([range(largest)]).astype(np.uint8).T
binary = np.unpackbits(decimal, axis=1)
a_bin = binary[a_dec]
b_bin = binary[b_dec]
c_bin = binary[c_dec]
x_train = np.array([[0,0]])
y_train = np.array([[0]])
for pos in range(BIN_DIM)[::-1]:
X = np.array([[a_bin[pos], b_bin[pos]]]) # shape=(1, 2)
x_train = np.concatenate((x_train,X),axis=0)
Y = np.array([[c_bin[pos]]]) # shape=(1, 1)
y_train=np.concatenate((y_train, Y),axis=0)
return x_train,y_train
x_train,y_train = generate_data()
newrnn = rnn(epoch=10000,ALPHA=0.2,HIDDEN_DIM = 10,checkpoint_path="./")
newrnn.train(x_train,y_train)
newrnn.predict()
相关文章
- linux下MongoDB的使用教程
- ps闪退是什么原因
- linux下如何关闭mongodb服务
- 免费代理IP – 免费HTTP代理IP_SOCKS5代理服务器_优质IP代理2022/12/02 14:15:03
- postgresql乱码怎么办
- linux如何卸载postgresql
- postgresql是否支持json类型
- hadoop是什么
- 如何查看是否安装postgresql
- postgresql启动不了如何解决
- postgresql服务启动报错1053怎么解决
- postgresql怎么启动服务
- 怎样删除postgresql
- postgresql安装错误的原因是什么
- postgresql服务怎么安装启动
- 怎么判断postgresql是否存在
- postgresql启动不了怎么解决
- 如何彻底卸载postgresql
- linux下如何查看是否安装postgresql
- linux下postgresql怎么启动