zl程序教程

您现在的位置是:首页 >  其他

当前栏目

第12节:RNN原理及numpy实现

2023-04-18 16:57:23 时间

文章目录

RNN

X_t 表示t时刻的输入;o_t表示t时刻的输出,S_t表示t时刻的记忆
S_t=f(U*X_t+W*S_{t-1})
o_t=softmax(VS_t)

RNN训练

  • s_t = tanh(Ux_t+Ws_{t-1})
  • 分类器输出每个类输出的概率
    • y'_t=softmax(Vs_t)
  • 交叉熵损失函数
    • E(y_t,y'_t)=-y_tlog(y'_t) y_t 时刻的标准答案是一个只有一个是1,其他都是0;y'_t 维度是相同的,但是一个概率向量,因此需要累加
    • E(y_t,ŷ_t)=sum^{t}{E_t(y_t,ŷ_t)}
  • 优化使用BPTT,因为RNN的记忆不仅仅是与输入有关还与当前的记忆有关.
    • [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ariabhT6-1640942687639)(https://raw.githubusercontent.com/errolyan/tuchuang/master/uPic/BRjOxP.png)]

numpy复现

# -*- coding:utf-8 -*-
# /usr/bin/python

import os
import pickle
import  numpy as np
import matplotlib.pyplot as plt

class rnn():

    def __init__(self,epoch=10000,ALPHA=0.1,HIDDEN_DIM = 10,checkpoint_path="./"):
        '''初始化参数'''
        # data generation
        self.BIN_DIM = 0
        self.INPUT_DIM = 0
        self.HIDDEN_DIM = HIDDEN_DIM
        self.OUTPUT_DIM = 0
        self.ALPHA = ALPHA
        self.ITER_NUM = epoch
        self.LOG_ITER = self.ITER_NUM // 10
        self.PLOT_ITER = self.ITER_NUM // 200
        self.checkpoint_path = checkpoint_path

    def init(self):
        '''初始化权重'''
        self.w0 = np.random.normal(0, 1, [self.INPUT_DIM, self.HIDDEN_DIM])
        self.w1 = np.random.normal(0, 1, [self.HIDDEN_DIM, self.OUTPUT_DIM])
        self.wh = np.random.normal(0, 2, [self.HIDDEN_DIM, self.HIDDEN_DIM])
        self.d0 = np.zeros_like(self.w0)
        self.d1 = np.zeros_like(self.w1)
        self.dh = np.zeros_like(self.wh)

    def saveweight(self):
        '''保存权重'''
        weight = (self.w0,self.w1,self.wh,self.d0,self.d1,self.dh)
        name = os.path.join(self.checkpoint_path,"weight.pkl")
        with open(name,"wb") as wg:
            pickle.dump(weight,wg)

    def sigmoid(self,x):
        '''激活函数'''
        return 1 / (1 + np.exp(-x))

    def deriv_sigmoid(self,out):
        '''激活函数求导'''
        return out * (1 - out)

    def bin2dec(self,b):
        out = 0
        for i, x in enumerate(b[::-1]):
            out += x * pow(2, i)
        return out

    def forward_propagation(self,x_train,y_train):
        '''前向传播'''
        # forward propagation
        overall_err = 0  # total error in the whole calculation process.
        pred = np.zeros_like(y_train)
        output_deltas = list()
        hidden_values = list()
        hidden_values.append(np.zeros(self.HIDDEN_DIM))
        for pos in range(self.BIN_DIM)[::-1]:
            X = np.array([x_train[pos]])  # shape=(1, 2)
            Y = np.array([y_train[pos]])  # shape=(1, 1)
            hidden = self.sigmoid(np.dot(X, self.w0) + np.dot(hidden_values[-1], self.wh))
            output = self.sigmoid(np.dot(hidden, self.w1))
            pred[pos] = np.round(output[0][0])
            # squared mean error
            output_err = Y - output
            output_deltas.append(output_err * self.deriv_sigmoid(output))
            hidden_values.append(hidden)
            overall_err += np.abs(output_err[0])
        return hidden_values,output_deltas,overall_err,pred

    def backpropagation(self,x_train,hidden_values,output_deltas):
        '''后向传播'''
        future_delta = np.zeros(self.HIDDEN_DIM)
        # backpropagation through time
        for pos in range(self.BIN_DIM):
            X = np.array([x_train[pos]])
            hidden = hidden_values[-(pos + 1)]
            prev_hidden = hidden_values[-(pos + 2)]
            output_delta = output_deltas[-(pos + 1)]
            hidden_delta = (np.dot(future_delta, self.wh.T) + np.dot(output_delta, self.w1.T)) * self.deriv_sigmoid(hidden)
            self.d1 += np.dot(np.atleast_2d(hidden).T, output_delta)
            self.dh += np.dot(np.atleast_2d(prev_hidden).T, hidden_delta)
            self.d0 += np.dot(X.T, hidden_delta)
            future_delta = hidden_delta
        self.w1 += self.ALPHA * self.d1
        self.w0 += self.ALPHA * self.d0
        self.wh += self.ALPHA * self.dh
        self.d1 *= 0
        self.d0 *= 0
        self.dh *= 0

    def acc(self,pred,c_dec):
        '''准确率计算'''
        if (self.bin2dec(pred) == c_dec):
            rnn.accuracy += 1

    def train(self,x_train,y_train):
        '''训练'''
        self.BIN_DIM = x_train.shape[0]
        self.INPUT_DIM = x_train.shape[1]
        self.OUTPUT_DIM = y_train.shape[1]
        self.init()
        errs = list()
        accs = list()
        error = 0
        accuracy = 0
        for i in range(self.ITER_NUM + 1):
            hidden_values,output_deltas,overall_err,pred = self.forward_propagation(x_train,y_train)
            self.backpropagation(x_train,hidden_values,output_deltas)
            error += overall_err

            if (i % self.PLOT_ITER == 0):
                errs.append(error / self.PLOT_ITER)
                accs.append(accuracy / self.PLOT_ITER)
                error = 0
                accuracy = 0
            if (i % self.LOG_ITER == 0):
                print('Iter', i)
                print("Error :", overall_err)
                print('----------')
        self.saveweight()


    def predict(self,):
        '''预测'''
        # 加载权重
        name = os.path.join(self.checkpoint_path, "weight.pkl")
        with open(name, "rb") as wg:
            (self.w0,self.w1,self.wh,self.d0,self.d1,self.dh) = pickle.load(wg)
        # 预测
        X = np.array([[0, 0]])  # shape=(1, 2)
        hidden = self.sigmoid(np.dot(X, self.w0) + np.dot(np.zeros(self.HIDDEN_DIM), self.wh))
        output = self.sigmoid(np.dot(hidden, self.w1))
        result = np.round(output[0][0])
        print("预测结果为",result)

    def show(self,):
        '''显示训练图像'''
        plt.plot(rnn.errs, label='error')
        plt.plot(rnn.accs, label='accuracy')
        plt.legend()
        plt.show()

def generate_data():
    # a + b = c
    BIN_DIM = 8
    largest = pow(2, BIN_DIM)
    a_dec = np.random.randint(largest / 2)
    b_dec = np.random.randint(largest / 2)
    c_dec = a_dec + b_dec
    decimal = np.array([range(largest)]).astype(np.uint8).T
    binary = np.unpackbits(decimal, axis=1)
    a_bin = binary[a_dec]
    b_bin = binary[b_dec]
    c_bin = binary[c_dec]
    x_train = np.array([[0,0]])
    y_train = np.array([[0]])
    for pos in range(BIN_DIM)[::-1]:
        X = np.array([[a_bin[pos], b_bin[pos]]])  # shape=(1, 2)
        x_train = np.concatenate((x_train,X),axis=0)
        Y = np.array([[c_bin[pos]]])  # shape=(1, 1)
        y_train=np.concatenate((y_train, Y),axis=0)
    return x_train,y_train

x_train,y_train = generate_data()
newrnn = rnn(epoch=10000,ALPHA=0.2,HIDDEN_DIM = 10,checkpoint_path="./")
newrnn.train(x_train,y_train)
newrnn.predict()