您现在的位置是：首页 > 其它

当前栏目

使用Transformers实现文本分类

实现分类文本 Transformers 使用

2023-09-14 09:08:40 时间

github: https://github.com/haibincoder/NlpSummary/tree/master/torchcode/classification

import copy

from torch import nn
import torch.nn.functional as F
import torch
import math

class Config(object):

    """配置参数"""
    def __init__(self, vocab_size, embed_dim, label_num, max_length=32):
        self.embedding_pretrained = None
        self.num_classes = label_num                                    # 类别数
        self.vocab_size = vocab_size                                       # 词表大小，在运行时赋值
        self.embed_dim = embed_dim                                      # 字向量维度
        self.num_head = 5
        self.dropout = 0.1
        self.hidden = 512
        self.num_encoder = 2
        self.max_length = max_length
        self.lr = 1e-3


class Model(nn.Module):
    def __init__(self, config):
        super().__init__()
        if config.embedding_pretrained is not None:
            self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
        else:
            self.embedding = nn.Embedding(config.vocab_size, config.embed_dim, padding_idx=config.vocab_size - 1)

        self.encoder = Encoder(config.embed_dim, config.num_head, config.hidden, config.dropout)
        self.encoders = nn.ModuleList([
            copy.deepcopy(self.encoder)
            for _ in range(config.num_encoder)])

        self.fc1 = nn.Linear(config.max_length * config.embed_dim, config.num_classes)

    def forward(self, x):
        out = self.embedding(x)
        for encoder in self.encoders:
            out = encoder(out)
        out = out.view(out.size(0), -1)
        # out = torch.mean(out, 1)
        out = self.fc1(out)
        return out


class Encoder(nn.Module):
    def __init__(self, embed_dim, num_head, hidden, dropout=0.0):
        super().__init__()
        self.attention = MultiHeadAttention(embed_dim, num_head, dropout)
        self.feed_forward = Position_wise_Feed_Forward(embed_dim, hidden, dropout)
    def forward(self, x):
        out = self.attention(x)
        out = self.feed_forward(out)
        return out


class MultiHeadAttention(nn.Module):
    def __init__(self, embed_dim, num_head, dropout=0.0):
        super().__init__()
        self.num_head = num_head
        assert embed_dim % num_head == 0, 'head num error'
        self.dim_head = embed_dim // num_head
        self.fc_q = nn.Linear(embed_dim, embed_dim)
        self.fc_k = nn.Linear(embed_dim, embed_dim)
        self.fc_v = nn.Linear(embed_dim, embed_dim)
        self.attention = Attention()
        self.fc = nn.Linear(embed_dim, embed_dim)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        batch_size = x.size(0)
        Q = self.fc_q(x)
        K = self.fc_k(x)
        V = self.fc_v(x)
        Q = Q.view(batch_size * self.num_head, -1, self.dim_head)
        K = K.view(batch_size * self.num_head, -1, self.dim_head)
        V = V.view(batch_size * self.num_head, -1, self.dim_head)
        context = self.attention(Q, K, V)
        context = context.view(batch_size, -1, self.dim_head * self.num_head)
        out = self.fc(context)
        out = self.dropout(out)
        # 残差
        out = out + x
        out = self.layer_norm(out)
        return out

'''
attention计算
'''
class Attention(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, query, key, value):
        k = query.size(-1)
        result = torch.matmul(query, key.transpose(-2, -1))
        score = result / math.sqrt(k)
        softmax_result = torch.softmax(score, dim=-1)
        result = torch.matmul(softmax_result, value)
        return result

'''
这里对应transformers encoder的Feed forward
'''
class Position_wise_Feed_Forward(nn.Module):
    def __init__(self, dim_model, hidden, dropout=0.0):
        super().__init__()
        self.fc1 = nn.Linear(dim_model, hidden)
        self.fc2 = nn.Linear(hidden, dim_model)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(dim_model)

    def forward(self, x):
        out = self.fc1(x)
        out = F.relu(out)
        out = self.fc2(out)
        out = self.dropout(out)
        out = out + x  # 残差连接
        out = self.layer_norm(out)
        return out

猜你喜欢

Unity3d 嵌入GoogleMap
Java实现 LeetCode 725 分隔链表（暴力）
Kubernetes 笔记 03 扫清概念
[SAA + SAP] 31. Migrations
Java实现蓝桥杯算法训练 Cowboys
C#.NET常见问题(FAQ)-如何使用变量访问控件属性
[PHP] 浅谈 Laravel Authentication 的 guards 与 providers
Java线程(四)：线程中断、线程让步、线程睡眠、线程合并 - 高爽|Coder - 博客频道 -
linux命令篇-blkid 命令查找块设备详细信息
subway 点餐
信号完整性（SI）电源完整性（PI）学习笔记（三十一）电源分配网路（三）
阿里云服务器怎么用ip访问不了
Js apply方法详解
goroutine与调度器
boost库在工作（3）作用域智能指针scoped_ptr之二
打造全自动漏洞赏金扫描工具
【SpringBoot笔记11】SpringBoot框架集成Swagger2文档
AppSync安装
MATLAB2012a_for_win7_64
Linux基础之opensuse leap 15
[React] Understand the React Hook Flow
rxjava之复习
指令流水线的吞吐率
如何优雅的在 Mac 上安装fiddler [无需mono - 完美解决]

相关主题

实现HashMap
js实现倒计时
日期类的实现
Python实现人脸识别
reactos操作系统实现(11)
使用Redis实现分布式锁
实现分布式缓存
Redis分布式锁实现
使用锁实现同步
实现简单爬虫
Java实现ajax
实现轮播效果

zl程序教程

当前栏目

使用Transformers实现文本分类

相关文章