zl程序教程

您现在的位置是:首页 >  其他

当前栏目

论文解读(FGSM)《Explaining and Harnessing Adversarial Examples》

论文 and 解读 Examples Adversarial
2023-09-14 09:08:52 时间

论文信息

论文标题:Explaining and Harnessing Adversarial Examples
论文作者:Ian J. Goodfellow, Jonathon Shlens, Christian Szegedy
论文来源:ICLR 2015
论文地址:download 
论文代码:download
视屏讲解:click

1 介绍

  对抗攻击

2 方法

  扰动:

    $\eta=\varepsilon \operatorname{sign}\left(\nabla_{x} J(\theta, x, y)\right)$

  对抗样本:

    $\tilde{x}=x+\eta$

  即:

    $x+\varepsilon \operatorname{sgn}\left(\nabla_{x} L(\theta, x, y)\right)$

3 代码

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
# 定义扰动值列表
epsilons = [0, .05, .1, .15, .2, .25, .3]
# 预训练模型路径(训练好的模型文件的存储路径)
device = torch.device("cuda" if  torch.cuda.is_available() else "cpu")
test_loader = torch.utils.data.DataLoader(datasets.MNIST('./datasets', train=False, download=True, transform=transforms.ToTensor()),batch_size=1,shuffle=True)

# 定义LeNet模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# 初始化网络
model = Net().to(device)

# 在评估模式下设置模型(Dropout层不被考虑)
model.eval()

def fgsm_attack(image, epsilon, data_grad):
    """
    :param image: 需要攻击的图像
    :param epsilon: 扰动值的范围
    :param data_grad: 图像的梯度
    :return: 扰动后的图像
    """
    # 收集数据梯度的元素符号
    sign_data_grad = data_grad.sign()
    # 通过调整输入图像的每个像素来创建扰动图像
    perturbed_image = image + epsilon*sign_data_grad
    # 添加剪切以维持[0,1]范围
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # 返回被扰动的图像
    return perturbed_image

def test( model, device, test_loader, epsilon ):
    correct = 0
    adv_examples = []

    # 循环遍历测试集中的所有示例
    for data, target in test_loader:
        # 把数据和标签发送到设备
        data, target = data.to(device), target.to(device)
        # 设置张量的requires_grad属性,这对于攻击很关键
        data.requires_grad = True

        output = model(data)  # 通过模型前向传递数据
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

        if init_pred.item() != target.item():  # 如果初始预测是错误的,不打断攻击,继续
            continue

        loss = F.nll_loss(output, target)
        model.zero_grad()
        loss.backward()

        # 收集datagrad
        data_grad = data.grad.data

        # 唤醒FGSM进行攻击
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        # 重新分类受扰乱的图像
        output = model(perturbed_data)

        # 检查是否成功
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        if final_pred.item() == target.item():
            correct += 1
            # 保存0 epsilon示例的特例
            if (epsilon == 0) and (len(adv_examples) < 5):
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
        else:
            # 稍后保存一些用于可视化的示例
            if len(adv_examples) < 5:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

    # 计算这个epsilon的最终准确度
    final_acc = correct/float(len(test_loader))
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

    # 返回准确性和对抗性示例
    return final_acc, adv_examples

test( model, device, test_loader, epsilon = 0.1 )