



  • 对抗样本生成模块支持安全工程师快速高效地生成对抗样本,用于攻击AI模型。

  • 对抗样本检测、防御模块支持用户检测过滤对抗样本、增强AI模型对于对抗样本的鲁棒性。

  • 评估模块提供多种指标全面评估对抗样本攻防性能。





  1. Copyimport sys
  2. import time
  3. import numpy as np
  4. from scipy.special import softmax
  6. from mindspore import dataset as ds
  7. import mindspore.common.dtype as mstype
  8. import mindspore.dataset.transforms.vision.c_transforms as CV
  9. import mindspore.dataset.transforms.c_transforms as C
  10. from mindspore.dataset.transforms.vision import Inter
  11. import mindspore.nn as nn
  12. import mindspore.ops.operations as P
  13. from mindspore.common.initializer import TruncatedNormal
  14. from mindspore import Model
  15. from mindspore import Tensor
  16. from mindspore import context
  17. from mindspore.train.serialization import load_checkpoint, load_param_into_net
  19. from mindarmour.attacks.gradient_method import FastGradientSignMethod
  20. from mindarmour.utils.logger import LogUtil
  21. from mindarmour.evaluations.attack_evaluation import AttackEvaluate
  23. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  25. LOGGER = LogUtil.get_instance()
  26. TAG = 'demo'



  1. Copy# generate training data
  2. def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
  3. num_parallel_workers=1, sparse=True):
  4. """
  5. create dataset for training or testing
  6. """
  7. # define dataset
  8. ds1 = ds.MnistDataset(data_path)
  10. # define operation parameters
  11. resize_height, resize_width = 32, 32
  12. rescale = 1.0 / 255.0
  13. shift = 0.0
  15. # define map operations
  16. resize_op = CV.Resize((resize_height, resize_width),
  17. interpolation=Inter.LINEAR)
  18. rescale_op = CV.Rescale(rescale, shift)
  19. hwc2chw_op = CV.HWC2CHW()
  20. type_cast_op = C.TypeCast(mstype.int32)
  21. one_hot_enco = C.OneHot(10)
  23. # apply map operations on images
  24. if not sparse:
  25. ds1 = ds1.map(input_columns="label", operations=one_hot_enco,
  26. num_parallel_workers=num_parallel_workers)
  27. type_cast_op = C.TypeCast(mstype.float32)
  28. ds1 = ds1.map(input_columns="label", operations=type_cast_op,
  29. num_parallel_workers=num_parallel_workers)
  30. ds1 = ds1.map(input_columns="image", operations=resize_op,
  31. num_parallel_workers=num_parallel_workers)
  32. ds1 = ds1.map(input_columns="image", operations=rescale_op,
  33. num_parallel_workers=num_parallel_workers)
  34. ds1 = ds1.map(input_columns="image", operations=hwc2chw_op,
  35. num_parallel_workers=num_parallel_workers)
  37. # apply DatasetOps
  38. buffer_size = 10000
  39. ds1 = ds1.shuffle(buffer_size=buffer_size)
  40. ds1 = ds1.batch(batch_size, drop_remainder=True)
  41. ds1 = ds1.repeat(repeat_size)
  43. return ds1



  • 定义LeNet模型网络。
  1. Copydef conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
  2. weight = weight_variable()
  3. return nn.Conv2d(in_channels, out_channels,
  4. kernel_size=kernel_size, stride=stride, padding=padding,
  5. weight_init=weight, has_bias=False, pad_mode="valid")
  8. def fc_with_initialize(input_channels, out_channels):
  9. weight = weight_variable()
  10. bias = weight_variable()
  11. return nn.Dense(input_channels, out_channels, weight, bias)
  14. def weight_variable():
  15. return TruncatedNormal(0.2)
  18. class LeNet5(nn.Cell):
  19. """
  20. Lenet network
  21. """
  22. def __init__(self):
  23. super(LeNet5, self).__init__()
  24. self.conv1 = conv(1, 6, 5)
  25. self.conv2 = conv(6, 16, 5)
  26. self.fc1 = fc_with_initialize(16*5*5, 120)
  27. self.fc2 = fc_with_initialize(120, 84)
  28. self.fc3 = fc_with_initialize(84, 10)
  29. self.relu = nn.ReLU()
  30. self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
  31. self.reshape = P.Reshape()
  33. def construct(self, x):
  34. x = self.conv1(x)
  35. x = self.relu(x)
  36. x = self.max_pool2d(x)
  37. x = self.conv2(x)
  38. x = self.relu(x)
  39. x = self.max_pool2d(x)
  40. x = self.reshape(x, (-1, 16*5*5))
  41. x = self.fc1(x)
  42. x = self.relu(x)
  43. x = self.fc2(x)
  44. x = self.relu(x)
  45. x = self.fc3(x)
  46. return x
  • 加载预训练的LeNet模型,您也可以训练并保存自己的MNIST模型,参考快速入门。利用上面定义的数据加载函数generate_mnist_dataset载入数据。
  1. Copyckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
  2. net = LeNet5()
  3. load_dict = load_checkpoint(ckpt_name)
  4. load_param_into_net(net, load_dict)
  6. # get test data
  7. data_list = "./MNIST_unzip/test"
  8. batch_size = 32
  9. dataset = generate_mnist_dataset(data_list, batch_size, sparse=False)
  • 测试模型。
  1. Copy# prediction accuracy before attack
  2. model = Model(net)
  3. batch_num = 3 # the number of batches of attacking samples
  4. test_images = []
  5. test_labels = []
  6. predict_labels = []
  7. i = 0
  8. for data in dataset.create_tuple_iterator():
  9. i += 1
  10. images = data[0].astype(np.float32)
  11. labels = data[1]
  12. test_images.append(images)
  13. test_labels.append(labels)
  14. pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(),
  15. axis=1)
  16. predict_labels.append(pred_labels)
  17. if i >= batch_num:
  18. break
  19. predict_labels = np.concatenate(predict_labels)
  20. true_labels = np.argmax(np.concatenate(test_labels), axis=1)
  21. accuracy = np.mean(np.equal(predict_labels, true_labels))
  22. LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)


  1. Copyprediction accuracy before attacking is : 0.9895833333333334



  1. Copy# attacking
  2. attack = FastGradientSignMethod(net, eps=0.3)
  3. start_time = time.clock()
  4. adv_data = attack.batch_generate(np.concatenate(test_images),
  5. np.concatenate(test_labels), batch_size=32)
  6. stop_time = time.clock()
  7. np.save('./adv_data', adv_data)
  8. pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy()
  9. # rescale predict confidences into (0, 1).
  10. pred_logits_adv = softmax(pred_logits_adv, axis=1)
  11. pred_labels_adv = np.argmax(pred_logits_adv, axis=1)
  12. accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels))
  13. LOGGER.info(TAG, "prediction accuracy after attacking is : %s", accuracy_adv)
  14. attack_evaluate = AttackEvaluate(np.concatenate(test_images).transpose(0, 2, 3, 1),
  15. np.concatenate(test_labels),
  16. adv_data.transpose(0, 2, 3, 1),
  17. pred_logits_adv)
  18. LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
  19. attack_evaluate.mis_classification_rate())
  20. LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
  21. attack_evaluate.avg_conf_adv_class())
  22. LOGGER.info(TAG, 'The average confidence of true class is : %s',
  23. attack_evaluate.avg_conf_true_class())
  24. LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
  25. 'samples and adversarial samples are: %s',
  26. attack_evaluate.avg_lp_distance())
  27. LOGGER.info(TAG, 'The average structural similarity between original '
  28. 'samples and adversarial samples are: %s',
  29. attack_evaluate.avg_ssim())
  30. LOGGER.info(TAG, 'The average costing time is %s',
  31. (stop_time - start_time)/(batch_num*batch_size))


  1. Copyprediction accuracy after attacking is : 0.052083
  2. mis-classification rate of adversaries is : 0.947917
  3. The average confidence of adversarial class is : 0.419824
  4. The average confidence of true class is : 0.070650
  5. The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.698870, 0.465888, 0.300000)
  6. The average structural similarity between original samples and adversarial samples are: 0.332538
  7. The average costing time is 0.003125

对模型进行FGSM无目标攻击后,模型精度由98.9%降到5.2%,误分类率高达95%,成功攻击的对抗样本的预测类别的平均置信度(ACAC)为 0.419824,成功攻击的对抗样本的真实类别的平均置信度(ACTC)为 0.070650,同时给出了生成的对抗样本与原始样本的零范数距离、二范数距离和无穷范数距离,平均每个对抗样本与原始样本间的结构相似性为0.332538,平均每生成一张对抗样本所需时间为0.003125s。







  1. Copyfrom mindspore.nn import SoftmaxCrossEntropyWithLogits
  2. from mindarmour.defenses import NaturalAdversarialDefense
  4. loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
  5. opt = nn.Momentum(net.trainable_params(), 0.01, 0.09)
  7. nad = NaturalAdversarialDefense(net, loss_fn=loss, optimizer=opt,
  8. bounds=(0.0, 1.0), eps=0.3)
  9. net.set_train()
  10. nad.batch_defense(np.concatenate(test_images), np.concatenate(test_labels),
  11. batch_size=32, epochs=20)
  13. # get accuracy of test data on defensed model
  14. net.set_train(False)
  15. acc_list = []
  16. pred_logits_adv = []
  17. for i in range(batch_num):
  18. batch_inputs = test_images[i]
  19. batch_labels = test_labels[i]
  20. logits = net(Tensor(batch_inputs)).asnumpy()
  21. pred_logits_adv.append(logits)
  22. label_pred = np.argmax(logits, axis=1)
  23. acc_list.append(np.mean(np.argmax(batch_labels, axis=1) == label_pred))
  24. pred_logits_adv = np.concatenate(pred_logits_adv)
  25. pred_logits_adv = softmax(pred_logits_adv, axis=1)
  27. LOGGER.info(TAG, 'accuracy of TEST data on defensed model is : %s',
  28. np.mean(acc_list))
  29. acc_list = []
  30. for i in range(batch_num):
  31. batch_inputs = adv_data[i * batch_size: (i + 1) * batch_size]
  32. batch_labels = test_labels[i]
  33. logits = net(Tensor(batch_inputs)).asnumpy()
  34. label_pred = np.argmax(logits, axis=1)
  35. acc_list.append(np.mean(np.argmax(batch_labels, axis=1) == label_pred))
  37. attack_evaluate = AttackEvaluate(np.concatenate(test_images),
  38. np.concatenate(test_labels),
  39. adv_data,
  40. pred_logits_adv)
  42. LOGGER.info(TAG, 'accuracy of adv data on defensed model is : %s',
  43. np.mean(acc_list))
  44. LOGGER.info(TAG, 'defense mis-classification rate of adversaries is : %s',
  45. attack_evaluate.mis_classification_rate())
  46. LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
  47. attack_evaluate.avg_conf_adv_class())
  48. LOGGER.info(TAG, 'The average confidence of true class is : %s',
  49. attack_evaluate.avg_conf_true_class())
  50. LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
  51. 'samples and adversarial samples are: %s',
  52. attack_evaluate.avg_lp_distance())


  1. Copyaccuracy of TEST data on defensed model is : 0.973958
  2. accuracy of adv data on defensed model is : 0.521835
  3. defense mis-classification rate of adversaries is : 0.026042
  4. The average confidence of adversarial class is : 0.67979
  5. The average confidence of true class is : 0.19144624
  6. The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.544365, 0.439001, 0.300000)
