Merge pull request #4 from myuito3/dev

Dev
myuito3 · Dec 3, 2023 · 3a7eb6b · 3a7eb6b
2 parents 42b7781 + 8ee3f43
commit 3a7eb6b
Show file tree

Hide file tree

Showing 32 changed files with 1,079 additions and 151 deletions.
diff --git a/README.md b/README.md
@@ -21,15 +21,21 @@ Currently supported attack methods are as follows:
 | FGSM                | White-box           | 📃[Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572) |
 | I-FGSM (BIM)        | White-box           | 📃[Adversarial examples in the physical world](https://arxiv.org/abs/1607.02533) |
 | MI-FGSM (MIM)       | White-box           | 📃[Boosting Adversarial Attacks with Momentum](https://arxiv.org/abs/1710.06081) |
+| NI-FGSM             | White-box           | 📃[Nesterov Accelerated Gradient and Scale Invariance for Adversarial Attacks](https://arxiv.org/abs/1908.06281) |
+| PGD                 | White-box           | 📃[Towards Deep Learning Models Resistant to Adversarial Attacks](https://arxiv.org/abs/1706.06083) |
+| SI-NI-FGSM          | White-box           | 📃[Nesterov Accelerated Gradient and Scale Invariance for Adversarial Attacks](https://arxiv.org/abs/1908.06281) |
 | SignHunter          | Black-box           | 📃[Sign Bits Are All You Need for Black-Box Attacks](https://openreview.net/forum?id=SygW0TEFwH) |
+| SimBA               | Black-box           | 📃[Simple Black-box Adversarial Attacks](https://arxiv.org/abs/1905.07121) |
 | Square attack       | Black-box           | 📃[Square Attack: a query-efficient black-box adversarial attack via random search](https://arxiv.org/abs/1912.00049) |
 
 ### 💠 Defenses
 Currently supported defense methods including adversarially trained models are as follows:
 
 | Method              | Type                | References          |
 | :------------------ | :------------------ | :------------------ |
-| JPEG Compression    | Input transform     | 📃[A study of the effect of JPG compression on adversarial images](https://arxiv.org/abs/1608.00853) |
+| Bit-Red             | Input transform     | 📃[Feature Squeezing: Detecting Adversarial Examples in Deep Neural Networks](https://arxiv.org/abs/1704.01155) |
+| JPEG                | Input transform     | 📃[A study of the effect of JPG compression on adversarial images](https://arxiv.org/abs/1608.00853) |
+| Randomization       | Input transform     | 📃[Mitigating Adversarial Effects Through Randomization](https://arxiv.org/abs/1711.01991) |
 | TRADES              | Adv. training       | 📃[Theoretically Principled Trade-off between Robustness and Accuracy](https://arxiv.org/abs/1901.08573) |
 
 ### 🧩 Others
@@ -72,12 +78,12 @@ python advgrads_cli/attack.py --load_config configs/mnist.yaml
 ### ⚙ Description format of config files
 The attack configs are managed by a YAML file. The main fields and variables are described below.
 
-- `data`: _(str)_ Specify a dataset for which adversarial examples are to be generated.
-- `model`: _(str)_ Select a model to be attacked. See [here](https://github.com/myuito3/AdvGrads/blob/main/advgrads/models/__init__.py) for currently supported models.
-- `attacks`: _(list)_ This field allows you to specify attack methods that you wish to execute in a list format. You can set values including hyperparameters defined for each method. The parameters that can be specified for all methods are as follows:
+- `data`: _(str, required)_ Specify a dataset for which adversarial examples are to be generated.
+- `model`: _(str, required)_ Select a model to be attacked. See [here](https://github.com/myuito3/AdvGrads/blob/main/advgrads/models/__init__.py) for currently supported models.
+- `attacks`: _(list, required)_ This field allows you to specify attack methods that you wish to execute in a list format. You can set values including hyperparameters defined for each method. The parameters that can be specified for all methods are as follows:
   - `method`: _(str)_ Attack method. See [here](https://github.com/myuito3/AdvGrads/blob/main/advgrads/adversarial/__init__.py) for currently supported attack methods.
   - `norm`: _(str)_ Norm for adversarial perturbations.
   - `eps`: _(float)_ Maximum norm constraint.
   - `max_iters`: _(int)_ Maximum number of iterations used in iterative methods.
   - `targeted`: _(bool)_ Whether or not to perform targeted attacks which aim to misclassify an adversarial example into a particular class.
-- `thirdparty_defense`: _(str)_ Thirdparty defensive method. See [here](https://github.com/myuito3/AdvGrads/blob/main/advgrads/adversarial/__init__.py) for currently supported defensive methods.
+- `thirdparty_defense`: _(str, optional)_ Thirdparty defensive method. See [here](https://github.com/myuito3/AdvGrads/blob/main/advgrads/adversarial/__init__.py) for currently supported defensive methods.
diff --git a/advgrads/adversarial/__init__.py b/advgrads/adversarial/__init__.py
@@ -12,40 +12,62 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Init adversarial attacks/defenses methods."""
+"""Init attack/defense method configs."""
 
 from advgrads.adversarial.attacks.base_attack import AttackConfig
 from advgrads.adversarial.attacks.deepfool import DeepFoolAttackConfig
+from advgrads.adversarial.attacks.di_mi_fgsm import DiMiFgsmAttackConfig
 from advgrads.adversarial.attacks.fgsm import FgsmAttackConfig
 from advgrads.adversarial.attacks.i_fgsm import IFgsmAttackConfig
 from advgrads.adversarial.attacks.mi_fgsm import MiFgsmAttackConfig
+from advgrads.adversarial.attacks.ni_fgsm import NiFgsmAttackConfig
+from advgrads.adversarial.attacks.pgd import PGDAttackConfig
+from advgrads.adversarial.attacks.pi_fgsm import PiFgsmAttackConfig
+from advgrads.adversarial.attacks.si_ni_fgsm import SiNiFgsmAttackConfig
 from advgrads.adversarial.attacks.signhunter import SignHunterAttackConfig
+from advgrads.adversarial.attacks.simba import SimBAAttackConfig
 from advgrads.adversarial.attacks.square import SquareAttackConfig
 from advgrads.adversarial.defenses.input_transform.base_defense import DefenseConfig
+from advgrads.adversarial.defenses.input_transform.bit_depth_reduction import (
+    BitDepthReductionDefenseConfig,
+)
 from advgrads.adversarial.defenses.input_transform.jpeg_compression import (
     JpegCompressionDefenseConfig,
 )
+from advgrads.adversarial.defenses.input_transform.randomization import (
+    RandomizationDefenseConfig,
+)
 
 
 def get_attack_config_class(name: str) -> AttackConfig:
+    assert name in all_attack_names, f"Attack method named '{name}' not found."
     return attack_class_dict[name]
 
 
 def get_defense_config_class(name: str) -> DefenseConfig:
+    assert name in all_defense_names, f"Defense method named '{name}' not found."
     return defense_class_dict[name]
 
 
 attack_class_dict = {
     "deepfool": DeepFoolAttackConfig,
+    "di-mi-fgsm": DiMiFgsmAttackConfig,
     "fgsm": FgsmAttackConfig,
-    "i_fgsm": IFgsmAttackConfig,
-    "mi_fgsm": MiFgsmAttackConfig,
+    "i-fgsm": IFgsmAttackConfig,
+    "mi-fgsm": MiFgsmAttackConfig,
+    "ni-fgsm": NiFgsmAttackConfig,
+    "pgd": PGDAttackConfig,
+    "pi-fgsm": PiFgsmAttackConfig,
+    "si-ni-fgsm": SiNiFgsmAttackConfig,
     "signhunter": SignHunterAttackConfig,
+    "simba": SimBAAttackConfig,
     "square": SquareAttackConfig,
 }
 all_attack_names = list(attack_class_dict.keys())
 
 defense_class_dict = {
+    "bit-red": BitDepthReductionDefenseConfig,
     "jpeg": JpegCompressionDefenseConfig,
+    "randomization": RandomizationDefenseConfig,
 }
 all_defense_names = list(defense_class_dict.keys())
diff --git a/advgrads/adversarial/attacks/base_attack.py b/advgrads/adversarial/attacks/base_attack.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Base class for adversarial attack methods."""
+"""Base class for attack methods."""
 
 from abc import abstractmethod
 from dataclasses import dataclass, field
-from typing import Any, Dict, Literal, Optional, Type
+from typing import Any, Dict, List, Literal, Optional, Type
 
 import torch
 from torch import Tensor
@@ -27,9 +27,12 @@
 from advgrads.models.base_model import Model
 
 
+NormType = Literal["l_0", "l_2", "l_inf"]
+
+
 @dataclass
 class AttackConfig(InstantiateConfig):
-    """Configuration for attack methods."""
+    """The base configuration class for attack methods."""
 
     _target: Type = field(default_factory=lambda: Attack)
     """Target class to instantiate."""
@@ -39,7 +42,7 @@ class AttackConfig(InstantiateConfig):
     """Min value of image used to clip perturbed images."""
     max_val: float = 1.0
     """Max value of image used to clip perturbed images."""
-    norm: Optional[Literal["l_0", "l_2", "l_inf"]] = None
+    norm: Optional[NormType] = None
     """Norm bound of adversarial perturbations."""
     eps: float = 0.0
     """Radius of a l_p ball."""
@@ -48,17 +51,29 @@ class AttackConfig(InstantiateConfig):
 
 
 class Attack:
-    """Base class for attack methods.
+    """The base class for attack methods.
 
     Args:
         config: Configuration for attack methods.
+        norm_allow_list: List of supported perturbation norms. Each method defines this
+            within its own class.
     """
 
     config: AttackConfig
+    norm_allow_list: List[NormType]
 
     def __init__(self, config: AttackConfig, **kwargs) -> None:
         self.config = config
 
+        if self.eps < 0:
+            raise ValueError(f"eps must be greater than or equal to 0, got {self.eps}.")
+        if self.max_iters < 0:
+            raise ValueError(
+                f"max_iters must be greater than or equal to 0, got {self.max_iters}."
+            )
+        if self.norm not in self.norm_allow_list:
+            raise ValueError(f"Method does not support {self.norm} perturbation norm.")
+
     def __call__(self, *args: Any, **kwargs: Any) -> Dict[ResultHeadNames, Any]:
         return self.get_outputs(*args, **kwargs)
 
@@ -112,14 +127,15 @@ def get_outputs(
         Args:
             x: Images to be searched for adversarial examples.
             y: Ground truth labels of images.
-            model: A model under attack.
+            model: A model to be attacked.
+            thirdparty_defense: Thirdparty defense method instance.
         """
         attack_outputs = self.run_attack(x, y, model, **kwargs)
         self.sanity_check(x, attack_outputs[ResultHeadNames.X_ADV])
 
         # If a defensive method is defined, the process is performed here. This
-        # corresponds to Section 5.2 (GRAY BOX: IMAGE TRANSFORMATIONS AT TEST TIME) of
-        # the paper of Guo et al.
+        # corresponds to Section 5.2 (GRAY BOX: IMAGE TRANSFORMATIONS AT TEST TIME) in
+        # the paper of Guo et al [https://arxiv.org/pdf/1711.00117.pdf].
         if thirdparty_defense is not None:
             attack_outputs[ResultHeadNames.X_ADV] = thirdparty_defense(
                 attack_outputs[ResultHeadNames.X_ADV]
@@ -142,22 +158,25 @@ def get_outputs(
         return attack_outputs
 
     def sanity_check(self, x: Tensor, x_adv: Tensor) -> None:
-        """Ensure that the amount of perturbation is properly controlled.
+        """Ensure that the amount of perturbation is properly controlled. This method
+        is specifically used to check the amount of perturbation of norm-constrained
+        type attack methods.
 
         Args:
             x: Original images.
             x_adv: Perturbed images.
         """
         if self.eps > 0.0:
+            deltas = x_adv - x
             if self.norm == "l_inf":
-                delta = x_adv - x
                 real = (
-                    delta.abs().max().half()
+                    deltas.abs().max().half()
                 )  # ignore slight differences within the decimal point
-                assert (
-                    real <= self.eps
-                ), f"Perturbations beyond the l_inf sphere ({real})."
+                msg = f"Perturbations beyond the l_inf sphere ({real})."
             elif self.norm == "l_2":
-                raise NotImplementedError
+                real = torch.norm(deltas.view(x.shape[0], -1), p=2, dim=-1).max()
+                msg = f"Perturbations beyond the l_2 sphere ({real})."
             elif self.norm == "l_0":
                 raise NotImplementedError
+
+            assert real <= self.eps, msg
diff --git a/advgrads/adversarial/attacks/deepfool.py b/advgrads/adversarial/attacks/deepfool.py
@@ -12,26 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Implementation of the DeepFool attack.
+"""The implementation of the DeepFool attack.
 
 Paper: DeepFool: a simple and accurate method to fool deep neural networks
 Url: https://arxiv.org/abs/1511.04599
 """
 
 from dataclasses import dataclass, field
-from typing import Dict, Type
+from typing import Dict, List, Type
 
 import torch
 from torch import Tensor
 
-from advgrads.adversarial.attacks.base_attack import Attack, AttackConfig
+from advgrads.adversarial.attacks.base_attack import Attack, AttackConfig, NormType
 from advgrads.adversarial.attacks.utils.result_heads import ResultHeadNames
 from advgrads.models.base_model import Model
 
 
 @dataclass
 class DeepFoolAttackConfig(AttackConfig):
-    """The configuration class for DeepFool attack."""
+    """The configuration class for the DeepFool attack."""
 
     _target: Type = field(default_factory=lambda: DeepFoolAttack)
     """Target class to instantiate."""
@@ -44,12 +44,14 @@ class DeepFoolAttack(Attack):
 
     Args:
         config: The DeepFool attack configuration.
+        norm_allow_list: List of supported perturbation norms.
     """
 
     config: DeepFoolAttackConfig
+    norm_allow_list: List[NormType] = ["l_2"]
 
-    def __init__(self, config: DeepFoolAttackConfig, **kwargs) -> None:
-        super().__init__(config, **kwargs)
+    def __init__(self, config: DeepFoolAttackConfig) -> None:
+        super().__init__(config)
 
         if self.targeted:
             raise ValueError("DeepFool does not support targeted attack.")

diff --git a/advgrads/adversarial/attacks/di_mi_fgsm.py b/advgrads/adversarial/attacks/di_mi_fgsm.py
@@ -0,0 +1,123 @@
+# Copyright 2023 Makoto Yuito. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The implementation of the Diverse Inputs Momentum Iterative Fast Gradient Sign
+Method (DI-MI-FGSM) attack. This method is referred to as Momentum Diverse Inputs
+Iterative Fast Gradient Sign Method (M-DI2-FGSM) in the original paper.
+
+Paper: Improving Transferability of Adversarial Examples with Input Diversity
+Url: https://arxiv.org/abs/1803.06978
+
+Original code is referenced from https://github.com/cihangxie/DI-2-FGSM
+"""
+
+import random
+from dataclasses import dataclass, field
+from typing import Dict, List, Type
+
+import torch
+import torch.nn.functional as F
+from torch import Tensor
+
+from advgrads.adversarial.attacks.base_attack import Attack, AttackConfig, NormType
+from advgrads.adversarial.attacks.utils.result_heads import ResultHeadNames
+from advgrads.models.base_model import Model
+
+
+@dataclass
+class DiMiFgsmAttackConfig(AttackConfig):
+    """The configuration class for the DI-MI-FGSM attack."""
+
+    _target: Type = field(default_factory=lambda: DiMiFgsmAttack)
+    """Target class to instantiate."""
+    max_resolution_ratio: float = 1.104
+    """Ratio of the length of one side of the transformed image to one of the original
+    image. The default value is calculated w.r.t the ImageNet setting mentioned in the
+    original paper (330/299 = 1.1036)."""
+    keep_dims: bool = False
+    """Whether to keep the original image size."""
+    prob: float = 0.5
+    """Probability of using diverse inputs."""
+    momentum: float = 1.0
+    """Momentum about the model."""
+
+
+class DiMiFgsmAttack(Attack):
+    """The class of the DI-MI-FGSM attack.
+
+    Args:
+        config: The DI-MI-FGSM attack configuration.
+        norm_allow_list: List of supported perturbation norms.
+    """
+
+    config: DiMiFgsmAttackConfig
+    norm_allow_list: List[NormType] = ["l_inf"]
+
+    def input_diversity(self, x: Tensor) -> Tensor:
+        """Apply diverse input patterns, i.e., random transformations, on the input
+        image x.
+
+        Args:
+            x: Images to be transformed.
+        """
+        h, w = x.shape[2:]
+        h_final = int(h * self.config.max_resolution_ratio)
+        w_final = int(w * self.config.max_resolution_ratio)
+
+        # 1. random resize
+        h_resize = random.randint(h, h_final - 1)
+        w_resize = random.randint(w, w_final - 1)
+        x_resize = F.interpolate(x, size=[h_resize, w_resize], mode="nearest")
+
+        # 2. random padding
+        h_remain = h_final - h_resize
+        w_remain = w_final - w_resize
+        pad_top = random.randint(0, h_remain)
+        pad_left = random.randint(0, w_remain)
+        dim = [pad_left, w_remain - pad_left, pad_top, h_remain - pad_top]
+        x_pad = F.pad(x_resize, dim, mode="constant", value=0)
+
+        assert x_pad.shape[2:] == (h_final, w_final)
+        if self.config.keep_dims:
+            x_pad = F.interpolate(x_pad, size=[h, w], mode="nearest")
+
+        return x_pad if torch.rand(1) < self.config.prob else x
+
+    def run_attack(
+        self, x: Tensor, y: Tensor, model: Model
+    ) -> Dict[ResultHeadNames, Tensor]:
+        x_adv = x
+        alpha = self.eps / self.max_iters
+        accumulated_grads = torch.zeros_like(x)
+
+        for _ in range(self.max_iters):
+            x_adv = x_adv.clone().detach().requires_grad_(True)
+            model.zero_grad()
+
+            logits = model(self.input_diversity(x_adv))
+            loss = F.cross_entropy(logits, y)
+            if self.targeted:
+                loss *= -1
+            gradients = torch.autograd.grad(loss, [x_adv])[0].detach()
+
+            gradients = gradients / torch.mean(
+                torch.abs(gradients), dim=(1, 2, 3), keepdims=True
+            )
+            gradients = gradients + self.config.momentum * accumulated_grads
+            accumulated_grads = gradients.clone().detach()
+
+            x_adv = x_adv + alpha * torch.sign(gradients)
+            x_adv = torch.clamp(x_adv, min=self.min_val, max=self.max_val)
+
+        return {ResultHeadNames.X_ADV: x_adv}