From db9f7def2326c7952a3f24cb0ac07aa17b038157 Mon Sep 17 00:00:00 2001 From: Justin Wang Date: Mon, 21 Oct 2024 19:00:14 +0000 Subject: [PATCH] release: v0.2.2 --- README.md | 4 ++-- nanogcg/gcg.py | 16 ++++++++-------- pyproject.toml | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index a2bdc65..509025f 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ pip install -e . ## Overview -The GCG algorithm was introduced in [Universal and Transferrable Attacks on Aligned Language Models](https://arxiv.org/pdf/2307.15043) [1] by Andy Zou, Zifan Wang, Nicholas Carlini, Milad Nasr, Zico Kolter, and Matt Fredrikson. This implementation implements the original algorithm and supports several modifications that can improve performance, including multi-position token swapping [2], early stopping strategy [2], a historical attack buffer [2][3], and the mellowmax loss function [4][5]. +The GCG algorithm was introduced in [Universal and Transferrable Attacks on Aligned Language Models](https://arxiv.org/pdf/2307.15043) [1] by Andy Zou, Zifan Wang, Nicholas Carlini, Milad Nasr, Zico Kolter, and Matt Fredrikson. This implementation implements the original algorithm and supports several modifications that can improve performance, including multi-position token swapping [2], a historical attack buffer [2][3], and the mellowmax loss function [4][5]. ## Usage @@ -83,7 +83,7 @@ The parameters that can be configured and their defaults are: - `use_prefix_cache: bool = True ` - if True, stores the KV cache for all token positions before the optimized tokens -- `allow_non_ascii : bool = False` - if True, allows for non-ascii tokens to be used +- `allow_non_ascii : bool = False` - if True, allows for non-ascii tokens in the optimized sequence - `filter_ids: bool = True` - if True, only retains candidate sequences that are the same after tokenization and retokenization diff --git a/nanogcg/gcg.py b/nanogcg/gcg.py index ef4feeb..4d31425 100644 --- a/nanogcg/gcg.py +++ b/nanogcg/gcg.py @@ -104,9 +104,9 @@ def sample_ids_from_grad( the number of candidate sequences to return topk : int the topk to be used when sampling from the gradient - n_replace: int + n_replace : int the number of token positions to update per sequence - not_allowed_ids: Tensor, shape = (n_ids) + not_allowed_ids : Tensor, shape = (n_ids) the token ids that should not be used in optimization Returns: @@ -221,9 +221,9 @@ def run( target = " " + target if config.add_space_before_target else target # Tokenize everything that doesn't get optimized - before_ids = tokenizer([before_str], padding=False, return_tensors="pt")["input_ids"].to(model.device).to(torch.int64) - after_ids = tokenizer([after_str], add_special_tokens=False, return_tensors="pt")["input_ids"].to(model.device).to(torch.int64) - target_ids = tokenizer([target], add_special_tokens=False, return_tensors="pt")["input_ids"].to(model.device).to(torch.int64) + before_ids = tokenizer([before_str], padding=False, return_tensors="pt")["input_ids"].to(model.device, torch.int64) + after_ids = tokenizer([after_str], add_special_tokens=False, return_tensors="pt")["input_ids"].to(model.device, torch.int64) + target_ids = tokenizer([target], add_special_tokens=False, return_tensors="pt")["input_ids"].to(model.device, torch.int64) # Embed everything that doesn't get optimized embedding_layer = self.embedding_layer @@ -377,15 +377,15 @@ def compute_token_gradient( """Computes the gradient of the GCG loss w.r.t the one-hot token matrix. Args: - optim_ids : Tensor, shape = (1, n_optim_ids) - the sequence of token ids that are being optimized + optim_ids : Tensor, shape = (1, n_optim_ids) + the sequence of token ids that are being optimized """ model = self.model embedding_layer = self.embedding_layer # Create the one-hot encoding matrix of our optimized token ids optim_ids_onehot = torch.nn.functional.one_hot(optim_ids, num_classes=embedding_layer.num_embeddings) - optim_ids_onehot = optim_ids_onehot.to(dtype=model.dtype, device=model.device) + optim_ids_onehot = optim_ids_onehot.to(model.device, model.dtype) optim_ids_onehot.requires_grad_() # (1, num_optim_tokens, vocab_size) @ (vocab_size, embed_dim) -> (1, num_optim_tokens, embed_dim) diff --git a/pyproject.toml b/pyproject.toml index 0a714ee..4f99b8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "nanogcg" -version = "0.2.1" +version = "0.2.2" authors = [ { name="Justin Wang", email="justin@grayswan.ai" }, ]