We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hello, Javi Rando! I have adapted and optimized GuidedPasswordGeneration.ipynb to run from the terminal. Let it be generate_conditional.py:
import os import argparse import torch from transformers import GPT2LMHeadModel from transformers import RobertaTokenizerFast import string def get_tokens(tokenizer, symbols): return tokenizer(symbols, add_special_tokens=False).input_ids def create_token_dict(tokenizer): lowercase = list(string.ascii_lowercase) uppercase = list(string.ascii_uppercase) digits = list(string.digits) punctuation = list(string.punctuation) lowercase_tokens = get_tokens(tokenizer, lowercase) uppercase_tokens = get_tokens(tokenizer, uppercase) digits_tokens = get_tokens(tokenizer, digits) punctuation_tokens = get_tokens(tokenizer, punctuation) return { "l": lowercase_tokens, "u": uppercase_tokens, "d": digits_tokens, "p": punctuation_tokens } def conditional_generation(template, num_generations=1): generated = 0 generations = [] while generated < num_generations: generation = torch.tensor([tokenizer.bos_token_id]).unsqueeze(0) current_length = 1 for char in template: if char in token_dict: bad_tokens = [i for i in all_tokens if i not in token_dict[char]] else: bad_tokens = [[tokenizer.eos_token_id]] generation = model.generate(generation.to(args.device), do_sample=True, max_length=current_length+1, pad_token_id=tokenizer.pad_token_id, num_return_sequences=1, bad_words_ids=bad_tokens) current_length += 1 if not 2 in generation.flatten(): generations.append(generation) generated += 1 return torch.cat(generations, 0)[:, 1:] if __name__ == "__main__": parser = argparse.ArgumentParser(description="Conditional generating passwords using PassGPT.") parser.add_argument("--model_path", type=str, help="Path to PassGPT model checkpoint", required=True) parser.add_argument("--tokenizer_path", type=str, help="Path to tokenizer checkpoint", required=True) parser.add_argument("--device", type=str, default='cuda', help="Device to run execution") parser.add_argument("--template", type=str, help="Password template (e.g., 'lluu**dd')") parser.add_argument("--maxchars", type=int, default=10, help="Maximum length of the passwords") parser.add_argument("--num_generations", type=int, default=1, help="Number of passwords to generate") args = parser.parse_args() model = GPT2LMHeadModel.from_pretrained(args.model_path).eval().to(args.device) tokenizer = RobertaTokenizerFast.from_pretrained(args.tokenizer_path, max_len=args.maxchars+2, padding="max_length", truncation=True, do_lower_case=False, strip_accents=False, mask_token="<mask>", unk_token="<unk>", pad_token="<pad>", truncation_side="right") token_dict = create_token_dict(tokenizer) all_tokens = [[i] for i in range(len(tokenizer))] generations = conditional_generation(args.template, args.num_generations) decoded_passwords = tokenizer.batch_decode(generations) for i, password in enumerate(decoded_passwords): print(f"Generated Password {i+1}: {password}")
Run command, example:
python src/generate_conditional.py --model_path output_dir/last/ --tokenizer_path tokenizers_folder/byte_bpe_tokenizer_99/ --template "ullldp*" --maxchars 10 --num_generations 5
Out:
Generated Password 1: Josi0!M Generated Password 2: Meek2-- Generated Password 3: Sant0$S Generated Password 4: Mana1** Generated Password 5: Tomh8&&
The text was updated successfully, but these errors were encountered:
This code looks good. Thanks a lot! Could you open a Pull Request and I will merge into the existing codebase?
Sorry, something went wrong.
When branches are created from issues, their pull requests are automatically linked.
Hello, Javi Rando!
I have adapted and optimized GuidedPasswordGeneration.ipynb to run from the terminal. Let it be generate_conditional.py:
Run command, example:
python src/generate_conditional.py --model_path output_dir/last/ --tokenizer_path tokenizers_folder/byte_bpe_tokenizer_99/ --template "ullldp*" --maxchars 10 --num_generations 5
Out:
The text was updated successfully, but these errors were encountered: