diff --git a/inference/huggingface/text-generation/ds-hf-compare-fidelity.py b/inference/huggingface/text-generation/ds-hf-compare-fidelity.py deleted file mode 100644 index ac338d841..000000000 --- a/inference/huggingface/text-generation/ds-hf-compare-fidelity.py +++ /dev/null @@ -1,138 +0,0 @@ -import torch -from tqdm import tqdm -import deepspeed -from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM -from difflib import SequenceMatcher -from argparse import ArgumentParser -from deepspeed.accelerator import get_accelerator - -parser = ArgumentParser() - -parser.add_argument("--model", required=True, type=str, help="model_name") -parser.add_argument("--dtype", default="float16", type=str, choices=["float32", "float16", "int8", "bfloat16"], help="data-type") -parser.add_argument("--num_inputs", default=1, type=int, help="number of test inputs") -parser.add_argument("--min_length", default=200, type=int, help="minimum tokens generated") -parser.add_argument("--max_length", default=300, type=int, help="maximum tokens generated") -parser.add_argument("--print_outputs", action='store_true', help="print generated text outputs") -parser.add_argument("--local_rank", type=int, default=0, help="local rank") -parser.add_argument("--use_kernel", default=True, help="enable kernel-injection") -args = parser.parse_args() -output_len = 16 -checkpoint = args.model -tokenizer = AutoTokenizer.from_pretrained(checkpoint) -model = AutoModelForCausalLM.from_pretrained(checkpoint) - -def print_0(output): - if args.local_rank == 0: - print(output) - -def get_baseline_outputs(prompt): - for _ in range(output_len): - input_ids = tokenizer(prompt, return_tensors="pt") - with torch.no_grad(): - outputs = model(**input_ids) - logits = outputs.logits - probs = torch.softmax(logits, dim=-1) - last_word_probs = probs[0, -1, :] - max_prob_idx = torch.argmax(last_word_probs, dim=-1) - predict_word = tokenizer.decode(max_prob_idx) - prompt = prompt + predict_word - return prompt - -def cal_cross_perplexity(ds_input, ds_output): - ds_input_ids = tokenizer(ds_input, return_tensors="pt")['input_ids'][0] - ds_output_ids = tokenizer(ds_output, return_tensors="pt")['input_ids'][0] - plexity = 0 - for i in tqdm(range(output_len), desc="Perplexity: "): - baseline_input_len = len(ds_input_ids)+i - - baseline_input_ids = ds_output_ids[:baseline_input_len] # set first i-1 tokens as inputs of baseline model - baseline_input = tokenizer.decode(baseline_input_ids) - x_id = ds_output_ids[baseline_input_len] # the No.i token - - encode_baseline_input = tokenizer(baseline_input, return_tensors="pt") - with torch.no_grad(): - outputs = model(**encode_baseline_input) - logits = outputs.logits - probs = torch.softmax(logits, dim=-1) - last_word_probs = probs[0, -1, :] - prob_xi = torch.log(last_word_probs[x_id]) - plexity = plexity + prob_xi - print(f"{'-'*60}") - plexity = torch.exp(-1*plexity/output_len) - return plexity - -def string_similarity(str1, str2): - matcher = SequenceMatcher(None, str1, str2) - similarity_ratio = matcher.ratio() - return similarity_ratio - -test_inputs = [ - "This is a test", - "One fish, two fish, red fish,", - "Microsoft is in Washington DC", - "The ancient art of basket weaving", - "Large language models are useful", - "You shouldn't buy a car without first checking", - "In today's lesson, we will cover the conflict between", - "Interestingly, the humble bumblebee is essential to our", - "How many blue buttons and yellow marbles are left in the", - "My favorite band is playing at a local music festival next month", - "Fortunately, I made it just in time to the event to tell her that", - "Once upon a time in a galaxy far away, there lived a boy named Anakin who", - "It is projected that by the year 3035, there will be more humans on the planet than ", - "Many years ago, we were hiking in the Amazon rain forest when we stumbled upon an impressive", - "Let's discuss today's agenda. First, we will go around and introduce ourselves. Next, we will cover our 3 essential markers for success: 1) ", - "These two historical figures ", - "I saw a news article about a major scientific discovery ", - "A poem about the beauty of the night sky", - "Improving mental health ", - "Being a professional athlete", - "There are many exotic travel destinations", - "She needed a recipe for a unique and delicious dessert", - "The process of creating a work of art", - "The importance of renewable energy has been a popular topic among", - "Hiking to the top of a mountain is no easy task. It can takes several hours and ", - "His latest clothing collection was all the rave at the last year's fashion week. Several ", - "Here's a list of 10 thought-provoking discussion questions", - "The show last night had to be postponed due to weather. I heard that people waited hours in the rain", - "A successful small business can be evaluated these three performance metrics", - "My favorite motivational quotes to inspire others are", - "A magical creature living in a hidden forest", - "The preparation of a gourmet meal", - "I overheard two scientists discussing a groundbreaking scientific theory", - "He wrote a blog post about the benefits of mindfulness and meditation.", - "This set of instructions for assembling a piece of furniture", - "Training for a marathon", - "What are your hopes and dreams for the world?", - "Imagine you are a time traveler. Write a journal entry about your visit to a historical event.", - "Generate a list of 10 unique and exciting travel destinations.", - "She gave speech advocating for equal rights", - "The process of producing a documentary film ", - "With a flick of a wand, the magician made the rabbit disappear", - "The bustling marketplace was a kaleidoscope of colors and sounds. There were at least 100 vendors and dozens of" -] - -if args.num_inputs < len(test_inputs): - inputs = test_inputs[:args.num_inputs] -else: - print_0(f"Warning: num_inputs ({args.num_inputs}) is greater than the number of test inputs ({len(test_inputs)}). Using all test inputs.") - inputs = test_inputs - -data_type = getattr(torch, args.dtype) -pipe = pipeline('text-generation', args.model, torch_dtype=data_type, device=torch.device(get_accelerator().device_name(0))) - -# Initialize the model with DeepSpeed -pipe.model = deepspeed.init_inference(pipe.model, dtype=data_type, replace_with_kernel_inject=args.use_kernel) - -# Run the DeepSpeed model and calculate cross plexity -for prompt in inputs: - ds_out = pipe(prompt, do_sample=False) - ds_out_text = ds_out[0]['generated_text'] - print_0(f"deepspeed output: {ds_out_text}") - print_0(f"{'-'*60}") - - # calculate cross plexity - if args.local_rank == 0: - perplexity = cal_cross_perplexity(prompt, ds_out_text) - print(f"The cross perplexity is: {perplexity}") \ No newline at end of file