From d3da4b22a95f2ab011cd031cdb52b89a6bc58574 Mon Sep 17 00:00:00 2001 From: GVN2307 Date: Thu, 12 Feb 2026 17:27:20 +0530 Subject: [PATCH] Fix inference paths, add requirements.txt, and update README --- README.md | 37 ++++++++++++++++++++++------ inference.py | 64 +++++++++++++++++++++++++++++++----------------- requirements.txt | 7 ++++++ 3 files changed, 77 insertions(+), 31 deletions(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index fdd8f7e..7222d8d 100644 --- a/README.md +++ b/README.md @@ -83,14 +83,35 @@ This project provides the fine-tuned adapter weights: --- -## Inference Example - -An example script (`inference.py`) is provided to demonstrate how to: -- Load the Qwen2.5-Math-1.5B base model -- Attach the fine-tuned LoRA adapter -- Run step-by-step math inference - -Note: Running the script requires downloading the base model from Hugging Face. +## Setup and Usage + +### Prerequisites +- Python 3.8+ +- CUDA-enabled GPU (recommended, e.g., NVIDIA T4 or better) + +### Installation +1. Clone the repository: + ```bash + git clone https://github.com/AshChadha-iitg/OpenMath.git + cd OpenMath + ``` +2. Install dependencies: + ```bash + pip install -r requirements.txt + ``` + +### Running Inference +The `inference.py` script is configured to load the fine-tuned LoRA adapter from the current directory and run a sample math problem. + +```bash +python inference.py +``` + +You can also import the `solve_problem` function into your own Python scripts: +```python +from inference import solve_problem +print(solve_problem("Your math problem here")) +``` --- diff --git a/inference.py b/inference.py index a8d15bf..5c93670 100644 --- a/inference.py +++ b/inference.py @@ -22,9 +22,9 @@ # CONFIG (MATCHES YOUR TRAINING) # ========================== BASE_MODEL = "Qwen/Qwen2.5-Math-1.5B" -ADAPTER_PATH = "./openmath-lora" # <-- PUT YOUR ADAPTER HERE +ADAPTER_PATH = "." # Weights are in the root directory -# 4-bit QLoRA config (same as your T4 training) +# 4-bit QLoRA config bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", @@ -35,9 +35,14 @@ # ========================== # LOAD TOKENIZER + MODEL # ========================== +print(f"Loading tokenizer and model: {BASE_MODEL}...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) tokenizer.pad_token = tokenizer.eos_token +# Check for CUDA +if not torch.cuda.is_available(): + print("Warning: CUDA is not available. Loading on CPU might be extremely slow or fail for 4-bit quantization.") + base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, quantization_config=bnb_config, @@ -45,33 +50,46 @@ ) # Attach your fine-tuned LoRA adapter -model = PeftModel.from_pretrained(base_model, ADAPTER_PATH) -model.eval() +print(f"Attaching adapter from {ADAPTER_PATH}...") +try: + model = PeftModel.from_pretrained(base_model, ADAPTER_PATH) + model.eval() +except Exception as e: + print(f"Error loading adapter: {e}") + exit(1) # Silence padding warning model.generation_config.pad_token_id = tokenizer.eos_token_id # ========================== -# OPENMATH PROMPT (MUST MATCH TRAINING) +# OPENMATH PROMPT # ========================== -prompt = ( -"### Instruction:\n" -"Solve the math problem step by step and give the final answer.\n\n" -"### Problem:\n" -"If a store sells pencils at 3 for $1, how much do 15 pencils cost?\n\n" -"### Solution:\n" -) +def solve_problem(problem_text): + prompt = ( + "### Instruction:\n" + "Solve the math problem step by step and give the final answer.\n\n" + "### Problem:\n" + f"{problem_text}\n\n" + "### Solution:\n" + ) -inputs = tokenizer(prompt, return_tensors="pt").to(model.device) + inputs = tokenizer(prompt, return_tensors="pt").to(model.device) -with torch.no_grad(): - outputs = model.generate( - **inputs, - max_new_tokens=200, - do_sample=False, # deterministic (better for math) - repetition_penalty=1.1, - no_repeat_ngram_size=3, - ) + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=512, + do_sample=False, + repetition_penalty=1.1, + no_repeat_ngram_size=3, + ) + + return tokenizer.decode(outputs[0], skip_special_tokens=True) -print("\n===== OPENMATH OUTPUT =====\n") -print(tokenizer.decode(outputs[0], skip_special_tokens=True)) +if __name__ == "__main__": + example_problem = "If a store sells pencils at 3 for $1, how much do 15 pencils cost?" + print("\n===== OPENMATH INFERENCE =====") + print(f"Problem: {example_problem}") + result = solve_problem(example_problem) + print("\n===== OUTPUT =====\n") + print(result) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f37e6dd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +peft +bitsandbytes +accelerate +safetensors +sentencepiece