Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,35 @@ This project provides the fine-tuned adapter weights:

---

## Inference Example

An example script (`inference.py`) is provided to demonstrate how to:
- Load the Qwen2.5-Math-1.5B base model
- Attach the fine-tuned LoRA adapter
- Run step-by-step math inference

Note: Running the script requires downloading the base model from Hugging Face.
## Setup and Usage

### Prerequisites
- Python 3.8+
- CUDA-enabled GPU (recommended, e.g., NVIDIA T4 or better)

### Installation
1. Clone the repository:
```bash
git clone https://github.com/AshChadha-iitg/OpenMath.git
cd OpenMath
```
2. Install dependencies:
```bash
pip install -r requirements.txt
```

### Running Inference
The `inference.py` script is configured to load the fine-tuned LoRA adapter from the current directory and run a sample math problem.

```bash
python inference.py
```

You can also import the `solve_problem` function into your own Python scripts:
```python
from inference import solve_problem
print(solve_problem("Your math problem here"))
```

---

Expand Down
64 changes: 41 additions & 23 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
# CONFIG (MATCHES YOUR TRAINING)
# ==========================
BASE_MODEL = "Qwen/Qwen2.5-Math-1.5B"
ADAPTER_PATH = "./openmath-lora" # <-- PUT YOUR ADAPTER HERE
ADAPTER_PATH = "." # Weights are in the root directory

# 4-bit QLoRA config (same as your T4 training)
# 4-bit QLoRA config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
Expand All @@ -35,43 +35,61 @@
# ==========================
# LOAD TOKENIZER + MODEL
# ==========================
print(f"Loading tokenizer and model: {BASE_MODEL}...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

# Check for CUDA
if not torch.cuda.is_available():
print("Warning: CUDA is not available. Loading on CPU might be extremely slow or fail for 4-bit quantization.")

base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
quantization_config=bnb_config,
device_map="auto",
)

# Attach your fine-tuned LoRA adapter
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
model.eval()
print(f"Attaching adapter from {ADAPTER_PATH}...")
try:
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
model.eval()
except Exception as e:
print(f"Error loading adapter: {e}")
exit(1)

# Silence padding warning
model.generation_config.pad_token_id = tokenizer.eos_token_id

# ==========================
# OPENMATH PROMPT (MUST MATCH TRAINING)
# OPENMATH PROMPT
# ==========================
prompt = (
"### Instruction:\n"
"Solve the math problem step by step and give the final answer.\n\n"
"### Problem:\n"
"If a store sells pencils at 3 for $1, how much do 15 pencils cost?\n\n"
"### Solution:\n"
)
def solve_problem(problem_text):
prompt = (
"### Instruction:\n"
"Solve the math problem step by step and give the final answer.\n\n"
"### Problem:\n"
f"{problem_text}\n\n"
"### Solution:\n"
)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=False, # deterministic (better for math)
repetition_penalty=1.1,
no_repeat_ngram_size=3,
)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
do_sample=False,
repetition_penalty=1.1,
no_repeat_ngram_size=3,
)

return tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n===== OPENMATH OUTPUT =====\n")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
if __name__ == "__main__":
example_problem = "If a store sells pencils at 3 for $1, how much do 15 pencils cost?"
print("\n===== OPENMATH INFERENCE =====")
print(f"Problem: {example_problem}")
result = solve_problem(example_problem)
print("\n===== OUTPUT =====\n")
print(result)
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
torch
transformers
peft
bitsandbytes
accelerate
safetensors
sentencepiece