Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion envs/MATH/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .env import Env, extract_answer, extract_groundtruth, judge_correct
from .data import get_train_test_dataset
from .data import get_dataset
from .prompt import COT_EXAMPLES, COT_TASK_DESC, PROBLEM_FORMAT_STR, SEP
25 changes: 19 additions & 6 deletions envs/MATH/data.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,35 @@
from pathlib import Path
import jsonlines
import json
import os
from torch.utils.data import Dataset


def get_train_test_dataset(*args, **kwargs):
env_dir = Path(__file__).parent
test_ds = JsonlMathDataset(env_dir / "dataset/test500.jsonl")
train_ds = JsonlMathDataset(env_dir / "dataset/train.jsonl")
root_dir = Path(__file__).parent.parent.parent # base dir
train_data_path = kwargs['train_data_path']
test_data_path = kwargs['test_data_path']
test_ds = JsonlMathDataset(root_dir / test_data_path)
train_ds = JsonlMathDataset(root_dir / train_data_path)

return train_ds, test_ds

def get_dataset(**kwargs):
root_dir = Path(__file__).parent.parent.parent.resolve()
return JsonlMathDataset(os.path.join(root_dir, kwargs['data_path']))


class JsonlMathDataset(Dataset):
def __init__(self, data_path):
super().__init__()
self.data = []
with jsonlines.open(data_path, "r") as reader:
for obj in reader:
self.data.append(obj)
if data_path.endswith(".jsonl"):
with jsonlines.open(data_path, "r") as reader:
for obj in reader:
self.data.append(obj)
elif data_path.endswith(".json"):
with open(data_path, "r") as reader:
self.data = json.load(reader)

def __len__(self):
return len(self.data)
Expand Down
11 changes: 7 additions & 4 deletions envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@

def get_env_datasets(env_name: str, **kwargs):
task_module = import_module(f"envs.{env_name}")
return task_module.get_train_test_dataset(**kwargs)
return task_module.get_dataset(**kwargs)


def get_default_query_str_builder(env_name: str, **kwargs):
task_module = import_module(f"envs.{env_name}")
cot_task_desc = kwargs.get('cot_task_desc', task_module.COT_TASK_DESC)
cot_examples = kwargs.get('cot_examples', task_module.COT_EXAMPLES)
problem_format_str = kwargs.get('problem_format_str', task_module.PROBLEM_FORMAT_STR)

def fn(problem_input: str, is_few_shot: bool):
return task_module.Env.build_query_str(
cot_task_desc=task_module.COT_TASK_DESC,
cot_examples=task_module.COT_EXAMPLES,
problem_format_str=task_module.PROBLEM_FORMAT_STR,
cot_task_desc=cot_task_desc,
cot_examples=cot_examples,
problem_format_str=problem_format_str,
problem_input=problem_input,
is_few_shot=is_few_shot,
)
Expand Down
2 changes: 2 additions & 0 deletions envs/gsm8k/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .env import Env, extract_answer, extract_groundtruth, judge_correct
from .data import get_dataset
44 changes: 44 additions & 0 deletions envs/gsm8k/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from pathlib import Path
import jsonlines
import json
import os
from torch.utils.data import Dataset


def get_train_test_dataset(*args, **kwargs):
root_dir = Path(__file__).parent.parent.parent # base dir
train_data_path = kwargs['train_data_path']
test_data_path = kwargs['test_data_path']
test_ds = JsonlMathDataset(root_dir / test_data_path)
train_ds = JsonlMathDataset(root_dir / train_data_path)

return train_ds, test_ds

def get_dataset(**kwargs):
root_dir = Path(__file__).parent.parent.parent.resolve()
return JsonlMathDataset(os.path.join(root_dir, kwargs['data_path']))


class JsonlMathDataset(Dataset):
def __init__(self, data_path):
super().__init__()
self.data = []
if data_path.endswith(".jsonl"):
with jsonlines.open(data_path, "r") as reader:
for obj in reader: # extract clean answer
q = obj['question']
full_answer = obj['answer']
cleaned_answer = full_answer.split('####')[-1].strip()
new_obj = {'question': q,
"answer": cleaned_answer}
self.data.append(new_obj)
elif data_path.endswith(".json"):
with open(data_path, "r") as reader:
self.data = json.load(reader)

def __len__(self):
return len(self.data)

def __getitem__(self, index):
x = self.data[index]
return {"question": x["problem"], "answer": x["solution"]}
1,319 changes: 1,319 additions & 0 deletions envs/gsm8k/dataset/socratic_test_1319.jsonl

Large diffs are not rendered by default.

1,319 changes: 1,319 additions & 0 deletions envs/gsm8k/dataset/test1319.jsonl

Large diffs are not rendered by default.

69 changes: 69 additions & 0 deletions envs/gsm8k/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import copy
import re
from typing import List, Optional
import numpy as np
from envs.base_env import CoTEnv, NoLegalActionException, INVALID_ANS
# from .verify_utils import extract_answer as extract_fn, grade_answer
from envs.MATH.parse_utils_qwen import extract_answer as extract_fn, parse_ground_truth
from envs.MATH.grader import math_equal

ANS_RE = None


def extract_answer(answer_str: str) -> str:
return extract_fn(answer_str, data_name='math')


def extract_groundtruth(groundtruth_str: str) -> str:
return parse_ground_truth(groundtruth_str, data_name='math')


def judge_correct(
problem_str: str, extracted_groundtruth: Optional[str], answer: str
) -> bool:
# return grade_answer(given_answer=answer, ground_truth=extracted_groundtruth)
result = math_equal(answer, extracted_groundtruth)
return result


class Env(CoTEnv):
sep = SEP

def __init__(
self,
config,
math_problems,
llm_gen_fn,
task_desc_str: str,
cot_example_str: str,
problem_format_str: str,
reset=True,
):
super().__init__(
config,
math_problems,
llm_gen_fn,
task_desc_str,
cot_example_str,
problem_format_str,
reset,
)

def post_process_act(self, action: str):
if not action.endswith(self.sep):
action = action.strip() + self.sep

return action

def _is_correct(self, completion):
extracted_answer = extract_answer(completion)
# print("Compare: {} -- {}".format(extrated_answer,
# self.math_problem['answer']))
# return extrated_answer == self.math_problem['answer']
return judge_correct(
self.math_problem["question"], self.math_problem["answer"], extracted_answer
)

def get_reward(self):
"""To implement based on learned reward model"""
return 0
2 changes: 1 addition & 1 deletion envs/rstar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .rstar_env import Env, extract_answer, extract_groundtruth, judge_correct
from .data import get_train_test_dataset
from .data import get_dataset
17 changes: 14 additions & 3 deletions envs/rstar/data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pathlib import Path
import jsonlines
import os
import json
from torch.utils.data import Dataset


Expand All @@ -9,14 +11,23 @@ def get_train_test_dataset(*args, **kwargs):
train_ds = JsonlMathDataset(env_dir / "MATH/dataset/train.jsonl")
return train_ds, test_ds

def get_dataset(**kwargs):
root_dir = Path(__file__).parent.parent.parent.resolve()
print(f"path = {os.path.join(root_dir, kwargs['data_path'])}")
return JsonlMathDataset(os.path.join(root_dir, kwargs['data_path']))


class JsonlMathDataset(Dataset):
def __init__(self, data_path):
super().__init__()
self.data = []
with jsonlines.open(data_path, "r") as reader:
for obj in reader:
self.data.append(obj)
if data_path.endswith(".jsonl"):
with jsonlines.open(data_path, "r") as reader:
for obj in reader:
self.data.append(obj)
elif data_path.endswith(".json"):
with open(data_path, "r") as reader:
self.data = json.load(reader)

def __len__(self):
return len(self.data)
Expand Down
10 changes: 4 additions & 6 deletions envs/rstar/rstar_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@
import os
import numpy as np

from envs.MATH.env import CoTEnv
from envs.base_env import NoLegalActionException, ResetException
from envs.base_env import CoTEnv, NoLegalActionException, ResetException
from tqdm import tqdm

from reason.inference.lm_call import LMCallingConfig
from .rstar_utils import *
from .eval_src.Evaluator import MATHEvaluator, QwenMATHEvaluator
from envs.MATH.prompt import COT_EXAMPLES, COT_TASK_DESC, PROBLEM_FORMAT_STR, SEP

from pathlib import Path

Expand All @@ -38,9 +36,9 @@ def __init__(
config,
math_problems,
llm_gen_fn,
task_desc_str: str = COT_TASK_DESC,
cot_example_str: str = COT_EXAMPLES,
problem_format_str: str = PROBLEM_FORMAT_STR,
task_desc_str: str,
cot_example_str: str,
problem_format_str: str,
reset=True,
):
"""
Expand Down
18 changes: 18 additions & 0 deletions prm/infer_fns.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,22 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device):
logits = model(input_id).logits[:,:,candidate_tokens]
scores = logits.softmax(dim=-1)[:,:,0]
step_scores = scores[input_id == step_tag_id]
return step_scores

@torch.inference_mode()
def _genrm_infer_fn(input_str: str, model, tokenizer, device):
"""
Generative Reward Model, Direct-GenRM
"""
GOOD_TOKEN = 'Yes'
BAD_TOKEN = 'No'
STEP_TAG = '\n\n'
candidate_tokens = tokenizer.encode(f"{GOOD_TOKEN} {BAD_TOKEN}")
step_tag_id = tokenizer.encode(f"{STEP_TAG}")[-1]

input_id = torch.tensor(
[tokenizer.encode(input_str)], device=device)
logits = model(input_id).logits[:,:,candidate_tokens]
scores = logits.softmax(dim=-1)[:,:,0]
step_scores = scores[input_id == step_tag_id]
return step_scores
15 changes: 12 additions & 3 deletions reason/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Open O1 dev
# OpenR Reasoning


## Environment
Expand All @@ -16,14 +16,23 @@ Set `NUM_LM_WORKER` and `NUM_RM_WORKER`, as well as the model_path config, in `r
```
sh reason/llm_service/create_service_math_shepherd.sh
```
or run model in tensor parallel mode
```angular2html
sh reason/llm_service/create_service_vllm_tensor_parallel.sh
```
⚠️ When setting up multiple LM/RM services, you can rename each service with input `--model-names $NAME` and call by `$NAME` in later execution.

### Run Inference
```
export PYTHONPATH=$(pwd)
sh scripts/eval/cot_greedy.sh
sh scripts/eval/cot_rerank.sh
sh scripts/eval/beam_search.sh
sh scripts/eval/vanila_mcts.sh
```

### Run Experiment for me @(anjie)
see `scripts/eval/exp/`, run scripts in it.
### Run LLM-as-Judge
After running inference script, log files can be found in saving directory containing `config.json`, `avg_result.json` and `record.json` files. Due to limited capabilities of rule-based answer checker, we can perform LLM-as-Judge on the generated reasoning pathway by running:
```angular2html
sh script/eval/llm_as_judge.sh
```
Loading