Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark_code/HLE/eval_hle.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def extract_label(data):

def extract_pred(data):
if "predict_result" in data:
return f'["{data["predict_result"]}"]'
return str(data["predict_result"])
if "prediction" in data:
return str(data["prediction"])
if "choices" in data and isinstance(data["choices"], list):
Expand Down
42 changes: 42 additions & 0 deletions benchmark_code/HLE/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import sys
sys.path.append(os.path.abspath(os.path.dirname(__file__)))
from eval_hle import generate_prompt
from transformers import AutoProcessor, PreTrainedTokenizer

def convert_sample_to_input_ids(
s:dict,
prompt_type: str,
tokenizer: PreTrainedTokenizer
):
question = s.get("question")
image = s.get("image", "")

if image:
question = "<|vision_start|><|image_pad|><|vision_end|>"+question
inputs = {
"prompt": generate_prompt(question),
"multi_modal_data": {"image": [image]}
}
return inputs
else:
contents = [
{"type": "text", "text": generate_prompt(question)}
]
messages = [
{
"role": "user",
"content": contents
}
]
final_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False
)
#处理当输入是多模态Processor的时候tokenizer异常
if "Processor" in type(tokenizer).__name__:
tokenizer = tokenizer.tokenizer
input_ids = tokenizer.encode(final_prompt)
return input_ids
19 changes: 19 additions & 0 deletions config/hle.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# 步骤(以Qwen/Qwen3-Omni-30B-A3B-Thinking为例):
# 1. 下载数据集文件(https://huggingface.co/datasets/cais/hle/blob/main/data/test-00000-of-00001.parquet)到本地
# 2. 下载Qwen/Qwen3-Omni-30B-A3B-Thinking模型到本地,然后安装运行模型所需依赖:
# pip install transformers==4.57.3
# pip install flash-attn --no-build-isolation
# pip install -r requirements.txt
# 3. 运行python run_pipeline.py --config config/hle.yaml --model_path {your_local_path_to_Qwen3-Omni-30B-A3B-Thinking} --backend hf

save_dir: results

preprocess: benchmark_code.HLE.preprocess
thinker_max_new_tokens: 4096

tasks:
HLE:
type: text
data_path: [YOUR_PATH_TO_TEST_PARQUET]
compare_func:
path: benchmark_code/HLE/eval_hle.py
12 changes: 11 additions & 1 deletion inference/backend/multimodal.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import math
import librosa
import base64
from io import BytesIO
from PIL import Image


Expand All @@ -23,8 +25,16 @@ def load_multimodal_data(
audio: list[str] | None = None,
return_dict=False,
):
def open_image(x):
if x.startswith("data:image"):
x = x.split(',', 1)[1]
image_data = base64.b64decode(x)
return Image.open(BytesIO(image_data))
else:
return Image.open(x)

if image is not None:
image = [Image.open(x) for x in image]
image = [open_image(x) for x in image]
image = [to_rgb(x) for x in image]
# For Qwen2-VL, Qwen2.5-VL, Omni, 解决processor短边小于28的bug
# OCRBench数据集需要调整,MIN_PIXELS
Expand Down
4 changes: 2 additions & 2 deletions inference/backend/predict_async_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ async def predict_sample(
else: # list[int]
input_len = len(inputs) # input_ids
input_tensors = dict(
inputs=torch.tensor([inputs], device=model.device),
input_ids=torch.tensor([inputs], device=model.device),
attention_mask=torch.ones((1, input_len), dtype=torch.long, device=model.device)
)

Expand All @@ -126,7 +126,7 @@ async def predict_sample(
**kwargs,
) # input + output, shape: (batch_size, total_len)

output = output_ids[0].tolist()[input_len:]
output = output_ids[0].flatten().tolist()[input_len:]

elif otype == 'turn_taking':
model = getattr(model, 'thinker', model) # Qwen2.5-Omni
Expand Down
25 changes: 19 additions & 6 deletions inference/data_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import random
import ujson as json
import base64
import asyncio
import time
import importlib
Expand Down Expand Up @@ -820,13 +821,25 @@ async def write_result_async(i_data, i_sample):

def write_result(i_data):
"""将推理结果写入jsonl文件"""
def default_serizalizer(obj):
return f"Lost data of JSON-unserializable type {obj.__class__.__name__}"
def dumps(r):
try:
json_kwargs = dict(
ensure_ascii=False,
escape_forward_slashes=False # ujson需要设置,以保持与标准库(json)一致的行为
)
return json.dumps(r, **json_kwargs)
except Exception as e:
import json
json_kwargs = dict(
ensure_ascii=False,
default=default_serizalizer # 处理不支持JSON化的元素(如bytes)
)
return json.dumps(r, **json_kwargs)
try:
d = file_list[i_data]
# print('save:', d.output_path)
json_kwargs = dict(
ensure_ascii=False,
escape_forward_slashes=False, # ujson需要设置,以保持与标准库(json)一致的行为
)
os.makedirs(os.path.split(d.output_path)[0], exist_ok=True)
if d.n_write == -1:
d.n_write = 0
Expand All @@ -837,15 +850,15 @@ def write_result(i_data):
if args.no_order:
while d.results_new:
r = d.results_new.popleft()
f.write(json.dumps(r, **json_kwargs) + '\n')
f.write(dumps(r) + '\n')
d.n_write += 1
else:
while d.n_write < len(d.results):
i = d.n_write
r = d.results[i]
if r is None:
break
f.write(json.dumps(r, **json_kwargs) + '\n')
f.write(dumps(r) + '\n')
d.results[i] = None # 释放内存
d.n_write += 1

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ zmq
pybase64
sanic
rouge_chinese
jieba
jieba
librosa