From 8e0ddf16f81d76286c069e27b35f82dcbf8ed1bb Mon Sep 17 00:00:00 2001 From: jxq Date: Fri, 5 Dec 2025 17:34:59 +0800 Subject: [PATCH 1/2] 1. Add custom sample configuration file for the HLE benchmark; 2. Support loading base64 image; 3. fix a bug in inference/backend/predict_async_hf.py --- benchmark_code/HLE/eval_hle.py | 2 +- benchmark_code/HLE/preprocess.py | 42 +++++++++++++++++++++++++++ inference/backend/multimodal.py | 12 +++++++- inference/backend/predict_async_hf.py | 4 +-- inference/data_server.py | 25 ++++++++++++---- requirements.txt | 3 +- 6 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 benchmark_code/HLE/preprocess.py diff --git a/benchmark_code/HLE/eval_hle.py b/benchmark_code/HLE/eval_hle.py index e32db04..cee6253 100644 --- a/benchmark_code/HLE/eval_hle.py +++ b/benchmark_code/HLE/eval_hle.py @@ -17,7 +17,7 @@ def extract_label(data): def extract_pred(data): if "predict_result" in data: - return f'["{data["predict_result"]}"]' + return str(data["predict_result"]) if "prediction" in data: return str(data["prediction"]) if "choices" in data and isinstance(data["choices"], list): diff --git a/benchmark_code/HLE/preprocess.py b/benchmark_code/HLE/preprocess.py new file mode 100644 index 0000000..11a3974 --- /dev/null +++ b/benchmark_code/HLE/preprocess.py @@ -0,0 +1,42 @@ +import os +import sys +sys.path.append(os.path.abspath(os.path.dirname(__file__))) +from eval_hle import generate_prompt +from transformers import AutoProcessor, PreTrainedTokenizer + +def convert_sample_to_input_ids( + s:dict, + prompt_type: str, + tokenizer: PreTrainedTokenizer +): + question = s.get("question") + image = s.get("image", "") + + if image: + question = "<|vision_start|><|image_pad|><|vision_end|>"+question + inputs = { + "prompt": generate_prompt(question), + "multi_modal_data": {"image": [image]} + } + return inputs + else: + contents = [ + {"type": "text", "text": generate_prompt(question)} + ] + messages = [ + { + "role": "user", + "content": contents + } + ] + final_prompt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + enable_thinking=False + ) + #处理当输入是多模态Processor的时候tokenizer异常 + if "Processor" in type(tokenizer).__name__: + tokenizer = tokenizer.tokenizer + input_ids = tokenizer.encode(final_prompt) + return input_ids \ No newline at end of file diff --git a/inference/backend/multimodal.py b/inference/backend/multimodal.py index b77826c..fe02be0 100755 --- a/inference/backend/multimodal.py +++ b/inference/backend/multimodal.py @@ -1,5 +1,7 @@ import math import librosa +import base64 +from io import BytesIO from PIL import Image @@ -23,8 +25,16 @@ def load_multimodal_data( audio: list[str] | None = None, return_dict=False, ): + def open_image(x): + if x.startswith("data:image"): + x = x.split(',', 1)[1] + image_data = base64.b64decode(x) + return Image.open(BytesIO(image_data)) + else: + return Image.open(x) + if image is not None: - image = [Image.open(x) for x in image] + image = [open_image(x) for x in image] image = [to_rgb(x) for x in image] # For Qwen2-VL, Qwen2.5-VL, Omni, 解决processor短边小于28的bug # OCRBench数据集需要调整,MIN_PIXELS diff --git a/inference/backend/predict_async_hf.py b/inference/backend/predict_async_hf.py index 1c86052..2bce7a9 100755 --- a/inference/backend/predict_async_hf.py +++ b/inference/backend/predict_async_hf.py @@ -109,7 +109,7 @@ async def predict_sample( else: # list[int] input_len = len(inputs) # input_ids input_tensors = dict( - inputs=torch.tensor([inputs], device=model.device), + input_ids=torch.tensor([inputs], device=model.device), attention_mask=torch.ones((1, input_len), dtype=torch.long, device=model.device) ) @@ -126,7 +126,7 @@ async def predict_sample( **kwargs, ) # input + output, shape: (batch_size, total_len) - output = output_ids[0].tolist()[input_len:] + output = output_ids[0].flatten().tolist()[input_len:] elif otype == 'turn_taking': model = getattr(model, 'thinker', model) # Qwen2.5-Omni diff --git a/inference/data_server.py b/inference/data_server.py index ee6cfef..b5365fb 100755 --- a/inference/data_server.py +++ b/inference/data_server.py @@ -3,6 +3,7 @@ import argparse import random import ujson as json +import base64 import asyncio import time import importlib @@ -820,13 +821,25 @@ async def write_result_async(i_data, i_sample): def write_result(i_data): """将推理结果写入jsonl文件""" + def default_serizalizer(obj): + return f"Lost data of JSON-unserializable type {obj.__class__.__name__}" + def dumps(r): + try: + json_kwargs = dict( + ensure_ascii=False, + escape_forward_slashes=False # ujson需要设置,以保持与标准库(json)一致的行为 + ) + return json.dumps(r, **json_kwargs) + except Exception as e: + import json + json_kwargs = dict( + ensure_ascii=False, + default=default_serizalizer # 处理不支持JSON化的元素(如bytes) + ) + return json.dumps(r, **json_kwargs) try: d = file_list[i_data] # print('save:', d.output_path) - json_kwargs = dict( - ensure_ascii=False, - escape_forward_slashes=False, # ujson需要设置,以保持与标准库(json)一致的行为 - ) os.makedirs(os.path.split(d.output_path)[0], exist_ok=True) if d.n_write == -1: d.n_write = 0 @@ -837,7 +850,7 @@ def write_result(i_data): if args.no_order: while d.results_new: r = d.results_new.popleft() - f.write(json.dumps(r, **json_kwargs) + '\n') + f.write(dumps(r) + '\n') d.n_write += 1 else: while d.n_write < len(d.results): @@ -845,7 +858,7 @@ def write_result(i_data): r = d.results[i] if r is None: break - f.write(json.dumps(r, **json_kwargs) + '\n') + f.write(dumps(r) + '\n') d.results[i] = None # 释放内存 d.n_write += 1 diff --git a/requirements.txt b/requirements.txt index 20a4e8b..0a88d7c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,5 @@ zmq pybase64 sanic rouge_chinese -jieba \ No newline at end of file +jieba +librosa \ No newline at end of file From 808a081a6f72fdf3c27e80cdc90c73b16f17da05 Mon Sep 17 00:00:00 2001 From: jxq Date: Fri, 5 Dec 2025 17:41:40 +0800 Subject: [PATCH 2/2] Add config/hle.yaml: --- config/hle.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 config/hle.yaml diff --git a/config/hle.yaml b/config/hle.yaml new file mode 100644 index 0000000..4e77d30 --- /dev/null +++ b/config/hle.yaml @@ -0,0 +1,19 @@ +# 步骤(以Qwen/Qwen3-Omni-30B-A3B-Thinking为例): +# 1. 下载数据集文件(https://huggingface.co/datasets/cais/hle/blob/main/data/test-00000-of-00001.parquet)到本地 +# 2. 下载Qwen/Qwen3-Omni-30B-A3B-Thinking模型到本地,然后安装运行模型所需依赖: +# pip install transformers==4.57.3 +# pip install flash-attn --no-build-isolation +# pip install -r requirements.txt +# 3. 运行python run_pipeline.py --config config/hle.yaml --model_path {your_local_path_to_Qwen3-Omni-30B-A3B-Thinking} --backend hf + +save_dir: results + +preprocess: benchmark_code.HLE.preprocess +thinker_max_new_tokens: 4096 + +tasks: + HLE: + type: text + data_path: [YOUR_PATH_TO_TEST_PARQUET] + compare_func: + path: benchmark_code/HLE/eval_hle.py \ No newline at end of file