From 8e0ddf16f81d76286c069e27b35f82dcbf8ed1bb Mon Sep 17 00:00:00 2001
From: jxq <jxq@jxq.com>
Date: Fri, 5 Dec 2025 17:34:59 +0800
Subject: [PATCH 1/2] 1. Add custom sample configuration file for the HLE
 benchmark; 2. Support loading base64 image; 3. fix a bug in
 inference/backend/predict_async_hf.py

---
 benchmark_code/HLE/eval_hle.py        |  2 +-
 benchmark_code/HLE/preprocess.py      | 42 +++++++++++++++++++++++++++
 inference/backend/multimodal.py       | 12 +++++++-
 inference/backend/predict_async_hf.py |  4 +--
 inference/data_server.py              | 25 ++++++++++++----
 requirements.txt                      |  3 +-
 6 files changed, 77 insertions(+), 11 deletions(-)
 create mode 100644 benchmark_code/HLE/preprocess.py

diff --git a/benchmark_code/HLE/eval_hle.py b/benchmark_code/HLE/eval_hle.py
index e32db04..cee6253 100644
--- a/benchmark_code/HLE/eval_hle.py
+++ b/benchmark_code/HLE/eval_hle.py
@@ -17,7 +17,7 @@ def extract_label(data):
 
 def extract_pred(data):
     if "predict_result" in data:
-        return f'["{data["predict_result"]}"]'
+        return str(data["predict_result"])
     if "prediction" in data:
         return str(data["prediction"])
     if "choices" in data and isinstance(data["choices"], list):
diff --git a/benchmark_code/HLE/preprocess.py b/benchmark_code/HLE/preprocess.py
new file mode 100644
index 0000000..11a3974
--- /dev/null
+++ b/benchmark_code/HLE/preprocess.py
@@ -0,0 +1,42 @@
+import os
+import sys
+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
+from eval_hle import generate_prompt
+from transformers import AutoProcessor, PreTrainedTokenizer
+
+def convert_sample_to_input_ids(
+    s:dict,
+    prompt_type: str,
+    tokenizer: PreTrainedTokenizer
+):
+    question = s.get("question")
+    image = s.get("image", "")
+
+    if image:
+        question = "<|vision_start|><|image_pad|><|vision_end|>"+question
+        inputs = {
+            "prompt": generate_prompt(question),
+            "multi_modal_data": {"image": [image]}
+        }
+        return inputs
+    else:
+        contents = [
+            {"type": "text", "text": generate_prompt(question)}
+        ]
+        messages = [
+            {
+                "role": "user",
+                "content": contents
+            }
+        ]
+        final_prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=False
+        )
+        #处理当输入是多模态Processor的时候tokenizer异常
+        if "Processor" in type(tokenizer).__name__:
+            tokenizer = tokenizer.tokenizer
+        input_ids = tokenizer.encode(final_prompt)
+        return input_ids
\ No newline at end of file
diff --git a/inference/backend/multimodal.py b/inference/backend/multimodal.py
index b77826c..fe02be0 100755
--- a/inference/backend/multimodal.py
+++ b/inference/backend/multimodal.py
@@ -1,5 +1,7 @@
 import math
 import librosa
+import base64
+from io import BytesIO
 from PIL import Image
 
 
@@ -23,8 +25,16 @@ def load_multimodal_data(
     audio: list[str] | None = None,
     return_dict=False,
 ):
+    def open_image(x):
+        if x.startswith("data:image"):
+            x = x.split(',', 1)[1]
+            image_data = base64.b64decode(x)
+            return Image.open(BytesIO(image_data))
+        else:
+            return Image.open(x)
+
     if image is not None:
-        image = [Image.open(x) for x in image]
+        image = [open_image(x) for x in image]
         image = [to_rgb(x) for x in image]
         # For Qwen2-VL, Qwen2.5-VL, Omni, 解决processor短边小于28的bug
         # OCRBench数据集需要调整，MIN_PIXELS
diff --git a/inference/backend/predict_async_hf.py b/inference/backend/predict_async_hf.py
index 1c86052..2bce7a9 100755
--- a/inference/backend/predict_async_hf.py
+++ b/inference/backend/predict_async_hf.py
@@ -109,7 +109,7 @@ async def predict_sample(
         else:  # list[int]
             input_len = len(inputs)  # input_ids
             input_tensors = dict(
-                inputs=torch.tensor([inputs], device=model.device),
+                input_ids=torch.tensor([inputs], device=model.device),
                 attention_mask=torch.ones((1, input_len), dtype=torch.long, device=model.device)
             )
 
@@ -126,7 +126,7 @@ async def predict_sample(
                 **kwargs,
             )  # input + output, shape: (batch_size, total_len)
 
-            output = output_ids[0].tolist()[input_len:]
+            output = output_ids[0].flatten().tolist()[input_len:]
 
         elif otype == 'turn_taking':
             model = getattr(model, 'thinker', model)  # Qwen2.5-Omni
diff --git a/inference/data_server.py b/inference/data_server.py
index ee6cfef..b5365fb 100755
--- a/inference/data_server.py
+++ b/inference/data_server.py
@@ -3,6 +3,7 @@
 import argparse
 import random
 import ujson as json
+import base64
 import asyncio
 import time
 import importlib
@@ -820,13 +821,25 @@ async def write_result_async(i_data, i_sample):
 
 def write_result(i_data):
     """将推理结果写入jsonl文件"""
+    def default_serizalizer(obj):
+        return f"Lost data of JSON-unserializable type {obj.__class__.__name__}"
+    def dumps(r):
+        try:
+            json_kwargs = dict(
+                ensure_ascii=False,
+                escape_forward_slashes=False  # ujson需要设置，以保持与标准库（json）一致的行为
+            )
+            return json.dumps(r, **json_kwargs)
+        except Exception as e:
+            import json
+            json_kwargs = dict(
+                ensure_ascii=False,
+                default=default_serizalizer  # 处理不支持JSON化的元素（如bytes）
+            )
+            return json.dumps(r, **json_kwargs)
     try:
         d = file_list[i_data]
         # print('save:', d.output_path)
-        json_kwargs = dict(
-            ensure_ascii=False,
-            escape_forward_slashes=False,  # ujson需要设置，以保持与标准库（json）一致的行为
-        )
         os.makedirs(os.path.split(d.output_path)[0], exist_ok=True)
         if d.n_write == -1:
             d.n_write = 0
@@ -837,7 +850,7 @@ def write_result(i_data):
             if args.no_order:
                 while d.results_new:
                     r = d.results_new.popleft()
-                    f.write(json.dumps(r, **json_kwargs) + '\n')
+                    f.write(dumps(r) + '\n')
                     d.n_write += 1
             else:
                 while d.n_write < len(d.results):
@@ -845,7 +858,7 @@ def write_result(i_data):
                     r = d.results[i]
                     if r is None:
                         break
-                    f.write(json.dumps(r, **json_kwargs) + '\n')
+                    f.write(dumps(r) + '\n')
                     d.results[i] = None  # 释放内存
                     d.n_write += 1
 
diff --git a/requirements.txt b/requirements.txt
index 20a4e8b..0a88d7c 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,4 +22,5 @@ zmq
 pybase64
 sanic
 rouge_chinese
-jieba
\ No newline at end of file
+jieba
+librosa
\ No newline at end of file

From 808a081a6f72fdf3c27e80cdc90c73b16f17da05 Mon Sep 17 00:00:00 2001
From: jxq <jxq@jxq.com>
Date: Fri, 5 Dec 2025 17:41:40 +0800
Subject: [PATCH 2/2] Add config/hle.yaml:

---
 config/hle.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 config/hle.yaml

diff --git a/config/hle.yaml b/config/hle.yaml
new file mode 100644
index 0000000..4e77d30
--- /dev/null
+++ b/config/hle.yaml
@@ -0,0 +1,19 @@
+# 步骤（以Qwen/Qwen3-Omni-30B-A3B-Thinking为例）：
+# 1. 下载数据集文件（https://huggingface.co/datasets/cais/hle/blob/main/data/test-00000-of-00001.parquet）到本地
+# 2. 下载Qwen/Qwen3-Omni-30B-A3B-Thinking模型到本地，然后安装运行模型所需依赖：
+# pip install transformers==4.57.3
+# pip install flash-attn --no-build-isolation
+# pip install -r requirements.txt
+# 3. 运行python run_pipeline.py --config config/hle.yaml --model_path {your_local_path_to_Qwen3-Omni-30B-A3B-Thinking} --backend hf 
+
+save_dir: results
+
+preprocess: benchmark_code.HLE.preprocess
+thinker_max_new_tokens: 4096
+
+tasks:
+    HLE:
+        type: text
+        data_path: [YOUR_PATH_TO_TEST_PARQUET]
+        compare_func:
+            path: benchmark_code/HLE/eval_hle.py
\ No newline at end of file