-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
Checklist / 检查清单
- I have searched existing issues, and this is a new bug report. / 我已经搜索过现有的 issues,确认这是一个新的 bug report。
Bug Description / Bug 描述
export MODEL_PATH=/models/Qwen3.5-35B-A3B
export CUDA_VISIBLE_DEVICES=3
export CARD_NUM=1
vllm serve $MODEL_PATH
--dtype 'bfloat16'
--api-key 1234
--port 8866
--tensor-parallel-size 1
--gpu-memory-utilization 0.85
--pipeline-parallel-size 1
运行报错:
non-default args: {'model_tag': '/models/Qwen3.5-35B-A3B', 'port': 8866, 'api_key': ['1234'], 'model': '/models/Qwen3.5-35B-A3B', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.85}
(APIServer pid=400) Traceback (most recent call last):
(APIServer pid=400) File "/usr/local/bin/vllm", line 8, in
(APIServer pid=400) sys.exit(main())
(APIServer pid=400) ^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/cli/main.py", line 73, in main
(APIServer pid=400) args.dispatch_function(args)
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/cli/serve.py", line 112, in cmd
(APIServer pid=400) uvloop.run(run_server(args))
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/uvloop/init.py", line 92, in run
(APIServer pid=400) return runner.run(wrapper())
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/asyncio/runners.py", line 118, in run
(APIServer pid=400) return self._loop.run_until_complete(task)
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/uvloop/init.py", line 48, in wrapper
(APIServer pid=400) return await main
(APIServer pid=400) ^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 471, in run_server
(APIServer pid=400) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 490, in run_server_worker
(APIServer pid=400) async with build_async_engine_client(
(APIServer pid=400) File "/usr/local/lib/python3.11/contextlib.py", line 210, in aenter
(APIServer pid=400) return await anext(self.gen)
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 96, in build_async_engine_client
(APIServer pid=400) async with build_async_engine_client_from_engine_args(
(APIServer pid=400) File "/usr/local/lib/python3.11/contextlib.py", line 210, in aenter
(APIServer pid=400) return await anext(self.gen)
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 122, in build_async_engine_client_from_engine_args
(APIServer pid=400) vllm_config = engine_args.create_engine_config(usage_context=usage_context)
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/engine/arg_utils.py", line 1477, in create_engine_config
(APIServer pid=400) model_config = self.create_model_config()
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/engine/arg_utils.py", line 1329, in create_model_config
(APIServer pid=400) return ModelConfig(
(APIServer pid=400) ^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/pydantic/_internal/_dataclasses.py", line 123, in init
(APIServer pid=400) s.pydantic_validator.validate_python(ArgsKwargs(args, kwargs), self_instance=s)
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/config/model.py", line 474, in post_init
(APIServer pid=400) hf_config = get_config(
(APIServer pid=400) ^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/transformers_utils/config.py", line 628, in get_config
(APIServer pid=400) config_dict, config = config_parser.parse(
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/transformers_utils/config.py", line 163, in parse
(APIServer pid=400) config = config_class.from_pretrained(
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/modelscope/utils/hf_util/patcher.py", line 179, in patch_pretrained_model_name_or_path
(APIServer pid=400) return cls._from_pretrained_origin.func(cls, model_dir,
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/transformers/configuration_utils.py", line 552, in from_pretrained
(APIServer pid=400) return cls.from_dict(config_dict, **kwargs)
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/transformers/configuration_utils.py", line 714, in from_dict
(APIServer pid=400) config = cls(**config_dict)
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/transformers_utils/configs/qwen3_5_moe.py", line 192, in init
(APIServer pid=400) self.text_config = self.sub_configs"text_config"
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/vllm/transformers_utils/configs/qwen3_5_moe.py", line 121, in init
(APIServer pid=400) super().init(**kwargs)
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/transformers/configuration_utils.py", line 219, in init
(APIServer pid=400) kwargs = self.convert_rope_params_to_dict(
(APIServer pid=400) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=400) File "/usr/local/lib/python3.11/site-packages/transformers/modeling_rope_utils.py", line 651, in convert_rope_params_to_dict
(APIServer pid=400) ignore_keys_at_rope_validation = ignore_keys_at_rope_validation | {"partial_rotary_factor"}
(APIServer pid=400) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
(APIServer pid=400) TypeError: unsupported operand type(s) for |: 'list' and 'set'
How to Reproduce / 如何复现
vllm部署推理服务的时候
Additional Information / 补充信息
No response