Skip to content

Commit 594d518

Browse files
committed
keep old api in vllm-integration
Signed-off-by: doujiang24 <doujiang24@gmail.com>
1 parent 7683324 commit 594d518

File tree

4 files changed

+89
-8
lines changed

4 files changed

+89
-8
lines changed

doc/en/vllm-integration.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pip3 install -e .
3737
- If you encounter any problems that you cannot solve, please refer to the [vLLM official compilation guide](https://docs.vllm.ai/en/v0.6.4.post1/getting_started/installation.html#install-the-latest-code).
3838

3939
## Configuration
40+
### Prepare configuration file to Run Example over RDMA
4041

4142
- Prepare a _**mooncake.json**_ file for both Prefill and Decode instances
4243
- **You don't need to change the `prefill_url` and `decode_url` of the config file in the decode side, please use the identical config file.**
@@ -45,14 +46,33 @@ pip3 install -e .
4546
{
4647
"prefill_url": "192.168.0.137:13003",
4748
"decode_url": "192.168.0.139:13003",
48-
"metadata_server": "192.168.0.139:2379"
49+
"metadata_server": "192.168.0.139:2379",
50+
"protocol": "rdma",
51+
"device_name": "erdma_0"
4952
}
5053
```
5154
- "prefill_url": The IP address and port of the Prefill node.
5255
- The port in the URL is used to communicate with etcd server for metadata.
5356
- "decode_url": The IP address and port of the Decode node.
5457
- The port in the URL is used to communicate with etcd server for metadata.
5558
- "metadata_server": The etcd server of mooncake transfer engine.
59+
- "protocol": The protocol to be used for data transmission. ("rdma/tcp")
60+
- "device_name": The device to be used for data transmission, required when "protocol" is set to "rdma". If multiple NIC devices are used, they can be separated by commas such as "erdma_0,erdma_1". Please note that there are no spaces between them.
61+
62+
63+
### Prepare configuration file to Run Example over TCP
64+
65+
- Prepare a _**mooncake.json**_ file for both Prefill and Decode instances
66+
```json
67+
{
68+
"prefill_url": "192.168.0.137:13003",
69+
"decode_url": "192.168.0.139:13003",
70+
"metadata_server": "192.168.0.139:2379",
71+
"protocol": "tcp",
72+
"device_name": ""
73+
}
74+
```
75+
5676

5777
## Run Example
5878
- Please change the IP addresses and ports in the following guide according to your env.

doc/zh/vllm-integration.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pip3 install -e .
3737
- 如果遇到任何无法解决的问题,请参照[vLLM官方的编译指南](https://docs.vllm.ai/en/v0.6.4.post1/getting_started/installation.html#install-the-latest-code)
3838

3939
## 配置
40+
### 使用 RDMA 运行示例所需配置文件
4041

4142
- 为预填充和解码实例准备一个 mooncake.json 文件
4243
- **在解码实例侧,你无须更改配置文件里的`prefill_url``decode_url`,使用完同相同的配置文件即可。**
@@ -45,14 +46,33 @@ pip3 install -e .
4546
{
4647
"prefill_url": "192.168.0.137:13003",
4748
"decode_url": "192.168.0.139:13003",
48-
"metadata_server": "192.168.0.139:2379"
49+
"metadata_server": "192.168.0.139:2379",
50+
"protocol": "rdma",
51+
"device_name": "erdma_0"
4952
}
5053
```
5154
- "prefill_url": 预填充节点的 IP 地址和端口。
5255
- URL 中的端口用于与 etcd 服务器通信以获取元数据。
5356
- "decode_url": 解码节点的 IP 地址和端口。
5457
- URL 中的端口用于与 etcd 服务器通信以获取元数据。
5558
- "metadata_server": mooncake 传输引擎的 etcd 服务器。
59+
- "protocol": 数据传输协议("rdma/tcp")。
60+
- "device_name": 用于数据传输的设备,当 "protocol" 设置为 "rdma" 时必填。如果使用多个 NIC 设备,它们可以用逗号分隔,如 "erdma_0,erdma_1"。请注意它们之间没有空格。
61+
62+
63+
### 使用 TCP 运行示例所需配置文件
64+
65+
- 为预填充和解码实例准备一个 mooncake.json 文件
66+
```json
67+
{
68+
"prefill_url": "192.168.0.137:13003",
69+
"decode_url": "192.168.0.139:13003",
70+
"metadata_server": "192.168.0.139:2379",
71+
"protocol": "tcp",
72+
"device_name": ""
73+
}
74+
```
75+
5676

5777
## 运行示例
5878
- 请根据您的环境更改以下指南中的 IP 地址和端口。

mooncake-integration/vllm/vllm_adaptor.cpp

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,24 @@ VLLMAdaptor::~VLLMAdaptor() {
2828
large_buffer_list_.clear();
2929
}
3030

31+
std::string formatDeviceNames(const std::string &device_names) {
32+
std::stringstream ss(device_names);
33+
std::string item;
34+
std::vector<std::string> tokens;
35+
while (getline(ss, item, ',')) {
36+
tokens.push_back(item);
37+
}
38+
39+
std::string formatted;
40+
for (size_t i = 0; i < tokens.size(); ++i) {
41+
formatted += "\"" + tokens[i] + "\"";
42+
if (i < tokens.size() - 1) {
43+
formatted += ",";
44+
}
45+
}
46+
return formatted;
47+
}
48+
3149
std::pair<std::string, std::string> parseConnectionString(
3250
const std::string &conn_string) {
3351
std::pair<std::string, std::string> result;
@@ -48,26 +66,46 @@ std::pair<std::string, std::string> parseConnectionString(
4866
}
4967

5068
int VLLMAdaptor::initialize(const char *local_hostname,
51-
const char *metadata_server) {
69+
const char *metadata_server, const char *protocol,
70+
const char *device_name) {
5271
auto conn_string = parseConnectionString(metadata_server);
53-
return initializeExt(local_hostname, conn_string.second.c_str(),
54-
conn_string.first.c_str());
72+
return initializeExt(local_hostname, conn_string.second.c_str(), protocol,
73+
device_name, conn_string.first.c_str());
5574
}
5675

5776
int VLLMAdaptor::initializeExt(const char *local_hostname,
5877
const char *metadata_server,
78+
const char *protocol, const char *device_name,
5979
const char *metadata_type) {
6080
std::string conn_string = metadata_server;
6181
if (conn_string.find("://") == std::string::npos)
6282
conn_string =
6383
std::string(metadata_type) + "://" + std::string(metadata_server);
6484

65-
engine_ = std::make_unique<TransferEngine>();
85+
// TODO: remove `false` in the feature, it's for keep same API in vllm.
86+
engine_ = std::make_unique<TransferEngine>(false);
6687
auto hostname_port = parseHostNameWithPort(local_hostname);
6788
int ret = engine_->init(conn_string, local_hostname,
6889
hostname_port.first.c_str(), hostname_port.second);
6990
if (ret) return -1;
7091

92+
xport_ = nullptr;
93+
if (strcmp(protocol, "rdma") == 0) {
94+
auto device_names = formatDeviceNames(device_name);
95+
std::string nic_priority_matrix =
96+
"{\"cpu:0\": [[" + device_names + "], []]}";
97+
void **args = (void **)malloc(2 * sizeof(void *));
98+
args[0] = (void *)nic_priority_matrix.c_str();
99+
args[1] = nullptr;
100+
xport_ = engine_->installTransport("rdma", args);
101+
} else if (strcmp(protocol, "tcp") == 0) {
102+
xport_ = engine_->installTransport("tcp", nullptr);
103+
} else {
104+
LOG(ERROR) << "Unsupported protocol";
105+
return -1;
106+
}
107+
108+
if (!xport_) return -1;
71109
free_list_.resize(kSlabSizeKBTabLen);
72110
doBuddyAllocate(kMaxClassId);
73111
return 0;
@@ -202,4 +240,4 @@ PYBIND11_MODULE(mooncake_vllm_adaptor, m) {
202240
.def("readBytesFromBuffer", &VLLMAdaptor::readBytesFromBuffer)
203241
.def("expRegisterMemory", &VLLMAdaptor::expRegisterMemory)
204242
.def("expUnregisterMemory", &VLLMAdaptor::expUnregisterMemory);
205-
}
243+
}

mooncake-integration/vllm/vllm_adaptor.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@ class VLLMAdaptor {
4444

4545
~VLLMAdaptor();
4646

47-
int initialize(const char *local_hostname, const char *metadata_server);
47+
int initialize(const char *local_hostname, const char *metadata_server,
48+
const char *protocol, const char *device_name);
4849

4950
int initializeExt(const char *local_hostname, const char *metadata_server,
51+
const char *protocol, const char *device_name,
5052
const char *metadata_type);
5153

5254
uintptr_t allocateManagedBuffer(size_t length);
@@ -84,6 +86,7 @@ class VLLMAdaptor {
8486

8587
private:
8688
std::shared_ptr<TransferEngine> engine_;
89+
Transport *xport_;
8790

8891
std::mutex mutex_;
8992
std::vector<std::stack<char *>> free_list_;

0 commit comments

Comments
 (0)