Skip to content

Commit 73fdb87

Browse files
authored
Merge branch 'main' into dev_accept_sim
2 parents d81f51c + cd32229 commit 73fdb87

37 files changed

+1917
-807
lines changed

.github/workflows/test.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
github.event.pull_request.draft == false
1919
runs-on: [self-hosted]
2020
container:
21-
image: lmsysorg/sglang:dev
21+
image: lmsysorg/sglang:v0.5.5 # we lock to this version to avoid repeated docker pull
2222
options: --gpus all --shm-size=2g --rm -v /dev/shm
2323
steps:
2424
- name: Checkout code
@@ -45,6 +45,7 @@ jobs:
4545
shell: bash
4646
run: |
4747
source sf/bin/activate
48+
export PYTHONPATH=$PWD
4849
python -m unittest discover -s ./tests -p "test_*.py" -v
4950
5051
- name: Save cache

configs/qwen2.5-7b-eagle3.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"architectures": [
3+
"LlamaForCausalLMEagle3"
4+
],
5+
"attention_bias": false,
6+
"attention_dropout": 0.0,
7+
"bos_token_id": 151643,
8+
"eos_token_id": 151645,
9+
"hidden_act": "silu",
10+
"hidden_size": 3584,
11+
"initializer_range": 0.02,
12+
"intermediate_size": 18944,
13+
"max_position_embeddings": 32768,
14+
"max_window_layers": 28,
15+
"model_type": "llama",
16+
"num_attention_heads": 28,
17+
"num_hidden_layers": 1,
18+
"num_key_value_heads": 4,
19+
"rms_norm_eps": 1e-06,
20+
"rope_scaling": null,
21+
"rope_theta": 1000000.0,
22+
"sliding_window": 131072,
23+
"tie_word_embeddings": false,
24+
"torch_dtype": "bfloat16",
25+
"transformers_version": "4.51.0",
26+
"use_cache": true,
27+
"use_sliding_window": false,
28+
"vocab_size": 152064,
29+
"draft_vocab_size": 16000
30+
}

examples/prepare_hidden_states.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
2+
ROOT_DIR=$(dirname $SCRIPT_DIR)
3+
4+
export PYTHONPATH=$ROOT_DIR
5+
6+
MODEL_PATH=Qwen/Qwen-2.5-7B-Instruct
7+
MODEL_NAME=Qwen-2.5-7B-Instruct
8+
DATASET_PATH=/datasets/all_2w.jsonl
9+
OUTPUT_ROOT_DIR=/specforge_output/$MODEL_NAME/2w
10+
NUM_SAMPLES=20000
11+
TP_SIZE=1
12+
MAX_LENGTH=16384
13+
DRAFT_CONFIG_PATH=./configs/qwen2.5-7b-eagle3.json
14+
15+
OUTPUT_DIR=$OUTPUT_ROOT_DIR/outputs
16+
NPROC_PER_NODE=8
17+
18+
19+
torchrun \
20+
--nproc_per_node $NPROC_PER_NODE \
21+
scripts/prepare_hidden_states.py \
22+
--target-model-path $MODEL_PATH \
23+
--enable-aux-hidden-states \
24+
--data-path $DATASET_PATH \
25+
--chat-template qwen \
26+
--max-length $MAX_LENGTH \
27+
--tp-size $TP_SIZE \
28+
--output-path $OUTPUT_ROOT_DIR/cache/hidden_states \
29+
--num-samples $NUM_SAMPLES \
30+
--batch-size 4

requirements-rocm.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Use the PyTorch ROCm wheel index (choose the stream that matches your system)
2+
--extra-index-url https://download.pytorch.org/whl/rocm6.3
3+
4+
pre-commit
5+
torch==2.8.0+rocm6.3
6+
torchaudio==2.8.0+rocm6.3
7+
torchvision==0.23.0+rocm6.3
8+
transformers==4.57.1
9+
qwen-vl-utils==0.0.11
10+
datasets
11+
setuptools
12+
tqdm
13+
wandb
14+
psutil
15+
numpy
16+
accelerate
17+
pydantic
18+
sglang[all]==0.5.4
19+
openai-harmony

0 commit comments

Comments
 (0)