Skip to content

Commit 8142dbb

Browse files
Lingitee-org
authored andcommitted
!921 sync commit to master
Merge pull request !921 from Lin/update-master
2 parents 0510ba2 + 81d91f7 commit 8142dbb

38 files changed

+719
-261
lines changed

README.md

Lines changed: 13 additions & 13 deletions
Large diffs are not rendered by default.

configs/glm/run_glm_6b_finetune.yaml

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,17 @@ processor:
9090
# ==== dataset config ====
9191
train_dataset: &train_dataset
9292
data_loader:
93-
type: MindDataset
94-
dataset_dir: ""
93+
type: ADGenDataLoader
94+
dataset_dir: "/path/to/AdvertiseGen"
9595
shuffle: True
96+
phase: "train"
97+
tokenizer:
98+
type: ChatGLMTokenizer
99+
vocab_file: "/path/to/ice_text.model"
96100
input_columns: ["input_ids", "label", "position_ids", "attention_mask"]
101+
max_source_length: 64
102+
max_target_length: 64
103+
ignore_pad_token_for_loss: True
97104
num_parallel_workers: 8
98105
python_multiprocessing: False
99106
drop_remainder: True
@@ -104,14 +111,21 @@ train_dataset: &train_dataset
104111
seed: 0
105112

106113
train_dataset_task:
107-
type: CausalLanguageModelDataset
114+
type: KeyWordGenDataset
108115
dataset_config: *train_dataset
109116

110117
eval_dataset: &eval_dataset
111118
data_loader:
112-
type: MindDataset
113-
dataset_dir: ""
114-
shuffle: True
119+
type: ADGenDataLoader
120+
dataset_dir: "/path/to/AdvertiseGen"
121+
shuffle: False
122+
phase: "eval"
123+
tokenizer:
124+
type: ChatGLMTokenizer
125+
vocab_file: "/path/to/ice_text.model"
126+
max_source_length: 256
127+
max_target_length: 256
128+
ignore_pad_token_for_loss: True
115129
input_columns: ["input_ids", "label"]
116130
num_parallel_workers: 8
117131
python_multiprocessing: False
@@ -123,7 +137,7 @@ eval_dataset: &eval_dataset
123137
seed: 0
124138

125139
eval_dataset_task:
126-
type: CausalLanguageModelDataset
140+
type: KeyWordGenDataset
127141
dataset_config: *eval_dataset
128142

129143
# ==== runner config ====

configs/glm/run_glm_6b_infer.yaml

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ model:
5353
max_decode_length: 2048 # The maximum length of the generated words.
5454
is_enhanced_encoder: True
5555
is_npu_acceleration: True # sample in npu to acceleration
56-
checkpoint_name_or_path: "glm_6b"
56+
checkpoint_name_or_path: "glm_6b_chat"
5757
top_k: 1
5858
top_p: 1 # top_p not support for npu_acceleration yet
5959
repetition_penalty: 1
@@ -86,10 +86,17 @@ processor:
8686
# ==== dataset config ====
8787
train_dataset: &train_dataset
8888
data_loader:
89-
type: MindDataset
90-
dataset_dir: ""
89+
type: ADGenDataLoader
90+
dataset_dir: "/path/to/AdvertiseGen"
9191
shuffle: True
92+
phase: "train"
93+
tokenizer:
94+
type: ChatGLMTokenizer
95+
vocab_file: "/path/to/ice_text.model"
9296
input_columns: ["input_ids", "label", "position_ids", "attention_mask"]
97+
max_source_length: 64
98+
max_target_length: 64
99+
ignore_pad_token_for_loss: True
93100
num_parallel_workers: 8
94101
python_multiprocessing: False
95102
drop_remainder: True
@@ -100,14 +107,21 @@ train_dataset: &train_dataset
100107
seed: 0
101108

102109
train_dataset_task:
103-
type: CausalLanguageModelDataset
110+
type: KeyWordGenDataset
104111
dataset_config: *train_dataset
105112

106113
eval_dataset: &eval_dataset
107114
data_loader:
108-
type: MindDataset
109-
dataset_dir: ""
110-
shuffle: True
115+
type: ADGenDataLoader
116+
dataset_dir: "/path/to/AdvertiseGen"
117+
shuffle: False
118+
phase: "eval"
119+
tokenizer:
120+
type: ChatGLMTokenizer
121+
vocab_file: "/path/to/ice_text.model"
122+
max_source_length: 256
123+
max_target_length: 256
124+
ignore_pad_token_for_loss: True
111125
input_columns: ["input_ids", "label"]
112126
num_parallel_workers: 8
113127
python_multiprocessing: False
@@ -119,7 +133,7 @@ eval_dataset: &eval_dataset
119133
seed: 0
120134

121135
eval_dataset_task:
122-
type: CausalLanguageModelDataset
136+
type: KeyWordGenDataset
123137
dataset_config: *eval_dataset
124138

125139
# ==== runner config ====

configs/glm/run_glm_6b_lora.yaml

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ model:
5252
max_decode_length: 2048 # The maximum length of the generated words.
5353
is_enhanced_encoder: True
5454
is_npu_acceleration: False
55-
checkpoint_name_or_path: "glm_6b"
55+
checkpoint_name_or_path: "glm_6b_lora"
5656
top_k: 1
5757
top_p: 1
5858
repetition_penalty: 1
@@ -96,10 +96,17 @@ processor:
9696
# ==== dataset config ====
9797
train_dataset: &train_dataset
9898
data_loader:
99-
type: MindDataset
100-
dataset_dir: ""
99+
type: ADGenDataLoader
100+
dataset_dir: "/path/to/AdvertiseGen"
101101
shuffle: True
102+
phase: "train"
103+
tokenizer:
104+
type: ChatGLMTokenizer
105+
vocab_file: "/path/to/ice_text.model"
102106
input_columns: ["input_ids", "label", "position_ids", "attention_mask"]
107+
max_source_length: 64
108+
max_target_length: 64
109+
ignore_pad_token_for_loss: True
103110
num_parallel_workers: 8
104111
python_multiprocessing: False
105112
drop_remainder: True
@@ -110,14 +117,21 @@ train_dataset: &train_dataset
110117
seed: 0
111118

112119
train_dataset_task:
113-
type: CausalLanguageModelDataset
120+
type: KeyWordGenDataset
114121
dataset_config: *train_dataset
115122

116123
eval_dataset: &eval_dataset
117124
data_loader:
118-
type: MindDataset
119-
dataset_dir: ""
120-
shuffle: True
125+
type: ADGenDataLoader
126+
dataset_dir: "/path/to/AdvertiseGen"
127+
shuffle: False
128+
phase: "eval"
129+
tokenizer:
130+
type: ChatGLMTokenizer
131+
vocab_file: "/path/to/ice_text.model"
132+
max_source_length: 256
133+
max_target_length: 256
134+
ignore_pad_token_for_loss: True
121135
input_columns: ["input_ids", "label"]
122136
num_parallel_workers: 8
123137
python_multiprocessing: False
@@ -129,7 +143,7 @@ eval_dataset: &eval_dataset
129143
seed: 0
130144

131145
eval_dataset_task:
132-
type: CausalLanguageModelDataset
146+
type: KeyWordGenDataset
133147
dataset_config: *eval_dataset
134148

135149
# ==== runner config ====
@@ -169,7 +183,7 @@ lr_scale: False
169183
# parallel config
170184
use_parallel: False
171185
parallel:
172-
parallel_mode: 1 # 0-dataset, 1-semi, 2-auto, 3-hybrid
186+
parallel_mode: 0 # 0-dataset, 1-semi, 2-auto, 3-hybrid
173187
gradients_mean: False
174188
loss_repeated_mean: True
175189
enable_alltoall: False
@@ -179,7 +193,7 @@ parallel:
179193
strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
180194
parallel_config:
181195
data_parallel: 1
182-
model_parallel: 4
196+
model_parallel: 1
183197
pipeline_stage: 1
184198
expert_parallel: 1
185199
optimizer_shard: False # optimizer shard

configs/glm/run_glm_6b_lora_infer.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ model:
5353
max_decode_length: 2048 # The maximum length of the generated words.
5454
is_enhanced_encoder: True
5555
is_npu_acceleration: True # sample in npu to acceleration
56-
checkpoint_name_or_path: "glm_6b"
56+
checkpoint_name_or_path: "glm_6b_lora_chat"
5757
top_k: 1
5858
top_p: 1 # top_p not support for npu_acceleration yet
5959
repetition_penalty: 1

configs/llama/run_llama_7b_lora.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ optimizer:
2525
beta1: 0.9
2626
beta2: 0.95
2727
eps: 1.e-8
28-
learning_rate: 2.e-5
28+
learning_rate: 1.e-4
2929

3030
# lr sechdule
3131
lr_schedule:
3232
type: CosineWithWarmUpLR
33-
learning_rate: 2.e-5
33+
learning_rate: 1.e-4
3434
warmup_ratio: 0.03
3535
total_steps: -1 # -1 means it will load the total steps of the dataset
3636

@@ -166,7 +166,7 @@ model:
166166
# configurition of lora
167167
in_channels: 4096
168168
out_channels: 4096
169-
lora_rank: 8
169+
lora_rank: 16
170170
lora_alpha: 16
171171
lora_dropout: 0.05
172172

@@ -193,6 +193,9 @@ runner_wrapper:
193193
scale_window: 1000
194194
use_clip_grad: True
195195

196+
eval_callbacks:
197+
- type: ObsMonitor
198+
196199
auto_tune: False
197200
filepath_prefix: './autotune'
198201
autotune_per_step: 10

0 commit comments

Comments
 (0)