Skip to content

Commit 38c0c9e

Browse files
authored
openocr compti code (#12033)
* openocr compti code * update config and repsvtr * svtrv2 doc
1 parent 3e5934d commit 38c0c9e

File tree

11 files changed

+1747
-4
lines changed

11 files changed

+1747
-4
lines changed

configs/det/det_repsvtr_db.yml

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
Global:
2+
debug: false
3+
use_gpu: true
4+
epoch_num: &epoch_num 500
5+
log_smooth_window: 20
6+
print_batch_step: 100
7+
save_model_dir: ./output/det_repsvtr_db
8+
save_epoch_step: 10
9+
eval_batch_step:
10+
- 0
11+
- 1000
12+
cal_metric_during_train: false
13+
checkpoints:
14+
pretrained_model:
15+
save_inference_dir: null
16+
use_visualdl: false
17+
infer_img: doc/imgs_en/img_10.jpg
18+
save_res_path: ./checkpoints/det_db/predicts_db.txt
19+
distributed: true
20+
21+
Architecture:
22+
model_type: det
23+
algorithm: DB
24+
Transform: null
25+
Backbone:
26+
name: RepSVTR_det
27+
Neck:
28+
name: RSEFPN
29+
out_channels: 96
30+
shortcut: True
31+
Head:
32+
name: DBHead
33+
k: 50
34+
35+
Loss:
36+
name: DBLoss
37+
balance_loss: true
38+
main_loss_type: DiceLoss
39+
alpha: 5
40+
beta: 10
41+
ohem_ratio: 3
42+
43+
Optimizer:
44+
name: Adam
45+
beta1: 0.9
46+
beta2: 0.999
47+
lr:
48+
name: Cosine
49+
learning_rate: 0.001 #(8*8c)
50+
warmup_epoch: 2
51+
regularizer:
52+
name: L2
53+
factor: 5.0e-05
54+
55+
PostProcess:
56+
name: DBPostProcess
57+
thresh: 0.3
58+
box_thresh: 0.6
59+
max_candidates: 1000
60+
unclip_ratio: 1.5
61+
62+
Metric:
63+
name: DetMetric
64+
main_indicator: hmean
65+
66+
Train:
67+
dataset:
68+
name: SimpleDataSet
69+
data_dir: ./train_data/icdar2015/text_localization/
70+
label_file_list:
71+
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
72+
ratio_list: [1.0]
73+
transforms:
74+
- DecodeImage:
75+
img_mode: BGR
76+
channel_first: false
77+
- DetLabelEncode: null
78+
- CopyPaste: null
79+
- IaaAugment:
80+
augmenter_args:
81+
- type: Fliplr
82+
args:
83+
p: 0.5
84+
- type: Affine
85+
args:
86+
rotate:
87+
- -10
88+
- 10
89+
- type: Resize
90+
args:
91+
size:
92+
- 0.5
93+
- 3
94+
- EastRandomCropData:
95+
size:
96+
- 640
97+
- 640
98+
max_tries: 50
99+
keep_ratio: true
100+
- MakeBorderMap:
101+
shrink_ratio: 0.4
102+
thresh_min: 0.3
103+
thresh_max: 0.7
104+
total_epoch: *epoch_num
105+
- MakeShrinkMap:
106+
shrink_ratio: 0.4
107+
min_text_size: 8
108+
total_epoch: *epoch_num
109+
- NormalizeImage:
110+
scale: 1./255.
111+
mean:
112+
- 0.485
113+
- 0.456
114+
- 0.406
115+
std:
116+
- 0.229
117+
- 0.224
118+
- 0.225
119+
order: hwc
120+
- ToCHWImage: null
121+
- KeepKeys:
122+
keep_keys:
123+
- image
124+
- threshold_map
125+
- threshold_mask
126+
- shrink_map
127+
- shrink_mask
128+
loader:
129+
shuffle: true
130+
drop_last: false
131+
batch_size_per_card: 8
132+
num_workers: 8
133+
134+
Eval:
135+
dataset:
136+
name: SimpleDataSet
137+
data_dir: ./train_data/icdar2015/text_localization/
138+
label_file_list:
139+
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
140+
transforms:
141+
- DecodeImage:
142+
img_mode: BGR
143+
channel_first: false
144+
- DetLabelEncode: null
145+
- DetResizeForTest:
146+
- NormalizeImage:
147+
scale: 1./255.
148+
mean:
149+
- 0.485
150+
- 0.456
151+
- 0.406
152+
std:
153+
- 0.229
154+
- 0.224
155+
- 0.225
156+
order: hwc
157+
- ToCHWImage: null
158+
- KeepKeys:
159+
keep_keys:
160+
- image
161+
- shape
162+
- polys
163+
- ignore_tags
164+
loader:
165+
shuffle: false
166+
drop_last: false
167+
batch_size_per_card: 1
168+
num_workers: 2
169+
profiler_options: null
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
Global:
2+
debug: false
3+
use_gpu: true
4+
epoch_num: 200
5+
log_smooth_window: 20
6+
print_batch_step: 10
7+
save_model_dir: ./output/rec_repsvtr_gtc
8+
save_epoch_step: 10
9+
eval_batch_step: [0, 1000]
10+
cal_metric_during_train: False
11+
pretrained_model:
12+
checkpoints:
13+
save_inference_dir:
14+
use_visualdl: false
15+
infer_img: doc/imgs_words/ch/word_1.jpg
16+
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
17+
max_text_length: &max_text_length 25
18+
infer_mode: false
19+
use_space_char: true
20+
distributed: true
21+
save_res_path: ./output/rec/predicts_repsvtr.txt
22+
23+
Optimizer:
24+
name: AdamW
25+
beta1: 0.9
26+
beta2: 0.999
27+
epsilon: 1.e-8
28+
weight_decay: 0.025
29+
no_weight_decay_name: norm
30+
one_dim_param_no_weight_decay: True
31+
lr:
32+
name: Cosine
33+
learning_rate: 0.001 # 8gpus 192bs
34+
warmup_epoch: 5
35+
36+
37+
Architecture:
38+
model_type: rec
39+
algorithm: SVTR_HGNet
40+
Transform:
41+
Backbone:
42+
name: RepSVTR
43+
Head:
44+
name: MultiHead
45+
head_list:
46+
- CTCHead:
47+
Neck:
48+
name: svtr
49+
dims: 256
50+
depth: 2
51+
hidden_dims: 256
52+
kernel_size: [1, 3]
53+
use_guide: True
54+
Head:
55+
fc_decay: 0.00001
56+
- NRTRHead:
57+
nrtr_dim: 384
58+
max_text_length: *max_text_length
59+
num_decoder_layers: 2
60+
61+
Loss:
62+
name: MultiLoss
63+
loss_config_list:
64+
- CTCLoss:
65+
- NRTRLoss:
66+
67+
PostProcess:
68+
name: CTCLabelDecode
69+
70+
Metric:
71+
name: RecMetric
72+
main_indicator: acc
73+
74+
75+
Train:
76+
dataset:
77+
name: MultiScaleDataSet
78+
ds_width: false
79+
data_dir: ./train_data/
80+
ext_op_transform_idx: 1
81+
label_file_list:
82+
- ./train_data/train_list.txt
83+
transforms:
84+
- DecodeImage:
85+
img_mode: BGR
86+
channel_first: false
87+
- RecAug:
88+
- MultiLabelEncode:
89+
gtc_encode: NRTRLabelEncode
90+
- KeepKeys:
91+
keep_keys:
92+
- image
93+
- label_ctc
94+
- label_gtc
95+
- length
96+
- valid_ratio
97+
sampler:
98+
name: MultiScaleSampler
99+
scales: [[320, 32], [320, 48], [320, 64]]
100+
first_bs: &bs 192
101+
fix_bs: false
102+
divided_factor: [8, 16] # w, h
103+
is_training: True
104+
loader:
105+
shuffle: true
106+
batch_size_per_card: *bs
107+
drop_last: true
108+
num_workers: 8
109+
Eval:
110+
dataset:
111+
name: SimpleDataSet
112+
data_dir: ./train_data
113+
label_file_list:
114+
- ./train_data/val_list.txt
115+
transforms:
116+
- DecodeImage:
117+
img_mode: BGR
118+
channel_first: false
119+
- MultiLabelEncode:
120+
gtc_encode: NRTRLabelEncode
121+
- RecResizeImg:
122+
image_shape: [3, 48, 320]
123+
- KeepKeys:
124+
keep_keys:
125+
- image
126+
- label_ctc
127+
- label_gtc
128+
- length
129+
- valid_ratio
130+
loader:
131+
shuffle: false
132+
drop_last: false
133+
batch_size_per_card: 128
134+
num_workers: 4

0 commit comments

Comments
 (0)