@@ -178,147 +178,248 @@ PaddleX combines model information and runtime environment information to provid
178
178
<table>
179
179
<tr>
180
180
<th>Pipeline</th>
181
- <th>Pipeline Module</th>
182
- <th>Specific Models </th>
181
+ <th>Module</th>
182
+ <th>Model Support List </th>
183
183
</tr>
184
+
184
185
<tr>
185
- <td>General Image Classification</td>
186
- <td>Image Classification</td>
187
- <td>ResNet18<br/>ResNet34<details>
188
- <summary><b>more</b></summary>ResNet50<br/>ResNet101<br/>ResNet152<br/>ResNet18_vd<br/>ResNet34_vd<br/>ResNet50_vd<br/>ResNet101_vd<br/>ResNet152_vd<br/>ResNet200_vd<br/>PP-LCNet_x0_25<br/>PP-LCNet_x0_35<br/>PP-LCNet_x0_5<br/>PP-LCNet_x0_75<br/>PP-LCNet_x1_0<br/>PP-LCNet_x1_5<br/>PP-LCNet_x2_0<br/>PP-LCNet_x2_5<br/>PP-LCNetV2_small<br/>PP-LCNetV2_base<br/>PP-LCNetV2_large<br/>MobileNetV3_large_x0_35<br/>MobileNetV3_large_x0_5<br/>MobileNetV3_large_x0_75<br/>MobileNetV3_large_x1_0<br/>MobileNetV3_large_x1_25<br/>MobileNetV3_small_x0_35<br/>MobileNetV3_small_x0_5<br/>MobileNetV3_small_x0_75<br/>MobileNetV3_small_x1_0<br/>MobileNetV3_small_x1_25<br/>ConvNeXt_tiny<br/>ConvNeXt_small<br/>ConvNeXt_base_224<br/>ConvNeXt_base_384<br/>ConvNeXt_large_224<br/>ConvNeXt_large_384<br/>MobileNetV1_x0_25<br/>MobileNetV1_x0_5<br/>MobileNetV1_x0_75<br/>MobileNetV1_x1_0<br/>MobileNetV2_x0_25<br/>MobileNetV2_x0_5<br/>MobileNetV2_x1_0<br/>MobileNetV2_x1_5<br/>MobileNetV2_x2_0<br/>SwinTransformer_tiny_patch4_window7_224<br/>SwinTransformer_small_patch4_window7_224<br/>SwinTransformer_base_patch4_window7_224<br/>SwinTransformer_base_patch4_window12_384<br/>SwinTransformer_large_patch4_window7_224<br/>SwinTransformer_large_patch4_window12_384<br/>PP-HGNet_small<br/>PP-HGNet_tiny<br/>PP-HGNet_base<br/>PP-HGNetV2-B0<br/>PP-HGNetV2-B1<br/>PP-HGNetV2-B2<br/>PP-HGNetV2-B3<br/>PP-HGNetV2-B4<br/>PP-HGNetV2-B5<br/>PP-HGNetV2-B6<br/>CLIP_vit_base_patch16_224<br/>CLIP_vit_large_patch14_224</details></td>
186
+ <td rowspan="2">OCR</td>
187
+ <td>Text Detection</td>
188
+ <td>✅</td>
189
189
</tr>
190
190
191
191
<tr>
192
- <td>General Object Detection</td>
193
- <td>Object Detection</td>
194
- <td>PP-YOLOE_plus-S<br/>PP-YOLOE_plus-M<details>
195
- <summary><b>more</b></summary>PP-YOLOE_plus-L<br/>PP-YOLOE_plus-X<br/>YOLOX-N<br/>YOLOX-T<br/>YOLOX-S<br/>YOLOX-M<br/>YOLOX-L<br/>YOLOX-X<br/>YOLOv3-DarkNet53<br/>YOLOv3-ResNet50_vd_DCN<br/>YOLOv3-MobileNetV3<br/>RT-DETR-R18<br/>RT-DETR-R50<br/>RT-DETR-L<br/>RT-DETR-H<br/>RT-DETR-X<br/>PicoDet-S<br/>PicoDet-L</details></td>
192
+ <td>Text Recognition</td>
193
+ <td>✅</td>
196
194
</tr>
197
195
198
196
<tr>
199
- <td>General Semantic Segmentation</td>
200
- <td>Semantic Segmentation</td>
201
- <td>Deeplabv3-R50<br/>Deeplabv3-R101<details>
202
- <summary><b>more</b></summary>Deeplabv3_Plus-R50<br/>Deeplabv3_Plus-R101<br/>PP-LiteSeg-T<br/>OCRNet_HRNet-W48<br/>OCRNet_HRNet-W18<br/>SeaFormer_tiny<br/>SeaFormer_small<br/>SeaFormer_base<br/>SeaFormer_large<br/>SegFormer-B0<br/>SegFormer-B1<br/>SegFormer-B2<br/>SegFormer-B3<br/>SegFormer-B4<br/>SegFormer-B5</details></td>
197
+ <td rowspan="7">PP-ChatOCRv3</td>
198
+ <td>Table Recognition</td>
199
+ <td>✅</td>
203
200
</tr>
204
201
205
202
<tr>
206
- <td>General Instance Segmentation</td>
207
- <td>Instance Segmentation</td>
208
- <td>Mask-RT-DETR-L<br/>Mask-RT-DETR-H</td>
203
+ <td>Layout Detection</td>
204
+ <td>✅</td>
205
+ </tr>
206
+
207
+ <tr>
208
+ <td>Text Detection</td>
209
+ <td>✅</td>
209
210
</tr>
210
211
211
212
<tr>
212
- <td rowspan="3">Seal Text Recognition</td>
213
- <td>Layout Analysis</td>
214
- <td>PicoDet-S_layout_3cls<br/>PicoDet-S_layout_17cls<details>
215
- <summary><b>more</b></summary>PicoDet-L_layout_3cls<br/>PicoDet-L_layout_17cls<br/>RT-DETR-H_layout_3cls<br/>RT-DETR-H_layout_17cls</details></td>
213
+ <td>Text Recognition</td>
214
+ <td>✅</td>
216
215
</tr>
217
216
218
217
<tr>
219
218
<td>Seal Text Detection</td>
220
- <td>PP-OCRv4_server_seal_det<br/>PP-OCRv4_mobile_seal_det </td>
219
+ <td>✅ </td>
221
220
</tr>
222
221
223
222
<tr>
224
- <td>Text Recognition</td>
225
- <td>PP-OCRv4_mobile_rec<br/>PP-OCRv4_server_rec</td>
223
+ <td>Text Image Unwarping</td>
224
+ <td>✅</td>
225
+ </tr>
226
+
227
+ <tr>
228
+ <td>Document Image Orientation Classification</td>
229
+ <td>✅</td>
230
+ </tr>
231
+
232
+ <tr>
233
+ <td rowspan="4">Table Recognition</td>
234
+ <td>Layout Detection</td>
235
+ <td>✅</td>
236
+ </tr>
237
+
238
+ <tr>
239
+ <td>Table Recognition</td>
240
+ <td>✅</td>
226
241
</tr>
227
242
228
243
<tr>
229
- <td rowspan="2">General OCR</td>
230
244
<td>Text Detection</td>
231
- <td>PP-OCRv4_server_det<br/>PP-OCRv4_mobile_det </td>
245
+ <td>✅ </td>
232
246
</tr>
233
247
234
248
<tr>
235
249
<td>Text Recognition</td>
236
- <td>PP-OCRv4_server_rec<br/>PP-OCRv4_mobile_rec<br/>ch_RepSVTR_rec<br/>ch_SVTRv2_rec </td>
250
+ <td>✅ </td>
237
251
</tr>
238
252
239
253
<tr>
240
- <td rowspan="5">General Table Recognition</td>
241
- <td>Layout Detection</td>
242
- <td>PicoDet_layout_1x</td>
254
+ <td>Object Detection</td>
255
+ <td>Object Detection</td>
256
+ <td>FasterRCNN-Swin-Tiny-FPN ❌<br>CenterNet-DLA-34 ❌ <br>CenterNet-ResNet50 ❌</td>
257
+ </tr>
258
+
259
+ <tr>
260
+ <td>Instance Segmentation</td>
261
+ <td>Instance Segmentation</td>
262
+ <td>Mask-RT-DETR-S ❌</td>
263
+ </tr>
264
+
265
+ <tr>
266
+ <td>Image Classification</td>
267
+ <td>Image Classification</td>
268
+ <td>✅</td>
269
+ </tr>
270
+
271
+ <tr>
272
+ <td>Semantic Segmentation</td>
273
+ <td>Semantic Segmentation</td>
274
+ <td>✅</td>
275
+ </tr>
276
+
277
+ <tr>
278
+ <td>Time Series Forecasting</td>
279
+ <td>Time Series Forecasting</td>
280
+ <td>❌</td>
281
+ </tr>
282
+
283
+ <tr>
284
+ <td>Time Series Anomaly Detection</td>
285
+ <td>Time Series Anomaly Forecasting</td>
286
+ <td>❌</td>
243
287
</tr>
244
288
245
289
<tr>
246
- <td rowspan="2">Table Recognition</td>
247
- <td>SLANet</td>
290
+ <td>Time Series Classification</td>
291
+ <td>Time Series Classification</td>
292
+ <td>❌</td>
248
293
</tr>
249
294
250
295
<tr>
251
- <td>SLANet_plus</td>
296
+ <td>Small Object Detection</td>
297
+ <td>Small Object Detection</td>
298
+ <td>✅</td>
299
+ </tr>
300
+
301
+ <tr>
302
+ <td>Multi-Label Image Classification</td>
303
+ <td>Multi-Label Image Classification</td>
304
+ <td>✅</td>
305
+ </tr>
306
+
307
+ <tr>
308
+ <td>Image Anomaly Detection</td>
309
+ <td>Unsupervised Anomaly Detection</td>
310
+ <td>✅</td>
311
+ </tr>
312
+
313
+ <tr>
314
+ <td rowspan="8">Layout Parsing</td>
315
+ <td>Table Structure Recognition</td>
316
+ <td>✅</td>
317
+ </tr>
318
+
319
+ <tr>
320
+ <td>Layout Region Analysis</td>
321
+ <td>✅</td>
252
322
</tr>
253
323
254
324
<tr>
255
325
<td>Text Detection</td>
256
- <td>PP-OCRv4_server_det<br/>PP-OCRv4_mobile_det </td>
326
+ <td>✅ </td>
257
327
</tr>
258
328
259
329
<tr>
260
330
<td>Text Recognition</td>
261
- <td>PP-OCRv4_server_rec<br/>PP-OCRv4_mobile_rec<br/>ch_RepSVTR_rec<br/>ch_SVTRv2_rec</td>
331
+ <td>✅</td>
332
+ </tr>
333
+
334
+ <tr>
335
+ <td>Formula Recognition</td>
336
+ <td>❌</td>
262
337
</tr>
263
338
264
339
<tr>
265
- <td rowspan="15">Document Scene Information Extraction v3</td>
266
- <td rowspan="2">Table Recognition</td>
267
- <td>SLANet</td>
340
+ <td>Seal Text Detection</td>
341
+ <td>✅</td>
268
342
</tr>
269
343
270
344
<tr>
271
- <td>SLANet_plus</td>
345
+ <td>Text Image Unwarping</td>
346
+ <td>✅</td>
347
+ </tr>
348
+
349
+ <tr>
350
+ <td>Document Image Orientation Classification</td>
351
+ <td>✅</td>
272
352
</tr>
273
353
274
354
<tr>
355
+ <td rowspan="2">Formula Recognition</td>
275
356
<td>Layout Detection</td>
276
- <td>PicoDet_layout_1x </td>
357
+ <td>❌ </td>
277
358
</tr>
278
359
279
360
<tr>
280
- <td rowspan="2">Text Detection </td>
281
- <td>PP-OCRv4_server_det </td>
361
+ <td>Formula Recognition </td>
362
+ <td>❌ </td>
282
363
</tr>
283
364
284
365
<tr>
285
- <td>PP-OCRv4_mobile_det</td>
366
+ <td rowspan="3">Seal Recognition</td>
367
+ <td>Layout Region Analysis</td>
368
+ <td>✅</td>
286
369
</tr>
287
370
288
371
<tr>
289
- <td rowspan="4"> Text Recognition </td>
290
- <td>PP-OCRv4_server_rec </td>
372
+ <td>Seal Text Detection </td>
373
+ <td>✅ </td>
291
374
</tr>
292
375
293
376
<tr>
294
- <td>PP-OCRv4_mobile_rec</td>
377
+ <td>Text Recognition</td>
378
+ <td>✅</td>
295
379
</tr>
296
380
297
381
<tr>
298
- <td>ch_RepSVTR_rec</td>
382
+ <td rowspan="2">Image Recognition</td>
383
+ <td>Subject Detection</td>
384
+ <td>✅</td>
299
385
</tr>
300
386
301
387
<tr>
302
- <td>ch_SVTRv2_rec</td>
388
+ <td>Image Feature</td>
389
+ <td>✅</td>
303
390
</tr>
304
391
305
392
<tr>
306
- <td rowspan="2">Seal Text Detection</td>
307
- <td>PP-OCRv4_server_seal_det</td>
393
+ <td rowspan="2">Pedestrian Attribute Recognition</td>
394
+ <td>Pedestrian Detection</td>
395
+ <td>❌</td>
308
396
</tr>
309
397
310
398
<tr>
311
- <td>PP-OCRv4_mobile_seal_det</td>
399
+ <td>Pedestrian Attribute Recognition</td>
400
+ <td>❌</td>
312
401
</tr>
313
402
314
403
<tr>
315
- <td>Text Image Rectification</td>
316
- <td>UVDoc</td>
404
+ <td rowspan="2">Vehicle Attribute Recognition</td>
405
+ <td>Vehicle Detection</td>
406
+ <td>❌</td>
317
407
</tr>
318
408
319
409
<tr>
320
- <td>Document Image Orientation Classification</td>
321
- <td>PP-LCNet_x1_0_doc_ori</td>
410
+ <td>Vehicle Attribute Recognition</td>
411
+ <td>❌</td>
412
+ </tr>
413
+
414
+ <tr>
415
+ <td rowspan="2">Face Recognition</td>
416
+ <td>Face Detection</td>
417
+ <td>✅</td>
418
+ </tr>
419
+
420
+ <tr>
421
+ <td>Face Feature</td>
422
+ <td>✅</td>
322
423
</tr>
323
424
324
425
</table>
0 commit comments