open-mmlab · 2019zhou · Apr 23, 2023 · gaotongxiao · Jun 2, 2023 · gaotongxiao
diff --git a/configs/textdet/_base_/datasets/detext.py b/configs/textdet/_base_/datasets/detext.py
@@ -0,0 +1,15 @@
+detext_textdet_data_root = 'data/detext'
+
+detext_textdet_train = dict(
+    type='OCRDataset',
+    data_root=detext_textdet_data_root,
+    ann_file='textdet_train.json',
+    filter_cfg=dict(filter_empty_gt=True, min_size=32),
+    pipeline=None)
+
+detext_textdet_test = dict(
+    type='OCRDataset',
+    data_root=detext_textdet_data_root,
+    ann_file='textdet_test.json',
+    test_mode=True,
+    pipeline=None)
diff --git a/configs/textrecog/_base_/datasets/detext.py b/configs/textrecog/_base_/datasets/detext.py
@@ -0,0 +1,14 @@
+detext_textrecog_data_root = 'data/detext'
+
+detext_textrecog_train = dict(
+    type='OCRDataset',
+    data_root=detext_textrecog_data_root,
+    ann_file='textrecog_train.json',
+    pipeline=None)
+
+detext_textrecog_test = dict(
+    type='OCRDataset',
+    data_root=detext_textrecog_data_root,
+    ann_file='textrecog_test.json',
+    test_mode=True,
+    pipeline=None)
diff --git a/configs/textspotting/_base_/datasets/detext.py b/configs/textspotting/_base_/datasets/detext.py
@@ -0,0 +1,15 @@
+detext_textspotting_data_root = 'data/detext'
+
+detext_textspotting_train = dict(
+    type='OCRDataset',
+    data_root=detext_textspotting_data_root,
+    ann_file='textspotting_train.json',
+    filter_cfg=dict(filter_empty_gt=True, min_size=32),
+    pipeline=None)
+
+detext_textspotting_test = dict(
+    type='OCRDataset',
+    data_root=detext_textspotting_data_root,
+    ann_file='textspotting_test.json',
+    test_mode=True,
+    pipeline=None)
diff --git a/dataset_zoo/detext/metafile.yml b/dataset_zoo/detext/metafile.yml
@@ -0,0 +1,31 @@
+Name: 'DETEXT'
+Paper:
+  Title: A Database for Evaluating Text Extraction from Biomedical Literature Figures
+  URL: http://cs-chan.com/doc/ESWA_2014A.pdf
+  Venue: ESWA
+  Year: '2015'
+  BibTeX: '@article{article,
+  author = {Yin, Xu-Cheng and Yang, Chun and Pei, Wei-Yi and Man, Haixia and Zhang, Jun and Learned-Miller, Erik and Yu, Hong},
+  year = {2015},
+  month = {05},
+  pages = {e0126200},
+  title = {DeTEXT: A Database for Evaluating Text Extraction from Biomedical Literature Figures},
+  volume = {10},
+  journal = {PloS one},
+  doi = {10.1371/journal.pone.0126200}}'
+Data:
+  Website: https://rrc.cvc.uab.es/?ch=9
+  Language:
+    - English
+  Scene:
+    - biomedical
+  Granularity:
+    - Word
+  Tasks:
+    - textrecog
+    - textdet
+    - textspotting
+  License:
+    Type: CC BY 1.0
+    Link: https://creativecommons.org/licenses/by/1.0/
+  Format: .txt
diff --git a/dataset_zoo/detext/textdet.py b/dataset_zoo/detext/textdet.py
@@ -0,0 +1,63 @@
+data_root = 'data/detext'
+cache_path = 'data/cache'
+
+train_preparer = dict(
+    obtainer=dict(
+        type='NaiveDataObtainer',
+        cache_path=cache_path,
+        files=[
+            dict(
+                url='https://rrc.cvc.uab.es/downloads/'
+                'ch9_training_images.zip',
+                save_name='detext_textdet_train_img.zip',
+                md5='e07161d6af1ef2f81f9ba0d2f904e377',
+                content=['image'],
+                mapping=[['detext_textdet_train_img', 'textdet_imgs/train']]),
+            dict(
+                url='https://rrc.cvc.uab.es/downloads/'
+                'ch9_training_localization_transcription_gt.zip',
+                save_name='detext_textdet_train_gt.zip',
+                md5='ae4dfe155e61dcfeadd80f6b0fd15626',
+                content=['annotation'],
+                mapping=[['detext_textdet_train_gt', 'annotations/train']]),
+        ]),
+    gatherer=dict(
+        type='PairGatherer',
+        img_suffixes=['.jpg'],
+        rule=[r'(\w+)\.jpg', r'gt_\1.txt']),
+    parser=dict(type='DetextDetAnnParser', encoding='utf-8-sig'),
+    packer=dict(type='TextDetPacker'),
+    dumper=dict(type='JsonDumper'),
+)
+
+test_preparer = dict(
+    obtainer=dict(
+        type='NaiveDataObtainer',
+        cache_path=cache_path,
+        files=[
+            dict(
+                url='https://rrc.cvc.uab.es/downloads/'
+                'ch9_validation_images.zip',
+                save_name='detext_textdet_test_img.zip',
+                md5='c6ffe0abe6f2d7b4d70e6883257308e0',
+                content=['image'],
+                mapping=[['detext_textdet_test_img', 'textdet_imgs/test']]),
+            dict(
+                url='https://rrc.cvc.uab.es/downloads/'
+                'ch9_validation_localization_transcription_gt.zip',
+                save_name='detext_textdet_test_gt.zip',
+                md5='075c4b27ab2848c90ad5e87d9f922bc3',
+                content=['annotation'],
+                mapping=[['detext_textdet_test_gt', 'annotations/test']]),
+        ]),
+    gatherer=dict(
+        type='PairGatherer',
+        img_suffixes=['.jpg', '.JPG'],
+        rule=[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt']),
+    parser=dict(type='DetextDetAnnParser', encoding='utf-8-sig'),
+    packer=dict(type='TextDetPacker'),
+    dumper=dict(type='JsonDumper'),
+)
+
+delete = ['detext_textdet_train_img', 'annotations', 'detext_textdet_test_img']
+config_generator = dict(type='TextDetConfigGenerator')
diff --git a/dataset_zoo/detext/textrecog.py b/dataset_zoo/detext/textrecog.py
@@ -0,0 +1,9 @@
+_base_ = ['textdet.py']
+
+_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
+_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
+
+_base_.train_preparer.packer.type = 'TextRecogCropPacker'
+_base_.test_preparer.packer.type = 'TextRecogCropPacker'
+
+config_generator = dict(type='TextRecogConfigGenerator')
diff --git a/dataset_zoo/detext/textspotting.py b/dataset_zoo/detext/textspotting.py
@@ -0,0 +1,8 @@
+_base_ = ['textdet.py']
+_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
+_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
+
+_base_.train_preparer.packer.type = 'TextSpottingPacker'
+_base_.test_preparer.packer.type = 'TextSpottingPacker'
+
+config_generator = dict(type='TextSpottingConfigGenerator')
diff --git a/mmocr/datasets/preparers/parsers/__init__.py b/mmocr/datasets/preparers/parsers/__init__.py
@@ -2,6 +2,7 @@
 from .base import BaseParser
 from .coco_parser import COCOTextDetAnnParser
 from .ctw1500_parser import CTW1500AnnParser
+from .detext_parser import DetextDetAnnParser
 from .funsd_parser import FUNSDTextDetAnnParser
 from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
                                ICDARTxtTextRecogAnnParser)
@@ -18,5 +19,5 @@
     'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
     'COCOTextDetAnnParser', 'SVTTextDetAnnParser', 'FUNSDTextDetAnnParser',
     'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser',
-    'SynthTextAnnParser', 'MJSynthAnnParser'
+    'SynthTextAnnParser', 'MJSynthAnnParser', 'DetextDetAnnParser'
 ]
diff --git a/mmocr/datasets/preparers/parsers/detext_parser.py b/mmocr/datasets/preparers/parsers/detext_parser.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Optional, Tuple
+
+from mmocr.registry import DATA_PARSERS
+from mmocr.utils import bbox2poly
+from .base import BaseParser
+
+
+@DATA_PARSERS.register_module()
+class DetextDetAnnParser(BaseParser):
+    """Detext Txt Format Text Detection Annotation Parser.
+
+    The original annotation format of this dataset is stored in txt files,
+    which is formed as the following format:
+        x1, y1, x2, y2, x3, y3, x4, y4, transcription
+
+    Args:
+        separator (str): The separator between each element in a line. Defaults
+            to ','.
+        ignore (str): The text to be ignored. Defaults to '###'.
+        format (str): The format of the annotation. Defaults to
+            'x1,y1,x2,y2,x3,y3,x4,trans'.
+        encoding (str): The encoding of the annotation file. Defaults to
+            'utf-8-sig'.
+        nproc (int): The number of processes to parse the annotation. Defaults
+            to 1.
+        remove_strs (List[str], Optional): Used to remove redundant strings in
+            the transcription. Defaults to None.
+        mode (str, optional): The mode of the box converter. Supported modes
+            are 'xywh' and 'xyxy'. Defaults to None.
+    """
+
+    def __init__(self,
+                 separator: str = ',',
+                 ignore: str = '###',
+                 format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
+                 encoding: str = 'utf-8',
+                 remove_strs: Optional[List[str]] = None,
+                 mode: str = None,
+                 **kwargs) -> None:
+        self.sep = separator
+        self.format = format
+        self.encoding = encoding
+        self.ignore = ignore
+        self.mode = mode
+        self.remove_strs = remove_strs
+        super().__init__(**kwargs)
+
+    def parse_file(self, img_path: str, ann_path: str) -> Tuple:
+        """Parse single annotation."""
+        instances = list()
+        for anno in self.loader(ann_path, self.sep, self.format,
+                                self.encoding):
+            anno = list(anno.values())
+            if self.remove_strs is not None:
+                for strs in self.remove_strs:
+                    for i in range(len(anno)):
+                        if strs in anno[i]:
+                            anno[i] = anno[i].replace(strs, '')
+            poly = list(map(float, anno[0:-1]))
+            if self.mode is not None:
+                poly = bbox2poly(poly, self.mode)
+                poly = poly.tolist()
+            text = anno[-1]
+            instances.append(
+                dict(poly=poly, text=text, ignore=text == self.ignore))
+
+        return img_path, instances