diff --git a/configs/textdet/_base_/datasets/detext.py b/configs/textdet/_base_/datasets/detext.py new file mode 100644 index 000000000..c934563b4 --- /dev/null +++ b/configs/textdet/_base_/datasets/detext.py @@ -0,0 +1,15 @@ +detext_textdet_data_root = 'data/detext' + +detext_textdet_train = dict( + type='OCRDataset', + data_root=detext_textdet_data_root, + ann_file='textdet_train.json', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=None) + +detext_textdet_test = dict( + type='OCRDataset', + data_root=detext_textdet_data_root, + ann_file='textdet_test.json', + test_mode=True, + pipeline=None) diff --git a/configs/textrecog/_base_/datasets/detext.py b/configs/textrecog/_base_/datasets/detext.py new file mode 100644 index 000000000..05c1a0a10 --- /dev/null +++ b/configs/textrecog/_base_/datasets/detext.py @@ -0,0 +1,14 @@ +detext_textrecog_data_root = 'data/detext' + +detext_textrecog_train = dict( + type='OCRDataset', + data_root=detext_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +detext_textrecog_test = dict( + type='OCRDataset', + data_root=detext_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/configs/textspotting/_base_/datasets/detext.py b/configs/textspotting/_base_/datasets/detext.py new file mode 100644 index 000000000..a25b6ed16 --- /dev/null +++ b/configs/textspotting/_base_/datasets/detext.py @@ -0,0 +1,15 @@ +detext_textspotting_data_root = 'data/detext' + +detext_textspotting_train = dict( + type='OCRDataset', + data_root=detext_textspotting_data_root, + ann_file='textspotting_train.json', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=None) + +detext_textspotting_test = dict( + type='OCRDataset', + data_root=detext_textspotting_data_root, + ann_file='textspotting_test.json', + test_mode=True, + pipeline=None) diff --git a/dataset_zoo/detext/metafile.yml b/dataset_zoo/detext/metafile.yml new file mode 100644 index 000000000..51421422e --- /dev/null +++ b/dataset_zoo/detext/metafile.yml @@ -0,0 +1,31 @@ +Name: 'DETEXT' +Paper: + Title: A Database for Evaluating Text Extraction from Biomedical Literature Figures + URL: http://cs-chan.com/doc/ESWA_2014A.pdf + Venue: ESWA + Year: '2015' + BibTeX: '@article{article, + author = {Yin, Xu-Cheng and Yang, Chun and Pei, Wei-Yi and Man, Haixia and Zhang, Jun and Learned-Miller, Erik and Yu, Hong}, + year = {2015}, + month = {05}, + pages = {e0126200}, + title = {DeTEXT: A Database for Evaluating Text Extraction from Biomedical Literature Figures}, + volume = {10}, + journal = {PloS one}, + doi = {10.1371/journal.pone.0126200}}' +Data: + Website: https://rrc.cvc.uab.es/?ch=9 + Language: + - English + Scene: + - biomedical + Granularity: + - Word + Tasks: + - textrecog + - textdet + - textspotting + License: + Type: CC BY 1.0 + Link: https://creativecommons.org/licenses/by/1.0/ + Format: .txt diff --git a/dataset_zoo/detext/textdet.py b/dataset_zoo/detext/textdet.py new file mode 100644 index 000000000..06f790ddc --- /dev/null +++ b/dataset_zoo/detext/textdet.py @@ -0,0 +1,63 @@ +data_root = 'data/detext' +cache_path = 'data/cache' + +train_preparer = dict( + obtainer=dict( + type='NaiveDataObtainer', + cache_path=cache_path, + files=[ + dict( + url='https://rrc.cvc.uab.es/downloads/' + 'ch9_training_images.zip', + save_name='detext_textdet_train_img.zip', + md5='e07161d6af1ef2f81f9ba0d2f904e377', + content=['image'], + mapping=[['detext_textdet_train_img', 'textdet_imgs/train']]), + dict( + url='https://rrc.cvc.uab.es/downloads/' + 'ch9_training_localization_transcription_gt.zip', + save_name='detext_textdet_train_gt.zip', + md5='ae4dfe155e61dcfeadd80f6b0fd15626', + content=['annotation'], + mapping=[['detext_textdet_train_gt', 'annotations/train']]), + ]), + gatherer=dict( + type='PairGatherer', + img_suffixes=['.jpg'], + rule=[r'(\w+)\.jpg', r'gt_\1.txt']), + parser=dict(type='DetextDetAnnParser', encoding='utf-8-sig'), + packer=dict(type='TextDetPacker'), + dumper=dict(type='JsonDumper'), +) + +test_preparer = dict( + obtainer=dict( + type='NaiveDataObtainer', + cache_path=cache_path, + files=[ + dict( + url='https://rrc.cvc.uab.es/downloads/' + 'ch9_validation_images.zip', + save_name='detext_textdet_test_img.zip', + md5='c6ffe0abe6f2d7b4d70e6883257308e0', + content=['image'], + mapping=[['detext_textdet_test_img', 'textdet_imgs/test']]), + dict( + url='https://rrc.cvc.uab.es/downloads/' + 'ch9_validation_localization_transcription_gt.zip', + save_name='detext_textdet_test_gt.zip', + md5='075c4b27ab2848c90ad5e87d9f922bc3', + content=['annotation'], + mapping=[['detext_textdet_test_gt', 'annotations/test']]), + ]), + gatherer=dict( + type='PairGatherer', + img_suffixes=['.jpg', '.JPG'], + rule=[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt']), + parser=dict(type='DetextDetAnnParser', encoding='utf-8-sig'), + packer=dict(type='TextDetPacker'), + dumper=dict(type='JsonDumper'), +) + +delete = ['detext_textdet_train_img', 'annotations', 'detext_textdet_test_img'] +config_generator = dict(type='TextDetConfigGenerator') diff --git a/dataset_zoo/detext/textrecog.py b/dataset_zoo/detext/textrecog.py new file mode 100644 index 000000000..c4436e074 --- /dev/null +++ b/dataset_zoo/detext/textrecog.py @@ -0,0 +1,9 @@ +_base_ = ['textdet.py'] + +_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' +_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' + +_base_.train_preparer.packer.type = 'TextRecogCropPacker' +_base_.test_preparer.packer.type = 'TextRecogCropPacker' + +config_generator = dict(type='TextRecogConfigGenerator') diff --git a/dataset_zoo/detext/textspotting.py b/dataset_zoo/detext/textspotting.py new file mode 100644 index 000000000..4681c61af --- /dev/null +++ b/dataset_zoo/detext/textspotting.py @@ -0,0 +1,8 @@ +_base_ = ['textdet.py'] +_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' +_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' + +_base_.train_preparer.packer.type = 'TextSpottingPacker' +_base_.test_preparer.packer.type = 'TextSpottingPacker' + +config_generator = dict(type='TextSpottingConfigGenerator') diff --git a/mmocr/datasets/preparers/parsers/__init__.py b/mmocr/datasets/preparers/parsers/__init__.py index fd3794710..81a44ffd6 100644 --- a/mmocr/datasets/preparers/parsers/__init__.py +++ b/mmocr/datasets/preparers/parsers/__init__.py @@ -2,6 +2,7 @@ from .base import BaseParser from .coco_parser import COCOTextDetAnnParser from .ctw1500_parser import CTW1500AnnParser +from .detext_parser import DetextDetAnnParser from .funsd_parser import FUNSDTextDetAnnParser from .icdar_txt_parser import (ICDARTxtTextDetAnnParser, ICDARTxtTextRecogAnnParser) @@ -18,5 +19,5 @@ 'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser', 'COCOTextDetAnnParser', 'SVTTextDetAnnParser', 'FUNSDTextDetAnnParser', 'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser', - 'SynthTextAnnParser', 'MJSynthAnnParser' + 'SynthTextAnnParser', 'MJSynthAnnParser', 'DetextDetAnnParser' ] diff --git a/mmocr/datasets/preparers/parsers/detext_parser.py b/mmocr/datasets/preparers/parsers/detext_parser.py new file mode 100644 index 000000000..feb9ccf2c --- /dev/null +++ b/mmocr/datasets/preparers/parsers/detext_parser.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional, Tuple + +from mmocr.registry import DATA_PARSERS +from mmocr.utils import bbox2poly +from .base import BaseParser + + +@DATA_PARSERS.register_module() +class DetextDetAnnParser(BaseParser): + """Detext Txt Format Text Detection Annotation Parser. + + The original annotation format of this dataset is stored in txt files, + which is formed as the following format: + x1, y1, x2, y2, x3, y3, x4, y4, transcription + + Args: + separator (str): The separator between each element in a line. Defaults + to ','. + ignore (str): The text to be ignored. Defaults to '###'. + format (str): The format of the annotation. Defaults to + 'x1,y1,x2,y2,x3,y3,x4,trans'. + encoding (str): The encoding of the annotation file. Defaults to + 'utf-8-sig'. + nproc (int): The number of processes to parse the annotation. Defaults + to 1. + remove_strs (List[str], Optional): Used to remove redundant strings in + the transcription. Defaults to None. + mode (str, optional): The mode of the box converter. Supported modes + are 'xywh' and 'xyxy'. Defaults to None. + """ + + def __init__(self, + separator: str = ',', + ignore: str = '###', + format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans', + encoding: str = 'utf-8', + remove_strs: Optional[List[str]] = None, + mode: str = None, + **kwargs) -> None: + self.sep = separator + self.format = format + self.encoding = encoding + self.ignore = ignore + self.mode = mode + self.remove_strs = remove_strs + super().__init__(**kwargs) + + def parse_file(self, img_path: str, ann_path: str) -> Tuple: + """Parse single annotation.""" + instances = list() + for anno in self.loader(ann_path, self.sep, self.format, + self.encoding): + anno = list(anno.values()) + if self.remove_strs is not None: + for strs in self.remove_strs: + for i in range(len(anno)): + if strs in anno[i]: + anno[i] = anno[i].replace(strs, '') + poly = list(map(float, anno[0:-1])) + if self.mode is not None: + poly = bbox2poly(poly, self.mode) + poly = poly.tolist() + text = anno[-1] + instances.append( + dict(poly=poly, text=text, ignore=text == self.ignore)) + + return img_path, instances