From 195c27538b7949631358715f5a18233145ffcb50 Mon Sep 17 00:00:00 2001 From: liukuikun <641417025@qq.com> Date: Tue, 21 Mar 2023 01:36:14 +0800 Subject: [PATCH 1/2] Paper List --- docs/en/_static/js/collapsed.js | 9 + docs/en/conf.py | 1 + docs/en/index.rst | 6 + docs/en/paper_zoo.py | 386 ++++++++++++++++++ docs/en/paper_zoo/index.rst | 14 + ... and Grouping for Scene Text Detection.yml | 71 ++++ ...ction with differentiable binarization.yml | 78 ++++ paper_zoo/textdet/algorithm_template.yml | 46 +++ ...ning for Robust Scene Text Recognition.yml | 78 ++++ paper_zoo/textrecog/algorithm_template.yml | 56 +++ 10 files changed, 745 insertions(+) create mode 100755 docs/en/paper_zoo.py create mode 100644 docs/en/paper_zoo/index.rst create mode 100644 paper_zoo/textdet/algorithm/Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection.yml create mode 100644 paper_zoo/textdet/algorithm/Real-time scene text detection with differentiable binarization.yml create mode 100644 paper_zoo/textdet/algorithm_template.yml create mode 100644 paper_zoo/textrecog/algorithm/Perceiving Stroke-Semantic Context: Hierarchical Contrastive Learning for Robust Scene Text Recognition.yml create mode 100644 paper_zoo/textrecog/algorithm_template.yml diff --git a/docs/en/_static/js/collapsed.js b/docs/en/_static/js/collapsed.js index 3124c144d..eb593cb70 100644 --- a/docs/en/_static/js/collapsed.js +++ b/docs/en/_static/js/collapsed.js @@ -1 +1,10 @@ var collapsedSections = ['Migration Guides', 'API Reference'] + +$(document).ready(function () { + $('.model-summary').DataTable({ + "stateSave": false, + "lengthChange": false, + "pageLength": 20, + "order": [] + }); +}); diff --git a/docs/en/conf.py b/docs/en/conf.py index 3082f7583..2aa00905e 100644 --- a/docs/en/conf.py +++ b/docs/en/conf.py @@ -181,6 +181,7 @@ def builder_inited_handler(app): subprocess.run(['./stats.py']) subprocess.run(['./dataset_zoo.py']) subprocess.run(['./project_zoo.py']) + subprocess.run(['./paper_zoo.py']) def setup(app): diff --git a/docs/en/index.rst b/docs/en/index.rst index 07bb9a3f5..ca15af9e8 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -60,6 +60,12 @@ You can switch between English and Chinese in the lower-left corner of the layou textrecog_models.md kie_models.md +.. toctree:: + :maxdepth: 2 + :caption: Paper Zoo + + paper_zoo/index.rst + .. toctree:: :maxdepth: 2 :caption: Notes diff --git a/docs/en/paper_zoo.py b/docs/en/paper_zoo.py new file mode 100755 index 000000000..82994b57b --- /dev/null +++ b/docs/en/paper_zoo.py @@ -0,0 +1,386 @@ +#!/usr/bin/env python +import os +import os.path as osp + +import yaml +from tabulate import tabulate + + +class BaseAlgorithmInfo: + + def __init__(self, info) -> None: + self.info = info + self.set_base_info() + self.set_model_info() + self.set_exp_info() + + def set_base_info(self): + # used for overview table + self.BaseTitle = self.get_info_value('Title') + self.Title = self.get_info_value( + 'Title', 'get_title_format', dir='paper') + self.Venue = self.get_info_value('Venue') + self.Year = self.get_info_value('Year') + self.URL = self.get_info_value('URL', method='get_paper_url_format') + self.Institution = self.get_info_value('Lab/Company') + self.Bibtex = self.get_info_value('Bibtex') + self.Code = self.get_info_value('Code', method='get_link_format') + + def set_model_info(self): + self.Abstract = self.get_info_value('Abstract') + self.Network = self.get_info_value('Network Structure', + 'get_network_format') + + def set_exp_info(self): + self.Device = self.get_info_value('Experiments.Metadata.Device') + self.FLOPs = self.get_info_value( + 'Experiments.Metadata.FLOPs', + 'get_fps_flops_format', + device=self.Device) + self.Params = self.get_info_value('Experiments.Metadata.Params') + self.TrainDataset = self.get_info_value( + 'Experiments.Metadata.Training Data') + + def get_info_value( + self, + key, + method='get_item_format', + source=None, + return_default='N/A', + **method_params, + ): + key_list = key.split('.') + if source is None: + source = self.info + for k in key_list: + if k not in source: + return return_default + source = source[k] + if method is not None: + return getattr(self, method)(source, **method_params) + return source + + def get_fps_flops_format(self, inputs, device='N/A'): + if inputs == 'N/A': + return inputs + if device == 'N/A': + return '{:.2f}'.format(inputs) + return '{:.2f}-{}'.format(inputs, device) + + def url_format(self, url, name='URL'): + if url != 'N/A': + return '[{}]({})'.format(name, url) + return 'N/A' + + def get_paper_url_format(self, url): + venue_url = '[Venue link]({})'.format(url['Venue']) if \ + url['Venue'] != 'N/A' else '' + arxiv_url = '[arXiv link]({})'.format(url['Arxiv']) if \ + url['Arxiv'] != 'N/A' else '' + return '
'.join([arxiv_url, venue_url]).strip('
') + + def get_link_format(self, url): + return '[Link](' + url + ')' if url != 'N/A' else 'N/A' + + def get_item_format(self, item): + if isinstance(item, list): + return '
'.join(item) + elif isinstance(item, (str, int)): + return item + + def get_title_format(self, title, dir): + name = title + '.md' + name = name.replace(' ', '%20') + return '[{}]({})'.format(title, osp.join(dir, name)) + + def get_abbr_format(self, abbr, dir): + name = self.BaseTitle + '.md' + name = name.replace(' ', '%20') + return '[{}]({})'.format(abbr, osp.join(dir, name)) + + def get_network_format(self, network): + return '
\n\n'.format(network) + + def __getitem__(self, key): + return getattr(self, key, 'N/A') + + +class RecogAlgorithmInfo(BaseAlgorithmInfo): + + def set_exp_info(self): + super().set_exp_info() + results = self.info['Experiments']['Results'] + self.Abbr = self.get_info_value( + 'Experiments.Name', 'get_abbr_format', dir='paper') + self.test_dataset_list = list() + self.metric_list = set() + for res in results: + self.test_dataset_list.append(res['Test Data']) + for metric in res['Metrics']: + self.metric_list.add(metric) + setattr(self, metric + '_' + res['Test Data'], + res['Metrics'][metric]) + self.Times = self.get_info_value('Experiments.Metadata.InferenceTime') + + def set_model_info(self): + super().set_model_info() + self.Learning_Method = self.get_info_value('Learning Method') + self.Language_Modality = self.get_info_value('Language Modality') + self.Architecture = self.get_info_value('Architecture') + + +class DetAlgorithmInfo(BaseAlgorithmInfo): + + def set_exp_info(self): + super().set_exp_info() + results = self.info['Experiments']['Results'] + self.Abbr = self.get_info_value( + 'Experiments.Name', 'get_abbr_format', dir='paper') + self.test_dataset_list = list() + self.metric_list = set() + for res in results: + self.test_dataset_list.append(res['Test Data']) + for metric in res['Metrics']: + self.metric_list.add(metric) + setattr(self, metric + '_' + res['Test Data'], + res['Metrics'][metric]) + setattr(self, 'FPS_' + res['Test Data'], res.get('FPS', 'N/A')) + + def set_model_info(self): + super().set_model_info() + self.Method = self.get_info_value('Method') + + +AlgorithmMapping = dict(textrecog=RecogAlgorithmInfo, textdet=DetAlgorithmInfo) + + +class BaseAlgorithmPaperList: + overview_header = ['Title', 'Venue', 'Year', 'Institution', 'URL', 'Code'] + + def __init__(self, paper_root, task, save_dir) -> None: + papers_dir = osp.join(paper_root, task, 'algorithm') + paper_paths = os.listdir(papers_dir) + paper_paths.sort() + self.algorithm_list = [ + AlgorithmMapping[task](yaml.safe_load( + open(osp.join(papers_dir, paper_path)))) + for paper_path in paper_paths + ] + + self.task = task + self.papers_dir = save_dir + + self.algorithm_dir = osp.join(self.papers_dir, 'algorithm') + os.makedirs(self.algorithm_dir, exist_ok=True) + + self.table_cfg = dict( + tablefmt='pipe', + floatfmt='.2f', + numalign='right', + stralign='center') + + def gen_algorithm(self): + self.gen_algorithm_single(osp.join(self.algorithm_dir, 'paper')) + self.gen_algorithm_overview( + osp.join(self.algorithm_dir, 'overview.md')) + # self.gen_algorithm_venue(osp.join(self.algorithm_dir, 'venue.md')) + # self.gen_algorithm_method(osp.join(self.algorithm_dir, 'method.md')) + self.gen_sota(osp.join(self.algorithm_dir, 'sota.md')) + + def gen_algorithm_overview(self, save_path): + rows = list() + for paper in self.algorithm_list: + row = [getattr(paper, head) for head in self.overview_header] + rows.append(row) + + with open(save_path, 'w') as f: + f.write('# Overview\n') + f.write("""```{table}\n:class: model-summary\n""") + f.write(tabulate(rows, self.overview_header, **self.table_cfg)) + f.write('\n```\n') + + def gen_sota(self, save_path): + pass + + def gen_algorithm_single(self, paper_dirs): + pass + + +class TextRecogPaperList(BaseAlgorithmPaperList): + sota_header = ['Abbr', 'Venue', 'Year', 'TrainDataset'] + + def gen_sota(self, save_path): + benchmark_dataset_list = [ + 'IIIT5K', 'SVT', 'IC13', 'IC15', 'SVTP', 'CUTE' + ] + metric_name = 'WAICS' + rows = list() + for paper in self.algorithm_list: + results = [ + getattr(paper, f'{metric_name}_{dataset}', 'N/A') + for dataset in benchmark_dataset_list + ] + + # remove 'N/A' in avg_list + avg_list = [x for x in results if x != 'N/A'] + # if all results are 'N/A', we don't append it to the sota table + if len(avg_list) != 0: + avg = sum(avg_list) / len(avg_list) + else: + continue + results.append(avg) + row = [getattr(paper, head) for head in self.sota_header] + results + rows.append(row) + # sort average accuracy from small to large + rows = sorted(rows, key=lambda x: x[-1]) + with open(save_path, 'w') as f: + f.write('# SOTA\n') + f.write("""```{table}\n:class: model-summary\n""") + header = self.sota_header + benchmark_dataset_list + ['Avg'] + f.write(tabulate(rows, header, **self.table_cfg)) + f.write('\n```\n') + + def gen_algorithm_single(self, paper_dirs): + overview_header = ['Venue', 'Year', 'Institution', 'URL', 'Code'] + model_header = [ + 'Architecture', 'Learning Method', 'Language Modality', 'Times', + 'FLOPs', 'Params' + ] + if not os.path.exists(paper_dirs): + os.makedirs(paper_dirs) + for paper in self.algorithm_list: + file_name = paper.BaseTitle + '.md' + file_path = os.path.join(paper_dirs, file_name) + + with open(file_path, 'w') as f: + f.write('# {}\n'.format(paper.BaseTitle)) + + f.write('## Overview\n\n\n') + row = [getattr(paper, head, 'N/A') for head in overview_header] + + f.write(tabulate([row], overview_header, **self.table_cfg)) + f.write('\n\n') + f.write('## Model\n\n') + f.write('### Abstract\n\n') + f.write(paper['Abstract'] + '\n\n') + f.write(paper.Network) + f.write('### Model information\n\n') + + rows = [ + getattr(paper, head.replace(' ', '_'), 'N/A') + for head in model_header + ] + f.write(tabulate([rows], model_header, **self.table_cfg)) + f.write('\n\n') + f.write('## Results\n\n') + + results_header = ['Metric', 'Training DataSets' + ] + paper.test_dataset_list + + rows = list() + for metric in paper.metric_list: + + row = [ + metric, + paper.TrainDataset, + ] + row += [ + getattr(paper, f'{metric}_{key}') + for key in paper.test_dataset_list + ] + rows.append(row) + f.write(tabulate(rows, results_header, **self.table_cfg)) + f.write('\n\n') + f.write('## Citation\n\n') + f.write('```bibtex\n{}\n```\n'.format(paper['Bibtex'])) + + +class TextDetPaperList(BaseAlgorithmPaperList): + sota_header = ['Abbr', 'Venue', 'Year'] + + def gen_sota(self, save_path): + benchmark_dataset_list = ['ICDAR2015', 'CTW500'] + metric_list = ['Precision', 'Recall', 'Hmean'] + rows = list() + for paper in self.algorithm_list: + results = [ + getattr(paper, f'{metric_name}_{dataset}', 'N/A') + for dataset in benchmark_dataset_list + for metric_name in metric_list + ] + + row = [getattr(paper, head) for head in self.sota_header] + results + rows.append(row) + # sort average accuracy from small to large + rows = sorted(rows, key=lambda x: x[-1]) + with open(save_path, 'w') as f: + f.write('# SOTA\n') + f.write("""```{table}\n:class: model-summary\n""") + header = self.sota_header + [ + f'{dataset}_{metric_name}' + for dataset in benchmark_dataset_list + for metric_name in metric_list + ] + f.write(tabulate(rows, header, **self.table_cfg)) + f.write('\n```\n') + + def gen_algorithm_single(self, paper_dirs): + overview_header = ['Venue', 'Year', 'Institution', 'URL', 'Code'] + model_header = ['Method', 'FPS', 'FLOPs', 'Params'] + if not os.path.exists(paper_dirs): + os.makedirs(paper_dirs) + for paper in self.algorithm_list: + file_name = paper.BaseTitle + '.md' + file_path = os.path.join(paper_dirs, file_name) + + with open(file_path, 'w') as f: + f.write('# {}\n'.format(paper.BaseTitle)) + + f.write('## Overview\n\n\n') + row = [getattr(paper, head, 'N/A') for head in overview_header] + + f.write(tabulate([row], overview_header, **self.table_cfg)) + f.write('\n\n') + f.write('## Model\n\n') + f.write('### Abstract\n\n') + f.write(paper['Abstract'] + '\n\n') + f.write(paper.Network) + f.write('### Model information\n\n') + + rows = [ + getattr(paper, head.replace(' ', '_'), 'N/A') + for head in model_header + ] + f.write(tabulate([rows], model_header, **self.table_cfg)) + f.write('\n\n') + f.write('## Results\n\n') + + results_header = ['Metric'] + paper.test_dataset_list + + rows = list() + for metric in paper.metric_list: + + row = [metric] + row += [ + getattr(paper, f'{metric}_{key}') + for key in paper.test_dataset_list + ] + rows.append(row) + f.write(tabulate(rows, results_header, **self.table_cfg)) + f.write('\n\n') + f.write('## Citation\n\n') + f.write('```bibtex\n{}\n```\n'.format(paper['Bibtex'])) + + +papers_dir = '../../paper_zoo' +# papers_dir = 'paper_zoo' + +save_dir = osp.join('docs/en/paper_zoo', 'textrecog') +paper_list = TextRecogPaperList(papers_dir, 'textrecog', save_dir) +os.makedirs(save_dir, exist_ok=True) +paper_list.gen_algorithm() + +save_dir = osp.join('docs/en/paper_zoo', 'textdet') +paper_list = TextDetPaperList(papers_dir, 'textdet', save_dir) +os.makedirs(save_dir, exist_ok=True) +paper_list.gen_algorithm() diff --git a/docs/en/paper_zoo/index.rst b/docs/en/paper_zoo/index.rst new file mode 100644 index 000000000..dea812219 --- /dev/null +++ b/docs/en/paper_zoo/index.rst @@ -0,0 +1,14 @@ +Text Recognition +************** + +.. toctree:: + :maxdepth: 1 + + textrecog/overview.md + textrecog/sota.md + +.. toctree:: + :maxdepth: 1 + + textdet/overview.md + textdet/sota.md diff --git a/paper_zoo/textdet/algorithm/Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection.yml b/paper_zoo/textdet/algorithm/Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection.yml new file mode 100644 index 000000000..72f3b9d7f --- /dev/null +++ b/paper_zoo/textdet/algorithm/Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection.yml @@ -0,0 +1,71 @@ +Title: 'Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection' +Abbreviation: 'FSGNet' +Venue: CVPR +Year: 2022 +Lab/Company: + - NetEase + - Huazhong University of Science and Technology +URL: + Venue: 'https://openaccess.thecvf.com/content/CVPR2022/papers/Tang_Few_Could_Be_Better_Than_All_Feature_Sampling_and_Grouping_CVPR_2022_paper.pdf' + Arxiv: 'https://arxiv.org/abs/2203.15221' +Bibtex: '@InProceedings{Tang_2022_CVPR, + author = {Tang, Jingqun and Zhang, Wenqing and Liu, Hongye and Yang, MingKun and Jiang, Bo and Hu, Guanglong and Bai, Xiang}, + title = {Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2022}, + pages = {4563-4572} +}' +Code: N/A + +Experiments: + Name: FSGNet + Metadata: + Device: N/A + FLOPs: 35.9 + Params: 38.3 + Results: + - Training Data: ICDAR2015 + Test Data: ICDAR2015 + Metrics: + Hmean: 0.891 + Precision: 0.909 + Recall: 0.873 + FPS: 12.9 + - Training Data: CTW1500 + Test Data: CTW1500 + Metrics: + Hmean: 0.852 + Precision: 0.881 + Recall: 0.824 + FPS: N/A + - Training Data: TotalText + Test Data: TotalText + Metrics: + Hmean: 0.881 + Precision: 0.907 + Recall: 0.857 + FPS: N/A + + +Abstract: 'Recently, transformer-based methods have achieved promising +progresses in object detection, as they can eliminate the post-processes like +NMS and enrich the deep representations. However, these methods cannot well +cope with scene text due to its extreme variance of scales and aspect ratios. +In this paper, we present a simple yet effective transformer-based architecture +for scene text detection. Different from previous approaches that learn robust +deep representations of scene text in a holistic manner, our method performs +scene text detection based on a few representative features, which avoids the +disturbance by background and reduces the computational cost. Specifically, we +first select a few representative features at all scales that are highly +relevant to foreground text. Then, we adopt a transformer for modeling the +relationship of the sampled features, which effectively divides them into +reasonable groups. As each feature group corresponds to a text instance, its +bounding box can be easily obtained without any post-processing operation. +Using the basic feature pyramid network for feature extraction, our method +consistently achieves state-of-the-art results on several popular datasets for +scene text detection.' +Network Structure: 'https://user-images.githubusercontent.com/24622904/226288868-d0798fc9-f9a5-4b60-b4da-bed071dc21f9.png' + +# Optional +Method: RegressionBased diff --git a/paper_zoo/textdet/algorithm/Real-time scene text detection with differentiable binarization.yml b/paper_zoo/textdet/algorithm/Real-time scene text detection with differentiable binarization.yml new file mode 100644 index 000000000..11cb7b6c6 --- /dev/null +++ b/paper_zoo/textdet/algorithm/Real-time scene text detection with differentiable binarization.yml @@ -0,0 +1,78 @@ +Title: 'Real-time scene text detection with differentiable binarization' +Abbreviation: 'DBNet' +Venue: AAAI +Year: 2020 +Lab/Company: + - Huazhong University of Science and Technology + - Megvii + - Shanghai Jiao Tong University + - Onlyou Tech. +URL: + Venue: https://ojs.aaai.org/index.php/AAAI/article/view/6812/6666 + Arxiv: https://arxiv.org/abs/1911.08947 +Bibtex: '@inproceedings{liao2020real, + title={Real-time scene text detection with differentiable binarization}, + author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang}, + booktitle={Proceedings of the AAAI conference on artificial intelligence}, + volume={34}, + number={07}, + pages={11474--11481}, + year={2020} +}' +Code: https://github.com/open-mmlab/mmocr + +Experiments: + Name: DBNet-ResNet50 + Metadata: + Device: N/A + Params: N/A + FLOPs: N/A + Results: + - Training Data: ICDAR2015 + Test Data: ICDAR2015 + Metrics: + Hmean: 0.873 + Precision: 0.918 + Recall: 0.832 + FPS: 12 + - Training Data: CTW1500 + Test Data: CTW1500 + Metrics: + Hmean: 0.834 + Precision: 0.869 + Recall: 0.802 + FPS: 22 + - Training Data: TotalText + Test Data: TotalText + Metrics: + Hmean: 0.847 + Precision: 0.871 + Recall: 0.825 + FPS: 22 +Abstract: 'Recently, segmentation-based methods are quite popular in +scene text detection, as the segmentation results can more ac- +curately describe scene text of various shapes such as curve +text. However, the post-processing of binarization is essen- +tial for segmentation-based detection, which converts proba- +bility maps produced by a segmentation method into bound- +ing boxes/regions of text. In this paper, we propose a mod- +ule named Differentiable Binarization (DB), which can per- +form the binarization process in a segmentation network. Op- +timized along with a DB module, a segmentation network can +adaptively set the thresholds for binarization, which not only +simplifies the post-processing but also enhances the perfor- +mance of text detection. Based on a simple segmentation net- +work, we validate the performance improvements of DB on +five benchmark datasets, which consistently achieves state- +of-the-art results, in terms of both detection accuracy and +speed. In particular, with a light-weight backbone, the per- +formance improvements by DB are significant so that we +can look for an ideal tradeoff between detection accuracy +and efficiency. Specifically, with a backbone of ResNet-18, +our detector achieves an F-measure of 82.8, running at 62 +FPS, on the MSRA-TD500 dataset. Code is available at: +https://github.com/MhLiao/DB.' +Network Structure: # URL + +# Optional +Method: SegmentationBase diff --git a/paper_zoo/textdet/algorithm_template.yml b/paper_zoo/textdet/algorithm_template.yml new file mode 100644 index 000000000..9b0ba4395 --- /dev/null +++ b/paper_zoo/textdet/algorithm_template.yml @@ -0,0 +1,46 @@ +Title: N/A +Venue: ICCV/arXiv/ECCV/... +Year: N/A +Lab/Company: N/A +URL: + Venue: N/A + Arxiv: N/A +Bibtex: N/A +Code: N/A +# If same network have different results on same dataset +# select the best one +Experiment: + Name: N/A + Metadata: + Device: N/A + FLOPs: N/A + Params: N/A + Results: + - Test Data: ICDAR2015 + Pre-taining Data: N/A + Metrics: + Hmean: N/A + Precision: N/A + Recall: N/A + FPS: N/A + - Test Data: CTW1500 + Metrics: + Hmean: N/A + Precision: N/A + Recall: N/A + FPS: N/A + - Test Data: TotalText + Metrics: + Hmean: N/A + Precision: N/A + Recall: N/A + FPS: N/A + +Abstract: +Network Structure: # URL + +# Optional +Method: + - SegmentationBase + - RegressionBase + - Hybrid diff --git a/paper_zoo/textrecog/algorithm/Perceiving Stroke-Semantic Context: Hierarchical Contrastive Learning for Robust Scene Text Recognition.yml b/paper_zoo/textrecog/algorithm/Perceiving Stroke-Semantic Context: Hierarchical Contrastive Learning for Robust Scene Text Recognition.yml new file mode 100644 index 000000000..9da391b9b --- /dev/null +++ b/paper_zoo/textrecog/algorithm/Perceiving Stroke-Semantic Context: Hierarchical Contrastive Learning for Robust Scene Text Recognition.yml @@ -0,0 +1,78 @@ +Title: 'Perceiving Stroke-Semantic Context: Hierarchical Contrastive Learning for Robust Scene Text Recognition' +Abbreviation: PerSec +Venue: AAAI +Year: 2022 +Lab/Company: + - Tencent YouTu Lab + - University of Science and Technology of China +URL: + Venue: 'https://www.aaai.org/AAAI22Papers/AAAI-785.LiuH.pdf' + Arxiv: N/A +Bibtex: '@inproceedings{liu2022perceiving, + title={Perceiving Stroke-Semantic Context: Hierarchical Contrastive Learning for Robust Scene Text Recognition}, + author={Liu, Hao and Wang, Bin and Bao, Zhimin and Xue, Mobai and Kang, Sheng and Jiang, Deqiang and Liu, Yinsong and Ren, Bo}, + year={2022}, + organization={AAAI}}' +Code: N/A + +Experiments: + Name: PerSec + Metadata: + Device: N/A + InferenceTime: 100 + FLOPs: 30 + Params: 20 + Training Data: MJ\ST\SA\Real + Results: + - Test Data: IIIT5K + Metrics: + WAICS: 88.1 + - Test Data: SVT + Metrics: + WAICS: 96.7 + - Test Data: IC13 + Metrics: + WAICS: 73.6 + - Test Data: IC15 + Metrics: + WAICS: 77.7 + - Test Data: SVTP + Metrics: + WAICS: 72.7 + - Test Data: CUTE + Metrics: + WAICS: 83.8 + + +Abstract: 'We introduce Perceiving Stroke-Semantic Context (PerSec), a new +approach to self-supervised representation learning tailored for Scene Text +Recognition (STR) task. Considering scene text images carry both visual and +semantic properties, we equip our PerSec with dual context perceivers which +can contrast and learn latent representations from low-level stroke and +high-level semantic contextual spaces simultaneously via hierarchical +contrastive learning on unlabeled text image data. Experiments in un- and +semi-supervised learning settings on STR benchmarks demonstrate our +proposed framework can yield a more robust representation for both +CTC-based and attention-based decoders than other contrastive learning +methods. To fully investigate the potential of our method, we also +collect a dataset of 100 million unlabeled text images, named UTI-100M, +covering 5 scenes and 4 languages. By leveraging hundred-million-level +unlabeled data, our PerSec shows significant performance improvement +when fine-tuning the learned representation on the labeled data. +Furthermore, we observe that the representation learned by PerSec +presents great generalization, especially under few labeled data scenes.' + +Network Structure: 'https://user-images.githubusercontent.com/24622904/211475019-ac656c48-3bc0-41e7-ae23-6aa8be8e0287.png' + + +Architecture: + - CTC + - Attention + - Transformer +Learning Method: + - Self-Supervised + - Supervised +Language Modality: + - Implicit Language Model + +Paper Reading URL: 'https://mp.weixin.qq.com/s?__biz=MzI1ODk1ODI5Mw==&mid=2247489751&idx=1&sn=38430279107d2a53827adec7884b9ce2&chksm=ea016e6ddd76e77b5ecdafc8bffd57da538751e273147fa3706e5d22e0385f01d446bdb031d0&scene=126&&sessionid=1670397988#rd' diff --git a/paper_zoo/textrecog/algorithm_template.yml b/paper_zoo/textrecog/algorithm_template.yml new file mode 100644 index 000000000..4e3c87828 --- /dev/null +++ b/paper_zoo/textrecog/algorithm_template.yml @@ -0,0 +1,56 @@ +Title: N/A +Venue: ICCV/arXiv/ECCV/... +Year: N/A +Lab/Company: N/A +URL: + Venue: N/A + Arxiv: N/A +Bibtex: N/A +Code: N/A +# If same network have different results on same dataset +# select the best one +Experiments: + Name: N/A + Metadata: + Device: N/A + FLOPs: N/A + Parameters: N/A + InferenceTime: N/A + Training Data: MJ\ST\SA\Real + Results: + - Test Data: IIIT5K + Metrics: + WAICS: N/A + - Test Data: SVT + Metrics: + WAICS: N/A + - Test Data: IC13 + Metrics: + WAICS: N/A + - Test Data: IC15 + Metrics: + WAICS: N/A + - Test Data: SVTP + Metrics: + WAICS: N/A + - Test Data: CUTE + Metrics: + WAICS: N/A + +Abstract: +Network Structure: # URL + +# Optional +Architecture: + - CTC + - Attention + - Transformer +Learning Method: + - Self-Supervised + - Semi-Supervised + - Contrastive + - Supervised +Language Modality: + - Implicit Language Model + - Explicit Language Model + - Language Free Model From 26b4debc50521a1731056069cca869f16da17b54 Mon Sep 17 00:00:00 2001 From: liukuikun <641417025@qq.com> Date: Tue, 21 Mar 2023 22:19:19 +0800 Subject: [PATCH 2/2] update index --- docs/en/_static/js/collapsed.js | 9 --------- docs/en/paper_zoo.py | 15 ++++++++++----- docs/en/paper_zoo/index.rst | 14 ++++++++------ 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/docs/en/_static/js/collapsed.js b/docs/en/_static/js/collapsed.js index eb593cb70..3124c144d 100644 --- a/docs/en/_static/js/collapsed.js +++ b/docs/en/_static/js/collapsed.js @@ -1,10 +1 @@ var collapsedSections = ['Migration Guides', 'API Reference'] - -$(document).ready(function () { - $('.model-summary').DataTable({ - "stateSave": false, - "lengthChange": false, - "pageLength": 20, - "order": [] - }); -}); diff --git a/docs/en/paper_zoo.py b/docs/en/paper_zoo.py index 82994b57b..3ad9bb579 100755 --- a/docs/en/paper_zoo.py +++ b/docs/en/paper_zoo.py @@ -195,7 +195,8 @@ def gen_algorithm_overview(self, save_path): with open(save_path, 'w') as f: f.write('# Overview\n') - f.write("""```{table}\n:class: model-summary\n""") + f.write('```{table}\n:class: model-summary nowrap field-list ' + 'table table-hover\n') f.write(tabulate(rows, self.overview_header, **self.table_cfg)) f.write('\n```\n') @@ -235,7 +236,9 @@ def gen_sota(self, save_path): rows = sorted(rows, key=lambda x: x[-1]) with open(save_path, 'w') as f: f.write('# SOTA\n') - f.write("""```{table}\n:class: model-summary\n""") + f.write( + '```{table}\n:class: model-summary nowrap field-list table ' + 'table-hover\n') header = self.sota_header + benchmark_dataset_list + ['Avg'] f.write(tabulate(rows, header, **self.table_cfg)) f.write('\n```\n') @@ -315,7 +318,9 @@ def gen_sota(self, save_path): rows = sorted(rows, key=lambda x: x[-1]) with open(save_path, 'w') as f: f.write('# SOTA\n') - f.write("""```{table}\n:class: model-summary\n""") + f.write( + '```{table}\n:class: model-summary nowrap field-list table ' + 'table-hover\n') header = self.sota_header + [ f'{dataset}_{metric_name}' for dataset in benchmark_dataset_list @@ -375,12 +380,12 @@ def gen_algorithm_single(self, paper_dirs): papers_dir = '../../paper_zoo' # papers_dir = 'paper_zoo' -save_dir = osp.join('docs/en/paper_zoo', 'textrecog') +save_dir = osp.join('paper_zoo', 'textrecog') paper_list = TextRecogPaperList(papers_dir, 'textrecog', save_dir) os.makedirs(save_dir, exist_ok=True) paper_list.gen_algorithm() -save_dir = osp.join('docs/en/paper_zoo', 'textdet') +save_dir = osp.join('paper_zoo', 'textdet') paper_list = TextDetPaperList(papers_dir, 'textdet', save_dir) os.makedirs(save_dir, exist_ok=True) paper_list.gen_algorithm() diff --git a/docs/en/paper_zoo/index.rst b/docs/en/paper_zoo/index.rst index dea812219..429872a6d 100644 --- a/docs/en/paper_zoo/index.rst +++ b/docs/en/paper_zoo/index.rst @@ -1,14 +1,16 @@ -Text Recognition -************** +Text Detection +*************************** .. toctree:: :maxdepth: 1 - textrecog/overview.md - textrecog/sota.md + textdet/algorithm/overview.md + textdet/algorithm/sota.md +Text Recognition +*************************** .. toctree:: :maxdepth: 1 - textdet/overview.md - textdet/sota.md + textrecog/algorithm/overview.md + textrecog/algorithm/sota.md