From 817ee351a18ff34d8b30d1c9d23aaa59edb5090c Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Fri, 22 May 2020 00:06:42 +0300 Subject: [PATCH 1/7] Add name suffixes to search index --- documentation/python.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/documentation/python.py b/documentation/python.py index d6d9139f..d1b651a9 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -2296,7 +2296,14 @@ def render_page(state: State, path, input_filename, env): def is_html_safe(string): return '<' not in string and '>' not in string and '&' not in string and '"' not in string and '\'' not in string -def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, merge_prefixes=True) -> bytearray: +_snake_case_point ='_[^_]' +_snake_case_point_re = re.compile(_snake_case_point) +_camel_case_point = '[^A-Z][A-Z].' +_camel_case_point_re = re.compile(_camel_case_point) +_camel_or_snake_case_point_re = re.compile('({})|({})'.format(_snake_case_point, _camel_case_point)) + +def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, + add_snake_case_suffixes=True, add_camel_case_suffixes=True, merge_prefixes=True) -> bytearray: trie = Trie() map = ResultMap() @@ -2360,6 +2367,22 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= if hasattr(result, 'params') and result.params is not None: trie.insert(name.lower() + '()', index_args, lookahead_barriers=lookahead_barriers + [len(name)] if add_lookahead_barriers else []) + if add_camel_case_suffixes and add_snake_case_suffixes: + prefix_end_re = _camel_or_snake_case_point_re + elif add_camel_case_suffixes: + prefix_end_re = _camel_case_point_re + elif add_snake_case_suffixes: + prefix_end_re = _snake_case_point_re + else: + prefix_end_re = None + if prefix_end_re: + for m in prefix_end_re.finditer(result.name.lstrip('__')): + name = result.name[m.start(0)+1:] + print(result.name, name) + trie.insert(name.lower(), index) + if hasattr(result, 'params') and result.params is not None: + trie.insert(name.lower() + '()', index_args, lookahead_barriers=[len(name)]) + # Add this symbol to total symbol count symbol_count += 1 From ef1d802801217ad2027b71924286f22baaf4c80b Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Fri, 22 May 2020 00:29:31 +0300 Subject: [PATCH 2/7] Expose name suffix search options to higher level function --- documentation/python.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/python.py b/documentation/python.py index d1b651a9..17598e23 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -2392,7 +2392,7 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= return serialize_search_data(trie, map, search_type_map, symbol_count, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes) -def run(basedir, config, *, templates=default_templates, search_add_lookahead_barriers=True, search_merge_subtrees=True, search_merge_prefixes=True): +def run(basedir, config, *, templates=default_templates, search_add_lookahead_barriers=True, search_add_snake_case_suffixes=True, search_add_camel_case_suffixes=True, search_merge_subtrees=True, search_merge_prefixes=True): # Populate the INPUT, if not specified, make it absolute if config['INPUT'] is None: config['INPUT'] = basedir else: config['INPUT'] = os.path.join(basedir, config['INPUT']) @@ -2618,7 +2618,7 @@ def fetch_class_index(entry): if not state.config['SEARCH_DISABLED']: logging.debug("building search data for {} symbols".format(len(state.search))) - data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes) + data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, add_snake_case_suffixes=search_add_snake_case_suffixes, add_camel_case_suffixes=search_add_camel_case_suffixes, merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes) # Joining twice, first before passing those to the URL formatter and # second after. If SEARCH_DOWNLOAD_BINARY is a string, use that as a From 60860f34fafb70426509a8875051b440405b821d Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Fri, 22 May 2020 02:40:37 +0300 Subject: [PATCH 3/7] Adjust tests (might be better to keep tests untouched and adjust run parameters instead) --- documentation/test_python/test_search.py | 75 ++++++++++++++++-------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/documentation/test_python/test_search.py b/documentation/test_python/test_search.py index 2bb395c5..8f55d078 100644 --- a/documentation/test_python/test_search.py +++ b/documentation/test_python/test_search.py @@ -43,7 +43,7 @@ def test(self): serialized = f.read() search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) - self.assertEqual(len(serialized), 2269) + self.assertEqual(len(serialized), 2585) self.assertEqual(search_data_pretty, """ 21 symbols search [14] @@ -84,9 +84,12 @@ def test(self): || sub [28] || | .$ || | data_in_a_submodule [27] +|loth [8] +|| s [9] |ub [28] || .$ || data_in_a_submodule [27] +|| module [27] foo [7, 21] || .$ || enum [0] @@ -104,15 +107,15 @@ def test(self): || withslots [9] || | .$ || | im_a_sloth [8] -|unc_with_params [12] -|| | ($ -|| | ) [13] -|| tion [22] +|unction [10, 22] || | ($ -|| | ) [23] +|| | ) [11, 23] || | _with_params [24] || | | ($ || | | ) [25] +|| _with_params [12] +|| | ($ +|| | ) [13] enum [0] | .$ | a_value [1] @@ -122,26 +125,42 @@ def test(self): ||| ($ ||| ) [4] ||property [5] +||sloth [8] +|||ubmodule [27] ||function [10] ||| ($ ||| ) [11] |nother [2] +value [1] +method [3, 17, 19, 15] +| ($ +| ) [4, 18, 20, 16] +property [5] +|arams [12, 24] +|| ($ +|| ) [13, 25] +|ybind [26] +|| .$ +|| foo [21] +|| | .$ +|| | overloaded_method [17, 19, 15] +|| | ($ +|| | ) [18, 20, 16] +|| unction [22] +|| | ($ +|| | ) [23] +|| | _with_params [24] +|| | | ($ +|| | | ) [25] data_declaration [6] -| in_a_submodule [27] +|| in_a_submodule [27] +|eclaration [6] im_a_sloth [8] -pybind [26] -| .$ -| foo [21] -| | .$ -| | overloaded_method [17, 19, 15] -| | ($ -| | ) [18, 20, 16] -| unction [22] -| | ($ -| | ) [23] -| | _with_params [24] -| | | ($ -| | | ) [25] +|n_a_submodule [27] +withslots [9] +| _params [12, 24] +| | ($ +| | ) [13, 25] overloaded_method [17, 19, 15] | ($ | ) [18, 20, 16] @@ -196,18 +215,24 @@ def test(self): serialized = f.read() search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) - self.assertEqual(len(serialized), 633) + self.assertEqual(len(serialized), 755) # The parameters get cut off with an ellipsis self.assertEqual(search_data_pretty, """ 3 symbols search_long_suffix_length [4] -| .$ -| many_parameters [0, 2] -| ($ -| ) [1, 3] +|| .$ +|| many_parameters [0, 2] +|| ($ +|| ) [1, 3] +|uffix_length [4] many_parameters [0, 2] | ($ | ) [1, 3] +parameters [0, 2] +| ($ +| ) [1, 3] +long_suffix_length [4] +|ength [4] 0: .many_parameters(arg0: typing.Tuple[float, int, str, typing.List[…) [prefix=4[:30], suffix_length=53, type=FUNCTION] -> #many_parameters-06151 1: [prefix=0[:52], suffix_length=51, type=FUNCTION] -> 2: .many_parameters(arg0: typing.Tuple[int, float, str, typing.List[…) [prefix=4[:30], suffix_length=53, type=FUNCTION] -> #many_parameters-31300 From 45fd0307a36ba7a04baa5c0f5a626aa1a9cf6e06 Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Mon, 25 May 2020 23:49:22 +0300 Subject: [PATCH 4/7] Remove debug print --- documentation/python.py | 1 - 1 file changed, 1 deletion(-) diff --git a/documentation/python.py b/documentation/python.py index 17598e23..02cb1652 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -2378,7 +2378,6 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= if prefix_end_re: for m in prefix_end_re.finditer(result.name.lstrip('__')): name = result.name[m.start(0)+1:] - print(result.name, name) trie.insert(name.lower(), index) if hasattr(result, 'params') and result.params is not None: trie.insert(name.lower() + '()', index_args, lookahead_barriers=[len(name)]) From 0f94a3c828846851699704315bc954d0af2077f6 Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Tue, 9 Jun 2020 16:38:54 +0300 Subject: [PATCH 5/7] Remove repeated key --- documentation/python.py | 1 - 1 file changed, 1 deletion(-) diff --git a/documentation/python.py b/documentation/python.py index 02cb1652..2a23d87c 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -139,7 +139,6 @@ def default_id_formatter(type: EntryType, path: List[str]) -> str: 'INPUT_MODULES': [], 'INPUT_PAGES': [], 'INPUT_DOCS': [], - 'OUTPUT': 'output', 'THEME_COLOR': '#22272e', 'FAVICON': 'favicon-dark.png', 'STYLESHEETS': [ From 29b40ec4e80cd84f7b7c8cdc88e7dfc83e8723e1 Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Tue, 9 Jun 2020 16:47:05 +0300 Subject: [PATCH 6/7] Add CamelCase/sanke_case suffixes to doxygen.py --- documentation/doxygen.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/documentation/doxygen.py b/documentation/doxygen.py index 82641407..65e79612 100755 --- a/documentation/doxygen.py +++ b/documentation/doxygen.py @@ -2318,7 +2318,14 @@ def extract_link(link): if state.doxyfile['M_FAVICON']: state.doxyfile['M_FAVICON'] = (state.doxyfile['M_FAVICON'], mimetypes.guess_type(state.doxyfile['M_FAVICON'])[0]) -def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, merge_prefixes=True) -> bytearray: +_snake_case_point ='_[^_]' +_camel_case_point = '[^A-Z][A-Z].' +_snake_case_point_re = re.compile(_snake_case_point) +_camel_case_point_re = re.compile(_camel_case_point) +_camel_or_snake_case_point_re = re.compile('({})|({})'.format(_snake_case_point, _camel_case_point)) + +def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, add_snake_case_suffixes=True, + add_camel_case_suffixes=True, merge_prefixes=True) -> bytearray: trie = Trie() map = ResultMap() @@ -2400,6 +2407,21 @@ def strip_tags(text): keyword_index = map.add(title, '', alias=index, suffix_length=suffix_length) trie.insert(search.lower(), keyword_index) + if add_camel_case_suffixes and add_snake_case_suffixes: + prefix_end_re = _camel_or_snake_case_point_re + elif add_camel_case_suffixes: + prefix_end_re = _camel_case_point_re + elif add_snake_case_suffixes: + prefix_end_re = _snake_case_point_re + else: + prefix_end_re = None + if prefix_end_re: + for m in prefix_end_re.finditer(result.name.lstrip('__')): + name = result.name[m.start(0)+1:] + trie.insert(name.lower(), index) + if hasattr(result, 'params') and result.params is not None: + trie.insert(name.lower() + '()', index_args, lookahead_barriers=[len(name)]) + # Add this symbol and all its aliases to total symbol count symbol_count += len(result.keywords) + 1 @@ -3495,7 +3517,9 @@ def parse_value(var): default_wildcard = '*.xml' default_templates = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates/doxygen/') -def run(state: State, *, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages, search_add_lookahead_barriers=True, search_merge_subtrees=True, search_merge_prefixes=True, sort_globbed_files=False): +def run(state: State, *, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages, + search_add_lookahead_barriers=True, add_snake_case_suffixes=True, add_camel_case_suffixes=True, + search_merge_subtrees=True, search_merge_prefixes=True, sort_globbed_files=False): xml_input = os.path.join(state.basedir, state.doxyfile['OUTPUT_DIRECTORY'], state.doxyfile['XML_OUTPUT']) xml_files_metadata = [os.path.join(xml_input, f) for f in glob.glob(os.path.join(xml_input, "*.xml"))] xml_files = [os.path.join(xml_input, f) for f in glob.glob(os.path.join(xml_input, wildcard))] @@ -3620,7 +3644,9 @@ def rtrim(value): return value.rstrip() if not state.doxyfile['M_SEARCH_DISABLED']: logging.debug("building search data for {} symbols".format(len(state.search))) - data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes) + data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, + add_snake_case_suffixes=add_snake_case_suffixes, add_camel_case_suffixes=add_camel_case_suffixes, + merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes) if state.doxyfile['M_SEARCH_DOWNLOAD_BINARY']: with open(os.path.join(html_output, searchdata_filename), 'wb') as f: From 43ff8e343b39d50f1860ad0f759318293e01f201 Mon Sep 17 00:00:00 2001 From: Sergei Izmailov Date: Tue, 9 Jun 2020 17:07:31 +0300 Subject: [PATCH 7/7] Update doxygen search tests --- documentation/test_doxygen/test_search.py | 88 +++++++++++++++-------- 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/documentation/test_doxygen/test_search.py b/documentation/test_doxygen/test_search.py index 8da0e12a..3fefb0eb 100755 --- a/documentation/test_doxygen/test_search.py +++ b/documentation/test_doxygen/test_search.py @@ -44,7 +44,7 @@ def test(self): serialized = f.read() search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) - self.assertEqual(len(serialized), 4836) + self.assertEqual(len(serialized), 5112) self.assertEqual(search_data_pretty, """ 53 symbols deprecated_macro [0] @@ -83,17 +83,29 @@ def test(self): || class [53] || struct [54] || union [58] -|ir [24] +|ir [24, 23] || /$ || file.h [9] -macro [3] +macro [3, 0] +|| ($ +|| ) [1] || _function [5] -|| ($ -|| ) [6] -|| _with_params [7] -|| | ($ -|| | ) [8] +|| | ($ +|| | ) [6] +|| | _with_params [7] +|| | | ($ +|| | | ) [8] |in() [49] +file.h [9, 2] +|unction [5] +|| ($ +|| ) [6] +|| _with_params [7] +|| | ($ +|| | ) [8] +|oo [11, 14, 18, 16, 37] +|| ($ +|| ) [12, 15, 19, 17, 38] glmacro() [4] | file() [10] | |oo() [13] @@ -109,11 +121,13 @@ def test(self): | namespace() [51] | struct() [56] | union() [60] -file.h [9] -|oo [11, 14, 18, 16] -|| ($ -|| ) [12, 15, 19, 17] -namespace [50] +with_params [7] +| ($ +| ) [8] +params [7] +| ($ +| ) [8] +namespace [50, 39] | :$ | :class [20] | | :$ @@ -128,29 +142,31 @@ def test(self): | variable [48] | struct [55] | union [59] -class [20] +class [20, 53] | :$ | :foo [11, 14, 18, 16] | ($ | ) [12, 15, 19, 17] +list [22] a group [29, 28] -| page [52] -| | $ -| | 0xc2 -| | 0xbb -| | subpage [57] -value [41, 31] -| riable [48] -enum [44, 34] +||page [52] +||| $ +||| 0xc2 +||| 0xbb +||| subpage [57] +|brief [40] +value [41, 31, 33] +| riable [48, 36] +enum [44, 34, 32] | :$ | :deprecatedvalue [33] | onlyabrief [40] | value [41] +typedef [46, 35] onlyabrief [40] -typedef [46] -struct [55] +struct [55, 54] |ubpage [57] -union [59] +union [59, 58] 0: DEPRECATED_MACRO(a, b, c) [suffix_length=9, deprecated, type=DEFINE] -> DeprecatedFile_8h.html#a7f8376730349fef9ff7d103b0245a13e 1: [prefix=0[:56], suffix_length=7, deprecated, type=DEFINE] -> 2: /DeprecatedFile.h [prefix=23[:0], deprecated, type=FILE] -> DeprecatedFile_8h.html @@ -239,18 +255,30 @@ def test(self): serialized = f.read() search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) - self.assertEqual(len(serialized), 473) + self.assertEqual(len(serialized), 489) # The parameters get cut off with an ellipsis self.assertEqual(search_data_pretty, """ 2 symbols file.h [2] -| :$ -| :averylongfunctionname [0] -| ($ -| ) [1] +|| :$ +|| :averylongfunctionname [0] +|| ($ +|| ) [1] +|unctionname [0] +|| ($ +|| ) [1] averylongfunctionname [0] | ($ | ) [1] +verylongfunctionname [0] +| ($ +| ) [1] +longfunctionname [0] +| ($ +| ) [1] +name [0] +| ($ +| ) [1] 0: ::aVeryLongFunctionName(const std::reference_wrapper #a1e9a11887275938ef5541070955c9d9c 1: [prefix=0[:46], suffix_length=51, type=FUNC] -> 2: File.h [type=FILE] -> File_8h.html