Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search name suffixes #149

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions documentation/doxygen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2318,7 +2318,14 @@ def extract_link(link):
if state.doxyfile['M_FAVICON']:
state.doxyfile['M_FAVICON'] = (state.doxyfile['M_FAVICON'], mimetypes.guess_type(state.doxyfile['M_FAVICON'])[0])

def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, merge_prefixes=True) -> bytearray:
_snake_case_point ='_[^_]'
_camel_case_point = '[^A-Z][A-Z].'
_snake_case_point_re = re.compile(_snake_case_point)
_camel_case_point_re = re.compile(_camel_case_point)
_camel_or_snake_case_point_re = re.compile('({})|({})'.format(_snake_case_point, _camel_case_point))

def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, add_snake_case_suffixes=True,
add_camel_case_suffixes=True, merge_prefixes=True) -> bytearray:
trie = Trie()
map = ResultMap()

Expand Down Expand Up @@ -2400,6 +2407,21 @@ def strip_tags(text):
keyword_index = map.add(title, '', alias=index, suffix_length=suffix_length)
trie.insert(search.lower(), keyword_index)

if add_camel_case_suffixes and add_snake_case_suffixes:
prefix_end_re = _camel_or_snake_case_point_re
elif add_camel_case_suffixes:
prefix_end_re = _camel_case_point_re
elif add_snake_case_suffixes:
prefix_end_re = _snake_case_point_re
else:
prefix_end_re = None
if prefix_end_re:
for m in prefix_end_re.finditer(result.name.lstrip('__')):
name = result.name[m.start(0)+1:]
trie.insert(name.lower(), index)
if hasattr(result, 'params') and result.params is not None:
trie.insert(name.lower() + '()', index_args, lookahead_barriers=[len(name)])

# Add this symbol and all its aliases to total symbol count
symbol_count += len(result.keywords) + 1

Expand Down Expand Up @@ -3495,7 +3517,9 @@ def parse_value(var):
default_wildcard = '*.xml'
default_templates = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates/doxygen/')

def run(state: State, *, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages, search_add_lookahead_barriers=True, search_merge_subtrees=True, search_merge_prefixes=True, sort_globbed_files=False):
def run(state: State, *, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages,
search_add_lookahead_barriers=True, add_snake_case_suffixes=True, add_camel_case_suffixes=True,
search_merge_subtrees=True, search_merge_prefixes=True, sort_globbed_files=False):
xml_input = os.path.join(state.basedir, state.doxyfile['OUTPUT_DIRECTORY'], state.doxyfile['XML_OUTPUT'])
xml_files_metadata = [os.path.join(xml_input, f) for f in glob.glob(os.path.join(xml_input, "*.xml"))]
xml_files = [os.path.join(xml_input, f) for f in glob.glob(os.path.join(xml_input, wildcard))]
Expand Down Expand Up @@ -3620,7 +3644,9 @@ def rtrim(value): return value.rstrip()
if not state.doxyfile['M_SEARCH_DISABLED']:
logging.debug("building search data for {} symbols".format(len(state.search)))

data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes)
data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers,
add_snake_case_suffixes=add_snake_case_suffixes, add_camel_case_suffixes=add_camel_case_suffixes,
merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes)

if state.doxyfile['M_SEARCH_DOWNLOAD_BINARY']:
with open(os.path.join(html_output, searchdata_filename), 'wb') as f:
Expand Down
29 changes: 25 additions & 4 deletions documentation/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ def default_id_formatter(type: EntryType, path: List[str]) -> str:
'INPUT_MODULES': [],
'INPUT_PAGES': [],
'INPUT_DOCS': [],
'OUTPUT': 'output',
'THEME_COLOR': '#22272e',
'FAVICON': 'favicon-dark.png',
'STYLESHEETS': [
Expand Down Expand Up @@ -2296,7 +2295,14 @@ def render_page(state: State, path, input_filename, env):
def is_html_safe(string):
return '<' not in string and '>' not in string and '&' not in string and '"' not in string and '\'' not in string

def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, merge_prefixes=True) -> bytearray:
_snake_case_point ='_[^_]'
_snake_case_point_re = re.compile(_snake_case_point)
_camel_case_point = '[^A-Z][A-Z].'
_camel_case_point_re = re.compile(_camel_case_point)
_camel_or_snake_case_point_re = re.compile('({})|({})'.format(_snake_case_point, _camel_case_point))

def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True,
add_snake_case_suffixes=True, add_camel_case_suffixes=True, merge_prefixes=True) -> bytearray:
trie = Trie()
map = ResultMap()

Expand Down Expand Up @@ -2360,6 +2366,21 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=
if hasattr(result, 'params') and result.params is not None:
trie.insert(name.lower() + '()', index_args, lookahead_barriers=lookahead_barriers + [len(name)] if add_lookahead_barriers else [])

if add_camel_case_suffixes and add_snake_case_suffixes:
prefix_end_re = _camel_or_snake_case_point_re
elif add_camel_case_suffixes:
prefix_end_re = _camel_case_point_re
elif add_snake_case_suffixes:
prefix_end_re = _snake_case_point_re
else:
prefix_end_re = None
if prefix_end_re:
for m in prefix_end_re.finditer(result.name.lstrip('__')):
name = result.name[m.start(0)+1:]
trie.insert(name.lower(), index)
if hasattr(result, 'params') and result.params is not None:
trie.insert(name.lower() + '()', index_args, lookahead_barriers=[len(name)])

# Add this symbol to total symbol count
symbol_count += 1

Expand All @@ -2369,7 +2390,7 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=

return serialize_search_data(trie, map, search_type_map, symbol_count, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes)

def run(basedir, config, *, templates=default_templates, search_add_lookahead_barriers=True, search_merge_subtrees=True, search_merge_prefixes=True):
def run(basedir, config, *, templates=default_templates, search_add_lookahead_barriers=True, search_add_snake_case_suffixes=True, search_add_camel_case_suffixes=True, search_merge_subtrees=True, search_merge_prefixes=True):
# Populate the INPUT, if not specified, make it absolute
if config['INPUT'] is None: config['INPUT'] = basedir
else: config['INPUT'] = os.path.join(basedir, config['INPUT'])
Expand Down Expand Up @@ -2595,7 +2616,7 @@ def fetch_class_index(entry):
if not state.config['SEARCH_DISABLED']:
logging.debug("building search data for {} symbols".format(len(state.search)))

data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes)
data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, add_snake_case_suffixes=search_add_snake_case_suffixes, add_camel_case_suffixes=search_add_camel_case_suffixes, merge_subtrees=search_merge_subtrees, merge_prefixes=search_merge_prefixes)

# Joining twice, first before passing those to the URL formatter and
# second after. If SEARCH_DOWNLOAD_BINARY is a string, use that as a
Expand Down
88 changes: 58 additions & 30 deletions documentation/test_doxygen/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test(self):
serialized = f.read()
search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
#print(search_data_pretty)
self.assertEqual(len(serialized), 4836)
self.assertEqual(len(serialized), 5112)
self.assertEqual(search_data_pretty, """
53 symbols
deprecated_macro [0]
Expand Down Expand Up @@ -83,17 +83,29 @@ def test(self):
|| class [53]
|| struct [54]
|| union [58]
|ir [24]
|ir [24, 23]
|| /$
|| file.h [9]
macro [3]
macro [3, 0]
|| ($
|| ) [1]
|| _function [5]
|| ($
|| ) [6]
|| _with_params [7]
|| | ($
|| | ) [8]
|| | ($
|| | ) [6]
|| | _with_params [7]
|| | | ($
|| | | ) [8]
|in() [49]
file.h [9, 2]
|unction [5]
|| ($
|| ) [6]
|| _with_params [7]
|| | ($
|| | ) [8]
|oo [11, 14, 18, 16, 37]
|| ($
|| ) [12, 15, 19, 17, 38]
glmacro() [4]
| file() [10]
| |oo() [13]
Expand All @@ -109,11 +121,13 @@ def test(self):
| namespace() [51]
| struct() [56]
| union() [60]
file.h [9]
|oo [11, 14, 18, 16]
|| ($
|| ) [12, 15, 19, 17]
namespace [50]
with_params [7]
| ($
| ) [8]
params [7]
| ($
| ) [8]
namespace [50, 39]
| :$
| :class [20]
| | :$
Expand All @@ -128,29 +142,31 @@ def test(self):
| variable [48]
| struct [55]
| union [59]
class [20]
class [20, 53]
| :$
| :foo [11, 14, 18, 16]
| ($
| ) [12, 15, 19, 17]
list [22]
a group [29, 28]
| page [52]
| | $
| | 0xc2
| | 0xbb
| | subpage [57]
value [41, 31]
| riable [48]
enum [44, 34]
||page [52]
||| $
||| 0xc2
||| 0xbb
||| subpage [57]
|brief [40]
value [41, 31, 33]
| riable [48, 36]
enum [44, 34, 32]
| :$
| :deprecatedvalue [33]
| onlyabrief [40]
| value [41]
typedef [46, 35]
onlyabrief [40]
typedef [46]
struct [55]
struct [55, 54]
|ubpage [57]
union [59]
union [59, 58]
0: DEPRECATED_MACRO(a, b, c) [suffix_length=9, deprecated, type=DEFINE] -> DeprecatedFile_8h.html#a7f8376730349fef9ff7d103b0245a13e
1: [prefix=0[:56], suffix_length=7, deprecated, type=DEFINE] ->
2: /DeprecatedFile.h [prefix=23[:0], deprecated, type=FILE] -> DeprecatedFile_8h.html
Expand Down Expand Up @@ -239,18 +255,30 @@ def test(self):
serialized = f.read()
search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
#print(search_data_pretty)
self.assertEqual(len(serialized), 473)
self.assertEqual(len(serialized), 489)
# The parameters get cut off with an ellipsis
self.assertEqual(search_data_pretty, """
2 symbols
file.h [2]
| :$
| :averylongfunctionname [0]
| ($
| ) [1]
|| :$
|| :averylongfunctionname [0]
|| ($
|| ) [1]
|unctionname [0]
|| ($
|| ) [1]
averylongfunctionname [0]
| ($
| ) [1]
verylongfunctionname [0]
| ($
| ) [1]
longfunctionname [0]
| ($
| ) [1]
name [0]
| ($
| ) [1]
0: ::aVeryLongFunctionName(const std::reference_wrapper<const std::vector<s…) [prefix=2[:12], suffix_length=53, type=FUNC] -> #a1e9a11887275938ef5541070955c9d9c
1: [prefix=0[:46], suffix_length=51, type=FUNC] ->
2: File.h [type=FILE] -> File_8h.html
Expand Down
75 changes: 50 additions & 25 deletions documentation/test_python/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test(self):
serialized = f.read()
search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
#print(search_data_pretty)
self.assertEqual(len(serialized), 2269)
self.assertEqual(len(serialized), 2585)
self.assertEqual(search_data_pretty, """
21 symbols
search [14]
Expand Down Expand Up @@ -84,9 +84,12 @@ def test(self):
|| sub [28]
|| | .$
|| | data_in_a_submodule [27]
|loth [8]
|| s [9]
|ub [28]
|| .$
|| data_in_a_submodule [27]
|| module [27]
foo [7, 21]
|| .$
|| enum [0]
Expand All @@ -104,15 +107,15 @@ def test(self):
|| withslots [9]
|| | .$
|| | im_a_sloth [8]
|unc_with_params [12]
|| | ($
|| | ) [13]
|| tion [22]
|unction [10, 22]
|| | ($
|| | ) [23]
|| | ) [11, 23]
|| | _with_params [24]
|| | | ($
|| | | ) [25]
|| _with_params [12]
|| | ($
|| | ) [13]
enum [0]
| .$
| a_value [1]
Expand All @@ -122,26 +125,42 @@ def test(self):
||| ($
||| ) [4]
||property [5]
||sloth [8]
|||ubmodule [27]
||function [10]
||| ($
||| ) [11]
|nother [2]
value [1]
method [3, 17, 19, 15]
| ($
| ) [4, 18, 20, 16]
property [5]
|arams [12, 24]
|| ($
|| ) [13, 25]
|ybind [26]
|| .$
|| foo [21]
|| | .$
|| | overloaded_method [17, 19, 15]
|| | ($
|| | ) [18, 20, 16]
|| unction [22]
|| | ($
|| | ) [23]
|| | _with_params [24]
|| | | ($
|| | | ) [25]
data_declaration [6]
| in_a_submodule [27]
|| in_a_submodule [27]
|eclaration [6]
im_a_sloth [8]
pybind [26]
| .$
| foo [21]
| | .$
| | overloaded_method [17, 19, 15]
| | ($
| | ) [18, 20, 16]
| unction [22]
| | ($
| | ) [23]
| | _with_params [24]
| | | ($
| | | ) [25]
|n_a_submodule [27]
withslots [9]
| _params [12, 24]
| | ($
| | ) [13, 25]
overloaded_method [17, 19, 15]
| ($
| ) [18, 20, 16]
Expand Down Expand Up @@ -196,18 +215,24 @@ def test(self):
serialized = f.read()
search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
#print(search_data_pretty)
self.assertEqual(len(serialized), 633)
self.assertEqual(len(serialized), 755)
# The parameters get cut off with an ellipsis
self.assertEqual(search_data_pretty, """
3 symbols
search_long_suffix_length [4]
| .$
| many_parameters [0, 2]
| ($
| ) [1, 3]
|| .$
|| many_parameters [0, 2]
|| ($
|| ) [1, 3]
|uffix_length [4]
many_parameters [0, 2]
| ($
| ) [1, 3]
parameters [0, 2]
| ($
| ) [1, 3]
long_suffix_length [4]
|ength [4]
0: .many_parameters(arg0: typing.Tuple[float, int, str, typing.List[…) [prefix=4[:30], suffix_length=53, type=FUNCTION] -> #many_parameters-06151
1: [prefix=0[:52], suffix_length=51, type=FUNCTION] ->
2: .many_parameters(arg0: typing.Tuple[int, float, str, typing.List[…) [prefix=4[:30], suffix_length=53, type=FUNCTION] -> #many_parameters-31300
Expand Down