Skip to content

Commit

Permalink
Exclude cf from language guess
Browse files Browse the repository at this point in the history
  • Loading branch information
hubertbossot committed Apr 21, 2024
1 parent da975ac commit 9571aae
Showing 1 changed file with 26 additions and 23 deletions.
49 changes: 26 additions & 23 deletions src/wiktionary/fr_wiktionary_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,35 +1031,38 @@ def add_language_code_with_named_parameter_to_template(
page_content = page_content[page_content.find('}}')+2:]
return final_page_content, page_content

else:
if debug_level > 0:
print(' "lang=" already present')

regex_lang = r'^[^{}]+\| *lang(?:gue|1)? *= *([a-zA-Z\-]*)'
p = re.compile(regex_lang)
m = p.match(page_content)
if m is None:
if debug_level > 0:
print(' weird case')
return next_template(final_page_content, page_content)

start = end = 0
old_language_code = ''
if m.span(1) is not None:
[start, end] = m.span(1)
old_language_code = page_content[start:end]
if debug_level > 0:
print(' "lang=" ' + old_language_code)
if debug_level > 0:
print(' "lang=" already present')

if language_code == old_language_code:
return next_template(final_page_content, page_content)
if current_template == 'cf':
return next_template(final_page_content, page_content)

# Correct language code with the paragraph's one
regex_lang = r'^[^{}]+\| *lang(?:gue|1)? *= *([a-zA-Z\-]*)'
p = re.compile(regex_lang)
m = p.match(page_content)
if m is None:
if debug_level > 0:
print(' "lang=" correction to ' + language_code)
page_content = page_content[:start] + language_code + page_content[end:]
print(' weird case')
return next_template(final_page_content, page_content)

start = end = 0
old_language_code = ''
if m.span(1) is not None:
[start, end] = m.span(1)
old_language_code = page_content[start:end]
if debug_level > 0:
print(' "lang=" ' + old_language_code)

if language_code == old_language_code:
return next_template(final_page_content, page_content)

if debug_level > 0:
print(' "lang=" correction to ' + language_code)
page_content = page_content[:start] + language_code + page_content[end:]

return next_template(final_page_content, page_content)


def next_template(final_page_content, current_page_content, current_template=None, language_code=None):
if language_code is None:
Expand Down

0 comments on commit 9571aae

Please sign in to comment.