Skip to content

Commit

Permalink
Fix wrong |flexion removal
Browse files Browse the repository at this point in the history
  • Loading branch information
hubertbossot committed Sep 29, 2024
1 parent ab9abd0 commit 1b77866
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 17 deletions.
12 changes: 8 additions & 4 deletions src/wiktionary/fr_wiktionary_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,16 @@ def treat_page(page):
if debug_level > 1:
print(' Definition paragraph')
add_language_code = True # Paragraphe avec code langue dans les modèles lexicaux

if language_code is None:
if debug_level > 0:
pywikibot.output(" find language_code...")
# TODO: gérer les {{S|étymologie}} en milieu d'article
language_code = page_content[
end_position + 1 + len(section) + 1:page_content.find('}}')].replace('|flexion', '')
if debug_level > 0:
pywikibot.output(" language_code found: " + language_code)

# TODO: num=, genre=...
summary = summary + ' ajout du {{langue|' + language_code + '}} manquant'
page_content = '== {{langue|' + language_code + '}} ==\n' + final_page_content[
Expand Down Expand Up @@ -677,8 +683,7 @@ def treat_page(page):
final_page_content, page_content = next_template(final_page_content, page_content,
current_template, 'nocat=1')
else:
final_page_content, page_content = next_template(
final_page_content, page_content)
final_page_content, page_content = next_template(final_page_content, page_content)

elif p < limit8:
if debug_level > 0:
Expand Down Expand Up @@ -794,8 +799,7 @@ def treat_page(page):
page_content.find('}}') < page_content.find('{{') or page_content.find('{{') == -1):
if debug_level > 1:
print(' possible duplicated "lang=" in ' + current_template)
final_page_content, page_content = next_template(
final_page_content, page_content)
final_page_content, page_content = next_template(final_page_content, page_content)
# TODO bug with nested templates:
# https://fr.wiktionary.org/w/index.php?title=Utilisateur:JackBot/test_unitaire&diff=prev&oldid=25811164
# regex = r'({{' + re.escape(current_template) + r')\|lang=' + language_code + '(\|[^{}]*({{(.*?)}}|.)*[^{}]*\|lang=' + language_code + ')'
Expand Down
24 changes: 11 additions & 13 deletions src/wiktionary/fr_wiktionary_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,13 +1100,11 @@ def add_language_code_with_named_parameter_to_template(

def next_template(final_page_content, current_page_content, current_template=None, language_code=None):
if language_code is None:
final_page_content = final_page_content + \
current_page_content[:current_page_content.find('}}')+2]
final_page_content = final_page_content + current_page_content[:current_page_content.find('}}')+2]
else:
final_page_content = final_page_content + \
current_template + '|' + language_code + '}}'
current_page_content = current_page_content[current_page_content.find(
'}}')+2:]
final_page_content = final_page_content + current_template + '|' + language_code + '}}'

current_page_content = current_page_content[current_page_content.find('}}')+2:]
return final_page_content, current_page_content


Expand Down Expand Up @@ -1683,7 +1681,9 @@ def format_sections(page_content, summary):

# Normalize sections title casing
for f in re.findall(r'{{S\|([^}]+)}}', page_content):
page_content = page_content.replace(f, f.lower())
# Do not touch false ISO codes like "gallo-italique de Sicile"
if ' ' not in f:
page_content = page_content.replace(f, f.lower())

# Replace deprecated aliases with language parameter
page_content = page_content.replace('{{S|adj|', '{{S|adjectif|')
Expand Down Expand Up @@ -2938,15 +2938,13 @@ def treat_conjugation(page_content, final_page_content, summary, current_templat
else:
page_content = '|groupe=3' + page_content

if (page_content.find(language_code) != -1 and page_content.find(language_code) < page_content.find(
'}}')) or language_code == 'fr':
final_page_content, page_content = next_template(
final_page_content, page_content)
if (page_content.find(language_code) != -1 and page_content.find(language_code) < page_content.find('}}')) \
or language_code == 'fr':
final_page_content, page_content = next_template(final_page_content, page_content)
else:
if page_content.find('|nocat=1') != -1:
page_content = page_content[:page_content.find('|nocat=1')] + page_content[
page_content.find('|nocat=1') + len(
'|nocat=1'):]
page_content.find('|nocat=1') + len('|nocat=1'):]
final_page_content = final_page_content + '|' + language_code + '}}'
page_content = page_content[page_content.find('}}') + 2:]
return page_content, final_page_content, summary
Expand Down

0 comments on commit 1b77866

Please sign in to comment.