From a3e3c7c5bd92cb83f527a073b509e75c8cf1dd66 Mon Sep 17 00:00:00 2001 From: anezih <90565940+anezih@users.noreply.github.com> Date: Sat, 21 Oct 2023 20:45:00 +0300 Subject: [PATCH] 2.4.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://sozluk.gov.tr/dosyalar/SozlugunKullanimiIleIlgiliAciklamalar.pdf madde düzeni ilke 9'a göre: - noktalı virgülden sonra gelen - ► iminden sonra gelen - bk.'dan sonra gelen eş ve yakın anlamlı sözcüklere, Türkçe karşılıklara ve yaygın kullanımlara bağlantı eklendi. --- src/gts_convert.py | 6 +++--- src/kindle.py | 4 ++-- src/util.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/gts_convert.py b/src/gts_convert.py index b1f6a6e..f0d4c18 100644 --- a/src/gts_convert.py +++ b/src/gts_convert.py @@ -9,9 +9,9 @@ from infl import INFL, GlosSource, Unmunched from kindle import kindle_glos -from util import fix_df_hws, fix_quotes, local_json, out_dir +from util import fix_df_hws, fix_quotes, gondermeler, local_json, out_dir -VERSION = (2, 4, 1) +VERSION = (2, 4, 2) LAST_ID = 99501 DUZELTME_IMLERI_DICT = { "Â" : "A", "â" : "a", @@ -71,7 +71,7 @@ def create_dictionaries(dictionary: list[dict[str,str]], infl_dicts: list[INFL], ozellikler += f'{i["tam_adi"]}{", " if i != a["ozelliklerListe"][-1] else ""}' if len(ozellikler) > 1: anlam += f'[{ozellikler}] ' - anlam += f'{fix_quotes(a["anlam"])}
' + anlam += f'{gondermeler(fix_quotes(a["anlam"]))}
' if a.get("orneklerListe"): for i in a.get("orneklerListe"): anlam += f'
{i["ornek"]}
' diff --git a/src/kindle.py b/src/kindle.py index 17cb32c..0585eb2 100644 --- a/src/kindle.py +++ b/src/kindle.py @@ -1,7 +1,7 @@ from pyglossary.glossary_v2 import Glossary from infl import INFL -from util import fix_quotes +from util import fix_quotes, gondermeler def kindle_glos(glos: Glossary, dictionary: list[dict[str,str]], infl_dicts: list[INFL], normalize) -> Glossary: entry_id_pairs: dict[str,str] = {it["madde"].strip() : f"{idx}".zfill(8) for idx, it in enumerate(dictionary, start=1)} @@ -49,7 +49,7 @@ def get_id(madde: str) -> str: ozellikler += f'{i["tam_adi"]}{", " if i != a["ozelliklerListe"][-1] else ""}' if len(ozellikler) > 1: anlam += f'[{ozellikler}] ' - anlam += f'{fix_quotes(a["anlam"])}
' + anlam += f'{gondermeler(fix_quotes(a["anlam"]), entry_id_pairs)}
' if a.get("orneklerListe"): for i in a.get("orneklerListe"): anlam += f'
{i["ornek"]}
' diff --git a/src/util.py b/src/util.py index fafcbaf..393def7 100644 --- a/src/util.py +++ b/src/util.py @@ -1,5 +1,7 @@ +import html import json import os +import re import tarfile import requests @@ -87,4 +89,43 @@ def local_json(fname: str, LAST_ID: int) -> list[dict]: if not e['madde']: arr.remove(e) arr.sort(key=lambda x: (x["madde"].strip().encode("utf-8").lower(), x["madde"].strip())) - return arr \ No newline at end of file + return arr + +def gondermeler(defi: str, entry_id_pairs: dict[str, str] = None) -> str: + res = re.search(r"(?:;|►|bk\.)\s?([^.:]+)", defi) + if not res: + return defi + else: + new_defi = defi + pointed_words_temp = [word.strip() for word in res.group(1).split(",")] + pointed_words = [] + for pwt in pointed_words_temp: + if " (" in pwt: + pointed_words.append(pwt[:pwt.index(" (")]) + else: + pointed_words.append(pwt) + pointed_words.sort( + key = lambda x: len(x), + reverse = True + ) + # kindle + if entry_id_pairs: + for pw in pointed_words: + kindle_pointed_word_id = entry_id_pairs.get(pw) + if not kindle_pointed_word_id: + return defi + else: + new_defi = new_defi.replace( + pw, + f'{pw}', + 1 + ) + return new_defi + # stardict + for pw in pointed_words: + new_defi = new_defi.replace( + pw, + f'{pw}', + 1 + ) + return new_defi \ No newline at end of file