2.4.2

https://sozluk.gov.tr/dosyalar/SozlugunKullanimiIleIlgiliAciklamalar.pdf madde düzeni ilke 9'a göre: - noktalı virgülden sonra gelen - ► iminden sonra gelen - bk.'dan sonra gelen eş ve yakın anlamlı sözcüklere, Türkçe karşılıklara ve yaygın kullanımlara bağlantı eklendi.
anezih · Oct 21, 2023 · a3e3c7c · a3e3c7c
1 parent 1840e47
commit a3e3c7c
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 6 deletions.
diff --git a/src/gts_convert.py b/src/gts_convert.py
@@ -9,9 +9,9 @@
 
 from infl import INFL, GlosSource, Unmunched
 from kindle import kindle_glos
-from util import fix_df_hws, fix_quotes, local_json, out_dir
+from util import fix_df_hws, fix_quotes, gondermeler, local_json, out_dir
 
-VERSION = (2, 4, 1)
+VERSION = (2, 4, 2)
 LAST_ID = 99501
 DUZELTME_IMLERI_DICT = {
     "Â" : "A", "â" : "a",
@@ -71,7 +71,7 @@ def create_dictionaries(dictionary: list[dict[str,str]], infl_dicts: list[INFL],
                     ozellikler += f'<i>{i["tam_adi"]}{", " if i != a["ozelliklerListe"][-1] else ""}</i>'
                 if len(ozellikler) > 1:
                     anlam += f'<span style="color:#696969">[{ozellikler}]</span> '
-            anlam += f'{fix_quotes(a["anlam"])}<br/>'
+            anlam += f'{gondermeler(fix_quotes(a["anlam"]))}<br/>'
             if a.get("orneklerListe"):
                 for i in a.get("orneklerListe"):
                     anlam += f'<br/><span style="margin-left:1.3em;margin-right:1.3em">▪ <i>{i["ornek"]}</i></span><br/>'

diff --git a/src/kindle.py b/src/kindle.py
@@ -1,7 +1,7 @@
 from pyglossary.glossary_v2 import Glossary
 
 from infl import INFL
-from util import fix_quotes
+from util import fix_quotes, gondermeler
 
 def kindle_glos(glos: Glossary, dictionary: list[dict[str,str]], infl_dicts: list[INFL], normalize) -> Glossary:
     entry_id_pairs: dict[str,str] = {it["madde"].strip() : f"{idx}".zfill(8) for idx, it in enumerate(dictionary, start=1)}
@@ -49,7 +49,7 @@ def get_id(madde: str) -> str:
                     ozellikler += f'<i>{i["tam_adi"]}{", " if i != a["ozelliklerListe"][-1] else ""}</i>'
                 if len(ozellikler) > 1:
                     anlam += f'<span style="color:#696969">[{ozellikler}]</span> '
-            anlam += f'{fix_quotes(a["anlam"])}<br/>'
+            anlam += f'{gondermeler(fix_quotes(a["anlam"]), entry_id_pairs)}<br/>'
             if a.get("orneklerListe"):
                 for i in a.get("orneklerListe"):
                     anlam += f'<br/><span style="margin-left:1.3em;margin-right:1.3em">▪ <i>{i["ornek"]}</i></span><br/>'

diff --git a/src/util.py b/src/util.py
@@ -1,5 +1,7 @@
+import html
 import json
 import os
+import re
 import tarfile
 
 import requests
@@ -87,4 +89,43 @@ def local_json(fname: str, LAST_ID: int) -> list[dict]:
         if not e['madde']:
             arr.remove(e)
     arr.sort(key=lambda x: (x["madde"].strip().encode("utf-8").lower(), x["madde"].strip()))
-    return arr
+    return arr
+
+def gondermeler(defi: str, entry_id_pairs: dict[str, str] = None) -> str:
+    res = re.search(r"(?:;|►|bk\.)\s?([^.:]+)", defi)
+    if not res:
+        return defi
+    else:
+        new_defi = defi
+        pointed_words_temp = [word.strip() for word in res.group(1).split(",")]
+        pointed_words = []
+        for pwt in pointed_words_temp:
+            if " (" in pwt:
+                pointed_words.append(pwt[:pwt.index(" (")])
+            else:
+                pointed_words.append(pwt)
+        pointed_words.sort(
+            key = lambda x: len(x),
+            reverse = True
+        )
+        # kindle
+        if entry_id_pairs:
+            for pw in pointed_words:
+                kindle_pointed_word_id = entry_id_pairs.get(pw)
+                if not kindle_pointed_word_id:
+                    return defi
+                else:
+                    new_defi = new_defi.replace(
+                        pw,
+                        f'<a href="#{kindle_pointed_word_id}">{pw}</a>',
+                        1
+                    )
+            return new_defi
+        # stardict
+        for pw in pointed_words:
+            new_defi = new_defi.replace(
+                pw,
+                f'<a href="bword://{html.escape(pw)}">{pw}</a>',
+                1
+            )
+        return new_defi