From a3e3c7c5bd92cb83f527a073b509e75c8cf1dd66 Mon Sep 17 00:00:00 2001
From: anezih <90565940+anezih@users.noreply.github.com>
Date: Sat, 21 Oct 2023 20:45:00 +0300
Subject: [PATCH] 2.4.2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
https://sozluk.gov.tr/dosyalar/SozlugunKullanimiIleIlgiliAciklamalar.pdf madde düzeni ilke 9'a göre:
- noktalı virgülden sonra gelen
- ► iminden sonra gelen
- bk.'dan sonra gelen
eş ve yakın anlamlı sözcüklere, Türkçe karşılıklara ve yaygın kullanımlara bağlantı eklendi.
---
src/gts_convert.py | 6 +++---
src/kindle.py | 4 ++--
src/util.py | 43 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 47 insertions(+), 6 deletions(-)
diff --git a/src/gts_convert.py b/src/gts_convert.py
index b1f6a6e..f0d4c18 100644
--- a/src/gts_convert.py
+++ b/src/gts_convert.py
@@ -9,9 +9,9 @@
from infl import INFL, GlosSource, Unmunched
from kindle import kindle_glos
-from util import fix_df_hws, fix_quotes, local_json, out_dir
+from util import fix_df_hws, fix_quotes, gondermeler, local_json, out_dir
-VERSION = (2, 4, 1)
+VERSION = (2, 4, 2)
LAST_ID = 99501
DUZELTME_IMLERI_DICT = {
"Â" : "A", "â" : "a",
@@ -71,7 +71,7 @@ def create_dictionaries(dictionary: list[dict[str,str]], infl_dicts: list[INFL],
ozellikler += f'{i["tam_adi"]}{", " if i != a["ozelliklerListe"][-1] else ""}'
if len(ozellikler) > 1:
anlam += f'[{ozellikler}] '
- anlam += f'{fix_quotes(a["anlam"])}
'
+ anlam += f'{gondermeler(fix_quotes(a["anlam"]))}
'
if a.get("orneklerListe"):
for i in a.get("orneklerListe"):
anlam += f'
▪ {i["ornek"]}
'
diff --git a/src/kindle.py b/src/kindle.py
index 17cb32c..0585eb2 100644
--- a/src/kindle.py
+++ b/src/kindle.py
@@ -1,7 +1,7 @@
from pyglossary.glossary_v2 import Glossary
from infl import INFL
-from util import fix_quotes
+from util import fix_quotes, gondermeler
def kindle_glos(glos: Glossary, dictionary: list[dict[str,str]], infl_dicts: list[INFL], normalize) -> Glossary:
entry_id_pairs: dict[str,str] = {it["madde"].strip() : f"{idx}".zfill(8) for idx, it in enumerate(dictionary, start=1)}
@@ -49,7 +49,7 @@ def get_id(madde: str) -> str:
ozellikler += f'{i["tam_adi"]}{", " if i != a["ozelliklerListe"][-1] else ""}'
if len(ozellikler) > 1:
anlam += f'[{ozellikler}] '
- anlam += f'{fix_quotes(a["anlam"])}
'
+ anlam += f'{gondermeler(fix_quotes(a["anlam"]), entry_id_pairs)}
'
if a.get("orneklerListe"):
for i in a.get("orneklerListe"):
anlam += f'
▪ {i["ornek"]}
'
diff --git a/src/util.py b/src/util.py
index fafcbaf..393def7 100644
--- a/src/util.py
+++ b/src/util.py
@@ -1,5 +1,7 @@
+import html
import json
import os
+import re
import tarfile
import requests
@@ -87,4 +89,43 @@ def local_json(fname: str, LAST_ID: int) -> list[dict]:
if not e['madde']:
arr.remove(e)
arr.sort(key=lambda x: (x["madde"].strip().encode("utf-8").lower(), x["madde"].strip()))
- return arr
\ No newline at end of file
+ return arr
+
+def gondermeler(defi: str, entry_id_pairs: dict[str, str] = None) -> str:
+ res = re.search(r"(?:;|►|bk\.)\s?([^.:]+)", defi)
+ if not res:
+ return defi
+ else:
+ new_defi = defi
+ pointed_words_temp = [word.strip() for word in res.group(1).split(",")]
+ pointed_words = []
+ for pwt in pointed_words_temp:
+ if " (" in pwt:
+ pointed_words.append(pwt[:pwt.index(" (")])
+ else:
+ pointed_words.append(pwt)
+ pointed_words.sort(
+ key = lambda x: len(x),
+ reverse = True
+ )
+ # kindle
+ if entry_id_pairs:
+ for pw in pointed_words:
+ kindle_pointed_word_id = entry_id_pairs.get(pw)
+ if not kindle_pointed_word_id:
+ return defi
+ else:
+ new_defi = new_defi.replace(
+ pw,
+ f'{pw}',
+ 1
+ )
+ return new_defi
+ # stardict
+ for pw in pointed_words:
+ new_defi = new_defi.replace(
+ pw,
+ f'{pw}',
+ 1
+ )
+ return new_defi
\ No newline at end of file