rootwork · getzze · Sep 27, 2023
diff --git a/generate_character_list.py b/generate_character_list.py
@@ -2,20 +2,26 @@
 Download the latest unicode tables from  https://www.unicode.org and create a .txt file
 containing all the names, blocks and character codes
 """
+import sys
 import os
 import logging
 from urllib import request
 
 curr_path = os.path.dirname(__file__)
 logging.basicConfig(level=logging.DEBUG)
 
+# Be compatible with both python 2 and 3
+if sys.version_info[0] >= 3:
+    unichr = chr
+
+BASE_URL = "https://www.unicode.org/Public/UCD/latest/ucd"
 
 def get_blocks():
     """ Download the info file for Unicode blocks.
     """
     logging.info("Downloading block data...")
-    req = request.urlopen("https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt")
-    content = req.read().decode()
+    with request.urlopen(f"{BASE_URL}/Blocks.txt") as req:
+        content = req.read().decode()
     logging.info("Done")
     return content
 
@@ -24,10 +30,8 @@ def get_data():
     """ Download the info file for Unicode blocks.
     """
     logging.info("Downloading character data...")
-    req = request.urlopen(
-        "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
-    )
-    content = req.read().decode()
+    with request.urlopen(f"{BASE_URL}/UnicodeData.txt") as req:
+        content = req.read().decode()
     logging.info("Done")
     return content
 
@@ -61,18 +65,19 @@ def locate_block(code, left=0, right=len(indices)):
         [start, end] = indices[half]
         if start > code:
             return locate_block(code, left, right=half)
-        elif end < code:
+        if end < code:
             return locate_block(code, half, right=right)
-        else:
-            return blocks[half]
+        return blocks[half]
 
     return locate_block
 
 
-def main():
-    """ Read the character and block data and unite them to a text file containing the following fields:
-    `<character name>   <character comment> <code>  <block name>`
-    seperated by tab characters.
+def main(out: str = "unicode_list.txt"):
+    """Create the file with Unicode characters.
+
+    Read the character and block data and unite them to a text file
+    containing the following fields, separated by tab characters:
+    `<character name> <character comment> <code> <block name>`
     """
     get_block = load_blocks()
     characters = clean(get_data())
@@ -90,20 +95,31 @@ def main():
         try:
             num = int(code, 16)
         except ValueError:
-            logging.warn("Could not convert " + code)
+            logging.warning("Could not convert %s", code)
             continue
 
         # Find the character's block
         blk = get_block(num)
         if blk is not None:
             output.append("\t".join((name, comment, code, blk)))
         else:
-            logging.warn("Code %s not found in any block, char: %s", num, unichr(num))
+            logging.warning("Code %s not found in any block, char: %s", num, unichr(num))
             output.append(name + "\t" + comment + "\t" + code + "\t")
 
-    with open("unicode_list.txt", "w") as target:
+    with open(out, "w", encoding="utf-8") as target:
         target.write("\n".join(output))
 
 
 if __name__ == "__main__":
-    main()
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "path",
+        type=str,
+        help="the output path where to save the Unicode list.",
+        default="unicode_list.txt",
+    )
+
+    args = parser.parse_args()
+
+    main(args.path)
diff --git a/main.py b/main.py
@@ -1,18 +1,24 @@
 import os
 import sys
 import codecs
-from os.path import join
-
-import subprocess # for pip autoinstallation
-
-from ulauncher.search.SortedList import SortedList
+import time
+import math
+import shutil
+import html.entities
+import asyncio
+import logging
+from typing import Dict, Optional
+from operator import itemgetter
+
+from ulauncher.utils.fuzzy_search import get_score
 from ulauncher.api.client.Extension import Extension
 from ulauncher.api.client.EventListener import EventListener
-from ulauncher.api.shared.event import KeywordQueryEvent, ItemEnterEvent
+from ulauncher.api.shared.event import KeywordQueryEvent
 from ulauncher.api.shared.item.ExtensionResultItem import ExtensionResultItem
 from ulauncher.api.shared.action.RenderResultListAction import RenderResultListAction
 from ulauncher.api.shared.action.CopyToClipboardAction import CopyToClipboardAction
-from ulauncher.api.shared.action.HideWindowAction import HideWindowAction
+
+logger = logging.getLogger(__name__)
 
 # Be compatible with both python 2 and 3
 if sys.version_info[0] >= 3:
@@ -27,108 +33,173 @@
 </svg>
 """
 
-# For pip autoinstallation
-def ensure_import(package):
-    try:
-        return __import__(package)
-    except ImportError:
-        subprocess.call([sys.executable, "-m", "pip", "install", "--user", package])
-    return __import__(package)
+ExtensionPreferences = Dict[str, str]
+UnicodeCharPreferences = Dict[str, int]
 
-# For HTML entity conversion
-htmlentities = ensure_import("htmlentities")
 
 class UnicodeChar:
-    """ Container class for unicode characters
-    """
+    """Container class for unicode characters."""
 
     def __init__(self, name, comment, block, code):
-        self.name = name if name != '<control>' else comment
+        self.name = name if name != "<control>" else comment
         self.comment = comment
         self.block = block
         self.code = code
         self.character = unichr(int(code, 16))
 
     def get_search_name(self):
-        """ Called by `ulauncher.search.SortedList` to get the string
-        that should be used in searches
-        """
-        return ' '.join([self.character, self.code, self.name, self.comment])
+        """Called to get the string that should be used in searches."""
+        return " ".join([self.character, self.code, self.name, self.comment])
 
 
 class UnicodeCharExtension(Extension):
+    unicode_path: str = "unicode_list.txt"
+
     def __init__(self):
-        super(UnicodeCharExtension, self).__init__()
+        super().__init__()
         check_cache_dir()
         self._load_character_table()
         self.subscribe(KeywordQueryEvent, KeywordQueryEventListener())
 
+    def get_filename(self) -> str:
+        """Default filename of the Unicode list."""
+        return os.path.join(FILE_PATH, self.unicode_path)
+
     def _load_character_table(self):
-        """ Read the data file and load to memory
-        """
+        """Read the data file and load to memory."""
+        filename = self.get_filename()
+
         self.character_list = []
-        with open(join(FILE_PATH, "unicode_list.txt"), "r") as f:
+        with open(filename, "r", encoding="utf-8") as f:
             for line in f.readlines():
                 name, comment, code, block = line.strip().split("\t")
                 character = UnicodeChar(name, comment, block, code)
                 self.character_list.append(character)
 
+    @staticmethod
+    async def refresh_unicode_list(path: str, preferences: UnicodeCharPreferences):
+        """Check if the Unicode list file needs refresh."""
+        # Get timestamp of the last time the file was modified
+        timestamp = os.path.getmtime(path)
+        # Number of days since the file was modified
+        age = math.floor((time.time() - timestamp) / 3600)
+
+        update_interval = preferences["update_interval"]
+
+        if 0 < update_interval < age:
+            await UnicodeCharExtension.update_unicode_list(path)
+
+    @staticmethod
+    async def update_unicode_list(path: str):
+        """Re-generate an old Unicode list file."""
+        # Save the file to a backup file if there is no backup.
+        backup = path + ".bkp"
+        if not os.path.isfile(backup):
+            logger.info("backup the file with Unicode list to: %s", backup)
+            shutil.copyfile(path, backup)
+
+        import generate_character_list
+
+        # Regenerate file with unicode list
+        logger.info("regenerate the file with Unicode list: %s", path)
+        generate_character_list.main(path)
+
+    @staticmethod
+    def get_preferences(
+        input_preferences: ExtensionPreferences,
+    ) -> UnicodeCharPreferences:
+        """Parse preferences to the correct types."""
+        preferences: UnicodeCharPreferences = {
+            "result_limit": int(input_preferences["result_limit"]),
+            "min_score": int(input_preferences["min_score"]),
+            "update_interval": int(input_preferences["update_interval"]),
+        }
+
+        return preferences
+
+    def search(self, query: str, preferences: UnicodeCharPreferences):
+        """Return a list of result sorted by relevance to the query."""
+        limit = preferences["result_limit"]
+        min_score = preferences["min_score"]
+
+        results = []
+        for c in self.character_list:
+            score = get_score(query, c.get_search_name())
+            if score >= min_score:
+                results.append((score, c))
+                results = sorted(results, reverse=True, key=itemgetter(0))
+                if len(results) > limit:
+                    results = results[:limit]
+
+        return [c for (s, c) in results]
+
 
 class KeywordQueryEventListener(EventListener):
     def on_event(self, event, extension):
+        preferences = extension.get_preferences(extension.preferences)
+        # Re-generate unicode list if it is too old.
+        coro = extension.refresh_unicode_list(extension.get_filename(), preferences)
+        # start the event loop and execute the coroutine
+        asyncio.run(coro)
+
         items = []
-        arg = event.get_argument()
-        if arg:
-            result_list = SortedList(arg, min_score=99, limit=10)
-            result_list.extend(extension.character_list)
-            for char in result_list:
+        query = event.get_argument().strip()
+        if query:
+            # Return best characters matching the query, ordered by score.
+            results = extension.search(query, preferences)
+            for char in results:
                 image_path = get_character_icon(char)
-                encoded = htmlentities.encode(char.character)
-                if "&" in encoded:
-                    sep = " - "
-                    html = encoded
-                else:
-                    sep = ""
-                    html = ""
+                html_val = html_encode(char.character)
+                html_str = ""
+                if html_val:
+                    html_str = f" - HTML: {html_val}"
+
                 items.append(
                     ExtensionResultItem(
                         icon=image_path,
-                        name=char.name.capitalize() + " - " + char.character,
-                        description=char.block + " - Alt+Enter: " + html + sep + "Code: U+" + char.code,
+                        name=f"{char.name.capitalize()} - {char.character}",
+                        description=f"{char.block}{html_str} - Alt+Enter: U+{char.code}",
                         on_enter=CopyToClipboardAction(char.character),
-                        on_alt_enter=CopyToClipboardAction(html),
+                        on_alt_enter=CopyToClipboardAction(char.code),
                     )
                 )
         return RenderResultListAction(items)
 
+
+def html_encode(char: str) -> Optional[str]:
+    """Get the html encoded str corresponding to the unicode char, if it exist."""
+    if ord(char) in html.entities.codepoint2name:
+        html_var = html.entities.codepoint2name[ord(char)]
+        return f"&{html_var};"
+    return None
+
+
 def get_character_icon(char):
-    """ Check if there is an existing icon for this character and return its path
+    """Check if there is an existing icon for this character and return its path
     or create a new one and return its path.
     """
-    path = FILE_PATH + "images/cache/icon_%s.svg" % char.code
+    path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
     if os.path.isfile(path):
         return path
     return create_character_icon(char)
 
 
 def create_character_icon(char, font="sans-serif"):
-    """ Create an SVG file containing the unicode glyph for char to be used
+    """Create an SVG file containing the unicode glyph for char to be used
     as a result icon.
 
     Note: this could be avoided by providing a gtk.PixBuf without creating a file,
     but ulauncher pickles the returned results, so it doesn't work currently.
     """
-    icon = ICON_TEMPLATE.replace("{symbol}", char.character).replace("{font}", font)
-    with codecs.open(
-        os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code), "w", "utf-8"
-    ) as target:
+    icon = ICON_TEMPLATE.format(symbol=char.character, font=font)
+    path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
+    with codecs.open(path, "w", "utf-8") as target:
         target.write(icon)
-    return os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code)
+    return path
 
 
 def check_cache_dir(path="images/cache"):
-    """ Check if the cache directory exists and if not create it.
-    """
+    """Check if the cache directory exists and if not create it."""
     path = os.path.join(FILE_PATH, path)
     if not os.path.isdir(path):
         os.mkdir(path)

diff --git a/manifest.json b/manifest.json
@@ -8,12 +8,33 @@
     "query_debounce": 0.5
   },
   "preferences": [
+    {
+      "id": "result_limit",
+      "type": "input",
+      "name": "Result limit",
+      "description": "Number of results that should be returned.",
+      "default_value": "10"
+    },
+    {
+      "id": "min_score",
+      "type": "input",
+      "name": "Minimum score",
+      "description": "Only display results with a matching score higher than this. If too high, e.g. higher than 200, no results will be shown.",
+      "default_value": "0"
+    },
     {
       "id": "symbol",
       "type": "keyword",
       "name": "Symbol",
       "description": "Search symbols in ASCII and Unicode. Enter to copy the symbol, alt+enter to copy the HTML entity. Dark mode friendly.",
       "default_value": "sym"
+    },
+    {
+      "id": "update_interval",
+      "type": "input",
+      "name": "Update interval",
+      "description": "Time interval (in days) after which the cached list of unicode symbols is updated by downloading the newest list. Set to -1 to never update the list.",
+      "default_value": "90"
     }
   ]
 }
diff --git a/versions.json b/versions.json
@@ -1,4 +1,5 @@
 [
   { "required_api_version": "^1.0.0", "commit": "API_v1" },
   { "required_api_version": "^2.0.0", "commit": "master" }
+  { "required_api_version": "^3.0.0", "commit": "api-v3" }
 ]