diff --git a/generate_character_list.py b/generate_character_list.py index efbe13f..c1d0647 100644 --- a/generate_character_list.py +++ b/generate_character_list.py @@ -2,6 +2,7 @@ Download the latest unicode tables from https://www.unicode.org and create a .txt file containing all the names, blocks and character codes """ +import sys import os import logging from urllib import request @@ -9,13 +10,18 @@ curr_path = os.path.dirname(__file__) logging.basicConfig(level=logging.DEBUG) +# Be compatible with both python 2 and 3 +if sys.version_info[0] >= 3: + unichr = chr + +BASE_URL = "https://www.unicode.org/Public/UCD/latest/ucd" def get_blocks(): """ Download the info file for Unicode blocks. """ logging.info("Downloading block data...") - req = request.urlopen("https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt") - content = req.read().decode() + with request.urlopen(f"{BASE_URL}/Blocks.txt") as req: + content = req.read().decode() logging.info("Done") return content @@ -24,10 +30,8 @@ def get_data(): """ Download the info file for Unicode blocks. """ logging.info("Downloading character data...") - req = request.urlopen( - "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt" - ) - content = req.read().decode() + with request.urlopen(f"{BASE_URL}/UnicodeData.txt") as req: + content = req.read().decode() logging.info("Done") return content @@ -61,18 +65,19 @@ def locate_block(code, left=0, right=len(indices)): [start, end] = indices[half] if start > code: return locate_block(code, left, right=half) - elif end < code: + if end < code: return locate_block(code, half, right=right) - else: - return blocks[half] + return blocks[half] return locate_block -def main(): - """ Read the character and block data and unite them to a text file containing the following fields: - ` ` - seperated by tab characters. +def main(out: str = "unicode_list.txt"): + """Create the file with Unicode characters. + + Read the character and block data and unite them to a text file + containing the following fields, separated by tab characters: + ` ` """ get_block = load_blocks() characters = clean(get_data()) @@ -90,7 +95,7 @@ def main(): try: num = int(code, 16) except ValueError: - logging.warn("Could not convert " + code) + logging.warning("Could not convert %s", code) continue # Find the character's block @@ -98,12 +103,23 @@ def main(): if blk is not None: output.append("\t".join((name, comment, code, blk))) else: - logging.warn("Code %s not found in any block, char: %s", num, unichr(num)) + logging.warning("Code %s not found in any block, char: %s", num, unichr(num)) output.append(name + "\t" + comment + "\t" + code + "\t") - with open("unicode_list.txt", "w") as target: + with open(out, "w", encoding="utf-8") as target: target.write("\n".join(output)) if __name__ == "__main__": - main() + import argparse + parser = argparse.ArgumentParser() + parser.add_argument( + "path", + type=str, + help="the output path where to save the Unicode list.", + default="unicode_list.txt", + ) + + args = parser.parse_args() + + main(args.path) diff --git a/main.py b/main.py index 2d43c69..2564f20 100644 --- a/main.py +++ b/main.py @@ -1,18 +1,24 @@ import os import sys import codecs -from os.path import join - -import subprocess # for pip autoinstallation - -from ulauncher.search.SortedList import SortedList +import time +import math +import shutil +import html.entities +import asyncio +import logging +from typing import Dict, Optional +from operator import itemgetter + +from ulauncher.utils.fuzzy_search import get_score from ulauncher.api.client.Extension import Extension from ulauncher.api.client.EventListener import EventListener -from ulauncher.api.shared.event import KeywordQueryEvent, ItemEnterEvent +from ulauncher.api.shared.event import KeywordQueryEvent from ulauncher.api.shared.item.ExtensionResultItem import ExtensionResultItem from ulauncher.api.shared.action.RenderResultListAction import RenderResultListAction from ulauncher.api.shared.action.CopyToClipboardAction import CopyToClipboardAction -from ulauncher.api.shared.action.HideWindowAction import HideWindowAction + +logger = logging.getLogger(__name__) # Be compatible with both python 2 and 3 if sys.version_info[0] >= 3: @@ -27,108 +33,173 @@ """ -# For pip autoinstallation -def ensure_import(package): - try: - return __import__(package) - except ImportError: - subprocess.call([sys.executable, "-m", "pip", "install", "--user", package]) - return __import__(package) +ExtensionPreferences = Dict[str, str] +UnicodeCharPreferences = Dict[str, int] -# For HTML entity conversion -htmlentities = ensure_import("htmlentities") class UnicodeChar: - """ Container class for unicode characters - """ + """Container class for unicode characters.""" def __init__(self, name, comment, block, code): - self.name = name if name != '' else comment + self.name = name if name != "" else comment self.comment = comment self.block = block self.code = code self.character = unichr(int(code, 16)) def get_search_name(self): - """ Called by `ulauncher.search.SortedList` to get the string - that should be used in searches - """ - return ' '.join([self.character, self.code, self.name, self.comment]) + """Called to get the string that should be used in searches.""" + return " ".join([self.character, self.code, self.name, self.comment]) class UnicodeCharExtension(Extension): + unicode_path: str = "unicode_list.txt" + def __init__(self): - super(UnicodeCharExtension, self).__init__() + super().__init__() check_cache_dir() self._load_character_table() self.subscribe(KeywordQueryEvent, KeywordQueryEventListener()) + def get_filename(self) -> str: + """Default filename of the Unicode list.""" + return os.path.join(FILE_PATH, self.unicode_path) + def _load_character_table(self): - """ Read the data file and load to memory - """ + """Read the data file and load to memory.""" + filename = self.get_filename() + self.character_list = [] - with open(join(FILE_PATH, "unicode_list.txt"), "r") as f: + with open(filename, "r", encoding="utf-8") as f: for line in f.readlines(): name, comment, code, block = line.strip().split("\t") character = UnicodeChar(name, comment, block, code) self.character_list.append(character) + @staticmethod + async def refresh_unicode_list(path: str, preferences: UnicodeCharPreferences): + """Check if the Unicode list file needs refresh.""" + # Get timestamp of the last time the file was modified + timestamp = os.path.getmtime(path) + # Number of days since the file was modified + age = math.floor((time.time() - timestamp) / 3600) + + update_interval = preferences["update_interval"] + + if 0 < update_interval < age: + await UnicodeCharExtension.update_unicode_list(path) + + @staticmethod + async def update_unicode_list(path: str): + """Re-generate an old Unicode list file.""" + # Save the file to a backup file if there is no backup. + backup = path + ".bkp" + if not os.path.isfile(backup): + logger.info("backup the file with Unicode list to: %s", backup) + shutil.copyfile(path, backup) + + import generate_character_list + + # Regenerate file with unicode list + logger.info("regenerate the file with Unicode list: %s", path) + generate_character_list.main(path) + + @staticmethod + def get_preferences( + input_preferences: ExtensionPreferences, + ) -> UnicodeCharPreferences: + """Parse preferences to the correct types.""" + preferences: UnicodeCharPreferences = { + "result_limit": int(input_preferences["result_limit"]), + "min_score": int(input_preferences["min_score"]), + "update_interval": int(input_preferences["update_interval"]), + } + + return preferences + + def search(self, query: str, preferences: UnicodeCharPreferences): + """Return a list of result sorted by relevance to the query.""" + limit = preferences["result_limit"] + min_score = preferences["min_score"] + + results = [] + for c in self.character_list: + score = get_score(query, c.get_search_name()) + if score >= min_score: + results.append((score, c)) + results = sorted(results, reverse=True, key=itemgetter(0)) + if len(results) > limit: + results = results[:limit] + + return [c for (s, c) in results] + class KeywordQueryEventListener(EventListener): def on_event(self, event, extension): + preferences = extension.get_preferences(extension.preferences) + # Re-generate unicode list if it is too old. + coro = extension.refresh_unicode_list(extension.get_filename(), preferences) + # start the event loop and execute the coroutine + asyncio.run(coro) + items = [] - arg = event.get_argument() - if arg: - result_list = SortedList(arg, min_score=99, limit=10) - result_list.extend(extension.character_list) - for char in result_list: + query = event.get_argument().strip() + if query: + # Return best characters matching the query, ordered by score. + results = extension.search(query, preferences) + for char in results: image_path = get_character_icon(char) - encoded = htmlentities.encode(char.character) - if "&" in encoded: - sep = " - " - html = encoded - else: - sep = "" - html = "" + html_val = html_encode(char.character) + html_str = "" + if html_val: + html_str = f" - HTML: {html_val}" + items.append( ExtensionResultItem( icon=image_path, - name=char.name.capitalize() + " - " + char.character, - description=char.block + " - Alt+Enter: " + html + sep + "Code: U+" + char.code, + name=f"{char.name.capitalize()} - {char.character}", + description=f"{char.block}{html_str} - Alt+Enter: U+{char.code}", on_enter=CopyToClipboardAction(char.character), - on_alt_enter=CopyToClipboardAction(html), + on_alt_enter=CopyToClipboardAction(char.code), ) ) return RenderResultListAction(items) + +def html_encode(char: str) -> Optional[str]: + """Get the html encoded str corresponding to the unicode char, if it exist.""" + if ord(char) in html.entities.codepoint2name: + html_var = html.entities.codepoint2name[ord(char)] + return f"&{html_var};" + return None + + def get_character_icon(char): - """ Check if there is an existing icon for this character and return its path + """Check if there is an existing icon for this character and return its path or create a new one and return its path. """ - path = FILE_PATH + "images/cache/icon_%s.svg" % char.code + path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg") if os.path.isfile(path): return path return create_character_icon(char) def create_character_icon(char, font="sans-serif"): - """ Create an SVG file containing the unicode glyph for char to be used + """Create an SVG file containing the unicode glyph for char to be used as a result icon. Note: this could be avoided by providing a gtk.PixBuf without creating a file, but ulauncher pickles the returned results, so it doesn't work currently. """ - icon = ICON_TEMPLATE.replace("{symbol}", char.character).replace("{font}", font) - with codecs.open( - os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code), "w", "utf-8" - ) as target: + icon = ICON_TEMPLATE.format(symbol=char.character, font=font) + path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg") + with codecs.open(path, "w", "utf-8") as target: target.write(icon) - return os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code) + return path def check_cache_dir(path="images/cache"): - """ Check if the cache directory exists and if not create it. - """ + """Check if the cache directory exists and if not create it.""" path = os.path.join(FILE_PATH, path) if not os.path.isdir(path): os.mkdir(path) diff --git a/manifest.json b/manifest.json index 8527ee9..c84fb79 100644 --- a/manifest.json +++ b/manifest.json @@ -8,12 +8,33 @@ "query_debounce": 0.5 }, "preferences": [ + { + "id": "result_limit", + "type": "input", + "name": "Result limit", + "description": "Number of results that should be returned.", + "default_value": "10" + }, + { + "id": "min_score", + "type": "input", + "name": "Minimum score", + "description": "Only display results with a matching score higher than this. If too high, e.g. higher than 200, no results will be shown.", + "default_value": "0" + }, { "id": "symbol", "type": "keyword", "name": "Symbol", "description": "Search symbols in ASCII and Unicode. Enter to copy the symbol, alt+enter to copy the HTML entity. Dark mode friendly.", "default_value": "sym" + }, + { + "id": "update_interval", + "type": "input", + "name": "Update interval", + "description": "Time interval (in days) after which the cached list of unicode symbols is updated by downloading the newest list. Set to -1 to never update the list.", + "default_value": "90" } ] } diff --git a/versions.json b/versions.json index b179cdb..eb0523c 100644 --- a/versions.json +++ b/versions.json @@ -1,4 +1,5 @@ [ { "required_api_version": "^1.0.0", "commit": "API_v1" }, { "required_api_version": "^2.0.0", "commit": "master" } + { "required_api_version": "^3.0.0", "commit": "api-v3" } ]